debian-mirror-gitlab/lib/gitlab/email/html_to_markdown_parser.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

51 lines
1.3 KiB
Ruby
Raw Normal View History

2023-04-23 21:23:45 +05:30
# frozen_string_literal: true
require 'nokogiri'
module Gitlab
module Email
class HtmlToMarkdownParser < Html2Text
2023-05-27 22:25:52 +05:30
extend Gitlab::Utils::Override
2023-04-23 21:23:45 +05:30
2023-05-27 22:25:52 +05:30
# List of tags to be converted by Markdown.
#
# All attributes are removed except for the defined ones.
#
# <tag> => [<attribute to keep>, ...]
ALLOWED_TAG_ATTRIBUTES = {
'em' => [],
'strong' => [],
'details' => [],
'img' => %w[alt src]
}.freeze
private_constant :ALLOWED_TAG_ATTRIBUTES
# This redefinition can be removed once https://github.com/soundasleep/html2text_ruby/pull/30
# is merged and released.
2023-04-23 21:23:45 +05:30
def self.convert(html)
html = fix_newlines(replace_entities(html))
doc = Nokogiri::HTML(html)
2023-05-27 22:25:52 +05:30
new(doc).convert
2023-04-23 21:23:45 +05:30
end
2023-05-27 22:25:52 +05:30
private
override :iterate_over
2023-04-23 21:23:45 +05:30
def iterate_over(node)
2023-05-27 22:25:52 +05:30
allowed_attributes = ALLOWED_TAG_ATTRIBUTES[node.name]
return super unless allowed_attributes
2023-04-23 21:23:45 +05:30
2023-05-27 22:25:52 +05:30
remove_attributes(node, allowed_attributes)
2023-04-23 21:23:45 +05:30
Kramdown::Document.new(node.to_html, input: 'html').to_commonmark
end
2023-05-27 22:25:52 +05:30
def remove_attributes(node, allowed_attributes)
to_remove = (node.keys - allowed_attributes)
to_remove.each { |key| node.remove_attribute(key) }
end
2023-04-23 21:23:45 +05:30
end
end
end