2023-04-23 21:23:45 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
require 'nokogiri'
|
|
|
|
|
|
|
|
module Gitlab
|
|
|
|
module Email
|
|
|
|
class HtmlToMarkdownParser < Html2Text
|
2023-05-27 22:25:52 +05:30
|
|
|
extend Gitlab::Utils::Override
|
2023-04-23 21:23:45 +05:30
|
|
|
|
2023-05-27 22:25:52 +05:30
|
|
|
# List of tags to be converted by Markdown.
|
|
|
|
#
|
|
|
|
# All attributes are removed except for the defined ones.
|
|
|
|
#
|
|
|
|
# <tag> => [<attribute to keep>, ...]
|
|
|
|
ALLOWED_TAG_ATTRIBUTES = {
|
|
|
|
'em' => [],
|
|
|
|
'strong' => [],
|
|
|
|
'details' => [],
|
|
|
|
'img' => %w[alt src]
|
|
|
|
}.freeze
|
|
|
|
private_constant :ALLOWED_TAG_ATTRIBUTES
|
|
|
|
|
|
|
|
# This redefinition can be removed once https://github.com/soundasleep/html2text_ruby/pull/30
|
|
|
|
# is merged and released.
|
2023-04-23 21:23:45 +05:30
|
|
|
def self.convert(html)
|
|
|
|
html = fix_newlines(replace_entities(html))
|
|
|
|
doc = Nokogiri::HTML(html)
|
|
|
|
|
2023-05-27 22:25:52 +05:30
|
|
|
new(doc).convert
|
2023-04-23 21:23:45 +05:30
|
|
|
end
|
|
|
|
|
2023-05-27 22:25:52 +05:30
|
|
|
private
|
|
|
|
|
|
|
|
override :iterate_over
|
2023-04-23 21:23:45 +05:30
|
|
|
def iterate_over(node)
|
2023-05-27 22:25:52 +05:30
|
|
|
allowed_attributes = ALLOWED_TAG_ATTRIBUTES[node.name]
|
|
|
|
return super unless allowed_attributes
|
2023-04-23 21:23:45 +05:30
|
|
|
|
2023-05-27 22:25:52 +05:30
|
|
|
remove_attributes(node, allowed_attributes)
|
2023-04-23 21:23:45 +05:30
|
|
|
|
|
|
|
Kramdown::Document.new(node.to_html, input: 'html').to_commonmark
|
|
|
|
end
|
2023-05-27 22:25:52 +05:30
|
|
|
|
|
|
|
def remove_attributes(node, allowed_attributes)
|
|
|
|
to_remove = (node.keys - allowed_attributes)
|
|
|
|
to_remove.each { |key| node.remove_attribute(key) }
|
|
|
|
end
|
2023-04-23 21:23:45 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|