debian-mirror-gitlab/lib/gitlab/email/reply_parser.rb

99 lines
2.9 KiB
Ruby
Raw Normal View History

2019-02-15 15:39:39 +05:30
# frozen_string_literal: true
2015-09-25 12:07:36 +05:30
# Inspired in great part by Discourse's Email::Receiver
module Gitlab
module Email
class ReplyParser
2021-12-11 22:18:48 +05:30
attr_accessor :message, :allow_only_quotes
2015-09-25 12:07:36 +05:30
2021-12-11 22:18:48 +05:30
def initialize(message, trim_reply: true, append_reply: false, allow_only_quotes: false)
2015-09-25 12:07:36 +05:30
@message = message
2018-11-08 19:23:39 +05:30
@trim_reply = trim_reply
2021-10-27 15:23:28 +05:30
@append_reply = append_reply
2021-12-11 22:18:48 +05:30
@allow_only_quotes = allow_only_quotes
2015-09-25 12:07:36 +05:30
end
def execute
body = select_body(message)
encoding = body.encoding
2021-10-27 15:23:28 +05:30
body, stripped_text = EmailReplyTrimmer.trim(body, @append_reply) if @trim_reply
2017-08-17 22:00:37 +05:30
return '' unless body
# not using /\s+$/ here because that deletes empty lines
body = body.gsub(/[ \t]$/, '')
# NOTE: We currently don't support empty quotes.
# EmailReplyTrimmer allows this as a special case,
# so we detect it manually here.
2021-12-11 22:18:48 +05:30
#
# If allow_only_quotes is true a message where all lines starts with ">" is allowed.
# This could happen if an email has an empty quote, forwarded without any new content.
return "" if body.lines.all? do |l|
l.strip.empty? || (!allow_only_quotes && l.start_with?('>'))
end
2015-09-25 12:07:36 +05:30
2022-07-23 23:45:48 +05:30
encoded_body = force_utf8(body.force_encoding(encoding))
2021-10-27 15:23:28 +05:30
return encoded_body unless @append_reply
2022-07-23 23:45:48 +05:30
[encoded_body, force_utf8(stripped_text.force_encoding(encoding))]
2015-09-25 12:07:36 +05:30
end
private
def select_body(message)
2017-08-17 22:00:37 +05:30
part =
if message.multipart?
message.text_part || message.html_part || message
else
message
end
2015-09-25 12:07:36 +05:30
2017-08-17 22:00:37 +05:30
decoded = fix_charset(part)
2015-09-25 12:07:36 +05:30
2017-08-17 22:00:37 +05:30
return "" unless decoded
2015-09-25 12:07:36 +05:30
# Certain trigger phrases that means we didn't parse correctly
2018-03-17 18:26:18 +05:30
if decoded =~ %r{(Content\-Type\:|multipart/alternative|text/plain)}
2015-09-25 12:07:36 +05:30
return ""
end
2017-08-17 22:00:37 +05:30
if (part.content_type || '').include? 'text/html'
HTMLParser.parse_reply(decoded)
else
decoded
end
2015-09-25 12:07:36 +05:30
end
# Force encoding to UTF-8 on a Mail::Message or Mail::Part
def fix_charset(object)
2019-07-07 11:18:12 +05:30
return if object.nil?
2015-09-25 12:07:36 +05:30
if object.charset
2022-07-23 23:45:48 +05:30
# A part of a multi-part may have a different encoding. Its encoding
# is denoted in its header. For example:
#
# ```
# ------=_Part_2192_32400445.1115745999735
# Content-Type: text/plain; charset=ISO-8859-1
# Content-Transfer-Encoding: 7bit
#
# Plain email.
# ```
# So, we had to force its part to corresponding encoding before able
# to convert it to UTF-8
force_utf8(object.body.decoded.force_encoding(object.charset.gsub(/utf8/i, "UTF-8")))
2015-09-25 12:07:36 +05:30
else
object.body.to_s
end
2021-06-08 01:23:25 +05:30
rescue StandardError
2015-09-25 12:07:36 +05:30
nil
end
2022-07-23 23:45:48 +05:30
def force_utf8(str)
Gitlab::EncodingHelper.encode_utf8(str).to_s
end
2015-09-25 12:07:36 +05:30
end
end
end