debian-mirror-gitlab/lib/gitlab/untrusted_regexp.rb
2017-08-17 22:00:37 +05:30

69 lines
1.7 KiB
Ruby

module Gitlab
# An untrusted regular expression is any regexp containing patterns sourced
# from user input.
#
# Ruby's built-in regular expression library allows patterns which complete in
# exponential time, permitting denial-of-service attacks.
#
# Not all regular expression features are available in untrusted regexes, and
# there is a strict limit on total execution time. See the RE2 documentation
# at https://github.com/google/re2/wiki/Syntax for more details.
class UntrustedRegexp
delegate :===, to: :regexp
def initialize(pattern)
@regexp = RE2::Regexp.new(pattern, log_errors: false)
raise RegexpError.new(regexp.error) unless regexp.ok?
end
def replace_all(text, rewrite)
RE2.GlobalReplace(text, regexp, rewrite)
end
def scan(text)
text = text.dup # modified in-place
results = []
loop do
match = scan_regexp.match(text)
break unless match
# Ruby scan returns empty strings, not nil
groups = match.to_a.map(&:to_s)
results <<
if regexp.number_of_capturing_groups.zero?
groups[0]
else
groups[1..-1]
end
text.slice!(0, match.end(0) || 1)
break unless text.present?
end
results
end
def replace(text, rewrite)
RE2.Replace(text, regexp, rewrite)
end
private
attr_reader :regexp
# RE2 scan operates differently to Ruby scan when there are no capture
# groups, so work around it
def scan_regexp
@scan_regexp ||=
if regexp.number_of_capturing_groups.zero?
RE2::Regexp.new('(' + regexp.source + ')')
else
regexp
end
end
end
end