2017-08-17 22:00:37 +05:30
|
|
|
module Gitlab
|
|
|
|
# An untrusted regular expression is any regexp containing patterns sourced
|
|
|
|
# from user input.
|
|
|
|
#
|
|
|
|
# Ruby's built-in regular expression library allows patterns which complete in
|
|
|
|
# exponential time, permitting denial-of-service attacks.
|
|
|
|
#
|
|
|
|
# Not all regular expression features are available in untrusted regexes, and
|
|
|
|
# there is a strict limit on total execution time. See the RE2 documentation
|
|
|
|
# at https://github.com/google/re2/wiki/Syntax for more details.
|
|
|
|
class UntrustedRegexp
|
2018-11-08 19:23:39 +05:30
|
|
|
require_dependency 're2'
|
|
|
|
|
|
|
|
delegate :===, :source, to: :regexp
|
|
|
|
|
|
|
|
def initialize(pattern, multiline: false)
|
|
|
|
if multiline
|
|
|
|
pattern = "(?m)#{pattern}"
|
|
|
|
end
|
2017-08-17 22:00:37 +05:30
|
|
|
|
|
|
|
@regexp = RE2::Regexp.new(pattern, log_errors: false)
|
|
|
|
|
|
|
|
raise RegexpError.new(regexp.error) unless regexp.ok?
|
|
|
|
end
|
|
|
|
|
|
|
|
def replace_all(text, rewrite)
|
|
|
|
RE2.GlobalReplace(text, regexp, rewrite)
|
|
|
|
end
|
|
|
|
|
|
|
|
def scan(text)
|
2017-09-10 17:25:29 +05:30
|
|
|
matches = scan_regexp.scan(text).to_a
|
|
|
|
matches.map!(&:first) if regexp.number_of_capturing_groups.zero?
|
|
|
|
matches
|
2017-08-17 22:00:37 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
def replace(text, rewrite)
|
|
|
|
RE2.Replace(text, regexp, rewrite)
|
|
|
|
end
|
|
|
|
|
2018-11-08 19:23:39 +05:30
|
|
|
def ==(other)
|
|
|
|
self.source == other.source
|
|
|
|
end
|
|
|
|
|
|
|
|
# Handles regular expressions with the preferred RE2 library where possible
|
|
|
|
# via UntustedRegex. Falls back to Ruby's built-in regular expression library
|
|
|
|
# when the syntax would be invalid in RE2.
|
|
|
|
#
|
|
|
|
# One difference between these is `(?m)` multi-line mode. Ruby regex enables
|
|
|
|
# this by default, but also handles `^` and `$` differently.
|
|
|
|
# See: https://www.regular-expressions.info/modifiers.html
|
|
|
|
def self.with_fallback(pattern, multiline: false)
|
|
|
|
UntrustedRegexp.new(pattern, multiline: multiline)
|
|
|
|
rescue RegexpError
|
|
|
|
Regexp.new(pattern)
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.valid?(pattern)
|
|
|
|
!!self.fabricate(pattern)
|
|
|
|
rescue RegexpError
|
|
|
|
false
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.fabricate(pattern)
|
|
|
|
matches = pattern.match(%r{^/(?<regexp>.+)/(?<flags>[ismU]*)$})
|
|
|
|
|
|
|
|
raise RegexpError, 'Invalid regular expression!' if matches.nil?
|
|
|
|
|
|
|
|
expression = matches[:regexp]
|
|
|
|
flags = matches[:flags]
|
|
|
|
expression.prepend("(?#{flags})") if flags.present?
|
|
|
|
|
|
|
|
self.new(expression, multiline: false)
|
|
|
|
end
|
|
|
|
|
2017-08-17 22:00:37 +05:30
|
|
|
private
|
|
|
|
|
|
|
|
attr_reader :regexp
|
|
|
|
|
|
|
|
# RE2 scan operates differently to Ruby scan when there are no capture
|
|
|
|
# groups, so work around it
|
|
|
|
def scan_regexp
|
|
|
|
@scan_regexp ||=
|
|
|
|
if regexp.number_of_capturing_groups.zero?
|
|
|
|
RE2::Regexp.new('(' + regexp.source + ')')
|
|
|
|
else
|
|
|
|
regexp
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|