debian-mirror-gitlab/lib/gitlab/untrusted_regexp.rb

132 lines
3.8 KiB
Ruby
Raw Normal View History

2018-12-13 13:39:08 +05:30
# frozen_string_literal: true
2017-08-17 22:00:37 +05:30
module Gitlab
# An untrusted regular expression is any regexp containing patterns sourced
# from user input.
#
# Ruby's built-in regular expression library allows patterns which complete in
# exponential time, permitting denial-of-service attacks.
#
# Not all regular expression features are available in untrusted regexes, and
# there is a strict limit on total execution time. See the RE2 documentation
# at https://github.com/google/re2/wiki/Syntax for more details.
2023-07-09 08:55:56 +05:30
#
# This class doesn't change any instance variables, which allows it to be frozen
# and setup in constants.
2017-08-17 22:00:37 +05:30
class UntrustedRegexp
2018-11-08 19:23:39 +05:30
require_dependency 're2'
2023-07-07 10:43:13 +05:30
# recreate Ruby's \R metacharacter
# https://ruby-doc.org/3.2.2/Regexp.html#class-Regexp-label-Character+Classes
BACKSLASH_R = '(\n|\v|\f|\r|\x{0085}|\x{2028}|\x{2029}|\r\n)'
2018-11-08 19:23:39 +05:30
delegate :===, :source, to: :regexp
def initialize(pattern, multiline: false)
if multiline
pattern = "(?m)#{pattern}"
end
2017-08-17 22:00:37 +05:30
@regexp = RE2::Regexp.new(pattern, log_errors: false)
2023-07-09 08:55:56 +05:30
@scan_regexp = initialize_scan_regexp
2017-08-17 22:00:37 +05:30
2021-06-08 01:23:25 +05:30
raise RegexpError, regexp.error unless regexp.ok?
2017-08-17 22:00:37 +05:30
end
def replace_all(text, rewrite)
RE2.GlobalReplace(text, regexp, rewrite)
end
2023-06-09 08:11:10 +05:30
# There is no built-in replace with block support (like `gsub`). We can accomplish
# the same thing by parsing and rebuilding the string with the substitutions.
def replace_gsub(text)
new_text = +''
remainder = text
matched = match(remainder)
until matched.nil? || matched.to_a.compact.empty?
partitioned = remainder.partition(matched.to_s)
new_text << partitioned.first
remainder = partitioned.last
new_text << yield(matched)
matched = match(remainder)
end
new_text << remainder
end
2017-08-17 22:00:37 +05:30
def scan(text)
2017-09-10 17:25:29 +05:30
matches = scan_regexp.scan(text).to_a
2020-10-24 23:57:45 +05:30
matches.map!(&:first) if regexp.number_of_capturing_groups == 0
2017-09-10 17:25:29 +05:30
matches
2017-08-17 22:00:37 +05:30
end
2021-04-29 21:17:54 +05:30
def match(text)
scan_regexp.match(text)
end
2019-04-03 18:18:56 +05:30
def match?(text)
text.present? && scan(text).present?
end
2017-08-17 22:00:37 +05:30
def replace(text, rewrite)
RE2.Replace(text, regexp, rewrite)
end
2023-05-08 21:46:49 +05:30
# #scan returns an array of the groups captured, rather than MatchData.
# Use this to give the capture group name and grab the proper value
def extract_named_group(name, match)
return unless match
match_position = regexp.named_capturing_groups[name.to_s]
raise RegexpError, "Invalid named capture group: #{name}" unless match_position
match[match_position - 1]
end
2018-11-08 19:23:39 +05:30
def ==(other)
self.source == other.source
end
2019-07-07 11:18:12 +05:30
# Handles regular expressions with the preferred RE2 library where possible
# via UntustedRegex. Falls back to Ruby's built-in regular expression library
# when the syntax would be invalid in RE2.
#
# One difference between these is `(?m)` multi-line mode. Ruby regex enables
# this by default, but also handles `^` and `$` differently.
# See: https://www.regular-expressions.info/modifiers.html
def self.with_fallback(pattern, multiline: false)
UntrustedRegexp.new(pattern, multiline: multiline)
rescue RegexpError
2022-07-16 23:28:13 +05:30
raise if Feature.enabled?(:disable_unsafe_regexp)
2022-05-07 20:08:51 +05:30
2022-07-16 23:28:13 +05:30
if Feature.enabled?(:ci_unsafe_regexp_logger, type: :ops)
2022-05-07 20:08:51 +05:30
Gitlab::AppJsonLogger.info(
class: self.name,
regexp: pattern.to_s,
fabricated: 'unsafe ruby regexp'
)
end
2019-07-07 11:18:12 +05:30
Regexp.new(pattern)
end
2017-08-17 22:00:37 +05:30
private
2023-07-09 08:55:56 +05:30
attr_reader :regexp, :scan_regexp
2017-08-17 22:00:37 +05:30
# RE2 scan operates differently to Ruby scan when there are no capture
# groups, so work around it
2023-07-09 08:55:56 +05:30
def initialize_scan_regexp
if regexp.number_of_capturing_groups == 0
RE2::Regexp.new('(' + regexp.source + ')')
else
regexp
end
2017-08-17 22:00:37 +05:30
end
end
end