debian-mirror-gitlab/lib/gitlab/utils.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

301 lines
8.8 KiB
Ruby
Raw Normal View History

2018-12-13 13:39:08 +05:30
# frozen_string_literal: true
2015-04-26 12:48:37 +05:30
module Gitlab
module Utils
extend self
2020-06-23 00:09:42 +05:30
PathTraversalAttackError ||= Class.new(StandardError)
2015-04-26 12:48:37 +05:30
2022-05-07 20:08:51 +05:30
private_class_method def logger
@logger ||= Gitlab::AppLogger
end
2018-12-13 13:39:08 +05:30
# Ensure that the relative path will not traverse outside the base directory
2020-04-08 14:13:33 +05:30
# We url decode the path to avoid passing invalid paths forward in url encoded format.
# Also see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/24223#note_284122580
# It also checks for ALT_SEPARATOR aka '\' (forward slash)
2020-10-24 23:57:45 +05:30
def check_path_traversal!(path)
2023-01-13 00:05:48 +05:30
return unless path
path = path.to_s if path.is_a?(Gitlab::HashedPath)
raise PathTraversalAttackError, 'Invalid path' unless path.is_a?(String)
2020-11-05 12:06:23 +05:30
2020-10-24 23:57:45 +05:30
path = decode_path(path)
2021-10-27 15:23:28 +05:30
path_regex = %r{(\A(\.{1,2})\z|\A\.\.[/\\]|[/\\]\.\.\z|[/\\]\.\.[/\\]|\n)}
2020-04-08 14:13:33 +05:30
2020-10-24 23:57:45 +05:30
if path.match?(path_regex)
2022-05-07 20:08:51 +05:30
logger.warn(message: "Potential path traversal attempt detected", path: "#{path}")
2021-06-08 01:23:25 +05:30
raise PathTraversalAttackError, 'Invalid path'
2020-04-08 14:13:33 +05:30
end
2018-12-13 13:39:08 +05:30
path
end
2020-10-24 23:57:45 +05:30
def allowlisted?(absolute_path, allowlist)
path = absolute_path.downcase
allowlist.map(&:downcase).any? do |allowed_path|
path.start_with?(allowed_path)
end
end
def check_allowed_absolute_path!(path, allowlist)
return unless Pathname.new(path).absolute?
return if allowlisted?(path, allowlist)
raise StandardError, "path #{path} is not allowed"
end
2022-05-07 20:08:51 +05:30
def check_allowed_absolute_path_and_path_traversal!(path, path_allowlist)
traversal_path = check_path_traversal!(path)
raise StandardError, "path is not a string!" unless traversal_path.is_a?(String)
check_allowed_absolute_path!(traversal_path, path_allowlist)
end
2020-10-24 23:57:45 +05:30
def decode_path(encoded_path)
decoded = CGI.unescape(encoded_path)
if decoded != CGI.unescape(decoded)
raise StandardError, "path #{encoded_path} is not allowed"
end
decoded
end
2015-09-11 14:41:01 +05:30
def force_utf8(str)
2019-10-12 21:52:04 +05:30
str.dup.force_encoding(Encoding::UTF_8)
2015-09-11 14:41:01 +05:30
end
2016-11-24 13:41:30 +05:30
2019-02-15 15:39:39 +05:30
def ensure_utf8_size(str, bytes:)
raise ArgumentError, 'Empty string provided!' if str.empty?
2020-10-24 23:57:45 +05:30
raise ArgumentError, 'Negative string size provided!' if bytes < 0
2019-02-15 15:39:39 +05:30
truncated = str.each_char.each_with_object(+'') do |char, object|
if object.bytesize + char.bytesize > bytes
break object
else
object.concat(char)
end
end
truncated + ('0' * (bytes - truncated.bytesize))
end
2018-12-13 13:39:08 +05:30
# Append path to host, making sure there's one single / in between
def append_path(host, path)
"#{host.to_s.sub(%r{\/+$}, '')}/#{path.to_s.sub(%r{^\/+}, '')}"
end
2018-03-17 18:26:18 +05:30
# A slugified version of the string, suitable for inclusion in URLs and
# domain names. Rules:
#
# * Lowercased
# * Anything not matching [a-z0-9-] is replaced with a -
# * Maximum length is 63 bytes
# * First/Last Character is not a hyphen
def slugify(str)
2020-07-28 23:09:34 +05:30
str.downcase
2018-03-17 18:26:18 +05:30
.gsub(/[^a-z0-9]/, '-')[0..62]
.gsub(/(\A-+|-+\z)/, '')
end
2020-03-13 15:44:24 +05:30
# Wraps ActiveSupport's Array#to_sentence to convert the given array to a
# comma-separated sentence joined with localized 'or' Strings instead of 'and'.
def to_exclusive_sentence(array)
array.to_sentence(two_words_connector: _(' or '), last_word_connector: _(', or '))
end
2018-05-09 12:01:36 +05:30
# Converts newlines into HTML line break elements
def nlbr(str)
2019-02-15 15:39:39 +05:30
ActionView::Base.full_sanitizer.sanitize(+str, tags: []).gsub(/\r?\n/, '<br>').html_safe
2018-05-09 12:01:36 +05:30
end
2018-03-17 18:26:18 +05:30
def remove_line_breaks(str)
str.gsub(/\r?\n/, '')
end
2020-05-24 23:13:21 +05:30
def to_boolean(value, default: nil)
2021-04-29 21:17:54 +05:30
value = value.to_s if [0, 1].include?(value)
2016-11-24 13:41:30 +05:30
return value if [true, false].include?(value)
return true if value =~ /^(true|t|yes|y|1|on)$/i
return false if value =~ /^(false|f|no|n|0|off)$/i
2020-05-24 23:13:21 +05:30
default
2016-11-24 13:41:30 +05:30
end
2017-09-10 17:25:29 +05:30
def boolean_to_yes_no(bool)
if bool
'Yes'
else
'No'
end
end
2018-03-17 18:26:18 +05:30
2022-01-26 12:08:38 +05:30
# Behaves like `which` on Linux machines: given PATH, try to resolve the given
# executable name to an absolute path, or return nil.
2018-03-17 18:26:18 +05:30
#
# which('ruby') #=> /usr/bin/ruby
2022-01-26 12:08:38 +05:30
def which(filename)
ENV['PATH']&.split(File::PATH_SEPARATOR)&.each do |path|
full_path = File.join(path, filename)
return full_path if File.executable?(full_path)
2018-03-17 18:26:18 +05:30
end
nil
end
2018-03-27 19:54:05 +05:30
2019-07-07 11:18:12 +05:30
def try_megabytes_to_bytes(size)
Integer(size).megabytes
rescue ArgumentError
size
end
2018-10-15 14:42:47 +05:30
def bytes_to_megabytes(bytes)
bytes.to_f / Numeric::MEGABYTE
end
2020-04-22 19:07:51 +05:30
def ms_to_round_sec(ms)
2020-05-24 23:13:21 +05:30
(ms.to_f / 1000).round(6)
2020-04-22 19:07:51 +05:30
end
2018-03-27 19:54:05 +05:30
# Used in EE
# Accepts either an Array or a String and returns an array
def ensure_array_from_string(string_or_array)
return string_or_array if string_or_array.is_a?(Array)
string_or_array.split(',').map(&:strip)
end
2019-02-15 15:39:39 +05:30
def deep_indifferent_access(data)
2023-01-13 00:05:48 +05:30
case data
when Array
2019-02-15 15:39:39 +05:30
data.map(&method(:deep_indifferent_access))
2023-01-13 00:05:48 +05:30
when Hash
2019-02-15 15:39:39 +05:30
data.with_indifferent_access
else
data
end
end
2019-10-12 21:52:04 +05:30
2021-09-30 23:02:18 +05:30
def deep_symbolized_access(data)
2023-01-13 00:05:48 +05:30
case data
when Array
2021-09-30 23:02:18 +05:30
data.map(&method(:deep_symbolized_access))
2023-01-13 00:05:48 +05:30
when Hash
2021-09-30 23:02:18 +05:30
data.deep_symbolize_keys
else
data
end
end
2019-10-12 21:52:04 +05:30
def string_to_ip_object(str)
return unless str
IPAddr.new(str)
rescue IPAddr::InvalidAddressError
end
2020-03-07 23:17:34 +05:30
2021-03-08 18:12:59 +05:30
# A safe alternative to String#downcase!
#
# This will make copies of frozen strings but downcase unfrozen
# strings in place, reducing allocations.
def safe_downcase!(str)
if str.frozen?
str.downcase
else
str.downcase! || str
end
end
2020-03-07 23:17:34 +05:30
# Converts a string to an Addressable::URI object.
# If the string is not a valid URI, it returns nil.
# Param uri_string should be a String object.
# This method returns an Addressable::URI object or nil.
def parse_url(uri_string)
Addressable::URI.parse(uri_string)
rescue Addressable::URI::InvalidURIError, TypeError
end
2020-06-23 00:09:42 +05:30
2022-04-04 11:22:00 +05:30
def add_url_parameters(url, params)
uri = parse_url(url.to_s)
uri.query_values = uri.query_values.to_h.merge(params.to_h.stringify_keys)
uri.query_values = nil if uri.query_values.empty?
uri.to_s
end
2021-06-02 17:11:27 +05:30
def removes_sensitive_data_from_url(uri_string)
uri = parse_url(uri_string)
return unless uri
return uri_string unless uri.fragment
stripped_params = CGI.parse(uri.fragment)
if stripped_params['access_token']
stripped_params['access_token'] = 'filtered'
filtered_query = Addressable::URI.new
filtered_query.query_values = stripped_params
uri.fragment = filtered_query.query
end
uri.to_s
end
2020-06-23 00:09:42 +05:30
# Invert a hash, collecting all keys that map to a given value in an array.
#
# Unlike `Hash#invert`, where the last encountered pair wins, and which has the
# type `Hash[k, v] => Hash[v, k]`, `multiple_key_invert` does not lose any
# information, has the type `Hash[k, v] => Hash[v, Array[k]]`, and the original
# hash can always be reconstructed.
#
# example:
#
# multiple_key_invert({ a: 1, b: 2, c: 1 })
# # => { 1 => [:a, :c], 2 => [:b] }
#
def multiple_key_invert(hash)
hash.flat_map { |k, v| Array.wrap(v).zip([k].cycle) }
.group_by(&:first)
.transform_values { |kvs| kvs.map(&:last) }
end
2020-07-28 23:09:34 +05:30
# This sort is stable (see https://en.wikipedia.org/wiki/Sorting_algorithm#Stability)
# contrary to the bare Ruby sort_by method. Using just sort_by leads to
# instability across different platforms (e.g., x86_64-linux and x86_64-darwin18)
# which in turn leads to different sorting results for the equal elements across
# these platforms.
# This method uses a list item's original index position to break ties.
def stable_sort_by(list)
list.sort_by.with_index { |x, idx| [yield(x), idx] }
end
2020-11-05 12:06:23 +05:30
# Check for valid brackets (`[` and `]`) in a string using this aspects:
# * open brackets count == closed brackets count
# * (optionally) reject nested brackets via `allow_nested: false`
# * open / close brackets coherence, eg. ][[] -> invalid
def valid_brackets?(string = '', allow_nested: true)
# remove everything except brackets
brackets = string.remove(/[^\[\]]/)
return true if brackets.empty?
# balanced counts check
return false if brackets.size.odd?
unless allow_nested
# nested brackets check
return false if brackets.include?('[[') || brackets.include?(']]')
end
# open / close brackets coherence check
untrimmed = brackets
loop do
trimmed = untrimmed.gsub('[]', '')
return true if trimmed.empty?
return false if trimmed == untrimmed
untrimmed = trimmed
end
end
2015-04-26 12:48:37 +05:30
end
end