debian-mirror-gitlab/lib/gitlab/utils.rb

# frozen_string_literal: true

module Gitlab
  module Utils
    extend self
    PathTraversalAttackError ||= Class.new(StandardError)

    # Ensure that the relative path will not traverse outside the base directory
    # We url decode the path to avoid passing invalid paths forward in url encoded format.
    # Also see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/24223#note_284122580
    # It also checks for ALT_SEPARATOR aka '\' (forward slash)
    def check_path_traversal!(path)
      return unless path.is_a?(String)

      path = decode_path(path)
      path_regex = %r{(\A(\.{1,2})\z|\A\.\.[/\\]|[/\\]\.\.\z|[/\\]\.\.[/\\]|\n)}

      if path.match?(path_regex)
        raise PathTraversalAttackError, 'Invalid path'
      end

      path
    end

    def allowlisted?(absolute_path, allowlist)
      path = absolute_path.downcase

      allowlist.map(&:downcase).any? do |allowed_path|
        path.start_with?(allowed_path)
      end
    end

    def check_allowed_absolute_path!(path, allowlist)
      return unless Pathname.new(path).absolute?
      return if allowlisted?(path, allowlist)

      raise StandardError, "path #{path} is not allowed"
    end

    def decode_path(encoded_path)
      decoded = CGI.unescape(encoded_path)
      if decoded != CGI.unescape(decoded)
        raise StandardError, "path #{encoded_path} is not allowed"
      end

      decoded
    end

    def force_utf8(str)
      str.dup.force_encoding(Encoding::UTF_8)
    end

    def ensure_utf8_size(str, bytes:)
      raise ArgumentError, 'Empty string provided!' if str.empty?
      raise ArgumentError, 'Negative string size provided!' if bytes < 0

      truncated = str.each_char.each_with_object(+'') do |char, object|
        if object.bytesize + char.bytesize > bytes
          break object
        else
          object.concat(char)
        end
      end

      truncated + ('0' * (bytes - truncated.bytesize))
    end

    # Append path to host, making sure there's one single / in between
    def append_path(host, path)
      "#{host.to_s.sub(%r{\/+$}, '')}/#{path.to_s.sub(%r{^\/+}, '')}"
    end

    # A slugified version of the string, suitable for inclusion in URLs and
    # domain names. Rules:
    #
    #   * Lowercased
    #   * Anything not matching [a-z0-9-] is replaced with a -
    #   * Maximum length is 63 bytes
    #   * First/Last Character is not a hyphen
    def slugify(str)
      str.downcase
        .gsub(/[^a-z0-9]/, '-')[0..62]
        .gsub(/(\A-+|-+\z)/, '')
    end

    # Wraps ActiveSupport's Array#to_sentence to convert the given array to a
    # comma-separated sentence joined with localized 'or' Strings instead of 'and'.
    def to_exclusive_sentence(array)
      array.to_sentence(two_words_connector: _(' or '), last_word_connector: _(', or '))
    end

    # Converts newlines into HTML line break elements
    def nlbr(str)
      ActionView::Base.full_sanitizer.sanitize(+str, tags: []).gsub(/\r?\n/, '<br>').html_safe
    end

    def remove_line_breaks(str)
      str.gsub(/\r?\n/, '')
    end

    def to_boolean(value, default: nil)
      value = value.to_s if [0, 1].include?(value)

      return value if [true, false].include?(value)
      return true if value =~ /^(true|t|yes|y|1|on)$/i
      return false if value =~ /^(false|f|no|n|0|off)$/i

      default
    end

    def boolean_to_yes_no(bool)
      if bool
        'Yes'
      else
        'No'
      end
    end

    def random_string
      Random.rand(Float::MAX.to_i).to_s(36)
    end

    # See: http://stackoverflow.com/questions/2108727/which-in-ruby-checking-if-program-exists-in-path-from-ruby
    # Cross-platform way of finding an executable in the $PATH.
    #
    #   which('ruby') #=> /usr/bin/ruby
    def which(cmd, env = ENV)
      exts = env['PATHEXT'] ? env['PATHEXT'].split(';') : ['']

      env['PATH'].split(File::PATH_SEPARATOR).each do |path|
        exts.each do |ext|
          exe = File.join(path, "#{cmd}#{ext}")
          return exe if File.executable?(exe) && !File.directory?(exe)
        end
      end

      nil
    end

    def try_megabytes_to_bytes(size)
      Integer(size).megabytes
    rescue ArgumentError
      size
    end

    def bytes_to_megabytes(bytes)
      bytes.to_f / Numeric::MEGABYTE
    end

    def ms_to_round_sec(ms)
      (ms.to_f / 1000).round(6)
    end

    # Used in EE
    # Accepts either an Array or a String and returns an array
    def ensure_array_from_string(string_or_array)
      return string_or_array if string_or_array.is_a?(Array)

      string_or_array.split(',').map(&:strip)
    end

    def deep_indifferent_access(data)
      if data.is_a?(Array)
        data.map(&method(:deep_indifferent_access))
      elsif data.is_a?(Hash)
        data.with_indifferent_access
      else
        data
      end
    end

    def deep_symbolized_access(data)
      if data.is_a?(Array)
        data.map(&method(:deep_symbolized_access))
      elsif data.is_a?(Hash)
        data.deep_symbolize_keys
      else
        data
      end
    end

    def string_to_ip_object(str)
      return unless str

      IPAddr.new(str)
    rescue IPAddr::InvalidAddressError
    end

    # A safe alternative to String#downcase!
    #
    # This will make copies of frozen strings but downcase unfrozen
    # strings in place, reducing allocations.
    def safe_downcase!(str)
      if str.frozen?
        str.downcase
      else
        str.downcase! || str
      end
    end

    # Converts a string to an Addressable::URI object.
    # If the string is not a valid URI, it returns nil.
    # Param uri_string should be a String object.
    # This method returns an Addressable::URI object or nil.
    def parse_url(uri_string)
      Addressable::URI.parse(uri_string)
    rescue Addressable::URI::InvalidURIError, TypeError
    end

    def removes_sensitive_data_from_url(uri_string)
      uri = parse_url(uri_string)

      return unless uri
      return uri_string unless uri.fragment

      stripped_params = CGI.parse(uri.fragment)
      if stripped_params['access_token']
        stripped_params['access_token'] = 'filtered'
        filtered_query = Addressable::URI.new
        filtered_query.query_values = stripped_params

        uri.fragment = filtered_query.query
      end

      uri.to_s
    end

    # Invert a hash, collecting all keys that map to a given value in an array.
    #
    # Unlike `Hash#invert`, where the last encountered pair wins, and which has the
    # type `Hash[k, v] => Hash[v, k]`, `multiple_key_invert` does not lose any
    # information, has the type `Hash[k, v] => Hash[v, Array[k]]`, and the original
    # hash can always be reconstructed.
    #
    # example:
    #
    #   multiple_key_invert({ a: 1, b: 2, c: 1 })
    #   # => { 1 => [:a, :c], 2 => [:b] }
    #
    def multiple_key_invert(hash)
      hash.flat_map { |k, v| Array.wrap(v).zip([k].cycle) }
        .group_by(&:first)
        .transform_values { |kvs| kvs.map(&:last) }
    end

    # This sort is stable (see https://en.wikipedia.org/wiki/Sorting_algorithm#Stability)
    # contrary to the bare Ruby sort_by method. Using just sort_by leads to
    # instability across different platforms (e.g., x86_64-linux and x86_64-darwin18)
    # which in turn leads to different sorting results for the equal elements across
    # these platforms.
    # This method uses a list item's original index position to break ties.
    def stable_sort_by(list)
      list.sort_by.with_index { |x, idx| [yield(x), idx] }
    end

    # Check for valid brackets (`[` and `]`) in a string using this aspects:
    # * open brackets count == closed brackets count
    # * (optionally) reject nested brackets via `allow_nested: false`
    # * open / close brackets coherence, eg. ][[] -> invalid
    def valid_brackets?(string = '', allow_nested: true)
      # remove everything except brackets
      brackets = string.remove(/[^\[\]]/)

      return true if brackets.empty?
      # balanced counts check
      return false if brackets.size.odd?

      unless allow_nested
        # nested brackets check
        return false if brackets.include?('[[') || brackets.include?(']]')
      end

      # open / close brackets coherence check
      untrimmed = brackets
      loop do
        trimmed = untrimmed.gsub('[]', '')
        return true if trimmed.empty?
        return false if trimmed == untrimmed

        untrimmed = trimmed
      end
    end
  end
end
New upstream version 11.5.3+dfsg 2018-12-13 13:39:08 +05:30			`# frozen_string_literal: true`

Imported Upstream version 7.10.0 2015-04-26 12:48:37 +05:30			`module Gitlab`
			`module Utils`
			`extend self`
New upstream version 13.1.0 2020-06-23 00:09:42 +05:30			`PathTraversalAttackError \|\|= Class.new(StandardError)`
Imported Upstream version 7.10.0 2015-04-26 12:48:37 +05:30
New upstream version 11.5.3+dfsg 2018-12-13 13:39:08 +05:30			`# Ensure that the relative path will not traverse outside the base directory`
New upstream version 12.9.2 2020-04-08 14:13:33 +05:30			`# We url decode the path to avoid passing invalid paths forward in url encoded format.`
			`# Also see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/24223#note_284122580`
			`# It also checks for ALT_SEPARATOR aka '\' (forward slash)`
New upstream version 13.3.8 2020-10-24 23:57:45 +05:30			`def check_path_traversal!(path)`
New upstream version 13.3.9 2020-11-05 12:06:23 +05:30			`return unless path.is_a?(String)`

New upstream version 13.3.8 2020-10-24 23:57:45 +05:30			`path = decode_path(path)`
New upstream version 14.2.5+ds1 2021-10-27 15:23:28 +05:30			`path_regex = %r{(\A(\.{1,2})\z\|\A\.\.[/\\]\|[/\\]\.\.\z\|[/\\]\.\.[/\\]\|\n)}`
New upstream version 12.9.2 2020-04-08 14:13:33 +05:30
New upstream version 13.3.8 2020-10-24 23:57:45 +05:30			`if path.match?(path_regex)`
New upstream version 13.12.3+ds1 2021-06-08 01:23:25 +05:30			`raise PathTraversalAttackError, 'Invalid path'`
New upstream version 12.9.2 2020-04-08 14:13:33 +05:30			`end`
New upstream version 11.5.3+dfsg 2018-12-13 13:39:08 +05:30
			`path`
			`end`

New upstream version 13.3.8 2020-10-24 23:57:45 +05:30			`def allowlisted?(absolute_path, allowlist)`
			`path = absolute_path.downcase`

			`allowlist.map(&:downcase).any? do \|allowed_path\|`
			`path.start_with?(allowed_path)`
			`end`
			`end`

			`def check_allowed_absolute_path!(path, allowlist)`
			`return unless Pathname.new(path).absolute?`
			`return if allowlisted?(path, allowlist)`

			`raise StandardError, "path #{path} is not allowed"`
			`end`

			`def decode_path(encoded_path)`
			`decoded = CGI.unescape(encoded_path)`
			`if decoded != CGI.unescape(decoded)`
			`raise StandardError, "path #{encoded_path} is not allowed"`
			`end`

			`decoded`
			`end`

Imported Upstream version 7.14.3 2015-09-11 14:41:01 +05:30			`def force_utf8(str)`
New upstream version 12.2.8 2019-10-12 21:52:04 +05:30			`str.dup.force_encoding(Encoding::UTF_8)`
Imported Upstream version 7.14.3 2015-09-11 14:41:01 +05:30			`end`
New upstream version 8.13.6+dfsg1 2016-11-24 13:41:30 +05:30
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`def ensure_utf8_size(str, bytes:)`
			`raise ArgumentError, 'Empty string provided!' if str.empty?`
New upstream version 13.3.8 2020-10-24 23:57:45 +05:30			`raise ArgumentError, 'Negative string size provided!' if bytes < 0`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30
			`truncated = str.each_char.each_with_object(+'') do \|char, object\|`
			`if object.bytesize + char.bytesize > bytes`
			`break object`
			`else`
			`object.concat(char)`
			`end`
			`end`

			`truncated + ('0' * (bytes - truncated.bytesize))`
			`end`

New upstream version 11.5.3+dfsg 2018-12-13 13:39:08 +05:30			`# Append path to host, making sure there's one single / in between`
			`def append_path(host, path)`
			`"#{host.to_s.sub(%r{\/+$}, '')}/#{path.to_s.sub(%r{^\/+}, '')}"`
			`end`

New upstream version 10.5.5+dfsg 2018-03-17 18:26:18 +05:30			`# A slugified version of the string, suitable for inclusion in URLs and`
			`# domain names. Rules:`
			`#`
			`# * Lowercased`
			`# * Anything not matching [a-z0-9-] is replaced with a -`
			`# * Maximum length is 63 bytes`
			`# * First/Last Character is not a hyphen`
			`def slugify(str)`
New upstream version 13.2.1 2020-07-28 23:09:34 +05:30			`str.downcase`
New upstream version 10.5.5+dfsg 2018-03-17 18:26:18 +05:30			`.gsub(/[^a-z0-9]/, '-')[0..62]`
			`.gsub(/(\A-+\|-+\z)/, '')`
			`end`

New upstream version 12.8.6 2020-03-13 15:44:24 +05:30			`# Wraps ActiveSupport's Array#to_sentence to convert the given array to a`
			`# comma-separated sentence joined with localized 'or' Strings instead of 'and'.`
			`def to_exclusive_sentence(array)`
			`array.to_sentence(two_words_connector: _(' or '), last_word_connector: _(', or '))`
			`end`

New upstream version 10.7.3+dfsg 2018-05-09 12:01:36 +05:30			`# Converts newlines into HTML line break elements`
			`def nlbr(str)`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`ActionView::Base.full_sanitizer.sanitize(+str, tags: []).gsub(/\r?\n/, '<br>').html_safe`
New upstream version 10.7.3+dfsg 2018-05-09 12:01:36 +05:30			`end`

New upstream version 10.5.5+dfsg 2018-03-17 18:26:18 +05:30			`def remove_line_breaks(str)`
			`str.gsub(/\r?\n/, '')`
			`end`

New upstream version 13.0.0 2020-05-24 23:13:21 +05:30			`def to_boolean(value, default: nil)`
New upstream version 13.11.2+ds1 2021-04-29 21:17:54 +05:30			`value = value.to_s if [0, 1].include?(value)`

New upstream version 8.13.6+dfsg1 2016-11-24 13:41:30 +05:30			`return value if [true, false].include?(value)`
			`return true if value =~ /^(true\|t\|yes\|y\|1\|on)$/i`
			`return false if value =~ /^(false\|f\|no\|n\|0\|off)$/i`

New upstream version 13.0.0 2020-05-24 23:13:21 +05:30			`default`
New upstream version 8.13.6+dfsg1 2016-11-24 13:41:30 +05:30			`end`
New upstream version 9.5.4+dfsg 2017-09-10 17:25:29 +05:30
			`def boolean_to_yes_no(bool)`
			`if bool`
			`'Yes'`
			`else`
			`'No'`
			`end`
			`end`
New upstream version 10.5.5+dfsg 2018-03-17 18:26:18 +05:30
			`def random_string`
			`Random.rand(Float::MAX.to_i).to_s(36)`
			`end`

			`# See: http://stackoverflow.com/questions/2108727/which-in-ruby-checking-if-program-exists-in-path-from-ruby`
			`# Cross-platform way of finding an executable in the $PATH.`
			`#`
			`# which('ruby') #=> /usr/bin/ruby`
			`def which(cmd, env = ENV)`
			`exts = env['PATHEXT'] ? env['PATHEXT'].split(';') : ['']`

			`env['PATH'].split(File::PATH_SEPARATOR).each do \|path\|`
			`exts.each do \|ext\|`
			`exe = File.join(path, "#{cmd}#{ext}")`
			`return exe if File.executable?(exe) && !File.directory?(exe)`
			`end`
			`end`

			`nil`
			`end`
New upstream version 10.6.0+dfsg 2018-03-27 19:54:05 +05:30
New upstream version 11.10.8+dfsg 2019-07-07 11:18:12 +05:30			`def try_megabytes_to_bytes(size)`
			`Integer(size).megabytes`
			`rescue ArgumentError`
			`size`
			`end`

New upstream version 10.8.7+dfsg 2018-10-15 14:42:47 +05:30			`def bytes_to_megabytes(bytes)`
			`bytes.to_f / Numeric::MEGABYTE`
			`end`

New upstream version 12.10.0 2020-04-22 19:07:51 +05:30			`def ms_to_round_sec(ms)`
New upstream version 13.0.0 2020-05-24 23:13:21 +05:30			`(ms.to_f / 1000).round(6)`
New upstream version 12.10.0 2020-04-22 19:07:51 +05:30			`end`

New upstream version 10.6.0+dfsg 2018-03-27 19:54:05 +05:30			`# Used in EE`
			`# Accepts either an Array or a String and returns an array`
			`def ensure_array_from_string(string_or_array)`
			`return string_or_array if string_or_array.is_a?(Array)`

			`string_or_array.split(',').map(&:strip)`
			`end`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30
			`def deep_indifferent_access(data)`
			`if data.is_a?(Array)`
			`data.map(&method(:deep_indifferent_access))`
			`elsif data.is_a?(Hash)`
			`data.with_indifferent_access`
			`else`
			`data`
			`end`
			`end`
New upstream version 12.2.8 2019-10-12 21:52:04 +05:30
New upstream version 14.1.7+ds1 2021-09-30 23:02:18 +05:30			`def deep_symbolized_access(data)`
			`if data.is_a?(Array)`
			`data.map(&method(:deep_symbolized_access))`
			`elsif data.is_a?(Hash)`
			`data.deep_symbolize_keys`
			`else`
			`data`
			`end`
			`end`

New upstream version 12.2.8 2019-10-12 21:52:04 +05:30			`def string_to_ip_object(str)`
			`return unless str`

			`IPAddr.new(str)`
			`rescue IPAddr::InvalidAddressError`
			`end`
New upstream version 12.6.8 2020-03-07 23:17:34 +05:30
New upstream version 13.8.5+ds1 2021-03-08 18:12:59 +05:30			`# A safe alternative to String#downcase!`
			`#`
			`# This will make copies of frozen strings but downcase unfrozen`
			`# strings in place, reducing allocations.`
			`def safe_downcase!(str)`
			`if str.frozen?`
			`str.downcase`
			`else`
			`str.downcase! \|\| str`
			`end`
			`end`

New upstream version 12.6.8 2020-03-07 23:17:34 +05:30			`# Converts a string to an Addressable::URI object.`
			`# If the string is not a valid URI, it returns nil.`
			`# Param uri_string should be a String object.`
			`# This method returns an Addressable::URI object or nil.`
			`def parse_url(uri_string)`
			`Addressable::URI.parse(uri_string)`
			`rescue Addressable::URI::InvalidURIError, TypeError`
			`end`
New upstream version 13.1.0 2020-06-23 00:09:42 +05:30
New upstream version 13.11.5+ds1 2021-06-02 17:11:27 +05:30			`def removes_sensitive_data_from_url(uri_string)`
			`uri = parse_url(uri_string)`

			`return unless uri`
			`return uri_string unless uri.fragment`

			`stripped_params = CGI.parse(uri.fragment)`
			`if stripped_params['access_token']`
			`stripped_params['access_token'] = 'filtered'`
			`filtered_query = Addressable::URI.new`
			`filtered_query.query_values = stripped_params`

			`uri.fragment = filtered_query.query`
			`end`

			`uri.to_s`
			`end`

New upstream version 13.1.0 2020-06-23 00:09:42 +05:30			`# Invert a hash, collecting all keys that map to a given value in an array.`
			`#`
			# Unlike `Hash#invert`, where the last encountered pair wins, and which has the
			# type `Hash[k, v] => Hash[v, k]`, `multiple_key_invert` does not lose any
			# information, has the type `Hash[k, v] => Hash[v, Array[k]]`, and the original
			`# hash can always be reconstructed.`
			`#`
			`# example:`
			`#`
			`# multiple_key_invert({ a: 1, b: 2, c: 1 })`
			`# # => { 1 => [:a, :c], 2 => [:b] }`
			`#`
			`def multiple_key_invert(hash)`
			`hash.flat_map { \|k, v\| Array.wrap(v).zip([k].cycle) }`
			`.group_by(&:first)`
			`.transform_values { \|kvs\| kvs.map(&:last) }`
			`end`
New upstream version 13.2.1 2020-07-28 23:09:34 +05:30
			`# This sort is stable (see https://en.wikipedia.org/wiki/Sorting_algorithm#Stability)`
			`# contrary to the bare Ruby sort_by method. Using just sort_by leads to`
			`# instability across different platforms (e.g., x86_64-linux and x86_64-darwin18)`
			`# which in turn leads to different sorting results for the equal elements across`
			`# these platforms.`
			`# This method uses a list item's original index position to break ties.`
			`def stable_sort_by(list)`
			`list.sort_by.with_index { \|x, idx\| [yield(x), idx] }`
			`end`
New upstream version 13.3.9 2020-11-05 12:06:23 +05:30
			# Check for valid brackets (`[` and `]`) in a string using this aspects:
			`# * open brackets count == closed brackets count`
			# * (optionally) reject nested brackets via `allow_nested: false`
			`# * open / close brackets coherence, eg. ][[] -> invalid`
			`def valid_brackets?(string = '', allow_nested: true)`
			`# remove everything except brackets`
			`brackets = string.remove(/[^\[\]]/)`

			`return true if brackets.empty?`
			`# balanced counts check`
			`return false if brackets.size.odd?`

			`unless allow_nested`
			`# nested brackets check`
			`return false if brackets.include?('[[') \|\| brackets.include?(']]')`
			`end`

			`# open / close brackets coherence check`
			`untrimmed = brackets`
			`loop do`
			`trimmed = untrimmed.gsub('[]', '')`
			`return true if trimmed.empty?`
			`return false if trimmed == untrimmed`

			`untrimmed = trimmed`
			`end`
			`end`
Imported Upstream version 7.10.0 2015-04-26 12:48:37 +05:30			`end`
			`end`