139 lines
4.2 KiB
Ruby
139 lines
4.2 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module Gitlab
|
|
class UrlSanitizer
|
|
include Gitlab::Utils::StrongMemoize
|
|
|
|
ALLOWED_SCHEMES = %w[http https ssh git].freeze
|
|
ALLOWED_WEB_SCHEMES = %w[http https].freeze
|
|
SCHEMIFIED_SCHEME = 'glschemelessuri'
|
|
SCHEMIFY_PLACEHOLDER = "#{SCHEMIFIED_SCHEME}://"
|
|
# URI::DEFAULT_PARSER.make_regexp will only match URLs with schemes or
|
|
# relative URLs. This section will match schemeless URIs with userinfo
|
|
# e.g. user:pass@gitlab.com but will not match scp-style URIs e.g.
|
|
# user@server:path/to/file)
|
|
#
|
|
# The userinfo part is very loose compared to URI's implementation so we
|
|
# also match non-escaped userinfo e.g foo:b?r@gitlab.com which should be
|
|
# encoded as foo:b%3Fr@gitlab.com
|
|
URI_REGEXP = %r{
|
|
(?:
|
|
#{URI::DEFAULT_PARSER.make_regexp(ALLOWED_SCHEMES)}
|
|
|
|
|
(?:(?:(?!@)[%#{URI::REGEXP::PATTERN::UNRESERVED}#{URI::REGEXP::PATTERN::RESERVED}])+(?:@))
|
|
(?# negative lookahead ensures this isn't an SCP-style URL: [host]:[rel_path|abs_path] server:path/to/file)
|
|
(?!#{URI::REGEXP::PATTERN::HOST}:(?:#{URI::REGEXP::PATTERN::REL_PATH}|#{URI::REGEXP::PATTERN::ABS_PATH}))
|
|
#{URI::REGEXP::PATTERN::HOSTPORT}
|
|
)
|
|
}x.freeze
|
|
|
|
def self.sanitize(content)
|
|
content.gsub(URI_REGEXP) do |url|
|
|
new(url).masked_url
|
|
rescue Addressable::URI::InvalidURIError
|
|
''
|
|
end
|
|
end
|
|
|
|
def self.valid?(url, allowed_schemes: ALLOWED_SCHEMES)
|
|
return false unless url.present?
|
|
return false unless url.is_a?(String)
|
|
|
|
uri = Addressable::URI.parse(url.strip)
|
|
|
|
allowed_schemes.include?(uri.scheme)
|
|
rescue Addressable::URI::InvalidURIError
|
|
false
|
|
end
|
|
|
|
def self.valid_web?(url)
|
|
valid?(url, allowed_schemes: ALLOWED_WEB_SCHEMES)
|
|
end
|
|
|
|
def initialize(url, credentials: nil)
|
|
%i[user password].each do |symbol|
|
|
credentials[symbol] = credentials[symbol].presence if credentials&.key?(symbol)
|
|
end
|
|
|
|
@credentials = credentials
|
|
@url = parse_url(url)
|
|
end
|
|
|
|
def credentials
|
|
@credentials ||= { user: @url.user.presence, password: @url.password.presence }
|
|
end
|
|
|
|
def user
|
|
credentials[:user]
|
|
end
|
|
|
|
def sanitized_url
|
|
safe_url = @url.dup
|
|
safe_url.password = nil
|
|
safe_url.user = nil
|
|
reverse_schemify(safe_url.to_s)
|
|
end
|
|
strong_memoize_attr :sanitized_url
|
|
|
|
def masked_url
|
|
url = @url.dup
|
|
url.password = "*****" if url.password.present?
|
|
url.user = "*****" if url.user.present?
|
|
reverse_schemify(url.to_s)
|
|
end
|
|
strong_memoize_attr :masked_url
|
|
|
|
def full_url
|
|
return reverse_schemify(@url.to_s) unless valid_credentials?
|
|
|
|
url = @url.dup
|
|
url.password = encode_percent(credentials[:password]) if credentials[:password].present?
|
|
url.user = encode_percent(credentials[:user]) if credentials[:user].present?
|
|
reverse_schemify(url.to_s)
|
|
end
|
|
strong_memoize_attr :full_url
|
|
|
|
private
|
|
|
|
def parse_url(url)
|
|
url = schemify(url.to_s.strip)
|
|
match = url.match(%r{\A(?:(?:#{SCHEMIFIED_SCHEME}|git|ssh|http(?:s?)):)?//(?:(.+)(?:@))?(.+)}o)
|
|
raw_credentials = match[1] if match
|
|
|
|
if raw_credentials.present?
|
|
url.sub!("#{raw_credentials}@", '')
|
|
|
|
user, _, password = raw_credentials.partition(':')
|
|
|
|
@credentials ||= {}
|
|
@credentials[:user] = user.presence if @credentials[:user].blank?
|
|
@credentials[:password] = password.presence if @credentials[:password].blank?
|
|
end
|
|
|
|
url = Addressable::URI.parse(url)
|
|
url.password = password if password.present?
|
|
url.user = user if user.present?
|
|
url
|
|
end
|
|
|
|
def schemify(url)
|
|
# Prepend the placeholder scheme unless the URL has a scheme or is relative
|
|
url.prepend(SCHEMIFY_PLACEHOLDER) unless url.starts_with?(%r{(?:#{URI::REGEXP::PATTERN::SCHEME}:)?//}o)
|
|
url
|
|
end
|
|
|
|
def reverse_schemify(url)
|
|
url.slice!(SCHEMIFY_PLACEHOLDER) if url.starts_with?(SCHEMIFY_PLACEHOLDER)
|
|
url
|
|
end
|
|
|
|
def valid_credentials?
|
|
credentials.is_a?(Hash) && credentials.values.any?
|
|
end
|
|
|
|
def encode_percent(string)
|
|
# CGI.escape converts spaces to +, but this doesn't work for git clone
|
|
CGI.escape(string).gsub('+', '%20')
|
|
end
|
|
end
|
|
end
|