2018-12-05 23:21:45 +05:30
# frozen_string_literal: true
2017-08-17 22:00:37 +05:30
require 'resolv'
2018-11-29 20:51:05 +05:30
require 'ipaddress'
2017-08-17 22:00:37 +05:30
module Gitlab
class UrlBlocker
2018-05-09 12:01:36 +05:30
BlockedUrlError = Class . new ( StandardError )
2017-08-17 22:00:37 +05:30
2023-05-27 22:25:52 +05:30
DENY_ALL_REQUESTS_EXCEPT_ALLOWED_DEFAULT = proc { deny_all_requests_except_allowed_app_setting } . freeze
2023-06-20 00:43:36 +05:30
# Result stores the validation result:
# uri - The original URI requested
# hostname - The hostname that should be used to connect. For DNS
# rebinding protection, this will be the resolved IP address of
# the hostname.
# use_proxy -
# If true, this means that the proxy server specified in the
# http_proxy/https_proxy environment variables should be used.
#
# If false, this either means that no proxy server was specified
# or that the hostname in the URL is exempt via the no_proxy
# environment variable. This allows the caller to disable usage
# of a proxy since the IP address may be used to
# connect. Otherwise, Net::HTTP may erroneously compare the IP
# address against the no_proxy list.
Result = Struct . new ( :uri , :hostname , :use_proxy )
2018-05-09 12:01:36 +05:30
class << self
2019-06-05 12:25:43 +05:30
# Validates the given url according to the constraints specified by arguments.
#
2023-06-20 00:43:36 +05:30
# ports - Raises error if the given URL port is not between given ports.
2020-04-22 19:07:51 +05:30
# allow_localhost - Raises error if URL resolves to a localhost IP address and argument is false.
# allow_local_network - Raises error if URL resolves to a link-local address and argument is false.
2022-05-07 20:08:51 +05:30
# allow_object_storage - Avoid raising an error if URL resolves to an object storage endpoint and argument is true.
2019-06-05 12:25:43 +05:30
# ascii_only - Raises error if URL has unicode characters and argument is true.
# enforce_user - Raises error if URL user doesn't start with alphanumeric characters and argument is true.
# enforce_sanitization - Raises error if URL includes any HTML/CSS/JS tags and argument is true.
2023-05-27 22:25:52 +05:30
# deny_all_requests_except_allowed - Raises error if URL is not in the allow list and argument is true. Can be Boolean or Proc. Defaults to instance app setting.
2019-06-05 12:25:43 +05:30
#
2023-06-20 00:43:36 +05:30
# Returns a Result object.
2019-06-05 12:25:43 +05:30
# rubocop:disable Metrics/ParameterLists
2023-06-20 00:43:36 +05:30
def validate_url_with_proxy! (
2019-06-05 12:25:43 +05:30
url ,
2023-03-04 22:38:38 +05:30
schemes : ,
2019-06-05 12:25:43 +05:30
ports : [ ] ,
allow_localhost : false ,
allow_local_network : true ,
2022-05-07 20:08:51 +05:30
allow_object_storage : false ,
2019-06-05 12:25:43 +05:30
ascii_only : false ,
enforce_user : false ,
enforce_sanitization : false ,
2023-05-27 22:25:52 +05:30
deny_all_requests_except_allowed : DENY_ALL_REQUESTS_EXCEPT_ALLOWED_DEFAULT ,
2019-12-21 20:55:43 +05:30
dns_rebind_protection : true )
2019-06-05 12:25:43 +05:30
# rubocop:enable Metrics/ParameterLists
2023-06-20 00:43:36 +05:30
return Result . new ( nil , nil , true ) if url . nil?
2017-08-17 22:00:37 +05:30
2023-03-04 22:38:38 +05:30
raise ArgumentError , 'The schemes is a required argument' if schemes . blank?
2018-11-29 20:51:05 +05:30
# Param url can be a string, URI or Addressable::URI
uri = parse_url ( url )
2017-08-17 22:00:37 +05:30
2019-09-30 21:07:59 +05:30
validate_uri (
uri : uri ,
schemes : schemes ,
ports : ports ,
enforce_sanitization : enforce_sanitization ,
enforce_user : enforce_user ,
2019-12-21 20:55:43 +05:30
ascii_only : ascii_only
2019-09-30 21:07:59 +05:30
)
2019-02-15 15:39:39 +05:30
2023-05-27 22:25:52 +05:30
begin
address_info = get_address_info ( uri )
rescue SocketError
2023-06-20 00:43:36 +05:30
proxy_in_use = uri_under_proxy_setting? ( uri , nil )
return Result . new ( uri , nil , proxy_in_use ) unless enforce_address_info_retrievable? ( uri , dns_rebind_protection , deny_all_requests_except_allowed )
2023-05-27 22:25:52 +05:30
raise BlockedUrlError , 'Host cannot be resolved or invalid'
end
2017-08-17 22:00:37 +05:30
2019-10-12 21:52:04 +05:30
ip_address = ip_address ( address_info )
2023-06-20 00:43:36 +05:30
proxy_in_use = uri_under_proxy_setting? ( uri , ip_address )
# Ignore DNS rebind protection when a proxy is being used, as DNS
# rebinding is expected behavior.
dns_rebind_protection && = ! proxy_in_use
return Result . new ( uri , nil , proxy_in_use ) if domain_in_allow_list? ( uri )
2019-12-04 20:38:33 +05:30
2023-06-20 00:43:36 +05:30
protected_uri_with_hostname = enforce_uri_hostname ( ip_address , uri , dns_rebind_protection , proxy_in_use )
2019-06-05 12:25:43 +05:30
2023-05-27 22:25:52 +05:30
return protected_uri_with_hostname if ip_in_allow_list? ( ip_address , port : get_port ( uri ) )
2021-02-04 15:43:07 +05:30
2019-06-05 12:25:43 +05:30
# Allow url from the GitLab instance itself but only for the configured hostname and ports
return protected_uri_with_hostname if internal? ( uri )
2022-05-07 20:08:51 +05:30
return protected_uri_with_hostname if allow_object_storage && object_storage_endpoint? ( uri )
2023-05-27 22:25:52 +05:30
validate_deny_all_requests_except_allowed! ( deny_all_requests_except_allowed )
2019-09-30 21:07:59 +05:30
validate_local_request (
address_info : address_info ,
allow_localhost : allow_localhost ,
allow_local_network : allow_local_network
)
2018-05-09 12:01:36 +05:30
2019-06-05 12:25:43 +05:30
protected_uri_with_hostname
2018-05-09 12:01:36 +05:30
end
2021-01-29 00:20:46 +05:30
def blocked_url? ( url , ** kwargs )
validate! ( url , ** kwargs )
2018-05-09 12:01:36 +05:30
2017-08-17 22:00:37 +05:30
false
2018-05-09 12:01:36 +05:30
rescue BlockedUrlError
true
2017-08-17 22:00:37 +05:30
end
2023-06-20 00:43:36 +05:30
# For backwards compatibility, Returns an array with [<uri>, <original-hostname>].
# Issue for refactoring: https://gitlab.com/gitlab-org/gitlab/-/issues/410890
def validate! ( ... )
result = validate_url_with_proxy! ( ... )
[ result . uri , result . hostname ]
end
2017-08-17 22:00:37 +05:30
private
2019-06-05 12:25:43 +05:30
# Returns the given URI with IP address as hostname and the original hostname respectively
# in an Array.
#
# It checks whether the resolved IP address matches with the hostname. If not, it changes
# the hostname to the resolved IP address.
#
# The original hostname is used to validate the SSL, given in that scenario
# we'll be making the request to the IP address, instead of using the hostname.
2023-06-20 00:43:36 +05:30
def enforce_uri_hostname ( ip_address , uri , dns_rebind_protection , proxy_in_use )
return Result . new ( uri , nil , proxy_in_use ) unless dns_rebind_protection && ip_address && ip_address != uri . hostname
2019-06-05 12:25:43 +05:30
2019-12-04 20:38:33 +05:30
new_uri = uri . dup
new_uri . hostname = ip_address
2023-06-20 00:43:36 +05:30
Result . new ( new_uri , uri . hostname , proxy_in_use )
2019-06-05 12:25:43 +05:30
end
2019-10-12 21:52:04 +05:30
def ip_address ( address_info )
address_info . first & . ip_address
end
2019-12-21 20:55:43 +05:30
def validate_uri ( uri : , schemes : , ports : , enforce_sanitization : , enforce_user : , ascii_only : )
2019-09-30 21:07:59 +05:30
validate_html_tags ( uri ) if enforce_sanitization
return if internal? ( uri )
2019-12-21 20:55:43 +05:30
validate_scheme ( uri . scheme , schemes )
2019-09-30 21:07:59 +05:30
validate_port ( get_port ( uri ) , ports ) if ports . any?
validate_user ( uri . user ) if enforce_user
validate_hostname ( uri . hostname )
validate_unicode_restriction ( uri ) if ascii_only
end
2023-06-20 00:43:36 +05:30
def uri_under_proxy_setting? ( uri , ip_address )
return false unless Gitlab . http_proxy_env?
# `no_proxy|NO_PROXY` specifies addresses for which the proxy is not
# used. If it's empty, there are no exceptions and this URI
# will be under proxy settings.
return true if no_proxy_env . blank?
# `no_proxy|NO_PROXY` is being used. We must check whether it
# applies to this specific URI.
:: URI :: Generic . use_proxy? ( uri . hostname , ip_address , get_port ( uri ) , no_proxy_env )
end
2023-05-27 22:25:52 +05:30
# Returns addrinfo object for the URI.
#
# @param uri [Addressable::URI]
#
# @raise [Gitlab::UrlBlocker::BlockedUrlError, ArgumentError] - BlockedUrlError raised if host is too long.
#
# @return [Array<Addrinfo>]
def get_address_info ( uri )
2019-12-04 20:38:33 +05:30
Addrinfo . getaddrinfo ( uri . hostname , get_port ( uri ) , nil , :STREAM ) . map do | addr |
2019-09-30 21:07:59 +05:30
addr . ipv6_v4mapped? ? addr . ipv6_to_ipv4 : addr
end
2023-05-27 22:25:52 +05:30
rescue ArgumentError = > error
# Addrinfo.getaddrinfo errors if the domain exceeds 1024 characters.
raise unless error . message . include? ( 'hostname too long' )
raise BlockedUrlError , " Host is too long (maximum is 1024 characters) "
end
def enforce_address_info_retrievable? ( uri , dns_rebind_protection , deny_all_requests_except_allowed )
# Do not enforce if URI is in the allow list
return false if domain_in_allow_list? ( uri )
# Enforce if the instance should block requests
return true if deny_all_requests_except_allowed? ( deny_all_requests_except_allowed )
2023-06-20 00:43:36 +05:30
# Do not enforce if DNS rebinding protection is disabled
2023-05-27 22:25:52 +05:30
return false unless dns_rebind_protection
2019-12-04 20:38:33 +05:30
2023-06-20 00:43:36 +05:30
# Do not enforce if proxy is used
return false if Gitlab . http_proxy_env?
2019-09-30 21:07:59 +05:30
# In the test suite we use a lot of mocked urls that are either invalid or
# don't exist. In order to avoid modifying a ton of tests and factories
# we allow invalid urls unless the environment variable RSPEC_ALLOW_INVALID_URLS
# is not true
2023-05-27 22:25:52 +05:30
return false if Rails . env . test? && ENV [ 'RSPEC_ALLOW_INVALID_URLS' ] == 'true'
2019-12-21 20:55:43 +05:30
2023-05-27 22:25:52 +05:30
true
2019-09-30 21:07:59 +05:30
end
2019-10-12 21:52:04 +05:30
def validate_local_request (
address_info : ,
allow_localhost : ,
allow_local_network : )
2019-09-30 21:07:59 +05:30
return if allow_local_network && allow_localhost
unless allow_localhost
validate_localhost ( address_info )
validate_loopback ( address_info )
end
unless allow_local_network
validate_local_network ( address_info )
validate_link_local ( address_info )
2022-02-05 19:09:49 +05:30
validate_shared_address ( address_info )
2022-05-07 20:08:51 +05:30
validate_limited_broadcast_address ( address_info )
2019-09-30 21:07:59 +05:30
end
end
2022-02-05 19:09:49 +05:30
def validate_shared_address ( addrs_info )
netmask = IPAddr . new ( '100.64.0.0/10' )
return unless addrs_info . any? { | addr | netmask . include? ( addr . ip_address ) }
raise BlockedUrlError , " Requests to the shared address space are not allowed "
end
2019-02-15 15:39:39 +05:30
def get_port ( uri )
uri . port || uri . default_port
end
2019-09-30 21:07:59 +05:30
def validate_html_tags ( uri )
2019-02-15 15:39:39 +05:30
uri_str = uri . to_s
sanitized_uri = ActionController :: Base . helpers . sanitize ( uri_str , tags : [ ] )
if sanitized_uri != uri_str
raise BlockedUrlError , 'HTML/CSS/JS tags are not allowed'
end
end
2018-11-29 20:51:05 +05:30
def parse_url ( url )
2021-12-11 22:18:48 +05:30
Addressable :: URI . parse ( url ) . tap do | parsed_url |
raise Addressable :: URI :: InvalidURIError if multiline_blocked? ( parsed_url )
end
2018-11-29 20:51:05 +05:30
rescue Addressable :: URI :: InvalidURIError , URI :: InvalidURIError
raise BlockedUrlError , 'URI is invalid'
end
2021-12-11 22:18:48 +05:30
def multiline_blocked? ( parsed_url )
url = parsed_url . to_s
return true if url =~ / \ n| \ r /
# Google Cloud Storage uses a multi-line, encoded Signature query string
return false if %w( http https ) . include? ( parsed_url . scheme & . downcase )
CGI . unescape ( url ) =~ / \ n| \ r /
2018-11-29 20:51:05 +05:30
end
2019-09-30 21:07:59 +05:30
def validate_port ( port , ports )
2018-05-09 12:01:36 +05:30
return if port . blank?
# Only ports under 1024 are restricted
return if port > = 1024
2018-11-08 19:23:39 +05:30
return if ports . include? ( port )
2017-08-17 22:00:37 +05:30
2018-11-08 19:23:39 +05:30
raise BlockedUrlError , " Only allowed ports are #{ ports . join ( ', ' ) } , and any over 1024 "
end
2019-12-21 20:55:43 +05:30
def validate_scheme ( scheme , schemes )
2023-03-04 22:38:38 +05:30
if scheme . blank? || ( schemes . any? && schemes . exclude? ( scheme ) )
2019-07-31 22:56:46 +05:30
raise BlockedUrlError , " Only allowed schemes are #{ schemes . join ( ', ' ) } "
2018-11-08 19:23:39 +05:30
end
2017-08-17 22:00:37 +05:30
end
2019-09-30 21:07:59 +05:30
def validate_user ( value )
2018-05-09 12:01:36 +05:30
return if value . blank?
return if value =~ / \ A \ p{Alnum} /
2017-08-17 22:00:37 +05:30
2018-05-09 12:01:36 +05:30
raise BlockedUrlError , " Username needs to start with an alphanumeric character "
end
2019-09-30 21:07:59 +05:30
def validate_hostname ( value )
2018-05-09 12:01:36 +05:30
return if value . blank?
2018-11-29 20:51:05 +05:30
return if IPAddress . valid? ( value )
2018-05-09 12:01:36 +05:30
return if value =~ / \ A \ p{Alnum} /
2018-11-29 20:51:05 +05:30
raise BlockedUrlError , " Hostname or IP address invalid "
2018-05-09 12:01:36 +05:30
end
2019-09-30 21:07:59 +05:30
def validate_unicode_restriction ( uri )
2019-02-15 15:39:39 +05:30
return if uri . to_s . ascii_only?
raise BlockedUrlError , " URI must be ascii only #{ uri . to_s . dump } "
end
2019-09-30 21:07:59 +05:30
def validate_localhost ( addrs_info )
2018-11-29 20:51:05 +05:30
local_ips = [ " :: " , " 0.0.0.0 " ]
2018-05-09 12:01:36 +05:30
local_ips . concat ( Socket . ip_address_list . map ( & :ip_address ) )
return if ( local_ips & addrs_info . map ( & :ip_address ) ) . empty?
raise BlockedUrlError , " Requests to localhost are not allowed "
end
2019-09-30 21:07:59 +05:30
def validate_loopback ( addrs_info )
2018-11-08 19:23:39 +05:30
return unless addrs_info . any? { | addr | addr . ipv4_loopback? || addr . ipv6_loopback? }
raise BlockedUrlError , " Requests to loopback addresses are not allowed "
end
2019-09-30 21:07:59 +05:30
def validate_local_network ( addrs_info )
2018-11-29 20:51:05 +05:30
return unless addrs_info . any? { | addr | addr . ipv4_private? || addr . ipv6_sitelocal? || addr . ipv6_unique_local? }
2018-05-09 12:01:36 +05:30
raise BlockedUrlError , " Requests to the local network are not allowed "
2017-08-17 22:00:37 +05:30
end
2019-09-30 21:07:59 +05:30
def validate_link_local ( addrs_info )
2018-11-08 19:23:39 +05:30
netmask = IPAddr . new ( '169.254.0.0/16' )
return unless addrs_info . any? { | addr | addr . ipv6_linklocal? || netmask . include? ( addr . ip_address ) }
raise BlockedUrlError , " Requests to the link local network are not allowed "
end
2023-05-27 22:25:52 +05:30
# Raises a BlockedUrlError if the instance is configured to deny all requests.
#
# This should only be called after allow list checks have been made.
def validate_deny_all_requests_except_allowed! ( should_deny )
return unless deny_all_requests_except_allowed? ( should_deny )
raise BlockedUrlError , " Requests to hosts and IP addresses not on the Allow List are denied "
end
2022-05-07 20:08:51 +05:30
# Raises a BlockedUrlError if any IP in `addrs_info` is the limited
# broadcast address.
# https://datatracker.ietf.org/doc/html/rfc919#section-7
def validate_limited_broadcast_address ( addrs_info )
blocked_ips = [ " 255.255.255.255 " ]
return if ( blocked_ips & addrs_info . map ( & :ip_address ) ) . empty?
raise BlockedUrlError , " Requests to the limited broadcast address are not allowed "
end
2017-08-17 22:00:37 +05:30
def internal? ( uri )
internal_web? ( uri ) || internal_shell? ( uri )
end
def internal_web? ( uri )
2019-12-21 20:55:43 +05:30
uri . scheme == config . gitlab . protocol &&
uri . hostname == config . gitlab . host &&
2022-01-12 12:59:36 +05:30
get_port ( uri ) == config . gitlab . port
2017-08-17 22:00:37 +05:30
end
def internal_shell? ( uri )
2018-11-29 20:51:05 +05:30
uri . scheme == 'ssh' &&
uri . hostname == config . gitlab_shell . ssh_host &&
2022-01-12 12:59:36 +05:30
get_port ( uri ) == config . gitlab_shell . ssh_port
2017-08-17 22:00:37 +05:30
end
2022-05-07 20:08:51 +05:30
def enabled_object_storage_endpoints
ObjectStoreSettings :: SUPPORTED_TYPES . collect do | type |
section_setting = config . try ( type )
next unless section_setting && section_setting [ 'enabled' ]
2023-07-09 08:55:56 +05:30
object_store_setting = section_setting [ 'object_store' ]
2022-05-07 20:08:51 +05:30
next unless object_store_setting && object_store_setting [ 'enabled' ]
object_store_setting . dig ( 'connection' , 'endpoint' )
end . compact . uniq
end
2023-05-27 22:25:52 +05:30
def deny_all_requests_except_allowed? ( should_deny )
should_deny . is_a? ( Proc ) ? should_deny . call : should_deny
end
def deny_all_requests_except_allowed_app_setting
Gitlab :: CurrentSettings . current_application_settings? &&
Gitlab :: CurrentSettings . deny_all_requests_except_allowed?
end
2022-05-07 20:08:51 +05:30
def object_storage_endpoint? ( uri )
enabled_object_storage_endpoints . any? do | endpoint |
endpoint_uri = URI ( endpoint )
uri . scheme == endpoint_uri . scheme &&
uri . hostname == endpoint_uri . hostname &&
get_port ( uri ) == get_port ( endpoint_uri )
end
end
2023-05-27 22:25:52 +05:30
def domain_in_allow_list? ( uri )
2021-01-29 00:20:46 +05:30
Gitlab :: UrlBlockers :: UrlAllowlist . domain_allowed? ( uri . normalized_host , port : get_port ( uri ) )
2019-10-12 21:52:04 +05:30
end
2023-05-27 22:25:52 +05:30
def ip_in_allow_list? ( ip_address , port : nil )
2021-01-29 00:20:46 +05:30
Gitlab :: UrlBlockers :: UrlAllowlist . ip_allowed? ( ip_address , port : port )
2019-10-12 21:52:04 +05:30
end
2017-08-17 22:00:37 +05:30
def config
Gitlab . config
end
2023-06-20 00:43:36 +05:30
def no_proxy_env
ENV [ 'no_proxy' ] || ENV [ 'NO_PROXY' ]
end
2017-08-17 22:00:37 +05:30
end
end
end