debian-mirror-gitlab/lib/gitlab/url_blocker.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

419 lines
15 KiB
Ruby
Raw Normal View History

2018-12-05 23:21:45 +05:30
# frozen_string_literal: true
2017-08-17 22:00:37 +05:30
require 'resolv'
2018-11-29 20:51:05 +05:30
require 'ipaddress'
2017-08-17 22:00:37 +05:30
module Gitlab
class UrlBlocker
2018-05-09 12:01:36 +05:30
BlockedUrlError = Class.new(StandardError)
2017-08-17 22:00:37 +05:30
2023-05-27 22:25:52 +05:30
DENY_ALL_REQUESTS_EXCEPT_ALLOWED_DEFAULT = proc { deny_all_requests_except_allowed_app_setting }.freeze
2023-06-20 00:43:36 +05:30
# Result stores the validation result:
# uri - The original URI requested
# hostname - The hostname that should be used to connect. For DNS
# rebinding protection, this will be the resolved IP address of
# the hostname.
# use_proxy -
# If true, this means that the proxy server specified in the
# http_proxy/https_proxy environment variables should be used.
#
# If false, this either means that no proxy server was specified
# or that the hostname in the URL is exempt via the no_proxy
# environment variable. This allows the caller to disable usage
# of a proxy since the IP address may be used to
# connect. Otherwise, Net::HTTP may erroneously compare the IP
# address against the no_proxy list.
Result = Struct.new(:uri, :hostname, :use_proxy)
2018-05-09 12:01:36 +05:30
class << self
2019-06-05 12:25:43 +05:30
# Validates the given url according to the constraints specified by arguments.
#
2023-06-20 00:43:36 +05:30
# ports - Raises error if the given URL port is not between given ports.
2020-04-22 19:07:51 +05:30
# allow_localhost - Raises error if URL resolves to a localhost IP address and argument is false.
# allow_local_network - Raises error if URL resolves to a link-local address and argument is false.
2022-05-07 20:08:51 +05:30
# allow_object_storage - Avoid raising an error if URL resolves to an object storage endpoint and argument is true.
2019-06-05 12:25:43 +05:30
# ascii_only - Raises error if URL has unicode characters and argument is true.
# enforce_user - Raises error if URL user doesn't start with alphanumeric characters and argument is true.
# enforce_sanitization - Raises error if URL includes any HTML/CSS/JS tags and argument is true.
2023-05-27 22:25:52 +05:30
# deny_all_requests_except_allowed - Raises error if URL is not in the allow list and argument is true. Can be Boolean or Proc. Defaults to instance app setting.
2019-06-05 12:25:43 +05:30
#
2023-06-20 00:43:36 +05:30
# Returns a Result object.
2019-06-05 12:25:43 +05:30
# rubocop:disable Metrics/ParameterLists
2023-06-20 00:43:36 +05:30
def validate_url_with_proxy!(
2019-06-05 12:25:43 +05:30
url,
2023-03-04 22:38:38 +05:30
schemes:,
2019-06-05 12:25:43 +05:30
ports: [],
allow_localhost: false,
allow_local_network: true,
2022-05-07 20:08:51 +05:30
allow_object_storage: false,
2019-06-05 12:25:43 +05:30
ascii_only: false,
enforce_user: false,
enforce_sanitization: false,
2023-05-27 22:25:52 +05:30
deny_all_requests_except_allowed: DENY_ALL_REQUESTS_EXCEPT_ALLOWED_DEFAULT,
2019-12-21 20:55:43 +05:30
dns_rebind_protection: true)
2019-06-05 12:25:43 +05:30
# rubocop:enable Metrics/ParameterLists
2023-06-20 00:43:36 +05:30
return Result.new(nil, nil, true) if url.nil?
2017-08-17 22:00:37 +05:30
2023-03-04 22:38:38 +05:30
raise ArgumentError, 'The schemes is a required argument' if schemes.blank?
2018-11-29 20:51:05 +05:30
# Param url can be a string, URI or Addressable::URI
uri = parse_url(url)
2017-08-17 22:00:37 +05:30
2019-09-30 21:07:59 +05:30
validate_uri(
uri: uri,
schemes: schemes,
ports: ports,
enforce_sanitization: enforce_sanitization,
enforce_user: enforce_user,
2019-12-21 20:55:43 +05:30
ascii_only: ascii_only
2019-09-30 21:07:59 +05:30
)
2019-02-15 15:39:39 +05:30
2023-05-27 22:25:52 +05:30
begin
address_info = get_address_info(uri)
rescue SocketError
2023-06-20 00:43:36 +05:30
proxy_in_use = uri_under_proxy_setting?(uri, nil)
return Result.new(uri, nil, proxy_in_use) unless enforce_address_info_retrievable?(uri, dns_rebind_protection, deny_all_requests_except_allowed)
2023-05-27 22:25:52 +05:30
raise BlockedUrlError, 'Host cannot be resolved or invalid'
end
2017-08-17 22:00:37 +05:30
2019-10-12 21:52:04 +05:30
ip_address = ip_address(address_info)
2023-06-20 00:43:36 +05:30
proxy_in_use = uri_under_proxy_setting?(uri, ip_address)
# Ignore DNS rebind protection when a proxy is being used, as DNS
# rebinding is expected behavior.
dns_rebind_protection &&= !proxy_in_use
return Result.new(uri, nil, proxy_in_use) if domain_in_allow_list?(uri)
2019-12-04 20:38:33 +05:30
2023-06-20 00:43:36 +05:30
protected_uri_with_hostname = enforce_uri_hostname(ip_address, uri, dns_rebind_protection, proxy_in_use)
2019-06-05 12:25:43 +05:30
2023-05-27 22:25:52 +05:30
return protected_uri_with_hostname if ip_in_allow_list?(ip_address, port: get_port(uri))
2021-02-04 15:43:07 +05:30
2019-06-05 12:25:43 +05:30
# Allow url from the GitLab instance itself but only for the configured hostname and ports
return protected_uri_with_hostname if internal?(uri)
2022-05-07 20:08:51 +05:30
return protected_uri_with_hostname if allow_object_storage && object_storage_endpoint?(uri)
2023-05-27 22:25:52 +05:30
validate_deny_all_requests_except_allowed!(deny_all_requests_except_allowed)
2019-09-30 21:07:59 +05:30
validate_local_request(
address_info: address_info,
allow_localhost: allow_localhost,
allow_local_network: allow_local_network
)
2018-05-09 12:01:36 +05:30
2019-06-05 12:25:43 +05:30
protected_uri_with_hostname
2018-05-09 12:01:36 +05:30
end
2021-01-29 00:20:46 +05:30
def blocked_url?(url, **kwargs)
validate!(url, **kwargs)
2018-05-09 12:01:36 +05:30
2017-08-17 22:00:37 +05:30
false
2018-05-09 12:01:36 +05:30
rescue BlockedUrlError
true
2017-08-17 22:00:37 +05:30
end
2023-06-20 00:43:36 +05:30
# For backwards compatibility, Returns an array with [<uri>, <original-hostname>].
# Issue for refactoring: https://gitlab.com/gitlab-org/gitlab/-/issues/410890
def validate!(...)
result = validate_url_with_proxy!(...)
[result.uri, result.hostname]
end
2017-08-17 22:00:37 +05:30
private
2019-06-05 12:25:43 +05:30
# Returns the given URI with IP address as hostname and the original hostname respectively
# in an Array.
#
# It checks whether the resolved IP address matches with the hostname. If not, it changes
# the hostname to the resolved IP address.
#
# The original hostname is used to validate the SSL, given in that scenario
# we'll be making the request to the IP address, instead of using the hostname.
2023-06-20 00:43:36 +05:30
def enforce_uri_hostname(ip_address, uri, dns_rebind_protection, proxy_in_use)
return Result.new(uri, nil, proxy_in_use) unless dns_rebind_protection && ip_address && ip_address != uri.hostname
2019-06-05 12:25:43 +05:30
2019-12-04 20:38:33 +05:30
new_uri = uri.dup
new_uri.hostname = ip_address
2023-06-20 00:43:36 +05:30
Result.new(new_uri, uri.hostname, proxy_in_use)
2019-06-05 12:25:43 +05:30
end
2019-10-12 21:52:04 +05:30
def ip_address(address_info)
address_info.first&.ip_address
end
2019-12-21 20:55:43 +05:30
def validate_uri(uri:, schemes:, ports:, enforce_sanitization:, enforce_user:, ascii_only:)
2019-09-30 21:07:59 +05:30
validate_html_tags(uri) if enforce_sanitization
return if internal?(uri)
2019-12-21 20:55:43 +05:30
validate_scheme(uri.scheme, schemes)
2019-09-30 21:07:59 +05:30
validate_port(get_port(uri), ports) if ports.any?
validate_user(uri.user) if enforce_user
validate_hostname(uri.hostname)
validate_unicode_restriction(uri) if ascii_only
end
2023-06-20 00:43:36 +05:30
def uri_under_proxy_setting?(uri, ip_address)
return false unless Gitlab.http_proxy_env?
# `no_proxy|NO_PROXY` specifies addresses for which the proxy is not
# used. If it's empty, there are no exceptions and this URI
# will be under proxy settings.
return true if no_proxy_env.blank?
# `no_proxy|NO_PROXY` is being used. We must check whether it
# applies to this specific URI.
::URI::Generic.use_proxy?(uri.hostname, ip_address, get_port(uri), no_proxy_env)
end
2023-05-27 22:25:52 +05:30
# Returns addrinfo object for the URI.
#
# @param uri [Addressable::URI]
#
# @raise [Gitlab::UrlBlocker::BlockedUrlError, ArgumentError] - BlockedUrlError raised if host is too long.
#
# @return [Array<Addrinfo>]
def get_address_info(uri)
2019-12-04 20:38:33 +05:30
Addrinfo.getaddrinfo(uri.hostname, get_port(uri), nil, :STREAM).map do |addr|
2019-09-30 21:07:59 +05:30
addr.ipv6_v4mapped? ? addr.ipv6_to_ipv4 : addr
end
2023-05-27 22:25:52 +05:30
rescue ArgumentError => error
# Addrinfo.getaddrinfo errors if the domain exceeds 1024 characters.
raise unless error.message.include?('hostname too long')
raise BlockedUrlError, "Host is too long (maximum is 1024 characters)"
end
def enforce_address_info_retrievable?(uri, dns_rebind_protection, deny_all_requests_except_allowed)
# Do not enforce if URI is in the allow list
return false if domain_in_allow_list?(uri)
# Enforce if the instance should block requests
return true if deny_all_requests_except_allowed?(deny_all_requests_except_allowed)
2023-06-20 00:43:36 +05:30
# Do not enforce if DNS rebinding protection is disabled
2023-05-27 22:25:52 +05:30
return false unless dns_rebind_protection
2019-12-04 20:38:33 +05:30
2023-06-20 00:43:36 +05:30
# Do not enforce if proxy is used
return false if Gitlab.http_proxy_env?
2019-09-30 21:07:59 +05:30
# In the test suite we use a lot of mocked urls that are either invalid or
# don't exist. In order to avoid modifying a ton of tests and factories
# we allow invalid urls unless the environment variable RSPEC_ALLOW_INVALID_URLS
# is not true
2023-05-27 22:25:52 +05:30
return false if Rails.env.test? && ENV['RSPEC_ALLOW_INVALID_URLS'] == 'true'
2019-12-21 20:55:43 +05:30
2023-05-27 22:25:52 +05:30
true
2019-09-30 21:07:59 +05:30
end
2019-10-12 21:52:04 +05:30
def validate_local_request(
address_info:,
allow_localhost:,
allow_local_network:)
2019-09-30 21:07:59 +05:30
return if allow_local_network && allow_localhost
unless allow_localhost
validate_localhost(address_info)
validate_loopback(address_info)
end
unless allow_local_network
validate_local_network(address_info)
validate_link_local(address_info)
2022-02-05 19:09:49 +05:30
validate_shared_address(address_info)
2022-05-07 20:08:51 +05:30
validate_limited_broadcast_address(address_info)
2019-09-30 21:07:59 +05:30
end
end
2022-02-05 19:09:49 +05:30
def validate_shared_address(addrs_info)
netmask = IPAddr.new('100.64.0.0/10')
return unless addrs_info.any? { |addr| netmask.include?(addr.ip_address) }
raise BlockedUrlError, "Requests to the shared address space are not allowed"
end
2019-02-15 15:39:39 +05:30
def get_port(uri)
uri.port || uri.default_port
end
2019-09-30 21:07:59 +05:30
def validate_html_tags(uri)
2019-02-15 15:39:39 +05:30
uri_str = uri.to_s
sanitized_uri = ActionController::Base.helpers.sanitize(uri_str, tags: [])
if sanitized_uri != uri_str
raise BlockedUrlError, 'HTML/CSS/JS tags are not allowed'
end
end
2018-11-29 20:51:05 +05:30
def parse_url(url)
2021-12-11 22:18:48 +05:30
Addressable::URI.parse(url).tap do |parsed_url|
raise Addressable::URI::InvalidURIError if multiline_blocked?(parsed_url)
end
2018-11-29 20:51:05 +05:30
rescue Addressable::URI::InvalidURIError, URI::InvalidURIError
raise BlockedUrlError, 'URI is invalid'
end
2021-12-11 22:18:48 +05:30
def multiline_blocked?(parsed_url)
url = parsed_url.to_s
return true if url =~ /\n|\r/
# Google Cloud Storage uses a multi-line, encoded Signature query string
return false if %w(http https).include?(parsed_url.scheme&.downcase)
CGI.unescape(url) =~ /\n|\r/
2018-11-29 20:51:05 +05:30
end
2019-09-30 21:07:59 +05:30
def validate_port(port, ports)
2018-05-09 12:01:36 +05:30
return if port.blank?
# Only ports under 1024 are restricted
return if port >= 1024
2018-11-08 19:23:39 +05:30
return if ports.include?(port)
2017-08-17 22:00:37 +05:30
2018-11-08 19:23:39 +05:30
raise BlockedUrlError, "Only allowed ports are #{ports.join(', ')}, and any over 1024"
end
2019-12-21 20:55:43 +05:30
def validate_scheme(scheme, schemes)
2023-03-04 22:38:38 +05:30
if scheme.blank? || (schemes.any? && schemes.exclude?(scheme))
2019-07-31 22:56:46 +05:30
raise BlockedUrlError, "Only allowed schemes are #{schemes.join(', ')}"
2018-11-08 19:23:39 +05:30
end
2017-08-17 22:00:37 +05:30
end
2019-09-30 21:07:59 +05:30
def validate_user(value)
2018-05-09 12:01:36 +05:30
return if value.blank?
return if value =~ /\A\p{Alnum}/
2017-08-17 22:00:37 +05:30
2018-05-09 12:01:36 +05:30
raise BlockedUrlError, "Username needs to start with an alphanumeric character"
end
2019-09-30 21:07:59 +05:30
def validate_hostname(value)
2018-05-09 12:01:36 +05:30
return if value.blank?
2018-11-29 20:51:05 +05:30
return if IPAddress.valid?(value)
2018-05-09 12:01:36 +05:30
return if value =~ /\A\p{Alnum}/
2018-11-29 20:51:05 +05:30
raise BlockedUrlError, "Hostname or IP address invalid"
2018-05-09 12:01:36 +05:30
end
2019-09-30 21:07:59 +05:30
def validate_unicode_restriction(uri)
2019-02-15 15:39:39 +05:30
return if uri.to_s.ascii_only?
raise BlockedUrlError, "URI must be ascii only #{uri.to_s.dump}"
end
2019-09-30 21:07:59 +05:30
def validate_localhost(addrs_info)
2018-11-29 20:51:05 +05:30
local_ips = ["::", "0.0.0.0"]
2018-05-09 12:01:36 +05:30
local_ips.concat(Socket.ip_address_list.map(&:ip_address))
return if (local_ips & addrs_info.map(&:ip_address)).empty?
raise BlockedUrlError, "Requests to localhost are not allowed"
end
2019-09-30 21:07:59 +05:30
def validate_loopback(addrs_info)
2018-11-08 19:23:39 +05:30
return unless addrs_info.any? { |addr| addr.ipv4_loopback? || addr.ipv6_loopback? }
raise BlockedUrlError, "Requests to loopback addresses are not allowed"
end
2019-09-30 21:07:59 +05:30
def validate_local_network(addrs_info)
2018-11-29 20:51:05 +05:30
return unless addrs_info.any? { |addr| addr.ipv4_private? || addr.ipv6_sitelocal? || addr.ipv6_unique_local? }
2018-05-09 12:01:36 +05:30
raise BlockedUrlError, "Requests to the local network are not allowed"
2017-08-17 22:00:37 +05:30
end
2019-09-30 21:07:59 +05:30
def validate_link_local(addrs_info)
2018-11-08 19:23:39 +05:30
netmask = IPAddr.new('169.254.0.0/16')
return unless addrs_info.any? { |addr| addr.ipv6_linklocal? || netmask.include?(addr.ip_address) }
raise BlockedUrlError, "Requests to the link local network are not allowed"
end
2023-05-27 22:25:52 +05:30
# Raises a BlockedUrlError if the instance is configured to deny all requests.
#
# This should only be called after allow list checks have been made.
def validate_deny_all_requests_except_allowed!(should_deny)
return unless deny_all_requests_except_allowed?(should_deny)
raise BlockedUrlError, "Requests to hosts and IP addresses not on the Allow List are denied"
end
2022-05-07 20:08:51 +05:30
# Raises a BlockedUrlError if any IP in `addrs_info` is the limited
# broadcast address.
# https://datatracker.ietf.org/doc/html/rfc919#section-7
def validate_limited_broadcast_address(addrs_info)
blocked_ips = ["255.255.255.255"]
return if (blocked_ips & addrs_info.map(&:ip_address)).empty?
raise BlockedUrlError, "Requests to the limited broadcast address are not allowed"
end
2017-08-17 22:00:37 +05:30
def internal?(uri)
internal_web?(uri) || internal_shell?(uri)
end
def internal_web?(uri)
2019-12-21 20:55:43 +05:30
uri.scheme == config.gitlab.protocol &&
uri.hostname == config.gitlab.host &&
2022-01-12 12:59:36 +05:30
get_port(uri) == config.gitlab.port
2017-08-17 22:00:37 +05:30
end
def internal_shell?(uri)
2018-11-29 20:51:05 +05:30
uri.scheme == 'ssh' &&
uri.hostname == config.gitlab_shell.ssh_host &&
2022-01-12 12:59:36 +05:30
get_port(uri) == config.gitlab_shell.ssh_port
2017-08-17 22:00:37 +05:30
end
2022-05-07 20:08:51 +05:30
def enabled_object_storage_endpoints
ObjectStoreSettings::SUPPORTED_TYPES.collect do |type|
section_setting = config.try(type)
next unless section_setting && section_setting['enabled']
2023-07-09 08:55:56 +05:30
object_store_setting = section_setting['object_store']
2022-05-07 20:08:51 +05:30
next unless object_store_setting && object_store_setting['enabled']
object_store_setting.dig('connection', 'endpoint')
end.compact.uniq
end
2023-05-27 22:25:52 +05:30
def deny_all_requests_except_allowed?(should_deny)
should_deny.is_a?(Proc) ? should_deny.call : should_deny
end
def deny_all_requests_except_allowed_app_setting
Gitlab::CurrentSettings.current_application_settings? &&
Gitlab::CurrentSettings.deny_all_requests_except_allowed?
end
2022-05-07 20:08:51 +05:30
def object_storage_endpoint?(uri)
enabled_object_storage_endpoints.any? do |endpoint|
endpoint_uri = URI(endpoint)
uri.scheme == endpoint_uri.scheme &&
uri.hostname == endpoint_uri.hostname &&
get_port(uri) == get_port(endpoint_uri)
end
end
2023-05-27 22:25:52 +05:30
def domain_in_allow_list?(uri)
2021-01-29 00:20:46 +05:30
Gitlab::UrlBlockers::UrlAllowlist.domain_allowed?(uri.normalized_host, port: get_port(uri))
2019-10-12 21:52:04 +05:30
end
2023-05-27 22:25:52 +05:30
def ip_in_allow_list?(ip_address, port: nil)
2021-01-29 00:20:46 +05:30
Gitlab::UrlBlockers::UrlAllowlist.ip_allowed?(ip_address, port: port)
2019-10-12 21:52:04 +05:30
end
2017-08-17 22:00:37 +05:30
def config
Gitlab.config
end
2023-06-20 00:43:36 +05:30
def no_proxy_env
ENV['no_proxy'] || ENV['NO_PROXY']
end
2017-08-17 22:00:37 +05:30
end
end
end