debian-mirror-gitlab/lib/gitlab/database/load_balancing/service_discovery/sampler.rb
2023-01-12 18:35:48 +00:00

56 lines
2 KiB
Ruby

# frozen_string_literal: true
module Gitlab
module Database
module LoadBalancing
class ServiceDiscovery
class Sampler
def initialize(max_replica_pools:, seed: Random.new_seed)
# seed must be set once and consistent
# for every invocation of #sample on
# the same instance of Sampler
@seed = seed
@max_replica_pools = max_replica_pools
end
def sample(addresses)
return addresses if @max_replica_pools.nil? || addresses.count <= @max_replica_pools
::Gitlab::Database::LoadBalancing::Logger.info(
event: :host_list_limit_exceeded,
message: "Host list length exceeds max_replica_pools so random hosts will be chosen.",
max_replica_pools: @max_replica_pools,
total_host_list_length: addresses.count,
randomization_seed: @seed
)
# First sort them in case the ordering from DNS server changes
# then randomly order all addresses using consistent seed so
# this process always gives the same set for this instance of
# Sampler
addresses = addresses.sort
addresses = addresses.shuffle(random: Random.new(@seed))
# Group by hostname so that we can sample evenly across hosts
addresses_by_host = addresses.group_by(&:hostname)
selected_addresses = []
while selected_addresses.count < @max_replica_pools
# Loop over all hostnames grabbing one address at a time to
# evenly distribute across all hostnames
addresses_by_host.each do |host, addresses|
next if addresses.empty?
selected_addresses << addresses.pop
break unless selected_addresses.count < @max_replica_pools
end
end
selected_addresses
end
end
end
end
end
end