# frozen_string_literal: true

module Gitlab
  module Database
    module LoadBalancing
      class ServiceDiscovery
        class Sampler
          def initialize(max_replica_pools:, seed: Random.new_seed)
            # seed must be set once and consistent
            # for every invocation of #sample on
            # the same instance of Sampler
            @seed = seed
            @max_replica_pools = max_replica_pools
          end

          def sample(addresses)
            return addresses if @max_replica_pools.nil? || addresses.count <= @max_replica_pools

            ::Gitlab::Database::LoadBalancing::Logger.info(
              event: :host_list_limit_exceeded,
              message: "Host list length exceeds max_replica_pools so random hosts will be chosen.",
              max_replica_pools: @max_replica_pools,
              total_host_list_length: addresses.count,
              randomization_seed: @seed
            )

            # First sort them in case the ordering from DNS server changes
            # then randomly order all addresses using consistent seed so
            # this process always gives the same set for this instance of
            # Sampler
            addresses = addresses.sort
            addresses = addresses.shuffle(random: Random.new(@seed))

            # Group by hostname so that we can sample evenly across hosts
            addresses_by_host = addresses.group_by(&:hostname)

            selected_addresses = []
            while selected_addresses.count < @max_replica_pools
              # Loop over all hostnames grabbing one address at a time to
              # evenly distribute across all hostnames
              addresses_by_host.each do |host, addresses|
                next if addresses.empty?

                selected_addresses << addresses.pop

                break unless selected_addresses.count < @max_replica_pools
              end
            end

            selected_addresses
          end
        end
      end
    end
  end
end