103 lines
3 KiB
Ruby
103 lines
3 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module Gitlab
|
|
module Database
|
|
class BatchAverageCounter
|
|
COLUMN_FALLBACK = 0
|
|
DEFAULT_BATCH_SIZE = 1_000
|
|
FALLBACK = -1
|
|
MAX_ALLOWED_LOOPS = 10_000
|
|
OFFSET_BY_ONE = 1
|
|
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
|
|
|
|
attr_reader :relation, :column
|
|
|
|
def initialize(relation, column)
|
|
@relation = relation
|
|
@column = wrap_column(relation, column)
|
|
end
|
|
|
|
def count(batch_size: nil)
|
|
raise 'BatchAverageCounter can not be run inside a transaction' if transaction_open?
|
|
|
|
batch_size = batch_size.presence || DEFAULT_BATCH_SIZE
|
|
|
|
start = column_start
|
|
finish = column_finish
|
|
|
|
total_sum = 0
|
|
total_records = 0
|
|
|
|
batch_start = start
|
|
|
|
while batch_start < finish
|
|
begin
|
|
batch_end = [batch_start + batch_size, finish].min
|
|
batch_relation = build_relation_batch(batch_start, batch_end)
|
|
|
|
# We use `sum` and `count` instead of `average` here to not run into an "average of averages"
|
|
# problem as batches will have different sizes, so we are essentially summing up the values for
|
|
# each batch separately, and then dividing that result on the total number of records.
|
|
batch_sum, batch_count = batch_relation.pick(column.sum, column.count)
|
|
|
|
total_sum += batch_sum.to_i
|
|
total_records += batch_count
|
|
|
|
batch_start = batch_end
|
|
rescue ActiveRecord::QueryCanceled => error # rubocop:disable Database/RescueQueryCanceled
|
|
# retry with a safe batch size & warmer cache
|
|
if batch_size >= 2 * DEFAULT_BATCH_SIZE
|
|
batch_size /= 2
|
|
else
|
|
log_canceled_batch_fetch(batch_start, batch_relation.to_sql, error)
|
|
|
|
return FALLBACK
|
|
end
|
|
end
|
|
|
|
sleep(SLEEP_TIME_IN_SECONDS)
|
|
end
|
|
|
|
return FALLBACK if total_records == 0
|
|
|
|
total_sum.to_f / total_records
|
|
end
|
|
|
|
private
|
|
|
|
def column_start
|
|
relation.unscope(:group, :having).minimum(column) || COLUMN_FALLBACK
|
|
end
|
|
|
|
def column_finish
|
|
(relation.unscope(:group, :having).maximum(column) || COLUMN_FALLBACK) + OFFSET_BY_ONE
|
|
end
|
|
|
|
def build_relation_batch(start, finish)
|
|
relation.where(column.between(start...finish))
|
|
end
|
|
|
|
def log_canceled_batch_fetch(batch_start, query, error)
|
|
Gitlab::AppJsonLogger
|
|
.error(
|
|
event: 'batch_count',
|
|
relation: relation.table_name,
|
|
operation: 'average',
|
|
start: batch_start,
|
|
query: query,
|
|
message: "Query has been canceled with message: #{error.message}"
|
|
)
|
|
end
|
|
|
|
def transaction_open?
|
|
relation.connection.transaction_open?
|
|
end
|
|
|
|
def wrap_column(relation, column)
|
|
return column if column.is_a?(Arel::Attributes::Attribute)
|
|
|
|
relation.arel_table[column]
|
|
end
|
|
end
|
|
end
|
|
end
|