68 lines
2.3 KiB
Ruby
68 lines
2.3 KiB
Ruby
|
# frozen_string_literal: true
|
||
|
|
||
|
module Gitlab
|
||
|
module Database
|
||
|
module BackgroundMigration
|
||
|
# This is an optimizer for throughput of batched migration jobs
|
||
|
#
|
||
|
# The underyling mechanic is based on the concept of time efficiency:
|
||
|
# time efficiency = job duration / interval
|
||
|
# Ideally, this is close but lower than 1 - so we're using time efficiently.
|
||
|
#
|
||
|
# We aim to land in the 90%-98% range, which gives the database a little breathing room
|
||
|
# in between.
|
||
|
#
|
||
|
# The optimizer is based on calculating the exponential moving average of time efficiencies
|
||
|
# for the last N jobs. If we're outside the range, we add 10% to or decrease by 20% of the batch size.
|
||
|
class BatchOptimizer
|
||
|
# Target time efficiency for a job
|
||
|
# Time efficiency is defined as: job duration / interval
|
||
|
TARGET_EFFICIENCY = (0.9..0.95).freeze
|
||
|
|
||
|
# Lower and upper bound for the batch size
|
||
|
ALLOWED_BATCH_SIZE = (1_000..2_000_000).freeze
|
||
|
|
||
|
# Limit for the multiplier of the batch size
|
||
|
MAX_MULTIPLIER = 1.2
|
||
|
|
||
|
# When smoothing time efficiency, use this many jobs
|
||
|
NUMBER_OF_JOBS = 20
|
||
|
|
||
|
# Smoothing factor for exponential moving average
|
||
|
EMA_ALPHA = 0.4
|
||
|
|
||
|
attr_reader :migration, :number_of_jobs, :ema_alpha
|
||
|
|
||
|
def initialize(migration, number_of_jobs: NUMBER_OF_JOBS, ema_alpha: EMA_ALPHA)
|
||
|
@migration = migration
|
||
|
@number_of_jobs = number_of_jobs
|
||
|
@ema_alpha = ema_alpha
|
||
|
end
|
||
|
|
||
|
def optimize!
|
||
|
return unless Feature.enabled?(:optimize_batched_migrations, type: :ops, default_enabled: :yaml)
|
||
|
|
||
|
if multiplier = batch_size_multiplier
|
||
|
migration.batch_size = (migration.batch_size * multiplier).to_i.clamp(ALLOWED_BATCH_SIZE)
|
||
|
migration.save!
|
||
|
end
|
||
|
end
|
||
|
|
||
|
private
|
||
|
|
||
|
def batch_size_multiplier
|
||
|
efficiency = migration.smoothed_time_efficiency(number_of_jobs: number_of_jobs, alpha: ema_alpha)
|
||
|
|
||
|
return if efficiency.nil? || efficiency == 0
|
||
|
|
||
|
# We hit the range - no change
|
||
|
return if TARGET_EFFICIENCY.include?(efficiency)
|
||
|
|
||
|
# Assumption: time efficiency is linear in the batch size
|
||
|
[TARGET_EFFICIENCY.max / efficiency, MAX_MULTIPLIER].min
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|