2022-07-16 23:28:13 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Gitlab
|
|
|
|
module Database
|
|
|
|
module Migrations
|
|
|
|
class TestBatchedBackgroundRunner < BaseBackgroundRunner
|
2022-10-11 01:57:18 +05:30
|
|
|
include Gitlab::Database::DynamicModelHelpers
|
2022-07-16 23:28:13 +05:30
|
|
|
|
2023-04-23 21:23:45 +05:30
|
|
|
MIGRATION_DETAILS_FILE_NAME = 'details.json'
|
|
|
|
|
2023-03-04 22:38:38 +05:30
|
|
|
def initialize(result_dir:, connection:, from_id:)
|
2022-11-25 23:54:43 +05:30
|
|
|
super(result_dir: result_dir, connection: connection)
|
2022-07-16 23:28:13 +05:30
|
|
|
@connection = connection
|
2023-03-04 22:38:38 +05:30
|
|
|
@from_id = from_id
|
2022-07-16 23:28:13 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
def jobs_by_migration_name
|
2023-03-17 16:20:25 +05:30
|
|
|
set_shared_model_connection do
|
2022-11-25 23:54:43 +05:30
|
|
|
Gitlab::Database::BackgroundMigration::BatchedMigration
|
|
|
|
.executable
|
2023-03-04 22:38:38 +05:30
|
|
|
.where('id > ?', from_id)
|
2022-11-25 23:54:43 +05:30
|
|
|
.to_h do |migration|
|
|
|
|
batching_strategy = migration.batch_class.new(connection: connection)
|
|
|
|
|
|
|
|
smallest_batch_start = migration.next_min_value
|
|
|
|
|
|
|
|
table_max_value = define_batchable_model(migration.table_name, connection: connection)
|
|
|
|
.maximum(migration.column_name)
|
|
|
|
|
|
|
|
largest_batch_start = table_max_value - migration.batch_size
|
|
|
|
|
|
|
|
# variance is the portion of the batch range that we shrink between variance * 0 and variance * 1
|
|
|
|
# to pick actual batches to sample.
|
|
|
|
variance = largest_batch_start - smallest_batch_start
|
|
|
|
|
|
|
|
batch_starts = uniform_fractions
|
|
|
|
.lazy # frac varies from 0 to 1, values in smallest_batch_start..largest_batch_start
|
|
|
|
.map { |frac| (variance * frac).to_i + smallest_batch_start }
|
|
|
|
|
|
|
|
# Track previously run batches so that we stop sampling if a new batch would intersect an older one
|
|
|
|
completed_batches = []
|
|
|
|
|
|
|
|
jobs_to_sample = batch_starts
|
|
|
|
# Stop sampling if a batch would intersect a previous batch
|
|
|
|
.take_while { |start| completed_batches.none? { |batch| batch.cover?(start) } }
|
|
|
|
.map do |batch_start|
|
|
|
|
# The current block is lazily evaluated as part of the jobs_to_sample enumerable
|
|
|
|
# so it executes after the enclosing using_connection block has already executed
|
|
|
|
# Therefore we need to re-associate with the explicit connection again
|
|
|
|
Gitlab::Database::SharedModel.using_connection(connection) do
|
|
|
|
next_bounds = batching_strategy.next_batch(
|
|
|
|
migration.table_name,
|
|
|
|
migration.column_name,
|
|
|
|
batch_min_value: batch_start,
|
|
|
|
batch_size: migration.batch_size,
|
2023-04-23 21:23:45 +05:30
|
|
|
job_class: migration.job_class,
|
2022-11-25 23:54:43 +05:30
|
|
|
job_arguments: migration.job_arguments
|
|
|
|
)
|
|
|
|
|
|
|
|
batch_min, batch_max = next_bounds
|
|
|
|
|
|
|
|
job = migration.create_batched_job!(batch_min, batch_max)
|
|
|
|
|
|
|
|
completed_batches << (batch_min..batch_max)
|
|
|
|
|
|
|
|
job
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2023-04-23 21:23:45 +05:30
|
|
|
job_class_name = migration.job_class_name
|
|
|
|
|
|
|
|
export_migration_details(job_class_name, migration.slice(:interval, :total_tuple_count, :max_batch_size))
|
|
|
|
|
|
|
|
[job_class_name, jobs_to_sample]
|
2022-07-16 23:28:13 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def run_job(job)
|
2023-03-17 16:20:25 +05:30
|
|
|
set_shared_model_connection do
|
2022-11-25 23:54:43 +05:30
|
|
|
Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper.new(connection: connection).perform(job)
|
|
|
|
end
|
2022-07-16 23:28:13 +05:30
|
|
|
end
|
2022-10-11 01:57:18 +05:30
|
|
|
|
|
|
|
def uniform_fractions
|
|
|
|
Enumerator.new do |y|
|
|
|
|
# Generates equally distributed fractions between 0 and 1, with increasing detail as more are pulled from
|
|
|
|
# the enumerator.
|
|
|
|
# 0, 1 (special case)
|
|
|
|
# 1/2
|
|
|
|
# 1/4, 3/4
|
|
|
|
# 1/8, 3/8, 5/8, 7/8
|
|
|
|
# etc.
|
|
|
|
# The pattern here is at each outer loop, the denominator multiplies by 2, and at each inner loop,
|
|
|
|
# the numerator counts up all odd numbers 1 <= n < denominator.
|
|
|
|
y << 0
|
|
|
|
y << 1
|
|
|
|
|
|
|
|
# denominators are each increasing power of 2
|
|
|
|
denominators = (1..).lazy.map { |exponent| 2**exponent }
|
|
|
|
|
|
|
|
denominators.each do |denominator|
|
|
|
|
# Numerators at the current step are all odd numbers between 1 and the denominator
|
|
|
|
numerators = (1..denominator).step(2)
|
|
|
|
|
|
|
|
numerators.each do |numerator|
|
|
|
|
next_frac = numerator.fdiv(denominator)
|
|
|
|
y << next_frac
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2023-03-04 22:38:38 +05:30
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
attr_reader :from_id
|
2023-03-17 16:20:25 +05:30
|
|
|
|
|
|
|
def set_shared_model_connection(&block)
|
|
|
|
Gitlab::Database::SharedModel.using_connection(connection, &block)
|
|
|
|
end
|
|
|
|
|
2023-04-23 21:23:45 +05:30
|
|
|
def job_meta(job)
|
2023-03-17 16:20:25 +05:30
|
|
|
set_shared_model_connection do
|
2023-04-23 21:23:45 +05:30
|
|
|
job.slice(:min_value, :max_value, :batch_size, :sub_batch_size, :pause_ms)
|
2023-03-17 16:20:25 +05:30
|
|
|
end
|
|
|
|
end
|
2023-04-23 21:23:45 +05:30
|
|
|
|
|
|
|
def export_migration_details(migration_name, attributes)
|
|
|
|
directory = result_dir.join(migration_name)
|
|
|
|
|
|
|
|
FileUtils.mkdir_p(directory) unless Dir.exist?(directory)
|
|
|
|
|
|
|
|
File.write(directory.join(MIGRATION_DETAILS_FILE_NAME), attributes.to_json)
|
|
|
|
end
|
|
|
|
|
|
|
|
def observers
|
|
|
|
::Gitlab::Database::Migrations::Observers.all_observers + [
|
|
|
|
::Gitlab::Database::Migrations::Observers::BatchDetails
|
|
|
|
]
|
|
|
|
end
|
2022-07-16 23:28:13 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|