debian-mirror-gitlab/lib/gitlab/database/migrations/test_batched_background_runner.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

145 lines
5.3 KiB
Ruby
Raw Normal View History

2022-07-16 23:28:13 +05:30
# frozen_string_literal: true
module Gitlab
module Database
module Migrations
class TestBatchedBackgroundRunner < BaseBackgroundRunner
2022-10-11 01:57:18 +05:30
include Gitlab::Database::DynamicModelHelpers
2022-07-16 23:28:13 +05:30
2023-04-23 21:23:45 +05:30
MIGRATION_DETAILS_FILE_NAME = 'details.json'
2023-03-04 22:38:38 +05:30
def initialize(result_dir:, connection:, from_id:)
2022-11-25 23:54:43 +05:30
super(result_dir: result_dir, connection: connection)
2022-07-16 23:28:13 +05:30
@connection = connection
2023-03-04 22:38:38 +05:30
@from_id = from_id
2022-07-16 23:28:13 +05:30
end
def jobs_by_migration_name
2023-03-17 16:20:25 +05:30
set_shared_model_connection do
2022-11-25 23:54:43 +05:30
Gitlab::Database::BackgroundMigration::BatchedMigration
.executable
2023-03-04 22:38:38 +05:30
.where('id > ?', from_id)
2022-11-25 23:54:43 +05:30
.to_h do |migration|
batching_strategy = migration.batch_class.new(connection: connection)
smallest_batch_start = migration.next_min_value
table_max_value = define_batchable_model(migration.table_name, connection: connection)
.maximum(migration.column_name)
largest_batch_start = table_max_value - migration.batch_size
# variance is the portion of the batch range that we shrink between variance * 0 and variance * 1
# to pick actual batches to sample.
variance = largest_batch_start - smallest_batch_start
batch_starts = uniform_fractions
.lazy # frac varies from 0 to 1, values in smallest_batch_start..largest_batch_start
.map { |frac| (variance * frac).to_i + smallest_batch_start }
# Track previously run batches so that we stop sampling if a new batch would intersect an older one
completed_batches = []
jobs_to_sample = batch_starts
# Stop sampling if a batch would intersect a previous batch
.take_while { |start| completed_batches.none? { |batch| batch.cover?(start) } }
.map do |batch_start|
# The current block is lazily evaluated as part of the jobs_to_sample enumerable
# so it executes after the enclosing using_connection block has already executed
# Therefore we need to re-associate with the explicit connection again
Gitlab::Database::SharedModel.using_connection(connection) do
next_bounds = batching_strategy.next_batch(
migration.table_name,
migration.column_name,
batch_min_value: batch_start,
batch_size: migration.batch_size,
2023-04-23 21:23:45 +05:30
job_class: migration.job_class,
2022-11-25 23:54:43 +05:30
job_arguments: migration.job_arguments
)
batch_min, batch_max = next_bounds
job = migration.create_batched_job!(batch_min, batch_max)
completed_batches << (batch_min..batch_max)
job
end
end
2023-04-23 21:23:45 +05:30
job_class_name = migration.job_class_name
export_migration_details(job_class_name, migration.slice(:interval, :total_tuple_count, :max_batch_size))
[job_class_name, jobs_to_sample]
2022-07-16 23:28:13 +05:30
end
end
end
def run_job(job)
2023-03-17 16:20:25 +05:30
set_shared_model_connection do
2022-11-25 23:54:43 +05:30
Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper.new(connection: connection).perform(job)
end
2022-07-16 23:28:13 +05:30
end
2022-10-11 01:57:18 +05:30
def uniform_fractions
Enumerator.new do |y|
# Generates equally distributed fractions between 0 and 1, with increasing detail as more are pulled from
# the enumerator.
# 0, 1 (special case)
# 1/2
# 1/4, 3/4
# 1/8, 3/8, 5/8, 7/8
# etc.
# The pattern here is at each outer loop, the denominator multiplies by 2, and at each inner loop,
# the numerator counts up all odd numbers 1 <= n < denominator.
y << 0
y << 1
# denominators are each increasing power of 2
denominators = (1..).lazy.map { |exponent| 2**exponent }
denominators.each do |denominator|
# Numerators at the current step are all odd numbers between 1 and the denominator
numerators = (1..denominator).step(2)
numerators.each do |numerator|
next_frac = numerator.fdiv(denominator)
y << next_frac
end
end
end
end
2023-03-04 22:38:38 +05:30
private
attr_reader :from_id
2023-03-17 16:20:25 +05:30
def set_shared_model_connection(&block)
Gitlab::Database::SharedModel.using_connection(connection, &block)
end
2023-04-23 21:23:45 +05:30
def job_meta(job)
2023-03-17 16:20:25 +05:30
set_shared_model_connection do
2023-04-23 21:23:45 +05:30
job.slice(:min_value, :max_value, :batch_size, :sub_batch_size, :pause_ms)
2023-03-17 16:20:25 +05:30
end
end
2023-04-23 21:23:45 +05:30
def export_migration_details(migration_name, attributes)
directory = result_dir.join(migration_name)
FileUtils.mkdir_p(directory) unless Dir.exist?(directory)
File.write(directory.join(MIGRATION_DETAILS_FILE_NAME), attributes.to_json)
end
def observers
::Gitlab::Database::Migrations::Observers.all_observers + [
::Gitlab::Database::Migrations::Observers::BatchDetails
]
end
2022-07-16 23:28:13 +05:30
end
end
end
end