debian-mirror-gitlab/lib/gitlab/background_migration/batched_migration_job.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

127 lines
3.7 KiB
Ruby
Raw Normal View History

2022-07-16 23:28:13 +05:30
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Base class for batched background migrations. Subclasses should implement the `#perform`
2022-08-27 11:52:29 +05:30
# method as the entry point for the job's execution.
#
# Job arguments needed must be defined explicitly,
# see https://docs.gitlab.com/ee/development/database/batched_background_migrations.html#job-arguments.
2022-07-16 23:28:13 +05:30
class BatchedMigrationJob
include Gitlab::Database::DynamicModelHelpers
2022-08-27 11:52:29 +05:30
def initialize(
start_id:, end_id:, batch_table:, batch_column:, sub_batch_size:, pause_ms:, job_arguments: [], connection:
)
2022-07-16 23:28:13 +05:30
@start_id = start_id
@end_id = end_id
@batch_table = batch_table
@batch_column = batch_column
@sub_batch_size = sub_batch_size
@pause_ms = pause_ms
2022-08-27 11:52:29 +05:30
@job_arguments = job_arguments
2022-07-16 23:28:13 +05:30
@connection = connection
end
2022-08-27 11:52:29 +05:30
def self.generic_instance(batch_table:, batch_column:, job_arguments: [], connection:)
new(
batch_table: batch_table, batch_column: batch_column,
job_arguments: job_arguments, connection: connection,
start_id: 0, end_id: 0, sub_batch_size: 0, pause_ms: 0
)
end
def self.job_arguments_count
0
end
2023-01-13 00:05:48 +05:30
def self.operation_name(operation)
define_method('operation_name') do
operation
end
end
2022-08-27 11:52:29 +05:30
def self.job_arguments(*args)
args.each.with_index do |arg, index|
define_method(arg) do
@job_arguments[index]
end
end
define_singleton_method(:job_arguments_count) do
args.count
end
end
def self.scope_to(scope)
define_method(:filter_batch) do |relation|
instance_exec(relation, &scope)
end
end
def filter_batch(relation)
relation
end
def perform
2022-07-16 23:28:13 +05:30
raise NotImplementedError, "subclasses of #{self.class.name} must implement #{__method__}"
end
def batch_metrics
@batch_metrics ||= Gitlab::Database::BackgroundMigration::BatchMetrics.new
end
private
attr_reader :start_id, :end_id, :batch_table, :batch_column, :sub_batch_size, :pause_ms, :connection
2023-01-13 00:05:48 +05:30
def each_sub_batch(batching_arguments: {}, batching_scope: nil)
2022-07-16 23:28:13 +05:30
all_batching_arguments = { column: batch_column, of: sub_batch_size }.merge(batching_arguments)
2022-08-27 11:52:29 +05:30
relation = filter_batch(base_relation)
sub_batch_relation = filter_sub_batch(relation, batching_scope)
2022-07-16 23:28:13 +05:30
2022-08-27 11:52:29 +05:30
sub_batch_relation.each_batch(**all_batching_arguments) do |relation|
2022-07-16 23:28:13 +05:30
batch_metrics.instrument_operation(operation_name) do
yield relation
end
sleep([pause_ms, 0].max * 0.001)
end
end
2023-01-13 00:05:48 +05:30
def distinct_each_batch(batching_arguments: {})
2022-08-27 11:52:29 +05:30
if base_relation != filter_batch(base_relation)
raise 'distinct_each_batch can not be used when additional filters are defined with scope_to'
end
2022-08-13 15:12:31 +05:30
all_batching_arguments = { column: batch_column, of: sub_batch_size }.merge(batching_arguments)
2022-08-27 11:52:29 +05:30
base_relation.distinct_each_batch(**all_batching_arguments) do |relation|
2022-08-13 15:12:31 +05:30
batch_metrics.instrument_operation(operation_name) do
yield relation
end
sleep([pause_ms, 0].max * 0.001)
end
end
2022-08-27 11:52:29 +05:30
def base_relation
define_batchable_model(batch_table, connection: connection)
2022-07-16 23:28:13 +05:30
.where(batch_column => start_id..end_id)
2022-08-27 11:52:29 +05:30
end
2022-07-16 23:28:13 +05:30
2022-08-27 11:52:29 +05:30
def filter_sub_batch(relation, batching_scope = nil)
return relation unless batching_scope
2022-07-16 23:28:13 +05:30
2022-08-27 11:52:29 +05:30
batching_scope.call(relation)
2022-07-16 23:28:13 +05:30
end
2023-01-13 00:05:48 +05:30
def operation_name
raise('Operation name is required, please define it with `operation_name`')
end
2022-07-16 23:28:13 +05:30
end
end
end