debian-mirror-gitlab/lib/gitlab/database/background_migration/batched_migration.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

200 lines
5.9 KiB
Ruby
Raw Normal View History

2021-04-17 20:07:23 +05:30
# frozen_string_literal: true
module Gitlab
module Database
module BackgroundMigration
2022-05-07 20:08:51 +05:30
class BatchedMigration < SharedModel
2021-04-17 20:07:23 +05:30
JOB_CLASS_MODULE = 'Gitlab::BackgroundMigration'
2021-04-29 21:17:54 +05:30
BATCH_CLASS_MODULE = "#{JOB_CLASS_MODULE}::BatchingStrategies"
2022-06-21 17:19:12 +05:30
MAXIMUM_FAILED_RATIO = 0.5
MINIMUM_JOBS = 50
2021-04-17 20:07:23 +05:30
self.table_name = :batched_background_migrations
has_many :batched_jobs, foreign_key: :batched_background_migration_id
2021-09-30 23:02:18 +05:30
has_one :last_job, -> { order(max_value: :desc) },
2021-04-17 20:07:23 +05:30
class_name: 'Gitlab::Database::BackgroundMigration::BatchedJob',
foreign_key: :batched_background_migration_id
2021-09-04 01:27:46 +05:30
validates :job_arguments, uniqueness: {
scope: [:job_class_name, :table_name, :column_name]
}
2022-01-26 12:08:38 +05:30
validate :validate_batched_jobs_status, if: -> { status_changed? && finished? }
2021-04-17 20:07:23 +05:30
scope :queue_order, -> { order(id: :asc) }
2022-06-21 17:19:12 +05:30
scope :queued, -> { with_statuses(:active, :paused) }
# on_hold_until is a temporary runtime status which puts execution "on hold"
scope :executable, -> { with_status(:active).where('on_hold_until IS NULL OR on_hold_until < NOW()') }
2022-07-16 23:28:13 +05:30
scope :created_after, ->(time) { where('created_at > ?', time) }
2021-09-04 01:27:46 +05:30
scope :for_configuration, ->(job_class_name, table_name, column_name, job_arguments) do
where(job_class_name: job_class_name, table_name: table_name, column_name: column_name)
.where("job_arguments = ?", job_arguments.to_json) # rubocop:disable Rails/WhereEquals
end
2021-04-17 20:07:23 +05:30
2022-06-21 17:19:12 +05:30
state_machine :status, initial: :paused do
state :paused, value: 0
state :active, value: 1
state :finished, value: 3
state :failed, value: 4
state :finalizing, value: 5
event :pause do
transition any => :paused
end
event :execute do
transition any => :active
end
event :finish do
transition any => :finished
end
event :failure do
transition any => :failed
end
event :finalize do
transition any => :finalizing
end
before_transition any => :active do |migration|
migration.started_at = Time.current if migration.respond_to?(:started_at)
end
end
2021-04-17 20:07:23 +05:30
2021-06-08 01:23:25 +05:30
attribute :pause_ms, :integer, default: 100
2022-06-21 17:19:12 +05:30
def self.valid_status
state_machine.states.map(&:name)
end
2021-09-30 23:02:18 +05:30
def self.find_for_configuration(job_class_name, table_name, column_name, job_arguments)
for_configuration(job_class_name, table_name, column_name, job_arguments).first
end
2021-04-29 21:17:54 +05:30
def self.active_migration
2022-06-21 17:19:12 +05:30
executable.queue_order.first
2021-04-29 21:17:54 +05:30
end
2021-09-04 01:27:46 +05:30
def self.successful_rows_counts(migrations)
BatchedJob
2022-04-04 11:22:00 +05:30
.with_status(:succeeded)
2021-09-04 01:27:46 +05:30
.where(batched_background_migration_id: migrations)
.group(:batched_background_migration_id)
.sum(:batch_size)
end
2021-04-29 21:17:54 +05:30
def interval_elapsed?(variance: 0)
return true unless last_job
interval_with_variance = interval - variance
last_job.created_at <= Time.current - interval_with_variance
2021-04-17 20:07:23 +05:30
end
def create_batched_job!(min, max)
2021-06-08 01:23:25 +05:30
batched_jobs.create!(
min_value: min,
max_value: max,
batch_size: batch_size,
sub_batch_size: sub_batch_size,
pause_ms: pause_ms
)
2021-04-17 20:07:23 +05:30
end
2021-11-11 11:23:49 +05:30
def retry_failed_jobs!
2022-04-04 11:22:00 +05:30
batched_jobs.with_status(:failed).each_batch(of: 100) do |batch|
2021-11-11 11:23:49 +05:30
self.class.transaction do
batch.lock.each(&:split_and_retry!)
2022-06-21 17:19:12 +05:30
self.execute!
2021-11-11 11:23:49 +05:30
end
end
2022-06-21 17:19:12 +05:30
self.execute!
end
def should_stop?
return unless started_at
total_jobs = batched_jobs.created_since(started_at).count
return if total_jobs < MINIMUM_JOBS
failed_jobs = batched_jobs.with_status(:failed).created_since(started_at).count
failed_jobs.fdiv(total_jobs) > MAXIMUM_FAILED_RATIO
2021-11-11 11:23:49 +05:30
end
2021-04-17 20:07:23 +05:30
def next_min_value
last_job&.max_value&.next || min_value
end
def job_class
"#{JOB_CLASS_MODULE}::#{job_class_name}".constantize
end
def batch_class
"#{BATCH_CLASS_MODULE}::#{batch_class_name}".constantize
end
def job_class_name=(class_name)
2022-01-26 12:08:38 +05:30
write_attribute(:job_class_name, class_name.delete_prefix("::"))
2021-04-17 20:07:23 +05:30
end
def batch_class_name=(class_name)
2022-01-26 12:08:38 +05:30
write_attribute(:batch_class_name, class_name.delete_prefix("::"))
2021-04-17 20:07:23 +05:30
end
2021-04-29 21:17:54 +05:30
2021-06-08 01:23:25 +05:30
def migrated_tuple_count
2022-04-04 11:22:00 +05:30
batched_jobs.with_status(:succeeded).sum(:batch_size)
2021-06-08 01:23:25 +05:30
end
2021-04-29 21:17:54 +05:30
def prometheus_labels
@prometheus_labels ||= {
migration_id: id,
migration_identifier: "%s/%s.%s" % [job_class_name, table_name, column_name]
}
end
2021-06-08 01:23:25 +05:30
def smoothed_time_efficiency(number_of_jobs: 10, alpha: 0.2)
2022-03-02 08:16:31 +05:30
jobs = batched_jobs.successful_in_execution_order.reverse_order.limit(number_of_jobs).with_preloads
2021-06-08 01:23:25 +05:30
return if jobs.size < number_of_jobs
efficiencies = jobs.map(&:time_efficiency).reject(&:nil?).each_with_index
dividend = efficiencies.reduce(0) do |total, (job_eff, i)|
total + job_eff * (1 - alpha)**i
end
divisor = efficiencies.reduce(0) do |total, (job_eff, i)|
total + (1 - alpha)**i
end
return if divisor == 0
(dividend / divisor).round(2)
end
def optimize!
BatchOptimizer.new(self).optimize!
end
2022-01-26 12:08:38 +05:30
2022-06-21 17:19:12 +05:30
def hold!(until_time: 10.minutes.from_now)
update!(on_hold_until: until_time)
end
2022-01-26 12:08:38 +05:30
private
def validate_batched_jobs_status
errors.add(:batched_jobs, 'jobs need to be succeeded') if batched_jobs.except_succeeded.exists?
end
2021-04-17 20:07:23 +05:30
end
end
end
end