debian-mirror-gitlab/lib/gitlab/database/background_migration/batched_job.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

166 lines
6.2 KiB
Ruby
Raw Normal View History

2021-04-17 20:07:23 +05:30
# frozen_string_literal: true
module Gitlab
module Database
module BackgroundMigration
2022-05-07 20:08:51 +05:30
SplitAndRetryError = Class.new(StandardError)
class BatchedJob < SharedModel
2021-11-11 11:23:49 +05:30
include EachBatch
2021-06-08 01:23:25 +05:30
include FromUnion
2021-04-17 20:07:23 +05:30
self.table_name = :batched_background_migration_jobs
2021-06-08 01:23:25 +05:30
MAX_ATTEMPTS = 3
STUCK_JOBS_TIMEOUT = 1.hour.freeze
2022-05-07 20:08:51 +05:30
TIMEOUT_EXCEPTIONS = [ActiveRecord::StatementTimeout, ActiveRecord::ConnectionTimeoutError,
ActiveRecord::AdapterTimeout, ActiveRecord::LockWaitTimeout].freeze
2021-06-08 01:23:25 +05:30
2022-03-02 08:16:31 +05:30
belongs_to :batched_migration, foreign_key: :batched_background_migration_id
2022-04-04 11:22:00 +05:30
has_many :batched_job_transition_logs, foreign_key: :batched_background_migration_job_id
2022-03-02 08:16:31 +05:30
2022-04-04 11:22:00 +05:30
scope :active, -> { with_statuses(:pending, :running) }
2022-03-02 08:16:31 +05:30
scope :stuck, -> { active.where('updated_at <= ?', STUCK_JOBS_TIMEOUT.ago) }
2022-04-04 11:22:00 +05:30
scope :retriable, -> { from_union([with_status(:failed).where('attempts < ?', MAX_ATTEMPTS), self.stuck]) }
scope :except_succeeded, -> { without_status(:succeeded) }
scope :successful_in_execution_order, -> { where.not(finished_at: nil).with_status(:succeeded).order(:finished_at) }
2022-03-02 08:16:31 +05:30
scope :with_preloads, -> { preload(:batched_migration) }
2022-06-21 17:19:12 +05:30
scope :created_since, ->(date_time) { where('created_at >= ?', date_time) }
2021-06-08 01:23:25 +05:30
2022-04-04 11:22:00 +05:30
state_machine :status, initial: :pending do
state :pending, value: 0
state :running, value: 1
state :failed, value: 2
state :succeeded, value: 3
event :succeed do
transition any => :succeeded
end
event :failure do
transition any => :failed
end
event :run do
transition any => :running
end
before_transition any => [:failed, :succeeded] do |job|
job.finished_at = Time.current
end
before_transition any => :running do |job|
job.attempts += 1
job.started_at = Time.current
job.finished_at = nil
job.metrics = {}
end
2022-05-07 20:08:51 +05:30
after_transition any => :failed do |job, transition|
error_hash = transition.args.find { |arg| arg[:error].present? }
exception = error_hash&.fetch(:error)
job.split_and_retry! if job.can_split?(exception)
rescue SplitAndRetryError => error
2022-06-21 17:19:12 +05:30
Gitlab::AppLogger.error(
message: error.message,
batched_job_id: job.id,
batched_migration_id: job.batched_migration.id,
job_class_name: job.migration_job_class_name,
job_arguments: job.migration_job_arguments
)
2022-05-07 20:08:51 +05:30
end
2022-04-04 11:22:00 +05:30
after_transition do |job, transition|
error_hash = transition.args.find { |arg| arg[:error].present? }
exception = error_hash&.fetch(:error)
job.batched_job_transition_logs.create(previous_status: transition.from, next_status: transition.to, exception_class: exception&.class, exception_message: exception&.message)
2022-06-21 17:19:12 +05:30
Gitlab::ErrorTracking.track_exception(exception, batched_job_id: job.id, job_class_name: job.migration_job_class_name, job_arguments: job.migration_job_arguments) if exception
Gitlab::AppLogger.info(
message: 'BatchedJob transition',
batched_job_id: job.id,
previous_state: transition.from_name,
new_state: transition.to_name,
batched_migration_id: job.batched_migration.id,
job_class_name: job.migration_job_class_name,
job_arguments: job.migration_job_arguments,
exception_class: exception&.class,
exception_message: exception&.message
)
2022-04-04 11:22:00 +05:30
end
end
2022-06-21 17:19:12 +05:30
delegate :job_class, :table_name, :column_name, :job_arguments, :job_class_name,
2021-04-17 20:07:23 +05:30
to: :batched_migration, prefix: :migration
2021-06-08 01:23:25 +05:30
attribute :pause_ms, :integer, default: 100
def time_efficiency
return unless succeeded?
return unless finished_at && started_at
duration = finished_at - started_at
# TODO: Switch to individual job interval (prereq: https://gitlab.com/gitlab-org/gitlab/-/issues/328801)
duration.to_f / batched_migration.interval
end
2021-09-30 23:02:18 +05:30
2022-05-07 20:08:51 +05:30
def can_split?(exception)
attempts >= MAX_ATTEMPTS && TIMEOUT_EXCEPTIONS.include?(exception&.class) && batch_size > sub_batch_size
end
2021-09-30 23:02:18 +05:30
def split_and_retry!
with_lock do
2022-05-07 20:08:51 +05:30
raise SplitAndRetryError, 'Only failed jobs can be split' unless failed?
2021-09-30 23:02:18 +05:30
new_batch_size = batch_size / 2
2022-05-07 20:08:51 +05:30
raise SplitAndRetryError, 'Job cannot be split further' if new_batch_size < 1
2021-09-30 23:02:18 +05:30
2022-05-07 20:08:51 +05:30
batching_strategy = batched_migration.batch_class.new(connection: self.class.connection)
2021-09-30 23:02:18 +05:30
next_batch_bounds = batching_strategy.next_batch(
batched_migration.table_name,
batched_migration.column_name,
batch_min_value: min_value,
2022-05-07 20:08:51 +05:30
batch_size: new_batch_size,
job_arguments: batched_migration.job_arguments
2021-09-30 23:02:18 +05:30
)
midpoint = next_batch_bounds.last
# We don't want the midpoint to go over the existing max_value because
# those IDs would already be in the next batched migration job.
# This could happen when a lot of records in the current batch are deleted.
#
# In this case, we just lower the batch size so that future calls to this
# method could eventually split the job if it continues to fail.
if midpoint >= max_value
update!(batch_size: new_batch_size, attempts: 0)
else
old_max_value = max_value
update!(
batch_size: new_batch_size,
max_value: midpoint,
attempts: 0,
started_at: nil,
finished_at: nil,
metrics: {}
)
new_record = dup
new_record.min_value = midpoint.next
new_record.max_value = old_max_value
new_record.save!
end
end
end
2021-04-17 20:07:23 +05:30
end
end
end
end