debian-mirror-gitlab/db/post_migrate/20210226141517_dedup_issue_metrics.rb

72 lines
2.2 KiB
Ruby
Raw Normal View History

2021-04-29 21:17:54 +05:30
# frozen_string_literal: true
class DedupIssueMetrics < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
TMP_INDEX_NAME = 'tmp_unique_issue_metrics_by_issue_id'
OLD_INDEX_NAME = 'index_issue_metrics'
INDEX_NAME = 'index_unique_issue_metrics_issue_id'
BATCH_SIZE = 1_000
disable_ddl_transaction!
class IssueMetrics < ActiveRecord::Base
self.table_name = 'issue_metrics'
include EachBatch
end
def up
IssueMetrics.reset_column_information
last_metrics_record_id = IssueMetrics.maximum(:id) || 0
# This index will disallow further duplicates while we're deduplicating the data.
add_concurrent_index(:issue_metrics, :issue_id, where: "id > #{Integer(last_metrics_record_id)}", unique: true, name: TMP_INDEX_NAME)
IssueMetrics.each_batch(of: BATCH_SIZE) do |relation|
duplicated_issue_ids = IssueMetrics
.where(issue_id: relation.select(:issue_id))
.select(:issue_id)
.group(:issue_id)
.having('COUNT(issue_metrics.issue_id) > 1')
.pluck(:issue_id)
duplicated_issue_ids.each do |issue_id|
deduplicate_item(issue_id)
end
end
add_concurrent_index(:issue_metrics, :issue_id, unique: true, name: INDEX_NAME)
remove_concurrent_index_by_name(:issue_metrics, TMP_INDEX_NAME)
remove_concurrent_index_by_name(:issue_metrics, OLD_INDEX_NAME)
end
def down
add_concurrent_index(:issue_metrics, :issue_id, name: OLD_INDEX_NAME)
remove_concurrent_index_by_name(:issue_metrics, TMP_INDEX_NAME)
remove_concurrent_index_by_name(:issue_metrics, INDEX_NAME)
end
private
def deduplicate_item(issue_id)
issue_metrics_records = IssueMetrics.where(issue_id: issue_id).order(updated_at: :asc).to_a
attributes = {}
issue_metrics_records.each do |issue_metrics_record|
params = issue_metrics_record.attributes.except('id')
attributes.merge!(params.compact)
end
ActiveRecord::Base.transaction do
record_to_keep = issue_metrics_records.pop
records_to_delete = issue_metrics_records
IssueMetrics.where(id: records_to_delete.map(&:id)).delete_all
record_to_keep.update!(attributes)
end
end
end