131 lines
4.2 KiB
Ruby
131 lines
4.2 KiB
Ruby
|
# frozen_string_literal: true
|
||
|
|
||
|
class RemoveDuplicateLabelsFromProject < ActiveRecord::Migration[6.0]
|
||
|
DOWNTIME = false
|
||
|
|
||
|
CREATE = 1
|
||
|
RENAME = 2
|
||
|
|
||
|
disable_ddl_transaction!
|
||
|
|
||
|
class BackupLabel < Label
|
||
|
self.table_name = 'backup_labels'
|
||
|
end
|
||
|
|
||
|
class Label < ApplicationRecord
|
||
|
self.table_name = 'labels'
|
||
|
end
|
||
|
|
||
|
class Project < ApplicationRecord
|
||
|
include EachBatch
|
||
|
|
||
|
self.table_name = 'projects'
|
||
|
end
|
||
|
|
||
|
BATCH_SIZE = 100_000
|
||
|
|
||
|
def up
|
||
|
# Split to smaller chunks
|
||
|
# Loop rather than background job, every 100,000
|
||
|
# there are 45,000,000 projects in total
|
||
|
Project.each_batch(of: BATCH_SIZE) do |batch|
|
||
|
range = batch.pluck('MIN(id)', 'MAX(id)').first
|
||
|
|
||
|
transaction do
|
||
|
remove_full_duplicates(*range)
|
||
|
end
|
||
|
|
||
|
transaction do
|
||
|
rename_partial_duplicates(*range)
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def down
|
||
|
Project.each_batch(of: BATCH_SIZE) do |batch|
|
||
|
range = batch.pluck('MIN(id)', 'MAX(id)').first
|
||
|
|
||
|
restore_renamed_labels(*range)
|
||
|
restore_deleted_labels(*range)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def remove_full_duplicates(start_id, stop_id)
|
||
|
# Fields that are considered duplicate:
|
||
|
# project_id title template description type color
|
||
|
|
||
|
duplicate_labels = ApplicationRecord.connection.execute(<<-SQL.squish)
|
||
|
WITH data AS (
|
||
|
SELECT labels.*,
|
||
|
row_number() OVER (PARTITION BY labels.project_id, labels.title, labels.template, labels.description, labels.type, labels.color ORDER BY labels.id) AS row_number,
|
||
|
#{CREATE} AS restore_action
|
||
|
FROM labels
|
||
|
WHERE labels.project_id BETWEEN #{start_id} AND #{stop_id}
|
||
|
AND NOT EXISTS (SELECT * FROM board_labels WHERE board_labels.label_id = labels.id)
|
||
|
AND NOT EXISTS (SELECT * FROM label_links WHERE label_links.label_id = labels.id)
|
||
|
AND NOT EXISTS (SELECT * FROM label_priorities WHERE label_priorities.label_id = labels.id)
|
||
|
AND NOT EXISTS (SELECT * FROM lists WHERE lists.label_id = labels.id)
|
||
|
AND NOT EXISTS (SELECT * FROM resource_label_events WHERE resource_label_events.label_id = labels.id)
|
||
|
) SELECT * FROM data WHERE row_number > 1;
|
||
|
SQL
|
||
|
|
||
|
if duplicate_labels.any?
|
||
|
# create backup records
|
||
|
BackupLabel.insert_all!(duplicate_labels.map { |label| label.except("row_number") })
|
||
|
|
||
|
Label.where(id: duplicate_labels.pluck("id")).delete_all
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def rename_partial_duplicates(start_id, stop_id)
|
||
|
# We need to ensure that the new title (with `_duplicate#{ID}`) doesn't exceed the limit.
|
||
|
# Truncate the original title (if needed) to 245 characters minus the length of the ID
|
||
|
# then add `_duplicate#{ID}`
|
||
|
|
||
|
soft_duplicates = ApplicationRecord.connection.execute(<<-SQL.squish)
|
||
|
WITH data AS (
|
||
|
SELECT
|
||
|
*,
|
||
|
substring(title from 1 for 245 - length(id::text)) || '_duplicate' || id::text as new_title,
|
||
|
#{RENAME} AS restore_action,
|
||
|
row_number() OVER (PARTITION BY project_id, title ORDER BY id) AS row_number
|
||
|
FROM labels
|
||
|
WHERE project_id BETWEEN #{start_id} AND #{stop_id}
|
||
|
) SELECT * FROM data WHERE row_number > 1;
|
||
|
SQL
|
||
|
|
||
|
if soft_duplicates.any?
|
||
|
# create backup records
|
||
|
BackupLabel.insert_all!(soft_duplicates.map { |label| label.except("row_number") })
|
||
|
|
||
|
ApplicationRecord.connection.execute(<<-SQL.squish)
|
||
|
UPDATE labels SET title = substring(title from 1 for 245 - length(id::text)) || '_duplicate' || id::text
|
||
|
WHERE labels.id IN (#{soft_duplicates.map { |dup| dup["id"] }.join(", ")});
|
||
|
SQL
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def restore_renamed_labels(start_id, stop_id)
|
||
|
# the backup label IDs are not incremental, they are copied directly from the Labels table
|
||
|
ApplicationRecord.connection.execute(<<-SQL.squish)
|
||
|
WITH backups AS (
|
||
|
SELECT id, title
|
||
|
FROM backup_labels
|
||
|
WHERE project_id BETWEEN #{start_id} AND #{stop_id} AND
|
||
|
restore_action = #{RENAME}
|
||
|
) UPDATE labels SET title = backups.title
|
||
|
FROM backups
|
||
|
WHERE labels.id = backups.id;
|
||
|
SQL
|
||
|
end
|
||
|
|
||
|
def restore_deleted_labels(start_id, stop_id)
|
||
|
ActiveRecord::Base.connection.execute(<<-SQL.squish)
|
||
|
INSERT INTO labels
|
||
|
SELECT id, title, color, project_id, created_at, updated_at, template, description, description_html, type, group_id, cached_markdown_version FROM backup_labels
|
||
|
WHERE backup_labels.project_id BETWEEN #{start_id} AND #{stop_id}
|
||
|
AND backup_labels.restore_action = #{CREATE}
|
||
|
SQL
|
||
|
end
|
||
|
end
|