2022-10-11 01:57:18 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Gitlab
|
|
|
|
module Database
|
|
|
|
class TablesTruncate
|
|
|
|
GITLAB_SCHEMAS_TO_IGNORE = %i[gitlab_geo].freeze
|
|
|
|
|
|
|
|
def initialize(database_name:, min_batch_size:, logger: nil, until_table: nil, dry_run: false)
|
|
|
|
@database_name = database_name
|
|
|
|
@min_batch_size = min_batch_size
|
|
|
|
@logger = logger
|
|
|
|
@until_table = until_table
|
|
|
|
@dry_run = dry_run
|
|
|
|
end
|
|
|
|
|
|
|
|
def execute
|
2023-01-13 00:05:48 +05:30
|
|
|
raise "Cannot truncate legacy tables in single-db setup" if single_database_setup?
|
2022-10-11 01:57:18 +05:30
|
|
|
raise "database is not supported" unless %w[main ci].include?(database_name)
|
|
|
|
|
|
|
|
logger&.info "DRY RUN:" if dry_run
|
|
|
|
|
|
|
|
schemas_for_connection = Gitlab::Database.gitlab_schemas_for_connection(connection)
|
|
|
|
tables_to_truncate = Gitlab::Database::GitlabSchema.tables_to_schema.reject do |_, schema_name|
|
2023-03-04 22:38:38 +05:30
|
|
|
GITLAB_SCHEMAS_TO_IGNORE.union(schemas_for_connection).include?(schema_name)
|
2022-10-11 01:57:18 +05:30
|
|
|
end.keys
|
|
|
|
|
2023-03-04 22:38:38 +05:30
|
|
|
Gitlab::Database::SharedModel.using_connection(connection) do
|
|
|
|
Postgresql::DetachedPartition.find_each do |detached_partition|
|
|
|
|
next if GITLAB_SCHEMAS_TO_IGNORE.union(schemas_for_connection).include?(detached_partition.table_schema)
|
|
|
|
|
|
|
|
tables_to_truncate << detached_partition.fully_qualified_table_name
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-10-11 01:57:18 +05:30
|
|
|
tables_sorted = Gitlab::Database::TablesSortedByForeignKeys.new(connection, tables_to_truncate).execute
|
|
|
|
# Checking if all the tables have the write-lock triggers
|
|
|
|
# to make sure we are deleting the right tables on the right database.
|
|
|
|
tables_sorted.flatten.each do |table_name|
|
2023-03-04 22:38:38 +05:30
|
|
|
lock_writes_manager = Gitlab::Database::LockWritesManager.new(
|
|
|
|
table_name: table_name,
|
|
|
|
connection: connection,
|
|
|
|
database_name: database_name,
|
2023-03-17 16:20:25 +05:30
|
|
|
with_retries: true,
|
2023-03-04 22:38:38 +05:30
|
|
|
logger: logger,
|
|
|
|
dry_run: dry_run
|
|
|
|
)
|
|
|
|
|
2023-03-17 16:20:25 +05:30
|
|
|
unless lock_writes_manager.table_locked_for_writes?
|
2022-10-11 01:57:18 +05:30
|
|
|
raise "Table '#{table_name}' is not locked for writes. Run the rake task gitlab:db:lock_writes first"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if until_table
|
|
|
|
table_index = tables_sorted.find_index { |tables_group| tables_group.include?(until_table) }
|
|
|
|
raise "The table '#{until_table}' is not within the truncated tables" if table_index.nil?
|
|
|
|
|
|
|
|
tables_sorted = tables_sorted[0..table_index]
|
|
|
|
end
|
|
|
|
|
|
|
|
# min_batch_size is the minimum number of new tables to truncate at each stage.
|
|
|
|
# But in each stage we have also have to truncate the already truncated tables in the previous stages
|
|
|
|
logger&.info "Truncating legacy tables for the database #{database_name}"
|
2023-03-04 22:38:38 +05:30
|
|
|
truncate_tables_in_batches(tables_sorted)
|
2022-10-11 01:57:18 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
attr_accessor :database_name, :min_batch_size, :logger, :dry_run, :until_table
|
|
|
|
|
2023-03-04 22:38:38 +05:30
|
|
|
def connection
|
|
|
|
@connection ||= Gitlab::Database.database_base_models[database_name].connection
|
|
|
|
end
|
|
|
|
|
|
|
|
def truncate_tables_in_batches(tables_sorted)
|
2022-10-11 01:57:18 +05:30
|
|
|
truncated_tables = []
|
|
|
|
|
|
|
|
tables_sorted.flatten.each do |table|
|
2023-03-04 22:38:38 +05:30
|
|
|
table_name_without_schema = ActiveRecord::ConnectionAdapters::PostgreSQL::Utils
|
|
|
|
.extract_schema_qualified_name(table)
|
|
|
|
.identifier
|
|
|
|
|
|
|
|
sql_statement = "SELECT set_config('lock_writes.#{table_name_without_schema}', 'false', false)"
|
2022-10-11 01:57:18 +05:30
|
|
|
logger&.info(sql_statement)
|
|
|
|
connection.execute(sql_statement) unless dry_run
|
2023-03-17 16:20:25 +05:30
|
|
|
|
|
|
|
# Temporarily unlocking writes on the attached partitions of the table.
|
|
|
|
# Because in some cases they might have been locked for writes as well, when they used to be
|
|
|
|
# normal tables before being converted into attached partitions.
|
|
|
|
Gitlab::Database::SharedModel.using_connection(connection) do
|
|
|
|
table_partitions = Gitlab::Database::PostgresPartition.for_parent_table(table_name_without_schema)
|
|
|
|
table_partitions.each do |table_partition|
|
|
|
|
partition_name_without_schema = ActiveRecord::ConnectionAdapters::PostgreSQL::Utils
|
|
|
|
.extract_schema_qualified_name(table_partition.identifier)
|
|
|
|
.identifier
|
|
|
|
|
|
|
|
sql_statement = "SELECT set_config('lock_writes.#{partition_name_without_schema}', 'false', false)"
|
|
|
|
logger&.info(sql_statement)
|
|
|
|
connection.execute(sql_statement) unless dry_run
|
|
|
|
end
|
|
|
|
end
|
2022-10-11 01:57:18 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
# We do the truncation in stages to avoid high IO
|
|
|
|
# In each stage, we truncate the new tables along with the already truncated
|
|
|
|
# tables before. That's because PostgreSQL doesn't allow to truncate any table (A)
|
|
|
|
# without truncating any other table (B) that has a Foreign Key pointing to the table (A).
|
|
|
|
# even if table (B) is empty, because it has been already truncated in a previous stage.
|
|
|
|
tables_sorted.in_groups_of(min_batch_size, false).each do |tables_groups|
|
|
|
|
new_tables_to_truncate = tables_groups.flatten
|
|
|
|
logger&.info "= New tables to truncate: #{new_tables_to_truncate.join(', ')}"
|
|
|
|
truncated_tables.push(*new_tables_to_truncate).tap(&:sort!)
|
|
|
|
sql_statements = [
|
|
|
|
"SET LOCAL statement_timeout = 0",
|
|
|
|
"SET LOCAL lock_timeout = 0",
|
|
|
|
"TRUNCATE TABLE #{truncated_tables.join(', ')} RESTRICT"
|
|
|
|
]
|
|
|
|
|
|
|
|
sql_statements.each { |sql_statement| logger&.info(sql_statement) }
|
|
|
|
|
|
|
|
next if dry_run
|
|
|
|
|
|
|
|
connection.transaction do
|
|
|
|
sql_statements.each { |sql_statement| connection.execute(sql_statement) }
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2023-01-13 00:05:48 +05:30
|
|
|
|
|
|
|
def single_database_setup?
|
|
|
|
return true unless Gitlab::Database.has_config?(:ci)
|
|
|
|
|
|
|
|
ci_base_model = Gitlab::Database.database_base_models[:ci]
|
|
|
|
!!Gitlab::Database.db_config_share_with(ci_base_model.connection_db_config)
|
|
|
|
end
|
2022-10-11 01:57:18 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|