debian-mirror-gitlab/lib/gitlab/database/migration_helpers.rb

1247 lines
46 KiB
Ruby
Raw Normal View History

2019-02-15 15:39:39 +05:30
# frozen_string_literal: true
2016-06-02 11:05:42 +05:30
module Gitlab
module Database
module MigrationHelpers
2018-03-17 18:26:18 +05:30
BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job
BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time
2019-10-12 21:52:04 +05:30
PERMITTED_TIMESTAMP_COLUMNS = %i[created_at updated_at deleted_at].to_set.freeze
DEFAULT_TIMESTAMP_COLUMNS = %i[created_at updated_at].freeze
2017-09-10 17:25:29 +05:30
# Adds `created_at` and `updated_at` columns with timezone information.
#
# This method is an improved version of Rails' built-in method `add_timestamps`.
#
2019-10-12 21:52:04 +05:30
# By default, adds `created_at` and `updated_at` columns, but these can be specified as:
#
# add_timestamps_with_timezone(:my_table, columns: [:created_at, :deleted_at])
#
# This allows you to create just the timestamps you need, saving space.
#
2017-09-10 17:25:29 +05:30
# Available options are:
2019-10-12 21:52:04 +05:30
# :default - The default value for the column.
# :null - When set to `true` the column will allow NULL values.
2017-09-10 17:25:29 +05:30
# The default is to not allow NULL values.
2019-10-12 21:52:04 +05:30
# :columns - the column names to create. Must be one
# of `Gitlab::Database::MigrationHelpers::PERMITTED_TIMESTAMP_COLUMNS`.
# Default value: `DEFAULT_TIMESTAMP_COLUMNS`
#
# All options are optional.
2017-09-10 17:25:29 +05:30
def add_timestamps_with_timezone(table_name, options = {})
options[:null] = false if options[:null].nil?
2019-10-12 21:52:04 +05:30
columns = options.fetch(:columns, DEFAULT_TIMESTAMP_COLUMNS)
default_value = options[:default]
2017-09-10 17:25:29 +05:30
2019-10-12 21:52:04 +05:30
validate_not_in_transaction!(:add_timestamps_with_timezone, 'with default value') if default_value
columns.each do |column_name|
validate_timestamp_column_name!(column_name)
2017-09-10 17:25:29 +05:30
# If default value is presented, use `add_column_with_default` method instead.
2019-10-12 21:52:04 +05:30
if default_value
2017-09-10 17:25:29 +05:30
add_column_with_default(
table_name,
column_name,
:datetime_with_timezone,
2019-10-12 21:52:04 +05:30
default: default_value,
2017-09-10 17:25:29 +05:30
allow_null: options[:null]
)
else
add_column(table_name, column_name, :datetime_with_timezone, options)
end
end
end
2019-10-12 21:52:04 +05:30
# To be used in the `#down` method of migrations that
# use `#add_timestamps_with_timezone`.
2016-06-02 11:05:42 +05:30
#
2019-10-12 21:52:04 +05:30
# Available options are:
# :columns - the column names to remove. Must be one
# Default value: `DEFAULT_TIMESTAMP_COLUMNS`
#
# All options are optional.
def remove_timestamps(table_name, options = {})
columns = options.fetch(:columns, DEFAULT_TIMESTAMP_COLUMNS)
columns.each do |column_name|
remove_column(table_name, column_name)
end
end
# Creates a new index, concurrently
2016-06-02 11:05:42 +05:30
#
# Example:
#
# add_concurrent_index :users, :some_column
#
# See Rails' `add_index` for more info on the available arguments.
def add_concurrent_index(table_name, column_name, options = {})
2016-06-02 11:05:42 +05:30
if transaction_open?
raise 'add_concurrent_index can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2019-10-12 21:52:04 +05:30
options = options.merge({ algorithm: :concurrently })
2016-06-02 11:05:42 +05:30
2018-05-09 12:01:36 +05:30
if index_exists?(table_name, column_name, options)
2019-09-30 21:07:59 +05:30
Rails.logger.warn "Index not created because it already exists (this may be due to an aborted migration or similar): table_name: #{table_name}, column_name: #{column_name}" # rubocop:disable Gitlab/RailsLogger
2018-05-09 12:01:36 +05:30
return
end
2018-11-20 20:47:30 +05:30
disable_statement_timeout do
add_index(table_name, column_name, options)
end
2016-06-02 11:05:42 +05:30
end
2019-10-12 21:52:04 +05:30
# Removes an existed index, concurrently
2017-08-17 22:00:37 +05:30
#
# Example:
#
# remove_concurrent_index :users, :some_column
#
# See Rails' `remove_index` for more info on the available arguments.
def remove_concurrent_index(table_name, column_name, options = {})
if transaction_open?
raise 'remove_concurrent_index can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2019-12-26 22:10:19 +05:30
options = options.merge({ algorithm: :concurrently })
2017-08-17 22:00:37 +05:30
2018-05-09 12:01:36 +05:30
unless index_exists?(table_name, column_name, options)
2019-09-30 21:07:59 +05:30
Rails.logger.warn "Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{table_name}, column_name: #{column_name}" # rubocop:disable Gitlab/RailsLogger
2018-05-09 12:01:36 +05:30
return
end
2018-11-20 20:47:30 +05:30
disable_statement_timeout do
remove_index(table_name, options.merge({ column: column_name }))
end
2017-08-17 22:00:37 +05:30
end
2019-10-12 21:52:04 +05:30
# Removes an existing index, concurrently
2017-09-10 17:25:29 +05:30
#
# Example:
#
# remove_concurrent_index :users, "index_X_by_Y"
#
# See Rails' `remove_index` for more info on the available arguments.
def remove_concurrent_index_by_name(table_name, index_name, options = {})
if transaction_open?
raise 'remove_concurrent_index_by_name can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2019-12-26 22:10:19 +05:30
options = options.merge({ algorithm: :concurrently })
2017-09-10 17:25:29 +05:30
2018-05-09 12:01:36 +05:30
unless index_exists_by_name?(table_name, index_name)
2019-09-30 21:07:59 +05:30
Rails.logger.warn "Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{table_name}, index_name: #{index_name}" # rubocop:disable Gitlab/RailsLogger
2018-05-09 12:01:36 +05:30
return
end
2018-11-20 20:47:30 +05:30
disable_statement_timeout do
remove_index(table_name, options.merge({ name: index_name }))
end
2017-09-10 17:25:29 +05:30
end
2017-08-17 22:00:37 +05:30
# Adds a foreign key with only minimal locking on the tables involved.
#
2019-10-12 21:52:04 +05:30
# This method only requires minimal locking
2017-08-17 22:00:37 +05:30
#
# source - The source table containing the foreign key.
# target - The target table the key points to.
# column - The name of the column to create the foreign key on.
# on_delete - The action to perform when associated data is removed,
# defaults to "CASCADE".
2020-01-01 13:55:28 +05:30
# name - The name of the foreign key.
2019-09-30 21:07:59 +05:30
#
# rubocop:disable Gitlab/RailsLogger
2020-03-09 13:42:32 +05:30
def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, name: nil, validate: true)
2017-08-17 22:00:37 +05:30
# Transactions would result in ALTER TABLE locks being held for the
# duration of the transaction, defeating the purpose of this method.
if transaction_open?
raise 'add_concurrent_foreign_key can not be run inside a transaction'
end
2020-01-01 13:55:28 +05:30
options = {
column: column,
on_delete: on_delete,
name: name.presence || concurrent_foreign_key_name(source, column)
}
2017-08-17 22:00:37 +05:30
2020-01-01 13:55:28 +05:30
if foreign_key_exists?(source, target, options)
warning_message = "Foreign key not created because it exists already " \
2018-05-09 12:01:36 +05:30
"(this may be due to an aborted migration or similar): " \
2020-01-01 13:55:28 +05:30
"source: #{source}, target: #{target}, column: #{options[:column]}, "\
"name: #{options[:name]}, on_delete: #{options[:on_delete]}"
2018-05-09 12:01:36 +05:30
2020-01-01 13:55:28 +05:30
Rails.logger.warn warning_message
else
2018-05-09 12:01:36 +05:30
# Using NOT VALID allows us to create a key without immediately
# validating it. This means we keep the ALTER TABLE lock only for a
# short period of time. The key _is_ enforced for any newly created
# data.
2020-01-01 13:55:28 +05:30
2018-05-09 12:01:36 +05:30
execute <<-EOF.strip_heredoc
ALTER TABLE #{source}
2020-01-01 13:55:28 +05:30
ADD CONSTRAINT #{options[:name]}
FOREIGN KEY (#{options[:column]})
2018-05-09 12:01:36 +05:30
REFERENCES #{target} (id)
2020-01-01 13:55:28 +05:30
#{on_delete_statement(options[:on_delete])}
2018-05-09 12:01:36 +05:30
NOT VALID;
EOF
end
2017-08-17 22:00:37 +05:30
# Validate the existing constraint. This can potentially take a very
# long time to complete, but fortunately does not lock the source table
# while running.
2020-03-09 13:42:32 +05:30
# Disable this check by passing `validate: false` to the method call
# The check will be enforced for new data (inserts) coming in,
# but validating existing data is delayed.
2018-05-09 12:01:36 +05:30
#
# Note this is a no-op in case the constraint is VALID already
2020-03-09 13:42:32 +05:30
if validate
disable_statement_timeout do
execute("ALTER TABLE #{source} VALIDATE CONSTRAINT #{options[:name]};")
end
2018-11-20 20:47:30 +05:30
end
2017-08-17 22:00:37 +05:30
end
2019-09-30 21:07:59 +05:30
# rubocop:enable Gitlab/RailsLogger
2017-08-17 22:00:37 +05:30
2020-03-09 13:42:32 +05:30
def validate_foreign_key(source, column, name: nil)
fk_name = name || concurrent_foreign_key_name(source, column)
unless foreign_key_exists?(source, name: fk_name)
2020-04-08 14:13:33 +05:30
raise missing_schema_object_message(source, "foreign key", fk_name)
2020-03-09 13:42:32 +05:30
end
disable_statement_timeout do
execute("ALTER TABLE #{source} VALIDATE CONSTRAINT #{fk_name};")
end
end
2020-01-01 13:55:28 +05:30
def foreign_key_exists?(source, target = nil, **options)
foreign_keys(source).any? do |foreign_key|
tables_match?(target.to_s, foreign_key.to_table.to_s) &&
options_match?(foreign_key.options, options)
2018-05-09 12:01:36 +05:30
end
end
2017-08-17 22:00:37 +05:30
# Returns the name for a concurrent foreign key.
#
# PostgreSQL constraint names have a limit of 63 bytes. The logic used
# here is based on Rails' foreign_key_name() method, which unfortunately
# is private so we can't rely on it directly.
2020-04-08 14:13:33 +05:30
#
# prefix:
# - The default prefix is `fk_` for backward compatibility with the existing
# concurrent foreign key helpers.
# - For standard rails foreign keys the prefix is `fk_rails_`
#
def concurrent_foreign_key_name(table, column, prefix: 'fk_')
2019-09-30 21:07:59 +05:30
identifier = "#{table}_#{column}_fk"
hashed_identifier = Digest::SHA256.hexdigest(identifier).first(10)
2020-04-08 14:13:33 +05:30
"#{prefix}#{hashed_identifier}"
2017-08-17 22:00:37 +05:30
end
2016-08-24 12:49:21 +05:30
# Long-running migrations may take more than the timeout allowed by
# the database. Disable the session's statement timeout to ensure
2019-10-12 21:52:04 +05:30
# migrations don't get killed prematurely.
2018-11-20 20:47:30 +05:30
#
# There are two possible ways to disable the statement timeout:
#
# - Per transaction (this is the preferred and default mode)
# - Per connection (requires a cleanup after the execution)
#
# When using a per connection disable statement, code must be inside
# a block so we can automatically execute `RESET ALL` after block finishes
# otherwise the statement will still be disabled until connection is dropped
# or `RESET ALL` is executed
2016-08-24 12:49:21 +05:30
def disable_statement_timeout
2018-11-20 20:47:30 +05:30
if block_given?
begin
execute('SET statement_timeout TO 0')
yield
ensure
execute('RESET ALL')
end
else
unless transaction_open?
raise <<~ERROR
Cannot call disable_statement_timeout() without a transaction open or outside of a transaction block.
If you don't want to use a transaction wrap your code in a block call:
disable_statement_timeout { # code that requires disabled statement here }
This will make sure statement_timeout is disabled before and reset after the block execution is finished.
ERROR
end
execute('SET LOCAL statement_timeout TO 0')
end
2017-08-17 22:00:37 +05:30
end
2020-03-09 13:42:32 +05:30
# Executes the block with a retry mechanism that alters the +lock_timeout+ and +sleep_time+ between attempts.
# The timings can be controlled via the +timing_configuration+ parameter.
# If the lock was not acquired within the retry period, a last attempt is made without using +lock_timeout+.
#
# ==== Examples
# # Invoking without parameters
# with_lock_retries do
# drop_table :my_table
# end
#
# # Invoking with custom +timing_configuration+
# t = [
# [1.second, 1.second],
# [2.seconds, 2.seconds]
# ]
#
# with_lock_retries(timing_configuration: t) do
# drop_table :my_table # this will be retried twice
# end
#
# # Disabling the retries using an environment variable
# > export DISABLE_LOCK_RETRIES=true
#
# with_lock_retries do
# drop_table :my_table # one invocation, it will not retry at all
# end
#
# ==== Parameters
# * +timing_configuration+ - [[ActiveSupport::Duration, ActiveSupport::Duration], ...] lock timeout for the block, sleep time before the next iteration, defaults to `Gitlab::Database::WithLockRetries::DEFAULT_TIMING_CONFIGURATION`
# * +logger+ - [Gitlab::JsonLogger]
# * +env+ - [Hash] custom environment hash, see the example with `DISABLE_LOCK_RETRIES`
def with_lock_retries(**args, &block)
merged_args = {
klass: self.class,
logger: Gitlab::BackgroundMigration::Logger
}.merge(args)
Gitlab::Database::WithLockRetries.new(merged_args).run(&block)
end
2017-08-17 22:00:37 +05:30
def true_value
Database.true_value
end
def false_value
Database.false_value
2016-08-24 12:49:21 +05:30
end
2016-06-02 11:05:42 +05:30
# Updates the value of a column in batches.
#
# This method updates the table in batches of 5% of the total row count.
2019-07-07 11:18:12 +05:30
# A `batch_size` option can also be passed to set this to a fixed number.
2016-06-22 15:30:34 +05:30
# This method will continue updating rows until no rows remain.
#
# When given a block this method will yield two values to the block:
#
# 1. An instance of `Arel::Table` for the table that is being updated.
# 2. The query to run as an Arel object.
#
# By supplying a block one can add extra conditions to the queries being
# executed. Note that the same block is used for _all_ queries.
#
# Example:
#
# update_column_in_batches(:projects, :foo, 10) do |table, query|
# query.where(table[:some_column].eq('hello'))
# end
#
# This would result in this method updating only rows where
# `projects.some_column` equals "hello".
2016-06-02 11:05:42 +05:30
#
# table - The name of the table.
# column - The name of the column to update.
# value - The value for the column.
2016-06-22 15:30:34 +05:30
#
2018-03-17 18:26:18 +05:30
# The `value` argument is typically a literal. To perform a computed
# update, an Arel literal can be used instead:
#
# update_value = Arel.sql('bar * baz')
#
# update_column_in_batches(:projects, :foo, update_value) do |table, query|
# query.where(table[:some_column].eq('hello'))
# end
#
2016-06-22 15:30:34 +05:30
# Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop
# determines this method to be too complex while there's no way to make it
# less "complex" without introducing extra methods (which actually will
# make things _more_ complex).
#
# rubocop: disable Metrics/AbcSize
2019-07-07 11:18:12 +05:30
def update_column_in_batches(table, column, value, batch_size: nil)
2017-09-10 17:25:29 +05:30
if transaction_open?
raise 'update_column_in_batches can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2016-06-22 15:30:34 +05:30
table = Arel::Table.new(table)
count_arel = table.project(Arel.star.count.as('count'))
count_arel = yield table, count_arel if block_given?
2020-03-09 13:42:32 +05:30
total = exec_query(count_arel.to_sql).to_a.first['count'].to_i
2016-06-22 15:30:34 +05:30
return if total == 0
2016-06-02 11:05:42 +05:30
2019-07-07 11:18:12 +05:30
if batch_size.nil?
# Update in batches of 5% until we run out of any rows to update.
batch_size = ((total / 100.0) * 5.0).ceil
max_size = 1000
2017-09-10 17:25:29 +05:30
2019-07-07 11:18:12 +05:30
# The upper limit is 1000 to ensure we don't lock too many rows. For
# example, for "merge_requests" even 1% of the table is around 35 000
# rows for GitLab.com.
batch_size = max_size if batch_size > max_size
end
2016-06-02 11:05:42 +05:30
2016-06-22 15:30:34 +05:30
start_arel = table.project(table[:id]).order(table[:id].asc).take(1)
start_arel = yield table, start_arel if block_given?
2020-03-09 13:42:32 +05:30
start_id = exec_query(start_arel.to_sql).to_a.first['id'].to_i
2016-06-22 15:30:34 +05:30
loop do
2017-09-10 17:25:29 +05:30
stop_arel = table.project(table[:id])
.where(table[:id].gteq(start_id))
.order(table[:id].asc)
.take(1)
.skip(batch_size)
2016-06-22 15:30:34 +05:30
stop_arel = yield table, stop_arel if block_given?
2020-03-09 13:42:32 +05:30
stop_row = exec_query(stop_arel.to_sql).to_a.first
2016-06-22 15:30:34 +05:30
2019-02-15 15:39:39 +05:30
update_arel = Arel::UpdateManager.new
2017-09-10 17:25:29 +05:30
.table(table)
.set([[table[column], value]])
.where(table[:id].gteq(start_id))
2016-06-02 11:05:42 +05:30
if stop_row
2016-06-22 15:30:34 +05:30
stop_id = stop_row['id'].to_i
start_id = stop_id
update_arel = update_arel.where(table[:id].lt(stop_id))
2016-06-02 11:05:42 +05:30
end
2016-06-22 15:30:34 +05:30
update_arel = yield table, update_arel if block_given?
execute(update_arel.to_sql)
2016-06-02 11:05:42 +05:30
2016-06-22 15:30:34 +05:30
# There are no more rows left to update.
break unless stop_row
2016-06-02 11:05:42 +05:30
end
end
# Adds a column with a default value without locking an entire table.
#
# This method runs the following steps:
#
# 1. Add the column with a default value of NULL.
2016-06-22 15:30:34 +05:30
# 2. Change the default value of the column to the specified value.
# 3. Update all existing rows in batches.
# 4. Set a `NOT NULL` constraint on the column if desired (the default).
2016-06-02 11:05:42 +05:30
#
# These steps ensure a column can be added to a large and commonly used
# table without locking the entire table for the duration of the table
# modification.
#
# table - The name of the table to update.
# column - The name of the column to add.
# type - The column type (e.g. `:integer`).
# default - The default value for the column.
2016-09-29 09:46:39 +05:30
# limit - Sets a column limit. For example, for :integer, the default is
# 4-bytes. Set `limit: 8` to allow 8-byte integers.
2016-06-02 11:05:42 +05:30
# allow_null - When set to `true` the column will allow NULL values, the
# default is to not allow NULL values.
2016-06-22 15:30:34 +05:30
#
# This method can also take a block which is passed directly to the
# `update_column_in_batches` method.
2016-09-29 09:46:39 +05:30
def add_column_with_default(table, column, type, default:, limit: nil, allow_null: false, &block)
2016-06-02 11:05:42 +05:30
if transaction_open?
raise 'add_column_with_default can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2018-11-20 20:47:30 +05:30
disable_statement_timeout do
transaction do
if limit
add_column(table, column, type, default: nil, limit: limit)
else
add_column(table, column, type, default: nil)
end
# Changing the default before the update ensures any newly inserted
# rows already use the proper default value.
change_column_default(table, column, default)
2016-09-29 09:46:39 +05:30
end
2016-06-02 11:05:42 +05:30
2018-11-20 20:47:30 +05:30
begin
2019-09-30 21:07:59 +05:30
default_after_type_cast = connection.type_cast(default, column_for(table, column))
update_column_in_batches(table, column, default_after_type_cast, &block)
2018-11-20 20:47:30 +05:30
change_column_null(table, column, false) unless allow_null
# We want to rescue _all_ exceptions here, even those that don't inherit
# from StandardError.
rescue Exception => error # rubocop: disable all
remove_column(table, column)
2016-06-02 11:05:42 +05:30
2018-11-20 20:47:30 +05:30
raise error
end
2016-06-02 11:05:42 +05:30
end
end
2017-08-17 22:00:37 +05:30
# Renames a column without requiring downtime.
#
# Concurrent renames work by using database triggers to ensure both the
# old and new column are in sync. However, this method will _not_ remove
# the triggers or the old column automatically; this needs to be done
# manually in a post-deployment migration. This can be done using the
# method `cleanup_concurrent_column_rename`.
#
# table - The name of the database table containing the column.
# old - The old column name.
# new - The new column name.
# type - The type of the new column. If no type is given the old column's
# type is used.
def rename_column_concurrently(table, old, new, type: nil)
if transaction_open?
raise 'rename_column_concurrently can not be run inside a transaction'
end
2018-03-17 18:26:18 +05:30
check_trigger_permissions!(table)
2019-12-04 20:38:33 +05:30
create_column_from(table, old, new, type: type)
2017-08-17 22:00:37 +05:30
2018-03-17 18:26:18 +05:30
install_rename_triggers(table, old, new)
end
2019-12-04 20:38:33 +05:30
# Reverses operations performed by rename_column_concurrently.
#
# This method takes care of removing previously installed triggers as well
# as removing the new column.
#
# table - The name of the database table.
# old - The name of the old column.
# new - The name of the new column.
2019-10-12 21:52:04 +05:30
def undo_rename_column_concurrently(table, old, new)
trigger_name = rename_trigger_name(table, old, new)
check_trigger_permissions!(table)
remove_rename_triggers_for_postgresql(table, trigger_name)
remove_column(table, new)
end
2018-03-17 18:26:18 +05:30
# Installs triggers in a table that keep a new column in sync with an old
# one.
#
# table - The name of the table to install the trigger in.
# old_column - The name of the old column.
# new_column - The name of the new column.
def install_rename_triggers(table, old_column, new_column)
trigger_name = rename_trigger_name(table, old_column, new_column)
2017-08-17 22:00:37 +05:30
quoted_table = quote_table_name(table)
2018-03-17 18:26:18 +05:30
quoted_old = quote_column_name(old_column)
quoted_new = quote_column_name(new_column)
2017-08-17 22:00:37 +05:30
2019-10-12 21:52:04 +05:30
install_rename_triggers_for_postgresql(
trigger_name,
quoted_table,
quoted_old,
quoted_new
)
2017-08-17 22:00:37 +05:30
end
# Changes the type of a column concurrently.
#
# table - The table containing the column.
# column - The name of the column to change.
# new_type - The new column type.
def change_column_type_concurrently(table, column, new_type)
temp_column = "#{column}_for_type_change"
rename_column_concurrently(table, column, temp_column, type: new_type)
end
# Performs cleanup of a concurrent type change.
#
# table - The table containing the column.
# column - The name of the column to change.
# new_type - The new column type.
def cleanup_concurrent_column_type_change(table, column)
temp_column = "#{column}_for_type_change"
transaction do
# This has to be performed in a transaction as otherwise we might have
# inconsistent data.
cleanup_concurrent_column_rename(table, column, temp_column)
rename_column(table, temp_column, column)
end
end
# Cleans up a concurrent column name.
#
# This method takes care of removing previously installed triggers as well
# as removing the old column.
#
# table - The name of the database table.
# old - The name of the old column.
# new - The name of the new column.
def cleanup_concurrent_column_rename(table, old, new)
trigger_name = rename_trigger_name(table, old, new)
2018-03-17 18:26:18 +05:30
check_trigger_permissions!(table)
2019-10-12 21:52:04 +05:30
remove_rename_triggers_for_postgresql(table, trigger_name)
2017-08-17 22:00:37 +05:30
remove_column(table, old)
end
2019-12-04 20:38:33 +05:30
# Reverses the operations performed by cleanup_concurrent_column_rename.
#
# This method adds back the old_column removed
# by cleanup_concurrent_column_rename.
# It also adds back the (old_column > new_column) trigger that is removed
# by cleanup_concurrent_column_rename.
#
# table - The name of the database table containing the column.
# old - The old column name.
# new - The new column name.
# type - The type of the old column. If no type is given the new column's
# type is used.
2019-10-12 21:52:04 +05:30
def undo_cleanup_concurrent_column_rename(table, old, new, type: nil)
if transaction_open?
raise 'undo_cleanup_concurrent_column_rename can not be run inside a transaction'
end
check_trigger_permissions!(table)
2019-12-04 20:38:33 +05:30
create_column_from(table, new, old, type: type)
2019-10-12 21:52:04 +05:30
install_rename_triggers(table, old, new)
end
2018-03-17 18:26:18 +05:30
# Changes the column type of a table using a background migration.
#
# Because this method uses a background migration it's more suitable for
# large tables. For small tables it's better to use
# `change_column_type_concurrently` since it can complete its work in a
# much shorter amount of time and doesn't rely on Sidekiq.
#
# Example usage:
#
# class Issue < ActiveRecord::Base
# self.table_name = 'issues'
#
# include EachBatch
#
# def self.to_migrate
# where('closed_at IS NOT NULL')
# end
# end
#
# change_column_type_using_background_migration(
# Issue.to_migrate,
# :closed_at,
# :datetime_with_timezone
# )
#
# Reverting a migration like this is done exactly the same way, just with
# a different type to migrate to (e.g. `:datetime` in the above example).
#
# relation - An ActiveRecord relation to use for scheduling jobs and
# figuring out what table we're modifying. This relation _must_
# have the EachBatch module included.
#
# column - The name of the column for which the type will be changed.
#
# new_type - The new type of the column.
#
# batch_size - The number of rows to schedule in a single background
# migration.
#
# interval - The time interval between every background migration.
def change_column_type_using_background_migration(
relation,
column,
new_type,
batch_size: 10_000,
interval: 10.minutes
)
unless relation.model < EachBatch
raise TypeError, 'The relation must include the EachBatch module'
end
temp_column = "#{column}_for_type_change"
table = relation.table_name
max_index = 0
add_column(table, temp_column, new_type)
install_rename_triggers(table, column, temp_column)
# Schedule the jobs that will copy the data from the old column to the
# new one. Rows with NULL values in our source column are skipped since
# the target column is already NULL at this point.
relation.where.not(column => nil).each_batch(of: batch_size) do |batch, index|
start_id, end_id = batch.pluck('MIN(id), MAX(id)').first
max_index = index
2020-04-08 14:13:33 +05:30
migrate_in(
2018-03-17 18:26:18 +05:30
index * interval,
'CopyColumn',
[table, column, temp_column, start_id, end_id]
)
end
# Schedule the renaming of the column to happen (initially) 1 hour after
# the last batch finished.
2020-04-08 14:13:33 +05:30
migrate_in(
2018-03-17 18:26:18 +05:30
(max_index * interval) + 1.hour,
'CleanupConcurrentTypeChange',
[table, column, temp_column]
)
if perform_background_migration_inline?
# To ensure the schema is up to date immediately we perform the
# migration inline in dev / test environments.
Gitlab::BackgroundMigration.steal('CopyColumn')
Gitlab::BackgroundMigration.steal('CleanupConcurrentTypeChange')
end
end
2018-11-08 19:23:39 +05:30
# Renames a column using a background migration.
#
# Because this method uses a background migration it's more suitable for
# large tables. For small tables it's better to use
# `rename_column_concurrently` since it can complete its work in a much
# shorter amount of time and doesn't rely on Sidekiq.
#
# Example usage:
#
# rename_column_using_background_migration(
# :users,
# :feed_token,
# :rss_token
# )
#
# table - The name of the database table containing the column.
#
# old - The old column name.
#
# new - The new column name.
#
# type - The type of the new column. If no type is given the old column's
# type is used.
#
# batch_size - The number of rows to schedule in a single background
# migration.
#
# interval - The time interval between every background migration.
def rename_column_using_background_migration(
table,
old_column,
new_column,
type: nil,
batch_size: 10_000,
interval: 10.minutes
)
check_trigger_permissions!(table)
old_col = column_for(table, old_column)
new_type = type || old_col.type
max_index = 0
add_column(table, new_column, new_type,
limit: old_col.limit,
precision: old_col.precision,
scale: old_col.scale)
# We set the default value _after_ adding the column so we don't end up
# updating any existing data with the default value. This isn't
# necessary since we copy over old values further down.
change_column_default(table, new_column, old_col.default) if old_col.default
install_rename_triggers(table, old_column, new_column)
model = Class.new(ActiveRecord::Base) do
self.table_name = table
include ::EachBatch
end
# Schedule the jobs that will copy the data from the old column to the
# new one. Rows with NULL values in our source column are skipped since
# the target column is already NULL at this point.
model.where.not(old_column => nil).each_batch(of: batch_size) do |batch, index|
start_id, end_id = batch.pluck('MIN(id), MAX(id)').first
max_index = index
2020-04-08 14:13:33 +05:30
migrate_in(
2018-11-08 19:23:39 +05:30
index * interval,
'CopyColumn',
[table, old_column, new_column, start_id, end_id]
)
end
# Schedule the renaming of the column to happen (initially) 1 hour after
# the last batch finished.
2020-04-08 14:13:33 +05:30
migrate_in(
2018-11-08 19:23:39 +05:30
(max_index * interval) + 1.hour,
'CleanupConcurrentRename',
[table, old_column, new_column]
)
if perform_background_migration_inline?
# To ensure the schema is up to date immediately we perform the
# migration inline in dev / test environments.
Gitlab::BackgroundMigration.steal('CopyColumn')
Gitlab::BackgroundMigration.steal('CleanupConcurrentRename')
end
end
2018-03-17 18:26:18 +05:30
def perform_background_migration_inline?
Rails.env.test? || Rails.env.development?
end
2017-08-17 22:00:37 +05:30
# Performs a concurrent column rename when using PostgreSQL.
def install_rename_triggers_for_postgresql(trigger, table, old, new)
execute <<-EOF.strip_heredoc
CREATE OR REPLACE FUNCTION #{trigger}()
RETURNS trigger AS
$BODY$
BEGIN
NEW.#{new} := NEW.#{old};
RETURN NEW;
END;
$BODY$
LANGUAGE 'plpgsql'
VOLATILE
EOF
2019-12-04 20:38:33 +05:30
execute <<-EOF.strip_heredoc
DROP TRIGGER IF EXISTS #{trigger}
ON #{table}
EOF
2017-08-17 22:00:37 +05:30
execute <<-EOF.strip_heredoc
CREATE TRIGGER #{trigger}
BEFORE INSERT OR UPDATE
ON #{table}
FOR EACH ROW
EXECUTE PROCEDURE #{trigger}()
EOF
end
# Removes the triggers used for renaming a PostgreSQL column concurrently.
def remove_rename_triggers_for_postgresql(table, trigger)
2018-03-17 18:26:18 +05:30
execute("DROP TRIGGER IF EXISTS #{trigger} ON #{table}")
execute("DROP FUNCTION IF EXISTS #{trigger}()")
2017-08-17 22:00:37 +05:30
end
# Returns the (base) name to use for triggers when renaming columns.
def rename_trigger_name(table, old, new)
'trigger_' + Digest::SHA256.hexdigest("#{table}_#{old}_#{new}").first(12)
end
# Returns an Array containing the indexes for the given column
def indexes_for(table, column)
column = column.to_s
indexes(table).select { |index| index.columns.include?(column) }
end
# Returns an Array containing the foreign keys for the given column.
def foreign_keys_for(table, column)
column = column.to_s
foreign_keys(table).select { |fk| fk.column == column }
end
# Copies all indexes for the old column to a new column.
#
# table - The table containing the columns and indexes.
# old - The old column.
# new - The new column.
def copy_indexes(table, old, new)
old = old.to_s
new = new.to_s
indexes_for(table, old).each do |index|
new_columns = index.columns.map do |column|
column == old ? new : column
end
# This is necessary as we can't properly rename indexes such as
# "ci_taggings_idx".
unless index.name.include?(old)
raise "The index #{index.name} can not be copied as it does not "\
"mention the old column. You have to rename this index manually first."
end
name = index.name.gsub(old, new)
options = {
unique: index.unique,
name: name,
length: index.lengths,
order: index.orders
}
options[:using] = index.using if index.using
options[:where] = index.where if index.where
unless index.opclasses.blank?
opclasses = index.opclasses.dup
# Copy the operator classes for the old column (if any) to the new
# column.
opclasses[new] = opclasses.delete(old) if opclasses[old]
options[:opclasses] = opclasses
end
add_concurrent_index(table, new_columns, options)
end
end
# Copies all foreign keys for the old column to the new column.
#
# table - The table containing the columns and indexes.
# old - The old column.
# new - The new column.
def copy_foreign_keys(table, old, new)
foreign_keys_for(table, old).each do |fk|
add_concurrent_foreign_key(fk.from_table,
fk.to_table,
column: new,
on_delete: fk.on_delete)
end
end
# Returns the column for the given table and column name.
def column_for(table, name)
name = name.to_s
2020-04-08 14:13:33 +05:30
column = columns(table).find { |column| column.name == name }
raise(missing_schema_object_message(table, "column", name)) if column.nil?
column
2017-08-17 22:00:37 +05:30
end
2018-12-13 13:39:08 +05:30
# This will replace the first occurrence of a string in a column with
2019-10-12 21:52:04 +05:30
# the replacement using `regexp_replace`
2017-08-17 22:00:37 +05:30
def replace_sql(column, pattern, replacement)
quoted_pattern = Arel::Nodes::Quoted.new(pattern.to_s)
quoted_replacement = Arel::Nodes::Quoted.new(replacement.to_s)
2019-10-12 21:52:04 +05:30
replace = Arel::Nodes::NamedFunction.new(
"regexp_replace", [column, quoted_pattern, quoted_replacement]
)
2017-08-17 22:00:37 +05:30
2019-10-12 21:52:04 +05:30
Arel::Nodes::SqlLiteral.new(replace.to_sql)
2017-08-17 22:00:37 +05:30
end
2017-09-10 17:25:29 +05:30
2019-07-07 11:18:12 +05:30
def remove_foreign_key_if_exists(*args)
if foreign_key_exists?(*args)
remove_foreign_key(*args)
end
end
2017-09-10 17:25:29 +05:30
def remove_foreign_key_without_error(*args)
remove_foreign_key(*args)
rescue ArgumentError
end
2018-03-17 18:26:18 +05:30
def sidekiq_queue_migrate(queue_from, to:)
while sidekiq_queue_length(queue_from) > 0
Sidekiq.redis do |conn|
conn.rpoplpush "queue:#{queue_from}", "queue:#{to}"
end
end
end
def sidekiq_queue_length(queue_name)
Sidekiq.redis do |conn|
conn.llen("queue:#{queue_name}")
end
end
def check_trigger_permissions!(table)
unless Grant.create_and_execute_trigger?(table)
dbname = Database.database_name
user = Database.username
raise <<-EOF
Your database user is not allowed to create, drop, or execute triggers on the
table #{table}.
If you are using PostgreSQL you can solve this by logging in to the GitLab
database (#{dbname}) using a super user and running:
ALTER #{user} WITH SUPERUSER
2019-10-12 21:52:04 +05:30
This query will grant the user super user permissions, ensuring you don't run
2018-03-17 18:26:18 +05:30
into similar problems in the future (e.g. when new tables are created).
EOF
end
end
# Bulk queues background migration jobs for an entire table, batched by ID range.
# "Bulk" meaning many jobs will be pushed at a time for efficiency.
# If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`.
#
# model_class - The table being iterated over
# job_class_name - The background migration job class as a string
# batch_size - The maximum number of rows per job
#
# Example:
#
# class Route < ActiveRecord::Base
# include EachBatch
# self.table_name = 'routes'
# end
#
# bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes')
#
# Where the model_class includes EachBatch, and the background migration exists:
#
# class Gitlab::BackgroundMigration::ProcessRoutes
# def perform(start_id, end_id)
# # do something
# end
# end
def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
jobs = []
2019-02-15 15:39:39 +05:30
table_name = model_class.quoted_table_name
2018-03-17 18:26:18 +05:30
model_class.each_batch(of: batch_size) do |relation|
2019-12-26 22:10:19 +05:30
start_id, end_id = relation.pluck("MIN(#{table_name}.id)", "MAX(#{table_name}.id)").first
2018-03-17 18:26:18 +05:30
if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE
# Note: This code path generally only helps with many millions of rows
# We push multiple jobs at a time to reduce the time spent in
# Sidekiq/Redis operations. We're using this buffer based approach so we
# don't need to run additional queries for every range.
2020-04-08 14:13:33 +05:30
bulk_migrate_async(jobs)
2018-03-17 18:26:18 +05:30
jobs.clear
end
jobs << [job_class_name, [start_id, end_id]]
end
2020-04-08 14:13:33 +05:30
bulk_migrate_async(jobs) unless jobs.empty?
2018-03-17 18:26:18 +05:30
end
# Queues background migration jobs for an entire table, batched by ID range.
# Each job is scheduled with a `delay_interval` in between.
# If you use a small interval, then some jobs may run at the same time.
#
2018-04-05 14:03:07 +05:30
# model_class - The table or relation being iterated over
2018-03-17 18:26:18 +05:30
# job_class_name - The background migration job class as a string
# delay_interval - The duration between each job's scheduled time (must respond to `to_f`)
# batch_size - The maximum number of rows per job
2020-04-08 14:13:33 +05:30
# other_arguments - Other arguments to send to the job
2018-03-17 18:26:18 +05:30
#
# Example:
#
# class Route < ActiveRecord::Base
# include EachBatch
# self.table_name = 'routes'
# end
#
# queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute)
#
# Where the model_class includes EachBatch, and the background migration exists:
#
# class Gitlab::BackgroundMigration::ProcessRoutes
# def perform(start_id, end_id)
# # do something
# end
# end
2020-04-08 14:13:33 +05:30
def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE, other_arguments: [])
2018-03-17 18:26:18 +05:30
raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
# To not overload the worker too much we enforce a minimum interval both
# when scheduling and performing jobs.
2018-11-18 11:00:15 +05:30
if delay_interval < BackgroundMigrationWorker.minimum_interval
delay_interval = BackgroundMigrationWorker.minimum_interval
2018-03-17 18:26:18 +05:30
end
model_class.each_batch(of: batch_size) do |relation, index|
2019-12-21 20:55:43 +05:30
start_id, end_id = relation.pluck(Arel.sql('MIN(id), MAX(id)')).first
2018-03-17 18:26:18 +05:30
# `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
# the same time, which is not helpful in most cases where we wish to
# spread the work over time.
2020-04-08 14:13:33 +05:30
migrate_in(delay_interval * index, job_class_name, [start_id, end_id] + other_arguments)
2018-03-17 18:26:18 +05:30
end
end
2018-03-27 19:54:05 +05:30
2018-05-09 12:01:36 +05:30
# Fetches indexes on a column by name for postgres.
#
# This will include indexes using an expression on the column, for example:
# `CREATE INDEX CONCURRENTLY index_name ON table (LOWER(column));`
#
# We can remove this when upgrading to Rails 5 with an updated `index_exists?`:
# - https://github.com/rails/rails/commit/edc2b7718725016e988089b5fb6d6fb9d6e16882
#
# Or this can be removed when we no longer support postgres < 9.5, so we
# can use `CREATE INDEX IF NOT EXISTS`.
def index_exists_by_name?(table, index)
# We can't fall back to the normal `index_exists?` method because that
# does not find indexes without passing a column name.
if indexes(table).map(&:name).include?(index.to_s)
true
else
2019-10-12 21:52:04 +05:30
postgres_exists_by_name?(table, index)
2018-03-27 19:54:05 +05:30
end
end
2018-05-09 12:01:36 +05:30
def postgres_exists_by_name?(table, name)
index_sql = <<~SQL
SELECT COUNT(*)
FROM pg_index
JOIN pg_class i ON (indexrelid=i.oid)
JOIN pg_class t ON (indrelid=t.oid)
WHERE i.relname = '#{name}' AND t.relname = '#{table}'
SQL
connection.select_value(index_sql).to_i > 0
2018-03-27 19:54:05 +05:30
end
2018-12-05 23:21:45 +05:30
2020-01-01 13:55:28 +05:30
def create_or_update_plan_limit(limit_name, plan_name, limit_value)
execute <<~SQL
INSERT INTO plan_limits (plan_id, #{quote_column_name(limit_name)})
VALUES
((SELECT id FROM plans WHERE name = #{quote(plan_name)} LIMIT 1), #{quote(limit_value)})
ON CONFLICT (plan_id) DO UPDATE SET #{quote_column_name(limit_name)} = EXCLUDED.#{quote_column_name(limit_name)};
SQL
end
2020-03-09 13:42:32 +05:30
# Note this should only be used with very small tables
def backfill_iids(table)
sql = <<-END
UPDATE #{table}
SET iid = #{table}_with_calculated_iid.iid_num
FROM (
SELECT id, ROW_NUMBER() OVER (PARTITION BY project_id ORDER BY id ASC) AS iid_num FROM #{table}
) AS #{table}_with_calculated_iid
WHERE #{table}.id = #{table}_with_calculated_iid.id
END
execute(sql)
end
2020-04-08 14:13:33 +05:30
def migrate_async(*args)
with_migration_context do
BackgroundMigrationWorker.perform_async(*args)
end
end
def migrate_in(*args)
with_migration_context do
BackgroundMigrationWorker.perform_in(*args)
end
end
def bulk_migrate_in(*args)
with_migration_context do
BackgroundMigrationWorker.bulk_perform_in(*args)
end
end
def bulk_migrate_async(*args)
with_migration_context do
BackgroundMigrationWorker.bulk_perform_async(*args)
end
end
2019-10-12 21:52:04 +05:30
private
2020-04-08 14:13:33 +05:30
def missing_schema_object_message(table, type, name)
<<~MESSAGE
Could not find #{type} "#{name}" on table "#{table}" which was referenced during the migration.
This issue could be caused by the database schema straying from the expected state.
To resolve this issue, please verify:
1. all previous migrations have completed
2. the database objects used in this migration match the Rails definition in schema.rb or structure.sql
MESSAGE
end
2020-01-01 13:55:28 +05:30
def tables_match?(target_table, foreign_key_table)
target_table.blank? || foreign_key_table == target_table
end
def options_match?(foreign_key_options, options)
options.all? { |k, v| foreign_key_options[k].to_s == v.to_s }
end
def on_delete_statement(on_delete)
return '' if on_delete.blank?
return 'ON DELETE SET NULL' if on_delete == :nullify
"ON DELETE #{on_delete.upcase}"
end
2019-12-04 20:38:33 +05:30
def create_column_from(table, old, new, type: nil)
old_col = column_for(table, old)
new_type = type || old_col.type
add_column(table, new, new_type,
limit: old_col.limit,
precision: old_col.precision,
scale: old_col.scale)
# We set the default value _after_ adding the column so we don't end up
# updating any existing data with the default value. This isn't
# necessary since we copy over old values further down.
change_column_default(table, new, old_col.default) unless old_col.default.nil?
update_column_in_batches(table, new, Arel::Table.new(table)[old])
change_column_null(table, new, false) unless old_col.null
copy_indexes(table, old, new)
copy_foreign_keys(table, old, new)
end
2019-10-12 21:52:04 +05:30
def validate_timestamp_column_name!(column_name)
return if PERMITTED_TIMESTAMP_COLUMNS.member?(column_name)
raise <<~MESSAGE
Illegal timestamp column name! Got #{column_name}.
Must be one of: #{PERMITTED_TIMESTAMP_COLUMNS.to_a}
MESSAGE
end
def validate_not_in_transaction!(method_name, modifier = nil)
return unless transaction_open?
raise <<~ERROR
#{["`#{method_name}`", modifier].compact.join(' ')} cannot be run inside a transaction.
You can disable transactions by calling `disable_ddl_transaction!` in the body of
your migration class
ERROR
2018-12-05 23:21:45 +05:30
end
2020-04-08 14:13:33 +05:30
def with_migration_context(&block)
Gitlab::ApplicationContext.with_context(caller_id: self.class.to_s, &block)
end
2016-06-02 11:05:42 +05:30
end
end
end