debian-mirror-gitlab/lib/gitlab/database/migration_helpers.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1706 lines
68 KiB
Ruby
Raw Normal View History

2019-02-15 15:39:39 +05:30
# frozen_string_literal: true
2016-06-02 11:05:42 +05:30
module Gitlab
module Database
module MigrationHelpers
2022-07-16 23:28:13 +05:30
include Migrations::ReestablishedConnectionStack
2020-07-28 23:09:34 +05:30
include Migrations::BackgroundMigrationHelpers
2022-01-26 12:08:38 +05:30
include Migrations::BatchedBackgroundMigrationHelpers
2021-04-29 21:17:54 +05:30
include DynamicModelHelpers
2021-09-04 01:27:46 +05:30
include RenameTableHelpers
2021-10-27 15:23:28 +05:30
include AsyncIndexes::MigrationHelpers
2020-07-28 23:09:34 +05:30
2022-04-04 11:22:00 +05:30
def define_batchable_model(table_name, connection: self.connection)
super(table_name, connection: connection)
end
def each_batch(table_name, connection: self.connection, **kwargs)
super(table_name, connection: connection, **kwargs)
end
def each_batch_range(table_name, connection: self.connection, **kwargs)
super(table_name, connection: connection, **kwargs)
end
2020-06-23 00:09:42 +05:30
# https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
MAX_IDENTIFIER_NAME_LENGTH = 63
2019-10-12 21:52:04 +05:30
DEFAULT_TIMESTAMP_COLUMNS = %i[created_at updated_at].freeze
2017-09-10 17:25:29 +05:30
# Adds `created_at` and `updated_at` columns with timezone information.
#
# This method is an improved version of Rails' built-in method `add_timestamps`.
#
2019-10-12 21:52:04 +05:30
# By default, adds `created_at` and `updated_at` columns, but these can be specified as:
#
# add_timestamps_with_timezone(:my_table, columns: [:created_at, :deleted_at])
#
# This allows you to create just the timestamps you need, saving space.
#
2017-09-10 17:25:29 +05:30
# Available options are:
2019-10-12 21:52:04 +05:30
# :default - The default value for the column.
# :null - When set to `true` the column will allow NULL values.
2017-09-10 17:25:29 +05:30
# The default is to not allow NULL values.
2021-12-11 22:18:48 +05:30
# :columns - the column names to create. Must end with `_at`.
2019-10-12 21:52:04 +05:30
# Default value: `DEFAULT_TIMESTAMP_COLUMNS`
#
# All options are optional.
2017-09-10 17:25:29 +05:30
def add_timestamps_with_timezone(table_name, options = {})
2019-10-12 21:52:04 +05:30
columns = options.fetch(:columns, DEFAULT_TIMESTAMP_COLUMNS)
columns.each do |column_name|
validate_timestamp_column_name!(column_name)
2017-09-10 17:25:29 +05:30
2021-12-11 22:18:48 +05:30
add_column(
table_name,
column_name,
:datetime_with_timezone,
default: options[:default],
null: options[:null] || false
)
2017-09-10 17:25:29 +05:30
end
end
2019-10-12 21:52:04 +05:30
# To be used in the `#down` method of migrations that
# use `#add_timestamps_with_timezone`.
2016-06-02 11:05:42 +05:30
#
2019-10-12 21:52:04 +05:30
# Available options are:
# :columns - the column names to remove. Must be one
# Default value: `DEFAULT_TIMESTAMP_COLUMNS`
#
# All options are optional.
def remove_timestamps(table_name, options = {})
columns = options.fetch(:columns, DEFAULT_TIMESTAMP_COLUMNS)
columns.each do |column_name|
remove_column(table_name, column_name)
end
end
2021-11-11 11:23:49 +05:30
# @deprecated Use `create_table` in V2 instead
2021-03-08 18:12:59 +05:30
#
# Creates a new table, optionally allowing the caller to add check constraints to the table.
# Aside from that addition, this method should behave identically to Rails' `create_table` method.
#
# Example:
#
# create_table_with_constraints :some_table do |t|
# t.integer :thing, null: false
# t.text :other_thing
#
# t.check_constraint :thing_is_not_null, 'thing IS NOT NULL'
# t.text_limit :other_thing, 255
# end
#
# See Rails' `create_table` for more info on the available arguments.
def create_table_with_constraints(table_name, **options, &block)
helper_context = self
with_lock_retries do
2021-04-17 20:07:23 +05:30
check_constraints = []
2021-03-08 18:12:59 +05:30
create_table(table_name, **options) do |t|
t.define_singleton_method(:check_constraint) do |name, definition|
helper_context.send(:validate_check_constraint_name!, name) # rubocop:disable GitlabSecurity/PublicSend
check_constraints << { name: name, definition: definition }
end
t.define_singleton_method(:text_limit) do |column_name, limit, name: nil|
# rubocop:disable GitlabSecurity/PublicSend
name = helper_context.send(:text_limit_name, table_name, column_name, name: name)
helper_context.send(:validate_check_constraint_name!, name)
# rubocop:enable GitlabSecurity/PublicSend
column_name = helper_context.quote_column_name(column_name)
definition = "char_length(#{column_name}) <= #{limit}"
check_constraints << { name: name, definition: definition }
end
t.instance_eval(&block) unless block.nil?
end
next if check_constraints.empty?
constraint_clauses = check_constraints.map do |constraint|
"ADD CONSTRAINT #{quote_table_name(constraint[:name])} CHECK (#{constraint[:definition]})"
end
execute(<<~SQL)
ALTER TABLE #{quote_table_name(table_name)}
#{constraint_clauses.join(",\n")}
SQL
end
end
2019-10-12 21:52:04 +05:30
# Creates a new index, concurrently
2016-06-02 11:05:42 +05:30
#
# Example:
#
# add_concurrent_index :users, :some_column
#
# See Rails' `add_index` for more info on the available arguments.
def add_concurrent_index(table_name, column_name, options = {})
2016-06-02 11:05:42 +05:30
if transaction_open?
raise 'add_concurrent_index can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2019-10-12 21:52:04 +05:30
options = options.merge({ algorithm: :concurrently })
2016-06-02 11:05:42 +05:30
2021-06-08 01:23:25 +05:30
if index_exists?(table_name, column_name, **options)
2021-12-11 22:18:48 +05:30
name = options[:name] || index_name(table_name, column_name)
_, schema = table_name.to_s.split('.').reverse
if index_invalid?(name, schema: schema)
say "Index being recreated because the existing version was INVALID: table_name: #{table_name}, column_name: #{column_name}"
remove_concurrent_index_by_name(table_name, name)
else
say "Index not created because it already exists (this may be due to an aborted migration or similar): table_name: #{table_name}, column_name: #{column_name}"
return
end
2018-05-09 12:01:36 +05:30
end
2018-11-20 20:47:30 +05:30
disable_statement_timeout do
2021-06-08 01:23:25 +05:30
add_index(table_name, column_name, **options)
2018-11-20 20:47:30 +05:30
end
2021-10-27 15:23:28 +05:30
# We created this index. Now let's remove the queuing entry for async creation in case it's still there.
unprepare_async_index(table_name, column_name, **options)
2016-06-02 11:05:42 +05:30
end
2021-12-11 22:18:48 +05:30
def index_invalid?(index_name, schema: nil)
index_name = connection.quote(index_name)
schema = connection.quote(schema) if schema
schema ||= 'current_schema()'
connection.select_value(<<~SQL)
select not i.indisvalid
from pg_class c
inner join pg_index i
on c.oid = i.indexrelid
inner join pg_namespace n
on n.oid = c.relnamespace
where n.nspname = #{schema}
and c.relname = #{index_name}
SQL
end
2019-10-12 21:52:04 +05:30
# Removes an existed index, concurrently
2017-08-17 22:00:37 +05:30
#
# Example:
#
# remove_concurrent_index :users, :some_column
#
# See Rails' `remove_index` for more info on the available arguments.
def remove_concurrent_index(table_name, column_name, options = {})
if transaction_open?
raise 'remove_concurrent_index can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2019-12-26 22:10:19 +05:30
options = options.merge({ algorithm: :concurrently })
2017-08-17 22:00:37 +05:30
2021-06-08 01:23:25 +05:30
unless index_exists?(table_name, column_name, **options)
2020-11-24 15:15:51 +05:30
Gitlab::AppLogger.warn "Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{table_name}, column_name: #{column_name}"
2018-05-09 12:01:36 +05:30
return
end
2018-11-20 20:47:30 +05:30
disable_statement_timeout do
2021-06-08 01:23:25 +05:30
remove_index(table_name, **options.merge({ column: column_name }))
2018-11-20 20:47:30 +05:30
end
2021-10-27 15:23:28 +05:30
# We removed this index. Now let's make sure it's not queued for async creation.
unprepare_async_index(table_name, column_name, **options)
2017-08-17 22:00:37 +05:30
end
2019-10-12 21:52:04 +05:30
# Removes an existing index, concurrently
2017-09-10 17:25:29 +05:30
#
# Example:
#
# remove_concurrent_index :users, "index_X_by_Y"
#
# See Rails' `remove_index` for more info on the available arguments.
def remove_concurrent_index_by_name(table_name, index_name, options = {})
if transaction_open?
raise 'remove_concurrent_index_by_name can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2020-07-28 23:09:34 +05:30
index_name = index_name[:name] if index_name.is_a?(Hash)
raise 'remove_concurrent_index_by_name must get an index name as the second argument' if index_name.blank?
2019-12-26 22:10:19 +05:30
options = options.merge({ algorithm: :concurrently })
2017-09-10 17:25:29 +05:30
2018-05-09 12:01:36 +05:30
unless index_exists_by_name?(table_name, index_name)
2020-11-24 15:15:51 +05:30
Gitlab::AppLogger.warn "Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{table_name}, index_name: #{index_name}"
2018-05-09 12:01:36 +05:30
return
end
2018-11-20 20:47:30 +05:30
disable_statement_timeout do
2021-06-08 01:23:25 +05:30
remove_index(table_name, **options.merge({ name: index_name }))
2018-11-20 20:47:30 +05:30
end
2021-10-27 15:23:28 +05:30
# We removed this index. Now let's make sure it's not queued for async creation.
unprepare_async_index_by_name(table_name, index_name, **options)
2017-09-10 17:25:29 +05:30
end
2017-08-17 22:00:37 +05:30
# Adds a foreign key with only minimal locking on the tables involved.
#
2019-10-12 21:52:04 +05:30
# This method only requires minimal locking
2017-08-17 22:00:37 +05:30
#
# source - The source table containing the foreign key.
# target - The target table the key points to.
# column - The name of the column to create the foreign key on.
2021-09-30 23:02:18 +05:30
# target_column - The name of the referenced column, defaults to "id".
2017-08-17 22:00:37 +05:30
# on_delete - The action to perform when associated data is removed,
# defaults to "CASCADE".
2020-01-01 13:55:28 +05:30
# name - The name of the foreign key.
2021-10-27 15:23:28 +05:30
# validate - Flag that controls whether the new foreign key will be validated after creation.
# If the flag is not set, the constraint will only be enforced for new data.
# reverse_lock_order - Flag that controls whether we should attempt to acquire locks in the reverse
# order of the ALTER TABLE. This can be useful in situations where the foreign
# key creation could deadlock with another process.
2019-09-30 21:07:59 +05:30
#
2021-10-27 15:23:28 +05:30
def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, target_column: :id, name: nil, validate: true, reverse_lock_order: false)
2017-08-17 22:00:37 +05:30
# Transactions would result in ALTER TABLE locks being held for the
# duration of the transaction, defeating the purpose of this method.
if transaction_open?
raise 'add_concurrent_foreign_key can not be run inside a transaction'
end
2020-01-01 13:55:28 +05:30
options = {
column: column,
on_delete: on_delete,
2021-09-30 23:02:18 +05:30
name: name.presence || concurrent_foreign_key_name(source, column),
primary_key: target_column
2020-01-01 13:55:28 +05:30
}
2017-08-17 22:00:37 +05:30
2021-01-03 14:25:43 +05:30
if foreign_key_exists?(source, target, **options)
2020-01-01 13:55:28 +05:30
warning_message = "Foreign key not created because it exists already " \
2018-05-09 12:01:36 +05:30
"(this may be due to an aborted migration or similar): " \
2020-01-01 13:55:28 +05:30
"source: #{source}, target: #{target}, column: #{options[:column]}, "\
"name: #{options[:name]}, on_delete: #{options[:on_delete]}"
2018-05-09 12:01:36 +05:30
2020-11-24 15:15:51 +05:30
Gitlab::AppLogger.warn warning_message
2020-01-01 13:55:28 +05:30
else
2018-05-09 12:01:36 +05:30
# Using NOT VALID allows us to create a key without immediately
# validating it. This means we keep the ALTER TABLE lock only for a
# short period of time. The key _is_ enforced for any newly created
# data.
2020-01-01 13:55:28 +05:30
2020-04-22 19:07:51 +05:30
with_lock_retries do
2021-10-27 15:23:28 +05:30
execute("LOCK TABLE #{target}, #{source} IN SHARE ROW EXCLUSIVE MODE") if reverse_lock_order
2020-04-22 19:07:51 +05:30
execute <<-EOF.strip_heredoc
ALTER TABLE #{source}
ADD CONSTRAINT #{options[:name]}
FOREIGN KEY (#{options[:column]})
2021-09-30 23:02:18 +05:30
REFERENCES #{target} (#{target_column})
2020-04-22 19:07:51 +05:30
#{on_delete_statement(options[:on_delete])}
NOT VALID;
EOF
end
2018-05-09 12:01:36 +05:30
end
2017-08-17 22:00:37 +05:30
# Validate the existing constraint. This can potentially take a very
# long time to complete, but fortunately does not lock the source table
# while running.
2020-03-13 15:44:24 +05:30
# Disable this check by passing `validate: false` to the method call
# The check will be enforced for new data (inserts) coming in,
# but validating existing data is delayed.
2018-05-09 12:01:36 +05:30
#
# Note this is a no-op in case the constraint is VALID already
2020-03-13 15:44:24 +05:30
if validate
disable_statement_timeout do
execute("ALTER TABLE #{source} VALIDATE CONSTRAINT #{options[:name]};")
end
2018-11-20 20:47:30 +05:30
end
2017-08-17 22:00:37 +05:30
end
2020-03-13 15:44:24 +05:30
def validate_foreign_key(source, column, name: nil)
fk_name = name || concurrent_foreign_key_name(source, column)
unless foreign_key_exists?(source, name: fk_name)
2020-04-08 14:13:33 +05:30
raise missing_schema_object_message(source, "foreign key", fk_name)
2020-03-13 15:44:24 +05:30
end
disable_statement_timeout do
execute("ALTER TABLE #{source} VALIDATE CONSTRAINT #{fk_name};")
end
end
2020-01-01 13:55:28 +05:30
def foreign_key_exists?(source, target = nil, **options)
foreign_keys(source).any? do |foreign_key|
tables_match?(target.to_s, foreign_key.to_table.to_s) &&
options_match?(foreign_key.options, options)
2018-05-09 12:01:36 +05:30
end
end
2017-08-17 22:00:37 +05:30
# Returns the name for a concurrent foreign key.
#
# PostgreSQL constraint names have a limit of 63 bytes. The logic used
# here is based on Rails' foreign_key_name() method, which unfortunately
# is private so we can't rely on it directly.
2020-04-08 14:13:33 +05:30
#
# prefix:
# - The default prefix is `fk_` for backward compatibility with the existing
# concurrent foreign key helpers.
# - For standard rails foreign keys the prefix is `fk_rails_`
#
def concurrent_foreign_key_name(table, column, prefix: 'fk_')
2019-09-30 21:07:59 +05:30
identifier = "#{table}_#{column}_fk"
hashed_identifier = Digest::SHA256.hexdigest(identifier).first(10)
2020-04-08 14:13:33 +05:30
"#{prefix}#{hashed_identifier}"
2017-08-17 22:00:37 +05:30
end
2016-08-24 12:49:21 +05:30
# Long-running migrations may take more than the timeout allowed by
# the database. Disable the session's statement timeout to ensure
2019-10-12 21:52:04 +05:30
# migrations don't get killed prematurely.
2018-11-20 20:47:30 +05:30
#
# There are two possible ways to disable the statement timeout:
#
# - Per transaction (this is the preferred and default mode)
# - Per connection (requires a cleanup after the execution)
#
# When using a per connection disable statement, code must be inside
2021-10-27 15:23:28 +05:30
# a block so we can automatically execute `RESET statement_timeout` after block finishes
2018-11-20 20:47:30 +05:30
# otherwise the statement will still be disabled until connection is dropped
2021-10-27 15:23:28 +05:30
# or `RESET statement_timeout` is executed
2016-08-24 12:49:21 +05:30
def disable_statement_timeout
2018-11-20 20:47:30 +05:30
if block_given?
2020-05-24 23:13:21 +05:30
if statement_timeout_disabled?
# Don't do anything if the statement_timeout is already disabled
# Allows for nested calls of disable_statement_timeout without
# resetting the timeout too early (before the outer call ends)
2018-11-20 20:47:30 +05:30
yield
2020-05-24 23:13:21 +05:30
else
begin
execute('SET statement_timeout TO 0')
yield
ensure
2021-10-27 15:23:28 +05:30
execute('RESET statement_timeout')
2020-05-24 23:13:21 +05:30
end
2018-11-20 20:47:30 +05:30
end
else
unless transaction_open?
raise <<~ERROR
Cannot call disable_statement_timeout() without a transaction open or outside of a transaction block.
If you don't want to use a transaction wrap your code in a block call:
disable_statement_timeout { # code that requires disabled statement here }
This will make sure statement_timeout is disabled before and reset after the block execution is finished.
ERROR
end
execute('SET LOCAL statement_timeout TO 0')
end
2017-08-17 22:00:37 +05:30
end
2020-03-13 15:44:24 +05:30
# Executes the block with a retry mechanism that alters the +lock_timeout+ and +sleep_time+ between attempts.
# The timings can be controlled via the +timing_configuration+ parameter.
# If the lock was not acquired within the retry period, a last attempt is made without using +lock_timeout+.
#
2021-11-11 11:23:49 +05:30
# Note this helper uses subtransactions when run inside an already open transaction.
#
2020-03-13 15:44:24 +05:30
# ==== Examples
# # Invoking without parameters
# with_lock_retries do
# drop_table :my_table
# end
#
# # Invoking with custom +timing_configuration+
# t = [
# [1.second, 1.second],
# [2.seconds, 2.seconds]
# ]
#
# with_lock_retries(timing_configuration: t) do
# drop_table :my_table # this will be retried twice
# end
#
# # Disabling the retries using an environment variable
# > export DISABLE_LOCK_RETRIES=true
#
# with_lock_retries do
# drop_table :my_table # one invocation, it will not retry at all
# end
#
# ==== Parameters
# * +timing_configuration+ - [[ActiveSupport::Duration, ActiveSupport::Duration], ...] lock timeout for the block, sleep time before the next iteration, defaults to `Gitlab::Database::WithLockRetries::DEFAULT_TIMING_CONFIGURATION`
# * +logger+ - [Gitlab::JsonLogger]
# * +env+ - [Hash] custom environment hash, see the example with `DISABLE_LOCK_RETRIES`
2021-01-03 14:25:43 +05:30
def with_lock_retries(*args, **kwargs, &block)
2021-09-30 23:02:18 +05:30
raise_on_exhaustion = !!kwargs.delete(:raise_on_exhaustion)
2020-03-13 15:44:24 +05:30
merged_args = {
2022-04-04 11:22:00 +05:30
connection: connection,
2020-03-13 15:44:24 +05:30
klass: self.class,
2021-11-11 11:23:49 +05:30
logger: Gitlab::BackgroundMigration::Logger,
allow_savepoints: true
2021-01-03 14:25:43 +05:30
}.merge(kwargs)
2020-03-13 15:44:24 +05:30
2021-09-30 23:02:18 +05:30
Gitlab::Database::WithLockRetries.new(**merged_args)
.run(raise_on_exhaustion: raise_on_exhaustion, &block)
2020-03-13 15:44:24 +05:30
end
2017-08-17 22:00:37 +05:30
def true_value
Database.true_value
end
def false_value
Database.false_value
2016-08-24 12:49:21 +05:30
end
2016-06-02 11:05:42 +05:30
# Updates the value of a column in batches.
#
# This method updates the table in batches of 5% of the total row count.
2019-07-07 11:18:12 +05:30
# A `batch_size` option can also be passed to set this to a fixed number.
2016-06-22 15:30:34 +05:30
# This method will continue updating rows until no rows remain.
#
# When given a block this method will yield two values to the block:
#
# 1. An instance of `Arel::Table` for the table that is being updated.
# 2. The query to run as an Arel object.
#
# By supplying a block one can add extra conditions to the queries being
# executed. Note that the same block is used for _all_ queries.
#
# Example:
#
# update_column_in_batches(:projects, :foo, 10) do |table, query|
# query.where(table[:some_column].eq('hello'))
# end
#
# This would result in this method updating only rows where
# `projects.some_column` equals "hello".
2016-06-02 11:05:42 +05:30
#
# table - The name of the table.
# column - The name of the column to update.
# value - The value for the column.
2016-06-22 15:30:34 +05:30
#
2018-03-17 18:26:18 +05:30
# The `value` argument is typically a literal. To perform a computed
# update, an Arel literal can be used instead:
#
# update_value = Arel.sql('bar * baz')
#
# update_column_in_batches(:projects, :foo, update_value) do |table, query|
# query.where(table[:some_column].eq('hello'))
# end
#
2016-06-22 15:30:34 +05:30
# Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop
# determines this method to be too complex while there's no way to make it
# less "complex" without introducing extra methods (which actually will
# make things _more_ complex).
#
2020-04-22 19:07:51 +05:30
# `batch_column_name` option is for tables without primary key, in this
2020-05-24 23:13:21 +05:30
# case another unique integer column can be used. Example: :user_id
2020-04-22 19:07:51 +05:30
#
2016-06-22 15:30:34 +05:30
# rubocop: disable Metrics/AbcSize
2022-08-27 11:52:29 +05:30
def update_column_in_batches(table_name, column, value, batch_size: nil, batch_column_name: :id, disable_lock_writes: false)
2017-09-10 17:25:29 +05:30
if transaction_open?
raise 'update_column_in_batches can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2022-08-27 11:52:29 +05:30
table = Arel::Table.new(table_name)
2016-06-22 15:30:34 +05:30
count_arel = table.project(Arel.star.count.as('count'))
count_arel = yield table, count_arel if block_given?
2020-03-13 15:44:24 +05:30
total = exec_query(count_arel.to_sql).to_a.first['count'].to_i
2016-06-22 15:30:34 +05:30
return if total == 0
2016-06-02 11:05:42 +05:30
2019-07-07 11:18:12 +05:30
if batch_size.nil?
# Update in batches of 5% until we run out of any rows to update.
batch_size = ((total / 100.0) * 5.0).ceil
max_size = 1000
2017-09-10 17:25:29 +05:30
2019-07-07 11:18:12 +05:30
# The upper limit is 1000 to ensure we don't lock too many rows. For
# example, for "merge_requests" even 1% of the table is around 35 000
# rows for GitLab.com.
batch_size = max_size if batch_size > max_size
end
2016-06-02 11:05:42 +05:30
2020-04-22 19:07:51 +05:30
start_arel = table.project(table[batch_column_name]).order(table[batch_column_name].asc).take(1)
2016-06-22 15:30:34 +05:30
start_arel = yield table, start_arel if block_given?
2020-04-22 19:07:51 +05:30
start_id = exec_query(start_arel.to_sql).to_a.first[batch_column_name.to_s].to_i
2016-06-22 15:30:34 +05:30
loop do
2020-04-22 19:07:51 +05:30
stop_arel = table.project(table[batch_column_name])
.where(table[batch_column_name].gteq(start_id))
.order(table[batch_column_name].asc)
2017-09-10 17:25:29 +05:30
.take(1)
.skip(batch_size)
2016-06-22 15:30:34 +05:30
stop_arel = yield table, stop_arel if block_given?
2020-03-13 15:44:24 +05:30
stop_row = exec_query(stop_arel.to_sql).to_a.first
2016-06-22 15:30:34 +05:30
2019-02-15 15:39:39 +05:30
update_arel = Arel::UpdateManager.new
2017-09-10 17:25:29 +05:30
.table(table)
.set([[table[column], value]])
2020-04-22 19:07:51 +05:30
.where(table[batch_column_name].gteq(start_id))
2016-06-02 11:05:42 +05:30
if stop_row
2020-04-22 19:07:51 +05:30
stop_id = stop_row[batch_column_name.to_s].to_i
2016-06-22 15:30:34 +05:30
start_id = stop_id
2020-04-22 19:07:51 +05:30
update_arel = update_arel.where(table[batch_column_name].lt(stop_id))
2016-06-02 11:05:42 +05:30
end
2016-06-22 15:30:34 +05:30
update_arel = yield table, update_arel if block_given?
2022-08-27 11:52:29 +05:30
transaction do
execute("SELECT set_config('lock_writes.#{table_name}', 'false', true)") if disable_lock_writes
execute(update_arel.to_sql)
end
2016-06-02 11:05:42 +05:30
2016-06-22 15:30:34 +05:30
# There are no more rows left to update.
break unless stop_row
2016-06-02 11:05:42 +05:30
end
end
# Adds a column with a default value without locking an entire table.
#
2020-05-24 23:13:21 +05:30
# @deprecated With PostgreSQL 11, adding columns with a default does not lead to a table rewrite anymore.
# As such, this method is not needed anymore and the default `add_column` helper should be used.
# This helper is subject to be removed in a >13.0 release.
def add_column_with_default(table, column, type, default:, limit: nil, allow_null: false)
raise 'Deprecated: add_column_with_default does not support being passed blocks anymore' if block_given?
2020-04-22 19:07:51 +05:30
2020-05-24 23:13:21 +05:30
add_column(table, column, type, default: default, limit: limit, null: allow_null)
2016-06-02 11:05:42 +05:30
end
2017-08-17 22:00:37 +05:30
# Renames a column without requiring downtime.
#
# Concurrent renames work by using database triggers to ensure both the
# old and new column are in sync. However, this method will _not_ remove
# the triggers or the old column automatically; this needs to be done
# manually in a post-deployment migration. This can be done using the
# method `cleanup_concurrent_column_rename`.
#
# table - The name of the database table containing the column.
# old - The old column name.
# new - The new column name.
# type - The type of the new column. If no type is given the old column's
# type is used.
2020-05-24 23:13:21 +05:30
# batch_column_name - option is for tables without primary key, in this
# case another unique integer column can be used. Example: :user_id
2020-07-28 23:09:34 +05:30
def rename_column_concurrently(table, old, new, type: nil, type_cast_function: nil, batch_column_name: :id)
2020-05-24 23:13:21 +05:30
unless column_exists?(table, batch_column_name)
raise "Column #{batch_column_name} does not exist on #{table}"
end
2017-08-17 22:00:37 +05:30
if transaction_open?
raise 'rename_column_concurrently can not be run inside a transaction'
end
2018-03-17 18:26:18 +05:30
check_trigger_permissions!(table)
2020-07-28 23:09:34 +05:30
create_column_from(table, old, new, type: type, batch_column_name: batch_column_name, type_cast_function: type_cast_function)
2017-08-17 22:00:37 +05:30
2018-03-17 18:26:18 +05:30
install_rename_triggers(table, old, new)
end
2019-12-04 20:38:33 +05:30
# Reverses operations performed by rename_column_concurrently.
#
# This method takes care of removing previously installed triggers as well
# as removing the new column.
#
# table - The name of the database table.
# old - The name of the old column.
# new - The name of the new column.
2019-10-12 21:52:04 +05:30
def undo_rename_column_concurrently(table, old, new)
trigger_name = rename_trigger_name(table, old, new)
check_trigger_permissions!(table)
2021-06-08 01:23:25 +05:30
remove_rename_triggers(table, trigger_name)
2019-10-12 21:52:04 +05:30
remove_column(table, new)
end
2018-03-17 18:26:18 +05:30
# Installs triggers in a table that keep a new column in sync with an old
# one.
#
# table - The name of the table to install the trigger in.
# old_column - The name of the old column.
# new_column - The name of the new column.
2021-06-08 01:23:25 +05:30
# trigger_name - The name of the trigger to use (optional).
def install_rename_triggers(table, old, new, trigger_name: nil)
2021-11-11 11:23:49 +05:30
Gitlab::Database::UnidirectionalCopyTrigger.on_table(table, connection: connection).create(old, new, trigger_name: trigger_name)
2021-06-08 01:23:25 +05:30
end
# Removes the triggers used for renaming a column concurrently.
def remove_rename_triggers(table, trigger)
2021-11-11 11:23:49 +05:30
Gitlab::Database::UnidirectionalCopyTrigger.on_table(table, connection: connection).drop(trigger)
2021-06-08 01:23:25 +05:30
end
# Returns the (base) name to use for triggers when renaming columns.
def rename_trigger_name(table, old, new)
2021-11-11 11:23:49 +05:30
Gitlab::Database::UnidirectionalCopyTrigger.on_table(table, connection: connection).name(old, new)
2017-08-17 22:00:37 +05:30
end
# Changes the type of a column concurrently.
#
# table - The table containing the column.
# column - The name of the column to change.
# new_type - The new column type.
2020-11-24 15:15:51 +05:30
def change_column_type_concurrently(table, column, new_type, type_cast_function: nil, batch_column_name: :id)
2017-08-17 22:00:37 +05:30
temp_column = "#{column}_for_type_change"
2020-11-24 15:15:51 +05:30
rename_column_concurrently(table, column, temp_column, type: new_type, type_cast_function: type_cast_function, batch_column_name: batch_column_name)
2017-08-17 22:00:37 +05:30
end
2021-01-03 14:25:43 +05:30
# Reverses operations performed by change_column_type_concurrently.
#
# table - The table containing the column.
# column - The name of the column to change.
def undo_change_column_type_concurrently(table, column)
temp_column = "#{column}_for_type_change"
undo_rename_column_concurrently(table, column, temp_column)
end
2017-08-17 22:00:37 +05:30
# Performs cleanup of a concurrent type change.
#
# table - The table containing the column.
# column - The name of the column to change.
# new_type - The new column type.
def cleanup_concurrent_column_type_change(table, column)
temp_column = "#{column}_for_type_change"
transaction do
# This has to be performed in a transaction as otherwise we might have
# inconsistent data.
cleanup_concurrent_column_rename(table, column, temp_column)
rename_column(table, temp_column, column)
end
end
2021-01-03 14:25:43 +05:30
# Reverses operations performed by cleanup_concurrent_column_type_change.
#
# table - The table containing the column.
# column - The name of the column to change.
# old_type - The type of the original column used with change_column_type_concurrently.
# type_cast_function - Required if the conversion back to the original type is not automatic
# batch_column_name - option for tables without a primary key, in this case
# another unique integer column can be used. Example: :user_id
2021-02-22 17:27:13 +05:30
def undo_cleanup_concurrent_column_type_change(table, column, old_type, type_cast_function: nil, batch_column_name: :id, limit: nil)
2022-06-21 17:19:12 +05:30
Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_ddl_mode!
2021-01-03 14:25:43 +05:30
temp_column = "#{column}_for_type_change"
# Using a descriptive name that includes orinal column's name risks
# taking us above the 63 character limit, so we use a hash
identifier = "#{table}_#{column}_for_type_change"
hashed_identifier = Digest::SHA256.hexdigest(identifier).first(10)
temp_undo_cleanup_column = "tmp_undo_cleanup_column_#{hashed_identifier}"
unless column_exists?(table, batch_column_name)
raise "Column #{batch_column_name} does not exist on #{table}"
end
if transaction_open?
raise 'undo_cleanup_concurrent_column_type_change can not be run inside a transaction'
end
check_trigger_permissions!(table)
begin
create_column_from(
table,
column,
temp_undo_cleanup_column,
type: old_type,
batch_column_name: batch_column_name,
2021-02-22 17:27:13 +05:30
type_cast_function: type_cast_function,
limit: limit
2021-01-03 14:25:43 +05:30
)
transaction do
# This has to be performed in a transaction as otherwise we might
# have inconsistent data.
rename_column(table, column, temp_column)
rename_column(table, temp_undo_cleanup_column, column)
install_rename_triggers(table, column, temp_column)
end
2021-06-08 01:23:25 +05:30
rescue StandardError
2021-01-03 14:25:43 +05:30
# create_column_from can not run inside a transaction, which means
# that there is a risk that if any of the operations that follow it
# fail, we'll be left with an inconsistent schema
# For those reasons, we make sure that we drop temp_undo_cleanup_column
# if an error is caught
if column_exists?(table, temp_undo_cleanup_column)
remove_column(table, temp_undo_cleanup_column)
end
raise
end
end
2017-08-17 22:00:37 +05:30
# Cleans up a concurrent column name.
#
# This method takes care of removing previously installed triggers as well
# as removing the old column.
#
# table - The name of the database table.
# old - The name of the old column.
# new - The name of the new column.
def cleanup_concurrent_column_rename(table, old, new)
trigger_name = rename_trigger_name(table, old, new)
2018-03-17 18:26:18 +05:30
check_trigger_permissions!(table)
2021-06-08 01:23:25 +05:30
remove_rename_triggers(table, trigger_name)
2017-08-17 22:00:37 +05:30
remove_column(table, old)
end
2019-12-04 20:38:33 +05:30
# Reverses the operations performed by cleanup_concurrent_column_rename.
#
# This method adds back the old_column removed
# by cleanup_concurrent_column_rename.
# It also adds back the (old_column > new_column) trigger that is removed
# by cleanup_concurrent_column_rename.
#
# table - The name of the database table containing the column.
# old - The old column name.
# new - The new column name.
# type - The type of the old column. If no type is given the new column's
# type is used.
2020-05-24 23:13:21 +05:30
# batch_column_name - option is for tables without primary key, in this
# case another unique integer column can be used. Example: :user_id
def undo_cleanup_concurrent_column_rename(table, old, new, type: nil, batch_column_name: :id)
unless column_exists?(table, batch_column_name)
raise "Column #{batch_column_name} does not exist on #{table}"
end
2019-10-12 21:52:04 +05:30
if transaction_open?
raise 'undo_cleanup_concurrent_column_rename can not be run inside a transaction'
end
check_trigger_permissions!(table)
2020-05-24 23:13:21 +05:30
create_column_from(table, new, old, type: type, batch_column_name: batch_column_name)
2019-10-12 21:52:04 +05:30
install_rename_triggers(table, old, new)
end
2021-06-08 01:23:25 +05:30
def convert_to_bigint_column(column)
"#{column}_convert_to_bigint"
end
# Initializes the conversion of a set of integer columns to bigint
2021-03-08 18:12:59 +05:30
#
# It can be used for converting both a Primary Key and any Foreign Keys
# that may reference it or any other integer column that we may want to
# upgrade (e.g. columns that store IDs, but are not set as FKs).
#
# - For primary keys and Foreign Keys (or other columns) defined as NOT NULL,
# the new bigint column is added with a hardcoded NOT NULL DEFAULT 0
# which allows us to skip a very costly verification step once we
# are ready to switch it.
# This is crucial for Primary Key conversions, because setting a column
# as the PK converts even check constraints to NOT NULL constraints
# and forces an inline re-verification of the whole table.
2021-04-29 21:17:54 +05:30
# - It sets up a trigger to keep the two columns in sync.
#
# Note: this helper is intended to be used in a regular (pre-deployment) migration.
#
# This helper is part 1 of a multi-step migration process:
2021-06-08 01:23:25 +05:30
# 1. initialize_conversion_of_integer_to_bigint to create the new columns and database trigger
2021-04-29 21:17:54 +05:30
# 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations
# 3. remaining steps TBD, see #288005
#
# table - The name of the database table containing the column
2021-06-08 01:23:25 +05:30
# columns - The name, or array of names, of the column(s) that we want to convert to bigint.
2021-04-29 21:17:54 +05:30
# primary_key - The name of the primary key column (most often :id)
2021-06-08 01:23:25 +05:30
def initialize_conversion_of_integer_to_bigint(table, columns, primary_key: :id)
2021-11-11 11:23:49 +05:30
create_temporary_columns_and_triggers(table, columns, primary_key: primary_key, data_type: :bigint)
2021-04-29 21:17:54 +05:30
end
2021-06-08 01:23:25 +05:30
# Reverts `initialize_conversion_of_integer_to_bigint`
#
# table - The name of the database table containing the columns
# columns - The name, or array of names, of the column(s) that we're converting to bigint.
def revert_initialize_conversion_of_integer_to_bigint(table, columns)
columns = Array.wrap(columns)
temporary_columns = columns.map { |column| convert_to_bigint_column(column) }
trigger_name = rename_trigger_name(table, columns, temporary_columns)
remove_rename_triggers(table, trigger_name)
temporary_columns.each { |column| remove_column(table, column) }
end
2021-11-11 11:23:49 +05:30
alias_method :cleanup_conversion_of_integer_to_bigint, :revert_initialize_conversion_of_integer_to_bigint
# Reverts `cleanup_conversion_of_integer_to_bigint`
#
# table - The name of the database table containing the columns
# columns - The name, or array of names, of the column(s) that we have converted to bigint.
# primary_key - The name of the primary key column (most often :id)
def restore_conversion_of_integer_to_bigint(table, columns, primary_key: :id)
create_temporary_columns_and_triggers(table, columns, primary_key: primary_key, data_type: :int)
end
2021-06-08 01:23:25 +05:30
# Backfills the new columns used in an integer-to-bigint conversion using background migrations.
2021-04-29 21:17:54 +05:30
#
# - This helper should be called from a post-deployment migration.
2021-06-08 01:23:25 +05:30
# - In order for this helper to work properly, the new columns must be first initialized with
2021-04-29 21:17:54 +05:30
# the `initialize_conversion_of_integer_to_bigint` helper.
# - It tracks the scheduled background jobs through Gitlab::Database::BackgroundMigration::BatchedMigration,
2021-03-08 18:12:59 +05:30
# which allows a more thorough check that all jobs succeeded in the
# cleanup migration and is way faster for very large tables.
#
2021-04-29 21:17:54 +05:30
# Note: this helper is intended to be used in a post-deployment migration, to ensure any new code is
# deployed (including background job changes) before we begin processing the background migration.
#
# This helper is part 2 of a multi-step migration process:
2021-06-08 01:23:25 +05:30
# 1. initialize_conversion_of_integer_to_bigint to create the new columns and database trigger
2021-04-29 21:17:54 +05:30
# 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations
# 3. remaining steps TBD, see #288005
2021-03-08 18:12:59 +05:30
#
# table - The name of the database table containing the column
2021-06-08 01:23:25 +05:30
# columns - The name, or an array of names, of the column(s) we want to convert to bigint.
2021-03-08 18:12:59 +05:30
# primary_key - The name of the primary key column (most often :id)
# batch_size - The number of rows to schedule in a single background migration
# sub_batch_size - The smaller batches that will be used by each scheduled job
# to update the table. Useful to keep each update at ~100ms while executing
# more updates per interval (2.minutes)
# Note that each execution of a sub-batch adds a constant 100ms sleep
# time in between the updates, which must be taken into account
# while calculating the batch, sub_batch and interval values.
# interval - The time interval between every background migration
#
# example:
# Assume that we have figured out that updating 200 records of the events
# table takes ~100ms on average.
# We can set the sub_batch_size to 200, leave the interval to the default
# and set the batch_size to 50_000 which will require
# ~50s = (50000 / 200) * (0.1 + 0.1) to complete and leaves breathing space
# between the scheduled jobs
2021-04-29 21:17:54 +05:30
def backfill_conversion_of_integer_to_bigint(
2021-03-08 18:12:59 +05:30
table,
2021-06-08 01:23:25 +05:30
columns,
2021-03-08 18:12:59 +05:30
primary_key: :id,
batch_size: 20_000,
sub_batch_size: 1000,
interval: 2.minutes
)
unless table_exists?(table)
raise "Table #{table} does not exist"
end
unless column_exists?(table, primary_key)
raise "Column #{primary_key} does not exist on #{table}"
end
2021-06-08 01:23:25 +05:30
conversions = Array.wrap(columns).to_h do |column|
raise ArgumentError, "Column #{column} does not exist on #{table}" unless column_exists?(table, column)
2021-03-08 18:12:59 +05:30
2021-06-08 01:23:25 +05:30
temporary_name = convert_to_bigint_column(column)
raise ArgumentError, "Column #{temporary_name} does not exist on #{table}" unless column_exists?(table, temporary_name)
2021-03-08 18:12:59 +05:30
2021-06-08 01:23:25 +05:30
[column, temporary_name]
2021-03-08 18:12:59 +05:30
end
2021-06-08 01:23:25 +05:30
queue_batched_background_migration(
2021-03-08 18:12:59 +05:30
'CopyColumnUsingBackgroundMigrationJob',
2021-04-29 21:17:54 +05:30
table,
primary_key,
2021-06-08 01:23:25 +05:30
conversions.keys,
conversions.values,
2021-04-29 21:17:54 +05:30
job_interval: interval,
2021-03-08 18:12:59 +05:30
batch_size: batch_size,
2021-04-29 21:17:54 +05:30
sub_batch_size: sub_batch_size)
2017-08-17 22:00:37 +05:30
end
2021-06-08 01:23:25 +05:30
# Reverts `backfill_conversion_of_integer_to_bigint`
#
# table - The name of the database table containing the column
# columns - The name, or an array of names, of the column(s) we want to convert to bigint.
# primary_key - The name of the primary key column (most often :id)
def revert_backfill_conversion_of_integer_to_bigint(table, columns, primary_key: :id)
columns = Array.wrap(columns)
conditions = ActiveRecord::Base.sanitize_sql([
'job_class_name = :job_class_name AND table_name = :table_name AND column_name = :column_name AND job_arguments = :job_arguments',
job_class_name: 'CopyColumnUsingBackgroundMigrationJob',
table_name: table,
column_name: primary_key,
job_arguments: [columns, columns.map { |column| convert_to_bigint_column(column) }].to_json
])
execute("DELETE FROM batched_background_migrations WHERE #{conditions}")
2017-08-17 22:00:37 +05:30
end
2022-06-21 17:19:12 +05:30
def ensure_batched_background_migration_is_finished(job_class_name:, table_name:, column_name:, job_arguments:, finalize: true)
2022-07-23 23:45:48 +05:30
Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_dml_mode!
Gitlab::Database::BackgroundMigration::BatchedMigration.reset_column_information
migration = Gitlab::Database::BackgroundMigration::BatchedMigration.find_for_configuration(
Gitlab::Database.gitlab_schemas_for_connection(connection),
job_class_name, table_name, column_name, job_arguments
)
2021-09-04 01:27:46 +05:30
configuration = {
job_class_name: job_class_name,
table_name: table_name,
column_name: column_name,
job_arguments: job_arguments
}
2022-06-21 17:19:12 +05:30
return Gitlab::AppLogger.warn "Could not find batched background migration for the given configuration: #{configuration}" if migration.nil?
return if migration.finished?
2022-07-16 23:28:13 +05:30
finalize_batched_background_migration(job_class_name: job_class_name, table_name: table_name, column_name: column_name, job_arguments: job_arguments) if finalize
2022-06-21 17:19:12 +05:30
unless migration.reload.finished? # rubocop:disable Cop/ActiveRecordAssociationReload
2021-09-04 01:27:46 +05:30
raise "Expected batched background migration for the given configuration to be marked as 'finished', " \
2022-06-21 17:19:12 +05:30
"but it is '#{migration.status_name}':" \
2021-09-30 23:02:18 +05:30
"\t#{configuration}" \
"\n\n" \
2022-07-23 23:45:48 +05:30
"Finalize it manually by running the following command in a `bash` or `sh` shell:" \
2021-09-30 23:02:18 +05:30
"\n\n" \
2022-05-07 20:08:51 +05:30
"\tsudo gitlab-rake gitlab:background_migrations:finalize[#{job_class_name},#{table_name},#{column_name},'#{job_arguments.to_json.gsub(',', '\,')}']" \
2021-09-30 23:02:18 +05:30
"\n\n" \
"For more information, check the documentation" \
"\n\n" \
"\thttps://docs.gitlab.com/ee/user/admin_area/monitoring/background_migrations.html#database-migrations-failing-because-of-batched-background-migration-not-finished"
2021-09-04 01:27:46 +05:30
end
end
2017-08-17 22:00:37 +05:30
# Returns an Array containing the indexes for the given column
def indexes_for(table, column)
column = column.to_s
indexes(table).select { |index| index.columns.include?(column) }
end
# Returns an Array containing the foreign keys for the given column.
def foreign_keys_for(table, column)
column = column.to_s
foreign_keys(table).select { |fk| fk.column == column }
end
# Copies all indexes for the old column to a new column.
#
# table - The table containing the columns and indexes.
# old - The old column.
# new - The new column.
def copy_indexes(table, old, new)
old = old.to_s
new = new.to_s
indexes_for(table, old).each do |index|
new_columns = index.columns.map do |column|
column == old ? new : column
end
# This is necessary as we can't properly rename indexes such as
# "ci_taggings_idx".
unless index.name.include?(old)
raise "The index #{index.name} can not be copied as it does not "\
"mention the old column. You have to rename this index manually first."
end
name = index.name.gsub(old, new)
options = {
unique: index.unique,
name: name,
length: index.lengths,
order: index.orders
}
options[:using] = index.using if index.using
options[:where] = index.where if index.where
unless index.opclasses.blank?
opclasses = index.opclasses.dup
# Copy the operator classes for the old column (if any) to the new
# column.
opclasses[new] = opclasses.delete(old) if opclasses[old]
2021-01-03 14:25:43 +05:30
options[:opclass] = opclasses
2017-08-17 22:00:37 +05:30
end
add_concurrent_index(table, new_columns, options)
end
end
# Copies all foreign keys for the old column to the new column.
#
# table - The table containing the columns and indexes.
# old - The old column.
# new - The new column.
def copy_foreign_keys(table, old, new)
foreign_keys_for(table, old).each do |fk|
add_concurrent_foreign_key(fk.from_table,
fk.to_table,
column: new,
on_delete: fk.on_delete)
end
end
# Returns the column for the given table and column name.
def column_for(table, name)
name = name.to_s
2020-04-08 14:13:33 +05:30
column = columns(table).find { |column| column.name == name }
raise(missing_schema_object_message(table, "column", name)) if column.nil?
column
2017-08-17 22:00:37 +05:30
end
2018-12-13 13:39:08 +05:30
# This will replace the first occurrence of a string in a column with
2019-10-12 21:52:04 +05:30
# the replacement using `regexp_replace`
2017-08-17 22:00:37 +05:30
def replace_sql(column, pattern, replacement)
quoted_pattern = Arel::Nodes::Quoted.new(pattern.to_s)
quoted_replacement = Arel::Nodes::Quoted.new(replacement.to_s)
2019-10-12 21:52:04 +05:30
replace = Arel::Nodes::NamedFunction.new(
"regexp_replace", [column, quoted_pattern, quoted_replacement]
)
2017-08-17 22:00:37 +05:30
2019-10-12 21:52:04 +05:30
Arel::Nodes::SqlLiteral.new(replace.to_sql)
2017-08-17 22:00:37 +05:30
end
2017-09-10 17:25:29 +05:30
2022-04-04 11:22:00 +05:30
def remove_foreign_key_if_exists(source, target = nil, **kwargs)
reverse_lock_order = kwargs.delete(:reverse_lock_order)
return unless foreign_key_exists?(source, target, **kwargs)
if target && reverse_lock_order && transaction_open?
execute("LOCK TABLE #{target}, #{source} IN ACCESS EXCLUSIVE MODE")
end
if target
remove_foreign_key(source, target, **kwargs)
else
remove_foreign_key(source, **kwargs)
2019-07-07 11:18:12 +05:30
end
end
2021-06-08 01:23:25 +05:30
def remove_foreign_key_without_error(*args, **kwargs)
remove_foreign_key(*args, **kwargs)
2017-09-10 17:25:29 +05:30
rescue ArgumentError
end
2018-03-17 18:26:18 +05:30
def sidekiq_queue_migrate(queue_from, to:)
while sidekiq_queue_length(queue_from) > 0
Sidekiq.redis do |conn|
conn.rpoplpush "queue:#{queue_from}", "queue:#{to}"
end
end
end
def sidekiq_queue_length(queue_name)
Sidekiq.redis do |conn|
conn.llen("queue:#{queue_name}")
end
end
def check_trigger_permissions!(table)
unless Grant.create_and_execute_trigger?(table)
2021-12-11 22:18:48 +05:30
dbname = ApplicationRecord.database.database_name
user = ApplicationRecord.database.username
2018-03-17 18:26:18 +05:30
raise <<-EOF
Your database user is not allowed to create, drop, or execute triggers on the
table #{table}.
If you are using PostgreSQL you can solve this by logging in to the GitLab
database (#{dbname}) using a super user and running:
ALTER #{user} WITH SUPERUSER
2019-10-12 21:52:04 +05:30
This query will grant the user super user permissions, ensuring you don't run
2018-03-17 18:26:18 +05:30
into similar problems in the future (e.g. when new tables are created).
EOF
end
end
2018-05-09 12:01:36 +05:30
# Fetches indexes on a column by name for postgres.
#
# This will include indexes using an expression on the column, for example:
# `CREATE INDEX CONCURRENTLY index_name ON table (LOWER(column));`
#
# We can remove this when upgrading to Rails 5 with an updated `index_exists?`:
# - https://github.com/rails/rails/commit/edc2b7718725016e988089b5fb6d6fb9d6e16882
#
# Or this can be removed when we no longer support postgres < 9.5, so we
# can use `CREATE INDEX IF NOT EXISTS`.
def index_exists_by_name?(table, index)
# We can't fall back to the normal `index_exists?` method because that
# does not find indexes without passing a column name.
if indexes(table).map(&:name).include?(index.to_s)
true
else
2019-10-12 21:52:04 +05:30
postgres_exists_by_name?(table, index)
2018-03-27 19:54:05 +05:30
end
end
2018-05-09 12:01:36 +05:30
def postgres_exists_by_name?(table, name)
index_sql = <<~SQL
SELECT COUNT(*)
2021-01-03 14:25:43 +05:30
FROM pg_catalog.pg_indexes
WHERE schemaname = #{connection.quote(current_schema)}
AND tablename = #{connection.quote(table)}
AND indexname = #{connection.quote(name)}
2018-05-09 12:01:36 +05:30
SQL
connection.select_value(index_sql).to_i > 0
2018-03-27 19:54:05 +05:30
end
2018-12-05 23:21:45 +05:30
2020-01-01 13:55:28 +05:30
def create_or_update_plan_limit(limit_name, plan_name, limit_value)
2020-04-22 19:07:51 +05:30
limit_name_quoted = quote_column_name(limit_name)
plan_name_quoted = quote(plan_name)
limit_value_quoted = quote(limit_value)
2020-01-01 13:55:28 +05:30
execute <<~SQL
2020-04-22 19:07:51 +05:30
INSERT INTO plan_limits (plan_id, #{limit_name_quoted})
SELECT id, #{limit_value_quoted} FROM plans WHERE name = #{plan_name_quoted} LIMIT 1
ON CONFLICT (plan_id) DO UPDATE SET #{limit_name_quoted} = EXCLUDED.#{limit_name_quoted};
2020-01-01 13:55:28 +05:30
SQL
end
2020-03-13 15:44:24 +05:30
# Note this should only be used with very small tables
def backfill_iids(table)
sql = <<-END
UPDATE #{table}
SET iid = #{table}_with_calculated_iid.iid_num
FROM (
SELECT id, ROW_NUMBER() OVER (PARTITION BY project_id ORDER BY id ASC) AS iid_num FROM #{table}
) AS #{table}_with_calculated_iid
WHERE #{table}.id = #{table}_with_calculated_iid.id
END
execute(sql)
end
2020-04-22 19:07:51 +05:30
# Returns the name for a check constraint
#
# type:
# - Any value, as long as it is unique
# - Constraint names are unique per table in Postgres, and, additionally,
# we can have multiple check constraints over a column
# So we use the (table, column, type) triplet as a unique name
# - e.g. we use 'max_length' when adding checks for text limits
# or 'not_null' when adding a NOT NULL constraint
#
def check_constraint_name(table, column, type)
identifier = "#{table}_#{column}_check_#{type}"
# Check concurrent_foreign_key_name() for info on why we use a hash
hashed_identifier = Digest::SHA256.hexdigest(identifier).first(10)
"check_#{hashed_identifier}"
end
def check_constraint_exists?(table, constraint_name)
# Constraint names are unique per table in Postgres, not per schema
# Two tables can have constraints with the same name, so we filter by
# the table name in addition to using the constraint_name
check_sql = <<~SQL
SELECT COUNT(*)
2021-01-03 14:25:43 +05:30
FROM pg_catalog.pg_constraint con
INNER JOIN pg_catalog.pg_class rel
ON rel.oid = con.conrelid
INNER JOIN pg_catalog.pg_namespace nsp
ON nsp.oid = con.connamespace
WHERE con.contype = 'c'
AND con.conname = #{connection.quote(constraint_name)}
AND nsp.nspname = #{connection.quote(current_schema)}
AND rel.relname = #{connection.quote(table)}
2020-04-22 19:07:51 +05:30
SQL
2020-10-24 23:57:45 +05:30
connection.select_value(check_sql) > 0
2020-04-22 19:07:51 +05:30
end
# Adds a check constraint to a table
#
# This method is the generic helper for adding any check constraint
# More specialized helpers may use it (e.g. add_text_limit or add_not_null)
#
# This method only requires minimal locking:
# - The constraint is added using NOT VALID
# This allows us to add the check constraint without validating it
# - The check will be enforced for new data (inserts) coming in
# - If `validate: true` the constraint is also validated
# Otherwise, validate_check_constraint() can be used at a later stage
# - Check comments on add_concurrent_foreign_key for more info
#
# table - The table the constraint will be added to
# check - The check clause to add
# e.g. 'char_length(name) <= 5' or 'store IS NOT NULL'
# constraint_name - The name of the check constraint (otherwise auto-generated)
# Should be unique per table (not per column)
# validate - Whether to validate the constraint in this call
#
def add_check_constraint(table, check, constraint_name, validate: true)
# Transactions would result in ALTER TABLE locks being held for the
# duration of the transaction, defeating the purpose of this method.
2021-11-11 11:23:49 +05:30
validate_not_in_transaction!(:add_check_constraint)
validate_check_constraint_name!(constraint_name)
2020-04-22 19:07:51 +05:30
if check_constraint_exists?(table, constraint_name)
warning_message = <<~MESSAGE
Check constraint was not created because it exists already
(this may be due to an aborted migration or similar)
table: #{table}, check: #{check}, constraint name: #{constraint_name}
MESSAGE
2020-11-24 15:15:51 +05:30
Gitlab::AppLogger.warn warning_message
2020-04-22 19:07:51 +05:30
else
# Only add the constraint without validating it
# Even though it is fast, ADD CONSTRAINT requires an EXCLUSIVE lock
# Use with_lock_retries to make sure that this operation
# will not timeout on tables accessed by many processes
with_lock_retries do
execute <<-EOF.strip_heredoc
ALTER TABLE #{table}
ADD CONSTRAINT #{constraint_name}
CHECK ( #{check} )
NOT VALID;
EOF
end
end
if validate
validate_check_constraint(table, constraint_name)
end
end
def validate_check_constraint(table, constraint_name)
2020-06-23 00:09:42 +05:30
validate_check_constraint_name!(constraint_name)
2020-04-22 19:07:51 +05:30
unless check_constraint_exists?(table, constraint_name)
raise missing_schema_object_message(table, "check constraint", constraint_name)
end
disable_statement_timeout do
# VALIDATE CONSTRAINT only requires a SHARE UPDATE EXCLUSIVE LOCK
# It only conflicts with other validations and creating indexes
execute("ALTER TABLE #{table} VALIDATE CONSTRAINT #{constraint_name};")
end
end
def remove_check_constraint(table, constraint_name)
2021-11-11 11:23:49 +05:30
# This is technically not necessary, but aligned with add_check_constraint
# and allows us to continue use with_lock_retries here
validate_not_in_transaction!(:remove_check_constraint)
2020-06-23 00:09:42 +05:30
validate_check_constraint_name!(constraint_name)
2020-04-22 19:07:51 +05:30
# DROP CONSTRAINT requires an EXCLUSIVE lock
# Use with_lock_retries to make sure that this will not timeout
with_lock_retries do
execute <<-EOF.strip_heredoc
ALTER TABLE #{table}
DROP CONSTRAINT IF EXISTS #{constraint_name}
EOF
end
end
2021-01-03 14:25:43 +05:30
# Copies all check constraints for the old column to the new column.
#
# table - The table containing the columns.
# old - The old column.
# new - The new column.
# schema - The schema the table is defined for
# If it is not provided, then the current_schema is used
def copy_check_constraints(table, old, new, schema: nil)
if transaction_open?
raise 'copy_check_constraints can not be run inside a transaction'
end
unless column_exists?(table, old)
raise "Column #{old} does not exist on #{table}"
end
unless column_exists?(table, new)
raise "Column #{new} does not exist on #{table}"
end
table_with_schema = schema.present? ? "#{schema}.#{table}" : table
check_constraints_for(table, old, schema: schema).each do |check_c|
validate = !(check_c["constraint_def"].end_with? "NOT VALID")
# Normalize:
# - Old constraint definitions:
# '(char_length(entity_path) <= 5500)'
# - Definitionss from pg_get_constraintdef(oid):
# 'CHECK ((char_length(entity_path) <= 5500))'
# - Definitions from pg_get_constraintdef(oid, pretty_bool):
# 'CHECK (char_length(entity_path) <= 5500)'
# - Not valid constraints: 'CHECK (...) NOT VALID'
# to a single format that we can use:
# '(char_length(entity_path) <= 5500)'
check_definition = check_c["constraint_def"]
.sub(/^\s*(CHECK)?\s*\({0,2}/, '(')
.sub(/\){0,2}\s*(NOT VALID)?\s*$/, ')')
constraint_name = begin
if check_definition == "(#{old} IS NOT NULL)"
not_null_constraint_name(table_with_schema, new)
elsif check_definition.start_with? "(char_length(#{old}) <="
text_limit_name(table_with_schema, new)
else
check_constraint_name(table_with_schema, new, 'copy_check_constraint')
end
end
add_check_constraint(
table_with_schema,
check_definition.gsub(old.to_s, new.to_s),
constraint_name,
validate: validate
)
end
end
2020-04-22 19:07:51 +05:30
# Migration Helpers for adding limit to text columns
def add_text_limit(table, column, limit, constraint_name: nil, validate: true)
add_check_constraint(
table,
"char_length(#{column}) <= #{limit}",
text_limit_name(table, column, name: constraint_name),
validate: validate
)
end
def validate_text_limit(table, column, constraint_name: nil)
validate_check_constraint(table, text_limit_name(table, column, name: constraint_name))
end
def remove_text_limit(table, column, constraint_name: nil)
remove_check_constraint(table, text_limit_name(table, column, name: constraint_name))
end
def check_text_limit_exists?(table, column, constraint_name: nil)
check_constraint_exists?(table, text_limit_name(table, column, name: constraint_name))
end
2020-05-24 23:13:21 +05:30
# Migration Helpers for managing not null constraints
def add_not_null_constraint(table, column, constraint_name: nil, validate: true)
if column_is_nullable?(table, column)
add_check_constraint(
table,
"#{column} IS NOT NULL",
not_null_constraint_name(table, column, name: constraint_name),
validate: validate
)
else
warning_message = <<~MESSAGE
NOT NULL check constraint was not created:
column #{table}.#{column} is already defined as `NOT NULL`
MESSAGE
2020-11-24 15:15:51 +05:30
Gitlab::AppLogger.warn warning_message
2020-05-24 23:13:21 +05:30
end
end
def validate_not_null_constraint(table, column, constraint_name: nil)
validate_check_constraint(
table,
not_null_constraint_name(table, column, name: constraint_name)
)
end
def remove_not_null_constraint(table, column, constraint_name: nil)
remove_check_constraint(
table,
not_null_constraint_name(table, column, name: constraint_name)
)
end
def check_not_null_constraint_exists?(table, column, constraint_name: nil)
check_constraint_exists?(
table,
not_null_constraint_name(table, column, name: constraint_name)
)
end
2020-10-24 23:57:45 +05:30
def create_extension(extension)
execute('CREATE EXTENSION IF NOT EXISTS %s' % extension)
rescue ActiveRecord::StatementInvalid => e
2021-12-11 22:18:48 +05:30
dbname = ApplicationRecord.database.database_name
user = ApplicationRecord.database.username
2020-10-24 23:57:45 +05:30
warn(<<~MSG) if e.to_s =~ /permission denied/
GitLab requires the PostgreSQL extension '#{extension}' installed in database '#{dbname}', but
the database user is not allowed to install the extension.
You can either install the extension manually using a database superuser:
CREATE EXTENSION IF NOT EXISTS #{extension}
Or, you can solve this by logging in to the GitLab
database (#{dbname}) using a superuser and running:
ALTER #{user} WITH SUPERUSER
This query will grant the user superuser permissions, ensuring any database extensions
can be installed through migrations.
For more information, refer to https://docs.gitlab.com/ee/install/postgresql_extensions.html.
MSG
raise
end
def drop_extension(extension)
execute('DROP EXTENSION IF EXISTS %s' % extension)
rescue ActiveRecord::StatementInvalid => e
2021-12-11 22:18:48 +05:30
dbname = ApplicationRecord.database.database_name
user = ApplicationRecord.database.username
2020-10-24 23:57:45 +05:30
warn(<<~MSG) if e.to_s =~ /permission denied/
This migration attempts to drop the PostgreSQL extension '#{extension}'
installed in database '#{dbname}', but the database user is not allowed
to drop the extension.
You can either drop the extension manually using a database superuser:
DROP EXTENSION IF EXISTS #{extension}
Or, you can solve this by logging in to the GitLab
database (#{dbname}) using a superuser and running:
ALTER #{user} WITH SUPERUSER
This query will grant the user superuser permissions, ensuring any database extensions
can be dropped through migrations.
For more information, refer to https://docs.gitlab.com/ee/install/postgresql_extensions.html.
MSG
raise
end
2021-09-30 23:02:18 +05:30
def rename_constraint(table_name, old_name, new_name)
execute <<~SQL
ALTER TABLE #{quote_table_name(table_name)}
RENAME CONSTRAINT #{quote_column_name(old_name)} TO #{quote_column_name(new_name)}
SQL
end
2022-07-23 23:45:48 +05:30
def drop_sequence(table_name, column_name, sequence_name)
execute <<~SQL
ALTER TABLE #{quote_table_name(table_name)} ALTER COLUMN #{quote_column_name(column_name)} DROP DEFAULT;
DROP SEQUENCE IF EXISTS #{quote_table_name(sequence_name)}
SQL
end
def add_sequence(table_name, column_name, sequence_name, start_value)
execute <<~SQL
CREATE SEQUENCE #{quote_table_name(sequence_name)} START #{start_value};
ALTER TABLE #{quote_table_name(table_name)} ALTER COLUMN #{quote_column_name(column_name)} SET DEFAULT nextval(#{quote(sequence_name)})
SQL
end
2019-10-12 21:52:04 +05:30
private
2021-11-11 11:23:49 +05:30
def create_temporary_columns_and_triggers(table, columns, primary_key: :id, data_type: :bigint)
unless table_exists?(table)
raise "Table #{table} does not exist"
end
unless column_exists?(table, primary_key)
raise "Column #{primary_key} does not exist on #{table}"
end
columns = Array.wrap(columns)
columns.each do |column|
next if column_exists?(table, column)
raise ArgumentError, "Column #{column} does not exist on #{table}"
end
check_trigger_permissions!(table)
conversions = columns.to_h { |column| [column, convert_to_bigint_column(column)] }
with_lock_retries do
conversions.each do |(source_column, temporary_name)|
column = column_for(table, source_column)
if (column.name.to_s == primary_key.to_s) || !column.null
# If the column to be converted is either a PK or is defined as NOT NULL,
# set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow
# That way, we skip the expensive validation step required to add
# a NOT NULL constraint at the end of the process
add_column(table, temporary_name, data_type, default: column.default || 0, null: false)
else
add_column(table, temporary_name, data_type, default: column.default)
end
end
install_rename_triggers(table, conversions.keys, conversions.values)
end
end
2020-06-23 00:09:42 +05:30
def validate_check_constraint_name!(constraint_name)
if constraint_name.to_s.length > MAX_IDENTIFIER_NAME_LENGTH
raise "The maximum allowed constraint name is #{MAX_IDENTIFIER_NAME_LENGTH} characters"
end
end
2021-01-03 14:25:43 +05:30
# Returns an ActiveRecord::Result containing the check constraints
# defined for the given column.
#
# If the schema is not provided, then the current_schema is used
def check_constraints_for(table, column, schema: nil)
check_sql = <<~SQL
SELECT
ccu.table_schema as schema_name,
ccu.table_name as table_name,
ccu.column_name as column_name,
con.conname as constraint_name,
pg_get_constraintdef(con.oid) as constraint_def
FROM pg_catalog.pg_constraint con
INNER JOIN pg_catalog.pg_class rel
ON rel.oid = con.conrelid
INNER JOIN pg_catalog.pg_namespace nsp
ON nsp.oid = con.connamespace
INNER JOIN information_schema.constraint_column_usage ccu
ON con.conname = ccu.constraint_name
AND nsp.nspname = ccu.constraint_schema
AND rel.relname = ccu.table_name
WHERE nsp.nspname = #{connection.quote(schema.presence || current_schema)}
AND rel.relname = #{connection.quote(table)}
AND ccu.column_name = #{connection.quote(column)}
AND con.contype = 'c'
ORDER BY constraint_name
SQL
connection.exec_query(check_sql)
end
2020-05-24 23:13:21 +05:30
def statement_timeout_disabled?
# This is a string of the form "100ms" or "0" when disabled
connection.select_value('SHOW statement_timeout') == "0"
end
def column_is_nullable?(table, column)
# Check if table.column has not been defined with NOT NULL
check_sql = <<~SQL
SELECT c.is_nullable
FROM information_schema.columns c
2021-01-03 14:25:43 +05:30
WHERE c.table_schema = #{connection.quote(current_schema)}
AND c.table_name = #{connection.quote(table)}
AND c.column_name = #{connection.quote(column)}
2020-05-24 23:13:21 +05:30
SQL
connection.select_value(check_sql) == 'YES'
end
2020-04-22 19:07:51 +05:30
def text_limit_name(table, column, name: nil)
name.presence || check_constraint_name(table, column, 'max_length')
end
2020-05-24 23:13:21 +05:30
def not_null_constraint_name(table, column, name: nil)
name.presence || check_constraint_name(table, column, 'not_null')
end
2020-04-08 14:13:33 +05:30
def missing_schema_object_message(table, type, name)
<<~MESSAGE
Could not find #{type} "#{name}" on table "#{table}" which was referenced during the migration.
This issue could be caused by the database schema straying from the expected state.
To resolve this issue, please verify:
1. all previous migrations have completed
2. the database objects used in this migration match the Rails definition in schema.rb or structure.sql
MESSAGE
end
2020-01-01 13:55:28 +05:30
def tables_match?(target_table, foreign_key_table)
target_table.blank? || foreign_key_table == target_table
end
def options_match?(foreign_key_options, options)
options.all? { |k, v| foreign_key_options[k].to_s == v.to_s }
end
def on_delete_statement(on_delete)
return '' if on_delete.blank?
return 'ON DELETE SET NULL' if on_delete == :nullify
"ON DELETE #{on_delete.upcase}"
end
2021-02-22 17:27:13 +05:30
def create_column_from(table, old, new, type: nil, batch_column_name: :id, type_cast_function: nil, limit: nil)
2019-12-04 20:38:33 +05:30
old_col = column_for(table, old)
new_type = type || old_col.type
2021-02-22 17:27:13 +05:30
new_limit = limit || old_col.limit
2019-12-04 20:38:33 +05:30
add_column(table, new, new_type,
2021-02-22 17:27:13 +05:30
limit: new_limit,
2019-12-04 20:38:33 +05:30
precision: old_col.precision,
scale: old_col.scale)
# We set the default value _after_ adding the column so we don't end up
# updating any existing data with the default value. This isn't
# necessary since we copy over old values further down.
change_column_default(table, new, old_col.default) unless old_col.default.nil?
2020-07-28 23:09:34 +05:30
old_value = Arel::Table.new(table)[old]
if type_cast_function.present?
old_value = Arel::Nodes::NamedFunction.new(type_cast_function, [old_value])
end
2022-06-21 17:19:12 +05:30
Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.with_suppressed do
2022-08-27 11:52:29 +05:30
Gitlab::Database::QueryAnalyzers::GitlabSchemasValidateConnection.with_suppressed do
update_column_in_batches(table, new, old_value, batch_column_name: batch_column_name, disable_lock_writes: true)
end
2022-06-21 17:19:12 +05:30
end
2019-12-04 20:38:33 +05:30
2020-05-24 23:13:21 +05:30
add_not_null_constraint(table, new) unless old_col.null
2019-12-04 20:38:33 +05:30
copy_indexes(table, old, new)
copy_foreign_keys(table, old, new)
2021-01-03 14:25:43 +05:30
copy_check_constraints(table, old, new)
2019-12-04 20:38:33 +05:30
end
2019-10-12 21:52:04 +05:30
def validate_timestamp_column_name!(column_name)
2021-12-11 22:18:48 +05:30
return if column_name.to_s.end_with?('_at')
2019-10-12 21:52:04 +05:30
raise <<~MESSAGE
Illegal timestamp column name! Got #{column_name}.
2021-12-11 22:18:48 +05:30
Must end with `_at`}
2019-10-12 21:52:04 +05:30
MESSAGE
end
def validate_not_in_transaction!(method_name, modifier = nil)
return unless transaction_open?
raise <<~ERROR
#{["`#{method_name}`", modifier].compact.join(' ')} cannot be run inside a transaction.
You can disable transactions by calling `disable_ddl_transaction!` in the body of
your migration class
ERROR
2018-12-05 23:21:45 +05:30
end
2016-06-02 11:05:42 +05:30
end
end
end