2019-02-15 15:39:39 +05:30
# frozen_string_literal: true
2016-06-02 11:05:42 +05:30
module Gitlab
module Database
module MigrationHelpers
2018-03-17 18:26:18 +05:30
BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job
BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time
2019-10-12 21:52:04 +05:30
PERMITTED_TIMESTAMP_COLUMNS = % i [ created_at updated_at deleted_at ] . to_set . freeze
DEFAULT_TIMESTAMP_COLUMNS = % i [ created_at updated_at ] . freeze
2017-09-10 17:25:29 +05:30
# Adds `created_at` and `updated_at` columns with timezone information.
#
# This method is an improved version of Rails' built-in method `add_timestamps`.
#
2019-10-12 21:52:04 +05:30
# By default, adds `created_at` and `updated_at` columns, but these can be specified as:
#
# add_timestamps_with_timezone(:my_table, columns: [:created_at, :deleted_at])
#
# This allows you to create just the timestamps you need, saving space.
#
2017-09-10 17:25:29 +05:30
# Available options are:
2019-10-12 21:52:04 +05:30
# :default - The default value for the column.
# :null - When set to `true` the column will allow NULL values.
2017-09-10 17:25:29 +05:30
# The default is to not allow NULL values.
2019-10-12 21:52:04 +05:30
# :columns - the column names to create. Must be one
# of `Gitlab::Database::MigrationHelpers::PERMITTED_TIMESTAMP_COLUMNS`.
# Default value: `DEFAULT_TIMESTAMP_COLUMNS`
#
# All options are optional.
2017-09-10 17:25:29 +05:30
def add_timestamps_with_timezone ( table_name , options = { } )
options [ :null ] = false if options [ :null ] . nil?
2019-10-12 21:52:04 +05:30
columns = options . fetch ( :columns , DEFAULT_TIMESTAMP_COLUMNS )
default_value = options [ :default ]
2017-09-10 17:25:29 +05:30
2019-10-12 21:52:04 +05:30
validate_not_in_transaction! ( :add_timestamps_with_timezone , 'with default value' ) if default_value
columns . each do | column_name |
validate_timestamp_column_name! ( column_name )
2017-09-10 17:25:29 +05:30
# If default value is presented, use `add_column_with_default` method instead.
2019-10-12 21:52:04 +05:30
if default_value
2017-09-10 17:25:29 +05:30
add_column_with_default (
table_name ,
column_name ,
:datetime_with_timezone ,
2019-10-12 21:52:04 +05:30
default : default_value ,
2017-09-10 17:25:29 +05:30
allow_null : options [ :null ]
)
else
add_column ( table_name , column_name , :datetime_with_timezone , options )
end
end
end
2019-10-12 21:52:04 +05:30
# To be used in the `#down` method of migrations that
# use `#add_timestamps_with_timezone`.
2016-06-02 11:05:42 +05:30
#
2019-10-12 21:52:04 +05:30
# Available options are:
# :columns - the column names to remove. Must be one
# Default value: `DEFAULT_TIMESTAMP_COLUMNS`
#
# All options are optional.
def remove_timestamps ( table_name , options = { } )
columns = options . fetch ( :columns , DEFAULT_TIMESTAMP_COLUMNS )
columns . each do | column_name |
remove_column ( table_name , column_name )
end
end
# Creates a new index, concurrently
2016-06-02 11:05:42 +05:30
#
# Example:
#
# add_concurrent_index :users, :some_column
#
# See Rails' `add_index` for more info on the available arguments.
2016-06-16 23:09:34 +05:30
def add_concurrent_index ( table_name , column_name , options = { } )
2016-06-02 11:05:42 +05:30
if transaction_open?
raise 'add_concurrent_index can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2019-10-12 21:52:04 +05:30
options = options . merge ( { algorithm : :concurrently } )
2016-06-02 11:05:42 +05:30
2018-05-09 12:01:36 +05:30
if index_exists? ( table_name , column_name , options )
2019-09-30 21:07:59 +05:30
Rails . logger . warn " Index not created because it already exists (this may be due to an aborted migration or similar): table_name: #{ table_name } , column_name: #{ column_name } " # rubocop:disable Gitlab/RailsLogger
2018-05-09 12:01:36 +05:30
return
end
2018-11-20 20:47:30 +05:30
disable_statement_timeout do
add_index ( table_name , column_name , options )
end
2016-06-02 11:05:42 +05:30
end
2019-10-12 21:52:04 +05:30
# Removes an existed index, concurrently
2017-08-17 22:00:37 +05:30
#
# Example:
#
# remove_concurrent_index :users, :some_column
#
# See Rails' `remove_index` for more info on the available arguments.
def remove_concurrent_index ( table_name , column_name , options = { } )
if transaction_open?
raise 'remove_concurrent_index can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2019-12-26 22:10:19 +05:30
options = options . merge ( { algorithm : :concurrently } )
2017-08-17 22:00:37 +05:30
2018-05-09 12:01:36 +05:30
unless index_exists? ( table_name , column_name , options )
2019-09-30 21:07:59 +05:30
Rails . logger . warn " Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{ table_name } , column_name: #{ column_name } " # rubocop:disable Gitlab/RailsLogger
2018-05-09 12:01:36 +05:30
return
end
2018-11-20 20:47:30 +05:30
disable_statement_timeout do
remove_index ( table_name , options . merge ( { column : column_name } ) )
end
2017-08-17 22:00:37 +05:30
end
2019-10-12 21:52:04 +05:30
# Removes an existing index, concurrently
2017-09-10 17:25:29 +05:30
#
# Example:
#
# remove_concurrent_index :users, "index_X_by_Y"
#
# See Rails' `remove_index` for more info on the available arguments.
def remove_concurrent_index_by_name ( table_name , index_name , options = { } )
if transaction_open?
raise 'remove_concurrent_index_by_name can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2019-12-26 22:10:19 +05:30
options = options . merge ( { algorithm : :concurrently } )
2017-09-10 17:25:29 +05:30
2018-05-09 12:01:36 +05:30
unless index_exists_by_name? ( table_name , index_name )
2019-09-30 21:07:59 +05:30
Rails . logger . warn " Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{ table_name } , index_name: #{ index_name } " # rubocop:disable Gitlab/RailsLogger
2018-05-09 12:01:36 +05:30
return
end
2018-11-20 20:47:30 +05:30
disable_statement_timeout do
remove_index ( table_name , options . merge ( { name : index_name } ) )
end
2017-09-10 17:25:29 +05:30
end
2017-08-17 22:00:37 +05:30
# Adds a foreign key with only minimal locking on the tables involved.
#
2019-10-12 21:52:04 +05:30
# This method only requires minimal locking
2017-08-17 22:00:37 +05:30
#
# source - The source table containing the foreign key.
# target - The target table the key points to.
# column - The name of the column to create the foreign key on.
# on_delete - The action to perform when associated data is removed,
# defaults to "CASCADE".
2019-09-30 21:07:59 +05:30
#
# rubocop:disable Gitlab/RailsLogger
2019-09-04 21:01:54 +05:30
def add_concurrent_foreign_key ( source , target , column : , on_delete : :cascade , name : nil )
2017-08-17 22:00:37 +05:30
# Transactions would result in ALTER TABLE locks being held for the
# duration of the transaction, defeating the purpose of this method.
if transaction_open?
raise 'add_concurrent_foreign_key can not be run inside a transaction'
end
2019-10-12 21:52:04 +05:30
on_delete = 'SET NULL' if on_delete == :nullify
2017-08-17 22:00:37 +05:30
2019-09-04 21:01:54 +05:30
key_name = name || concurrent_foreign_key_name ( source , column )
2017-08-17 22:00:37 +05:30
2018-05-09 12:01:36 +05:30
unless foreign_key_exists? ( source , target , column : column )
Rails . logger . warn " Foreign key not created because it exists already " \
" (this may be due to an aborted migration or similar): " \
" source: #{ source } , target: #{ target } , column: #{ column } "
# Using NOT VALID allows us to create a key without immediately
# validating it. This means we keep the ALTER TABLE lock only for a
# short period of time. The key _is_ enforced for any newly created
# data.
execute <<-EOF.strip_heredoc
ALTER TABLE #{source}
ADD CONSTRAINT #{key_name}
FOREIGN KEY ( #{column})
REFERENCES #{target} (id)
#{on_delete ? "ON DELETE #{on_delete.upcase}" : ''}
NOT VALID ;
EOF
end
2017-08-17 22:00:37 +05:30
# Validate the existing constraint. This can potentially take a very
# long time to complete, but fortunately does not lock the source table
# while running.
2018-05-09 12:01:36 +05:30
#
# Note this is a no-op in case the constraint is VALID already
2018-11-20 20:47:30 +05:30
disable_statement_timeout do
execute ( " ALTER TABLE #{ source } VALIDATE CONSTRAINT #{ key_name } ; " )
end
2017-08-17 22:00:37 +05:30
end
2019-09-30 21:07:59 +05:30
# rubocop:enable Gitlab/RailsLogger
2017-08-17 22:00:37 +05:30
2018-05-09 12:01:36 +05:30
def foreign_key_exists? ( source , target = nil , column : nil )
foreign_keys ( source ) . any? do | key |
if column
key . options [ :column ] . to_s == column . to_s
else
key . to_table . to_s == target . to_s
end
end
end
2017-08-17 22:00:37 +05:30
# Returns the name for a concurrent foreign key.
#
# PostgreSQL constraint names have a limit of 63 bytes. The logic used
# here is based on Rails' foreign_key_name() method, which unfortunately
# is private so we can't rely on it directly.
def concurrent_foreign_key_name ( table , column )
2019-09-30 21:07:59 +05:30
identifier = " #{ table } _ #{ column } _fk "
hashed_identifier = Digest :: SHA256 . hexdigest ( identifier ) . first ( 10 )
" fk_ #{ hashed_identifier } "
2017-08-17 22:00:37 +05:30
end
2016-08-24 12:49:21 +05:30
# Long-running migrations may take more than the timeout allowed by
# the database. Disable the session's statement timeout to ensure
2019-10-12 21:52:04 +05:30
# migrations don't get killed prematurely.
2018-11-20 20:47:30 +05:30
#
# There are two possible ways to disable the statement timeout:
#
# - Per transaction (this is the preferred and default mode)
# - Per connection (requires a cleanup after the execution)
#
# When using a per connection disable statement, code must be inside
# a block so we can automatically execute `RESET ALL` after block finishes
# otherwise the statement will still be disabled until connection is dropped
# or `RESET ALL` is executed
2016-08-24 12:49:21 +05:30
def disable_statement_timeout
2018-11-20 20:47:30 +05:30
if block_given?
begin
execute ( 'SET statement_timeout TO 0' )
yield
ensure
execute ( 'RESET ALL' )
end
else
unless transaction_open?
raise << ~ ERROR
Cannot call disable_statement_timeout ( ) without a transaction open or outside of a transaction block .
If you don ' t want to use a transaction wrap your code in a block call :
disable_statement_timeout { # code that requires disabled statement here }
This will make sure statement_timeout is disabled before and reset after the block execution is finished .
ERROR
end
execute ( 'SET LOCAL statement_timeout TO 0' )
end
2017-08-17 22:00:37 +05:30
end
def true_value
Database . true_value
end
def false_value
Database . false_value
2016-08-24 12:49:21 +05:30
end
2016-06-02 11:05:42 +05:30
# Updates the value of a column in batches.
#
# This method updates the table in batches of 5% of the total row count.
2019-07-07 11:18:12 +05:30
# A `batch_size` option can also be passed to set this to a fixed number.
2016-06-22 15:30:34 +05:30
# This method will continue updating rows until no rows remain.
#
# When given a block this method will yield two values to the block:
#
# 1. An instance of `Arel::Table` for the table that is being updated.
# 2. The query to run as an Arel object.
#
# By supplying a block one can add extra conditions to the queries being
# executed. Note that the same block is used for _all_ queries.
#
# Example:
#
# update_column_in_batches(:projects, :foo, 10) do |table, query|
# query.where(table[:some_column].eq('hello'))
# end
#
# This would result in this method updating only rows where
# `projects.some_column` equals "hello".
2016-06-02 11:05:42 +05:30
#
# table - The name of the table.
# column - The name of the column to update.
# value - The value for the column.
2016-06-22 15:30:34 +05:30
#
2018-03-17 18:26:18 +05:30
# The `value` argument is typically a literal. To perform a computed
# update, an Arel literal can be used instead:
#
# update_value = Arel.sql('bar * baz')
#
# update_column_in_batches(:projects, :foo, update_value) do |table, query|
# query.where(table[:some_column].eq('hello'))
# end
#
2016-06-22 15:30:34 +05:30
# Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop
# determines this method to be too complex while there's no way to make it
# less "complex" without introducing extra methods (which actually will
# make things _more_ complex).
#
# rubocop: disable Metrics/AbcSize
2019-07-07 11:18:12 +05:30
def update_column_in_batches ( table , column , value , batch_size : nil )
2017-09-10 17:25:29 +05:30
if transaction_open?
raise 'update_column_in_batches can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2016-06-22 15:30:34 +05:30
table = Arel :: Table . new ( table )
count_arel = table . project ( Arel . star . count . as ( 'count' ) )
count_arel = yield table , count_arel if block_given?
total = exec_query ( count_arel . to_sql ) . to_hash . first [ 'count' ] . to_i
return if total == 0
2016-06-02 11:05:42 +05:30
2019-07-07 11:18:12 +05:30
if batch_size . nil?
# Update in batches of 5% until we run out of any rows to update.
batch_size = ( ( total / 100 . 0 ) * 5 . 0 ) . ceil
max_size = 1000
2017-09-10 17:25:29 +05:30
2019-07-07 11:18:12 +05:30
# The upper limit is 1000 to ensure we don't lock too many rows. For
# example, for "merge_requests" even 1% of the table is around 35 000
# rows for GitLab.com.
batch_size = max_size if batch_size > max_size
end
2016-06-02 11:05:42 +05:30
2016-06-22 15:30:34 +05:30
start_arel = table . project ( table [ :id ] ) . order ( table [ :id ] . asc ) . take ( 1 )
start_arel = yield table , start_arel if block_given?
start_id = exec_query ( start_arel . to_sql ) . to_hash . first [ 'id' ] . to_i
2016-06-16 23:09:34 +05:30
loop do
2017-09-10 17:25:29 +05:30
stop_arel = table . project ( table [ :id ] )
. where ( table [ :id ] . gteq ( start_id ) )
. order ( table [ :id ] . asc )
. take ( 1 )
. skip ( batch_size )
2016-06-22 15:30:34 +05:30
stop_arel = yield table , stop_arel if block_given?
stop_row = exec_query ( stop_arel . to_sql ) . to_hash . first
2019-02-15 15:39:39 +05:30
update_arel = Arel :: UpdateManager . new
2017-09-10 17:25:29 +05:30
. table ( table )
. set ( [ [ table [ column ] , value ] ] )
. where ( table [ :id ] . gteq ( start_id ) )
2016-06-02 11:05:42 +05:30
if stop_row
2016-06-22 15:30:34 +05:30
stop_id = stop_row [ 'id' ] . to_i
start_id = stop_id
update_arel = update_arel . where ( table [ :id ] . lt ( stop_id ) )
2016-06-02 11:05:42 +05:30
end
2016-06-22 15:30:34 +05:30
update_arel = yield table , update_arel if block_given?
execute ( update_arel . to_sql )
2016-06-02 11:05:42 +05:30
2016-06-22 15:30:34 +05:30
# There are no more rows left to update.
break unless stop_row
2016-06-02 11:05:42 +05:30
end
end
# Adds a column with a default value without locking an entire table.
#
# This method runs the following steps:
#
# 1. Add the column with a default value of NULL.
2016-06-22 15:30:34 +05:30
# 2. Change the default value of the column to the specified value.
# 3. Update all existing rows in batches.
# 4. Set a `NOT NULL` constraint on the column if desired (the default).
2016-06-02 11:05:42 +05:30
#
# These steps ensure a column can be added to a large and commonly used
# table without locking the entire table for the duration of the table
# modification.
#
# table - The name of the table to update.
# column - The name of the column to add.
# type - The column type (e.g. `:integer`).
# default - The default value for the column.
2016-09-29 09:46:39 +05:30
# limit - Sets a column limit. For example, for :integer, the default is
# 4-bytes. Set `limit: 8` to allow 8-byte integers.
2016-06-02 11:05:42 +05:30
# allow_null - When set to `true` the column will allow NULL values, the
# default is to not allow NULL values.
2016-06-22 15:30:34 +05:30
#
# This method can also take a block which is passed directly to the
# `update_column_in_batches` method.
2016-09-29 09:46:39 +05:30
def add_column_with_default ( table , column , type , default : , limit : nil , allow_null : false , & block )
2016-06-02 11:05:42 +05:30
if transaction_open?
raise 'add_column_with_default can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2018-11-20 20:47:30 +05:30
disable_statement_timeout do
transaction do
if limit
add_column ( table , column , type , default : nil , limit : limit )
else
add_column ( table , column , type , default : nil )
end
# Changing the default before the update ensures any newly inserted
# rows already use the proper default value.
change_column_default ( table , column , default )
2016-09-29 09:46:39 +05:30
end
2016-06-02 11:05:42 +05:30
2018-11-20 20:47:30 +05:30
begin
2019-09-30 21:07:59 +05:30
default_after_type_cast = connection . type_cast ( default , column_for ( table , column ) )
update_column_in_batches ( table , column , default_after_type_cast , & block )
2016-06-16 23:09:34 +05:30
2018-11-20 20:47:30 +05:30
change_column_null ( table , column , false ) unless allow_null
# We want to rescue _all_ exceptions here, even those that don't inherit
# from StandardError.
rescue Exception = > error # rubocop: disable all
remove_column ( table , column )
2016-06-02 11:05:42 +05:30
2018-11-20 20:47:30 +05:30
raise error
end
2016-06-02 11:05:42 +05:30
end
end
2017-08-17 22:00:37 +05:30
# Renames a column without requiring downtime.
#
# Concurrent renames work by using database triggers to ensure both the
# old and new column are in sync. However, this method will _not_ remove
# the triggers or the old column automatically; this needs to be done
# manually in a post-deployment migration. This can be done using the
# method `cleanup_concurrent_column_rename`.
#
# table - The name of the database table containing the column.
# old - The old column name.
# new - The new column name.
# type - The type of the new column. If no type is given the old column's
# type is used.
def rename_column_concurrently ( table , old , new , type : nil )
if transaction_open?
raise 'rename_column_concurrently can not be run inside a transaction'
end
2018-03-17 18:26:18 +05:30
check_trigger_permissions! ( table )
2019-12-04 20:38:33 +05:30
create_column_from ( table , old , new , type : type )
2017-08-17 22:00:37 +05:30
2018-03-17 18:26:18 +05:30
install_rename_triggers ( table , old , new )
end
2019-12-04 20:38:33 +05:30
# Reverses operations performed by rename_column_concurrently.
#
# This method takes care of removing previously installed triggers as well
# as removing the new column.
#
# table - The name of the database table.
# old - The name of the old column.
# new - The name of the new column.
2019-10-12 21:52:04 +05:30
def undo_rename_column_concurrently ( table , old , new )
trigger_name = rename_trigger_name ( table , old , new )
check_trigger_permissions! ( table )
remove_rename_triggers_for_postgresql ( table , trigger_name )
remove_column ( table , new )
end
2018-03-17 18:26:18 +05:30
# Installs triggers in a table that keep a new column in sync with an old
# one.
#
# table - The name of the table to install the trigger in.
# old_column - The name of the old column.
# new_column - The name of the new column.
def install_rename_triggers ( table , old_column , new_column )
trigger_name = rename_trigger_name ( table , old_column , new_column )
2017-08-17 22:00:37 +05:30
quoted_table = quote_table_name ( table )
2018-03-17 18:26:18 +05:30
quoted_old = quote_column_name ( old_column )
quoted_new = quote_column_name ( new_column )
2017-08-17 22:00:37 +05:30
2019-10-12 21:52:04 +05:30
install_rename_triggers_for_postgresql (
trigger_name ,
quoted_table ,
quoted_old ,
quoted_new
)
2017-08-17 22:00:37 +05:30
end
# Changes the type of a column concurrently.
#
# table - The table containing the column.
# column - The name of the column to change.
# new_type - The new column type.
def change_column_type_concurrently ( table , column , new_type )
temp_column = " #{ column } _for_type_change "
rename_column_concurrently ( table , column , temp_column , type : new_type )
end
# Performs cleanup of a concurrent type change.
#
# table - The table containing the column.
# column - The name of the column to change.
# new_type - The new column type.
def cleanup_concurrent_column_type_change ( table , column )
temp_column = " #{ column } _for_type_change "
transaction do
# This has to be performed in a transaction as otherwise we might have
# inconsistent data.
cleanup_concurrent_column_rename ( table , column , temp_column )
rename_column ( table , temp_column , column )
end
end
# Cleans up a concurrent column name.
#
# This method takes care of removing previously installed triggers as well
# as removing the old column.
#
# table - The name of the database table.
# old - The name of the old column.
# new - The name of the new column.
def cleanup_concurrent_column_rename ( table , old , new )
trigger_name = rename_trigger_name ( table , old , new )
2018-03-17 18:26:18 +05:30
check_trigger_permissions! ( table )
2019-10-12 21:52:04 +05:30
remove_rename_triggers_for_postgresql ( table , trigger_name )
2017-08-17 22:00:37 +05:30
remove_column ( table , old )
end
2019-12-04 20:38:33 +05:30
# Reverses the operations performed by cleanup_concurrent_column_rename.
#
# This method adds back the old_column removed
# by cleanup_concurrent_column_rename.
# It also adds back the (old_column > new_column) trigger that is removed
# by cleanup_concurrent_column_rename.
#
# table - The name of the database table containing the column.
# old - The old column name.
# new - The new column name.
# type - The type of the old column. If no type is given the new column's
# type is used.
2019-10-12 21:52:04 +05:30
def undo_cleanup_concurrent_column_rename ( table , old , new , type : nil )
if transaction_open?
raise 'undo_cleanup_concurrent_column_rename can not be run inside a transaction'
end
check_trigger_permissions! ( table )
2019-12-04 20:38:33 +05:30
create_column_from ( table , new , old , type : type )
2019-10-12 21:52:04 +05:30
install_rename_triggers ( table , old , new )
end
2018-03-17 18:26:18 +05:30
# Changes the column type of a table using a background migration.
#
# Because this method uses a background migration it's more suitable for
# large tables. For small tables it's better to use
# `change_column_type_concurrently` since it can complete its work in a
# much shorter amount of time and doesn't rely on Sidekiq.
#
# Example usage:
#
# class Issue < ActiveRecord::Base
# self.table_name = 'issues'
#
# include EachBatch
#
# def self.to_migrate
# where('closed_at IS NOT NULL')
# end
# end
#
# change_column_type_using_background_migration(
# Issue.to_migrate,
# :closed_at,
# :datetime_with_timezone
# )
#
# Reverting a migration like this is done exactly the same way, just with
# a different type to migrate to (e.g. `:datetime` in the above example).
#
# relation - An ActiveRecord relation to use for scheduling jobs and
# figuring out what table we're modifying. This relation _must_
# have the EachBatch module included.
#
# column - The name of the column for which the type will be changed.
#
# new_type - The new type of the column.
#
# batch_size - The number of rows to schedule in a single background
# migration.
#
# interval - The time interval between every background migration.
def change_column_type_using_background_migration (
relation ,
column ,
new_type ,
batch_size : 10_000 ,
interval : 10 . minutes
)
unless relation . model < EachBatch
raise TypeError , 'The relation must include the EachBatch module'
end
temp_column = " #{ column } _for_type_change "
table = relation . table_name
max_index = 0
add_column ( table , temp_column , new_type )
install_rename_triggers ( table , column , temp_column )
# Schedule the jobs that will copy the data from the old column to the
# new one. Rows with NULL values in our source column are skipped since
# the target column is already NULL at this point.
relation . where . not ( column = > nil ) . each_batch ( of : batch_size ) do | batch , index |
start_id , end_id = batch . pluck ( 'MIN(id), MAX(id)' ) . first
max_index = index
BackgroundMigrationWorker . perform_in (
index * interval ,
'CopyColumn' ,
[ table , column , temp_column , start_id , end_id ]
)
end
# Schedule the renaming of the column to happen (initially) 1 hour after
# the last batch finished.
BackgroundMigrationWorker . perform_in (
( max_index * interval ) + 1 . hour ,
'CleanupConcurrentTypeChange' ,
[ table , column , temp_column ]
)
if perform_background_migration_inline?
# To ensure the schema is up to date immediately we perform the
# migration inline in dev / test environments.
Gitlab :: BackgroundMigration . steal ( 'CopyColumn' )
Gitlab :: BackgroundMigration . steal ( 'CleanupConcurrentTypeChange' )
end
end
2018-11-08 19:23:39 +05:30
# Renames a column using a background migration.
#
# Because this method uses a background migration it's more suitable for
# large tables. For small tables it's better to use
# `rename_column_concurrently` since it can complete its work in a much
# shorter amount of time and doesn't rely on Sidekiq.
#
# Example usage:
#
# rename_column_using_background_migration(
# :users,
# :feed_token,
# :rss_token
# )
#
# table - The name of the database table containing the column.
#
# old - The old column name.
#
# new - The new column name.
#
# type - The type of the new column. If no type is given the old column's
# type is used.
#
# batch_size - The number of rows to schedule in a single background
# migration.
#
# interval - The time interval between every background migration.
def rename_column_using_background_migration (
table ,
old_column ,
new_column ,
type : nil ,
batch_size : 10_000 ,
interval : 10 . minutes
)
check_trigger_permissions! ( table )
old_col = column_for ( table , old_column )
new_type = type || old_col . type
max_index = 0
add_column ( table , new_column , new_type ,
limit : old_col . limit ,
precision : old_col . precision ,
scale : old_col . scale )
# We set the default value _after_ adding the column so we don't end up
# updating any existing data with the default value. This isn't
# necessary since we copy over old values further down.
change_column_default ( table , new_column , old_col . default ) if old_col . default
install_rename_triggers ( table , old_column , new_column )
model = Class . new ( ActiveRecord :: Base ) do
self . table_name = table
include :: EachBatch
end
# Schedule the jobs that will copy the data from the old column to the
# new one. Rows with NULL values in our source column are skipped since
# the target column is already NULL at this point.
model . where . not ( old_column = > nil ) . each_batch ( of : batch_size ) do | batch , index |
start_id , end_id = batch . pluck ( 'MIN(id), MAX(id)' ) . first
max_index = index
BackgroundMigrationWorker . perform_in (
index * interval ,
'CopyColumn' ,
[ table , old_column , new_column , start_id , end_id ]
)
end
# Schedule the renaming of the column to happen (initially) 1 hour after
# the last batch finished.
BackgroundMigrationWorker . perform_in (
( max_index * interval ) + 1 . hour ,
'CleanupConcurrentRename' ,
[ table , old_column , new_column ]
)
if perform_background_migration_inline?
# To ensure the schema is up to date immediately we perform the
# migration inline in dev / test environments.
Gitlab :: BackgroundMigration . steal ( 'CopyColumn' )
Gitlab :: BackgroundMigration . steal ( 'CleanupConcurrentRename' )
end
end
2018-03-17 18:26:18 +05:30
def perform_background_migration_inline?
Rails . env . test? || Rails . env . development?
end
2017-08-17 22:00:37 +05:30
# Performs a concurrent column rename when using PostgreSQL.
def install_rename_triggers_for_postgresql ( trigger , table , old , new )
execute <<-EOF.strip_heredoc
CREATE OR REPLACE FUNCTION #{trigger}()
RETURNS trigger AS
$BODY $
BEGIN
NEW . #{new} := NEW.#{old};
RETURN NEW ;
END ;
$BODY $
LANGUAGE 'plpgsql'
VOLATILE
EOF
2019-12-04 20:38:33 +05:30
execute <<-EOF.strip_heredoc
DROP TRIGGER IF EXISTS #{trigger}
ON #{table}
EOF
2017-08-17 22:00:37 +05:30
execute <<-EOF.strip_heredoc
CREATE TRIGGER #{trigger}
BEFORE INSERT OR UPDATE
ON #{table}
FOR EACH ROW
EXECUTE PROCEDURE #{trigger}()
EOF
end
# Removes the triggers used for renaming a PostgreSQL column concurrently.
def remove_rename_triggers_for_postgresql ( table , trigger )
2018-03-17 18:26:18 +05:30
execute ( " DROP TRIGGER IF EXISTS #{ trigger } ON #{ table } " )
execute ( " DROP FUNCTION IF EXISTS #{ trigger } () " )
2017-08-17 22:00:37 +05:30
end
# Returns the (base) name to use for triggers when renaming columns.
def rename_trigger_name ( table , old , new )
'trigger_' + Digest :: SHA256 . hexdigest ( " #{ table } _ #{ old } _ #{ new } " ) . first ( 12 )
end
# Returns an Array containing the indexes for the given column
def indexes_for ( table , column )
column = column . to_s
indexes ( table ) . select { | index | index . columns . include? ( column ) }
end
# Returns an Array containing the foreign keys for the given column.
def foreign_keys_for ( table , column )
column = column . to_s
foreign_keys ( table ) . select { | fk | fk . column == column }
end
# Copies all indexes for the old column to a new column.
#
# table - The table containing the columns and indexes.
# old - The old column.
# new - The new column.
def copy_indexes ( table , old , new )
old = old . to_s
new = new . to_s
indexes_for ( table , old ) . each do | index |
new_columns = index . columns . map do | column |
column == old ? new : column
end
# This is necessary as we can't properly rename indexes such as
# "ci_taggings_idx".
unless index . name . include? ( old )
raise " The index #{ index . name } can not be copied as it does not " \
" mention the old column. You have to rename this index manually first. "
end
name = index . name . gsub ( old , new )
options = {
unique : index . unique ,
name : name ,
length : index . lengths ,
order : index . orders
}
options [ :using ] = index . using if index . using
options [ :where ] = index . where if index . where
unless index . opclasses . blank?
opclasses = index . opclasses . dup
# Copy the operator classes for the old column (if any) to the new
# column.
opclasses [ new ] = opclasses . delete ( old ) if opclasses [ old ]
options [ :opclasses ] = opclasses
end
add_concurrent_index ( table , new_columns , options )
end
end
# Copies all foreign keys for the old column to the new column.
#
# table - The table containing the columns and indexes.
# old - The old column.
# new - The new column.
def copy_foreign_keys ( table , old , new )
foreign_keys_for ( table , old ) . each do | fk |
add_concurrent_foreign_key ( fk . from_table ,
fk . to_table ,
column : new ,
on_delete : fk . on_delete )
end
end
# Returns the column for the given table and column name.
def column_for ( table , name )
name = name . to_s
columns ( table ) . find { | column | column . name == name }
end
2018-12-13 13:39:08 +05:30
# This will replace the first occurrence of a string in a column with
2019-10-12 21:52:04 +05:30
# the replacement using `regexp_replace`
2017-08-17 22:00:37 +05:30
def replace_sql ( column , pattern , replacement )
quoted_pattern = Arel :: Nodes :: Quoted . new ( pattern . to_s )
quoted_replacement = Arel :: Nodes :: Quoted . new ( replacement . to_s )
2019-10-12 21:52:04 +05:30
replace = Arel :: Nodes :: NamedFunction . new (
" regexp_replace " , [ column , quoted_pattern , quoted_replacement ]
)
2017-08-17 22:00:37 +05:30
2019-10-12 21:52:04 +05:30
Arel :: Nodes :: SqlLiteral . new ( replace . to_sql )
2017-08-17 22:00:37 +05:30
end
2017-09-10 17:25:29 +05:30
2019-07-07 11:18:12 +05:30
def remove_foreign_key_if_exists ( * args )
if foreign_key_exists? ( * args )
remove_foreign_key ( * args )
end
end
2017-09-10 17:25:29 +05:30
def remove_foreign_key_without_error ( * args )
remove_foreign_key ( * args )
rescue ArgumentError
end
2018-03-17 18:26:18 +05:30
def sidekiq_queue_migrate ( queue_from , to : )
while sidekiq_queue_length ( queue_from ) > 0
Sidekiq . redis do | conn |
conn . rpoplpush " queue: #{ queue_from } " , " queue: #{ to } "
end
end
end
def sidekiq_queue_length ( queue_name )
Sidekiq . redis do | conn |
conn . llen ( " queue: #{ queue_name } " )
end
end
def check_trigger_permissions! ( table )
unless Grant . create_and_execute_trigger? ( table )
dbname = Database . database_name
user = Database . username
raise <<-EOF
Your database user is not allowed to create , drop , or execute triggers on the
table #{table}.
If you are using PostgreSQL you can solve this by logging in to the GitLab
database ( #{dbname}) using a super user and running:
ALTER #{user} WITH SUPERUSER
2019-10-12 21:52:04 +05:30
This query will grant the user super user permissions , ensuring you don ' t run
2018-03-17 18:26:18 +05:30
into similar problems in the future ( e . g . when new tables are created ) .
EOF
end
end
# Bulk queues background migration jobs for an entire table, batched by ID range.
# "Bulk" meaning many jobs will be pushed at a time for efficiency.
# If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`.
#
# model_class - The table being iterated over
# job_class_name - The background migration job class as a string
# batch_size - The maximum number of rows per job
#
# Example:
#
# class Route < ActiveRecord::Base
# include EachBatch
# self.table_name = 'routes'
# end
#
# bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes')
#
# Where the model_class includes EachBatch, and the background migration exists:
#
# class Gitlab::BackgroundMigration::ProcessRoutes
# def perform(start_id, end_id)
# # do something
# end
# end
def bulk_queue_background_migration_jobs_by_range ( model_class , job_class_name , batch_size : BACKGROUND_MIGRATION_BATCH_SIZE )
raise " #{ model_class } does not have an ID to use for batch ranges " unless model_class . column_names . include? ( 'id' )
jobs = [ ]
2019-02-15 15:39:39 +05:30
table_name = model_class . quoted_table_name
2018-03-17 18:26:18 +05:30
model_class . each_batch ( of : batch_size ) do | relation |
2019-12-26 22:10:19 +05:30
start_id , end_id = relation . pluck ( " MIN( #{ table_name } .id) " , " MAX( #{ table_name } .id) " ) . first
2018-03-17 18:26:18 +05:30
if jobs . length > = BACKGROUND_MIGRATION_JOB_BUFFER_SIZE
# Note: This code path generally only helps with many millions of rows
# We push multiple jobs at a time to reduce the time spent in
# Sidekiq/Redis operations. We're using this buffer based approach so we
# don't need to run additional queries for every range.
BackgroundMigrationWorker . bulk_perform_async ( jobs )
jobs . clear
end
jobs << [ job_class_name , [ start_id , end_id ] ]
end
BackgroundMigrationWorker . bulk_perform_async ( jobs ) unless jobs . empty?
end
# Queues background migration jobs for an entire table, batched by ID range.
# Each job is scheduled with a `delay_interval` in between.
# If you use a small interval, then some jobs may run at the same time.
#
2018-04-05 14:03:07 +05:30
# model_class - The table or relation being iterated over
2018-03-17 18:26:18 +05:30
# job_class_name - The background migration job class as a string
# delay_interval - The duration between each job's scheduled time (must respond to `to_f`)
# batch_size - The maximum number of rows per job
#
# Example:
#
# class Route < ActiveRecord::Base
# include EachBatch
# self.table_name = 'routes'
# end
#
# queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute)
#
# Where the model_class includes EachBatch, and the background migration exists:
#
# class Gitlab::BackgroundMigration::ProcessRoutes
# def perform(start_id, end_id)
# # do something
# end
# end
def queue_background_migration_jobs_by_range_at_intervals ( model_class , job_class_name , delay_interval , batch_size : BACKGROUND_MIGRATION_BATCH_SIZE )
raise " #{ model_class } does not have an ID to use for batch ranges " unless model_class . column_names . include? ( 'id' )
# To not overload the worker too much we enforce a minimum interval both
# when scheduling and performing jobs.
2018-11-18 11:00:15 +05:30
if delay_interval < BackgroundMigrationWorker . minimum_interval
delay_interval = BackgroundMigrationWorker . minimum_interval
2018-03-17 18:26:18 +05:30
end
model_class . each_batch ( of : batch_size ) do | relation , index |
2019-12-21 20:55:43 +05:30
start_id , end_id = relation . pluck ( Arel . sql ( 'MIN(id), MAX(id)' ) ) . first
2018-03-17 18:26:18 +05:30
# `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
# the same time, which is not helpful in most cases where we wish to
# spread the work over time.
BackgroundMigrationWorker . perform_in ( delay_interval * index , job_class_name , [ start_id , end_id ] )
end
end
2018-03-27 19:54:05 +05:30
2018-05-09 12:01:36 +05:30
# Fetches indexes on a column by name for postgres.
#
# This will include indexes using an expression on the column, for example:
# `CREATE INDEX CONCURRENTLY index_name ON table (LOWER(column));`
#
# We can remove this when upgrading to Rails 5 with an updated `index_exists?`:
# - https://github.com/rails/rails/commit/edc2b7718725016e988089b5fb6d6fb9d6e16882
#
# Or this can be removed when we no longer support postgres < 9.5, so we
# can use `CREATE INDEX IF NOT EXISTS`.
def index_exists_by_name? ( table , index )
# We can't fall back to the normal `index_exists?` method because that
# does not find indexes without passing a column name.
if indexes ( table ) . map ( & :name ) . include? ( index . to_s )
true
else
2019-10-12 21:52:04 +05:30
postgres_exists_by_name? ( table , index )
2018-03-27 19:54:05 +05:30
end
end
2018-05-09 12:01:36 +05:30
def postgres_exists_by_name? ( table , name )
index_sql = << ~ SQL
SELECT COUNT ( * )
FROM pg_index
JOIN pg_class i ON ( indexrelid = i . oid )
JOIN pg_class t ON ( indrelid = t . oid )
WHERE i . relname = '#{name}' AND t . relname = '#{table}'
SQL
connection . select_value ( index_sql ) . to_i > 0
2018-03-27 19:54:05 +05:30
end
2018-12-05 23:21:45 +05:30
2019-10-12 21:52:04 +05:30
private
2019-12-04 20:38:33 +05:30
def create_column_from ( table , old , new , type : nil )
old_col = column_for ( table , old )
new_type = type || old_col . type
add_column ( table , new , new_type ,
limit : old_col . limit ,
precision : old_col . precision ,
scale : old_col . scale )
# We set the default value _after_ adding the column so we don't end up
# updating any existing data with the default value. This isn't
# necessary since we copy over old values further down.
change_column_default ( table , new , old_col . default ) unless old_col . default . nil?
update_column_in_batches ( table , new , Arel :: Table . new ( table ) [ old ] )
change_column_null ( table , new , false ) unless old_col . null
copy_indexes ( table , old , new )
copy_foreign_keys ( table , old , new )
end
2019-10-12 21:52:04 +05:30
def validate_timestamp_column_name! ( column_name )
return if PERMITTED_TIMESTAMP_COLUMNS . member? ( column_name )
raise << ~ MESSAGE
Illegal timestamp column name ! Got #{column_name}.
Must be one of : #{PERMITTED_TIMESTAMP_COLUMNS.to_a}
MESSAGE
end
def validate_not_in_transaction! ( method_name , modifier = nil )
return unless transaction_open?
raise << ~ ERROR
#{["`#{method_name}`", modifier].compact.join(' ')} cannot be run inside a transaction.
You can disable transactions by calling ` disable_ddl_transaction! ` in the body of
your migration class
ERROR
2018-12-05 23:21:45 +05:30
end
2016-06-02 11:05:42 +05:30
end
end
end