debian-mirror-gitlab/lib/gitlab/database/migration_helpers/v2.rb

# frozen_string_literal: true

module Gitlab
  module Database
    module MigrationHelpers
      module V2
        include Gitlab::Database::MigrationHelpers
        # Creates a new table, optionally allowing the caller to add text limit constraints to the table.
        # This method only extends Rails' `create_table` method
        #
        # Example:
        #
        #    create_table :db_guides do |t|
        #      t.bigint :stars, default: 0, null: false
        #      t.text :title, limit: 128
        #      t.text :notes, limit: 1024
        #
        #      t.check_constraint 'stars > 1000', name: 'so_many_stars'
        #    end
        #
        # See Rails' `create_table` for more info on the available arguments.
        #
        # When adding foreign keys to other tables, consider wrapping the call into a with_lock_retries block
        # to avoid traffic stalls.
        def create_table(table_name, *args, **kwargs, &block)
          helper_context = self

          super do |t|
            t.define_singleton_method(:text) do |column_name, **kwargs|
              limit = kwargs.delete(:limit)

              super(column_name, **kwargs)

              if limit
                # rubocop:disable GitlabSecurity/PublicSend
                name = helper_context.send(:text_limit_name, table_name, column_name)
                # rubocop:enable GitlabSecurity/PublicSend

                column_name = helper_context.quote_column_name(column_name)
                definition = "char_length(#{column_name}) <= #{limit}"

                t.check_constraint(definition, name: name)
              end
            end

            t.instance_eval(&block) unless block.nil?
          end
        end

        # Executes the block with a retry mechanism that alters the +lock_timeout+ and +sleep_time+ between attempts.
        # The timings can be controlled via the +timing_configuration+ parameter.
        # If the lock was not acquired within the retry period, a last attempt is made without using +lock_timeout+.
        #
        # In order to retry the block, the method wraps the block into a transaction.
        #
        # When called inside an open transaction it will execute the block directly if lock retries are enabled
        # with `enable_lock_retries!` at migration level, otherwise it will raise an error.
        #
        # ==== Examples
        #   # Invoking without parameters
        #   with_lock_retries do
        #     drop_table :my_table
        #   end
        #
        #   # Invoking with custom +timing_configuration+
        #   t = [
        #     [1.second, 1.second],
        #     [2.seconds, 2.seconds]
        #   ]
        #
        #   with_lock_retries(timing_configuration: t) do
        #     drop_table :my_table # this will be retried twice
        #   end
        #
        #   # Disabling the retries using an environment variable
        #   > export DISABLE_LOCK_RETRIES=true
        #
        #   with_lock_retries do
        #     drop_table :my_table # one invocation, it will not retry at all
        #   end
        #
        # ==== Parameters
        # * +timing_configuration+ - [[ActiveSupport::Duration, ActiveSupport::Duration], ...] lock timeout for the block, sleep time before the next iteration, defaults to `Gitlab::Database::WithLockRetries::DEFAULT_TIMING_CONFIGURATION`
        # * +logger+ - [Gitlab::JsonLogger]
        # * +env+ - [Hash] custom environment hash, see the example with `DISABLE_LOCK_RETRIES`
        def with_lock_retries(*args, **kwargs, &block)
          if transaction_open?
            if enable_lock_retries?
              Gitlab::AppLogger.warn 'Lock retries already enabled, executing the block directly'
              yield
            else
              raise <<~EOF
              #{__callee__} can not be run inside an already open transaction

              Use migration-level lock retries instead, see https://docs.gitlab.com/ee/development/migration_style_guide.html#retry-mechanism-when-acquiring-database-locks
              EOF
            end
          else
            super(*args, **kwargs.merge(allow_savepoints: false), &block)
          end
        end

        # Renames a column without requiring downtime.
        #
        # Concurrent renames work by using database triggers to ensure both the
        # old and new column are in sync. However, this method will _not_ remove
        # the triggers or the old column automatically; this needs to be done
        # manually in a post-deployment migration. This can be done using the
        # method `cleanup_concurrent_column_rename`.
        #
        # table - The name of the database table containing the column.
        # old_column - The old column name.
        # new_column - The new column name.
        # type - The type of the new column. If no type is given the old column's
        #        type is used.
        # batch_column_name - option is for tables without primary key, in this
        #        case another unique integer column can be used. Example: :user_id
        def rename_column_concurrently(table, old_column, new_column, type: nil, batch_column_name: :id)
          Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_ddl_mode!

          setup_renamed_column(__callee__, table, old_column, new_column, type, batch_column_name)

          with_lock_retries do
            install_bidirectional_triggers(table, old_column, new_column)
          end
        end

        # Reverses operations performed by rename_column_concurrently.
        #
        # This method takes care of removing previously installed triggers as well
        # as removing the new column.
        #
        # table - The name of the database table.
        # old_column - The name of the old column.
        # new_column - The name of the new column.
        def undo_rename_column_concurrently(table, old_column, new_column)
          teardown_rename_mechanism(table, old_column, new_column, column_to_remove: new_column)
        end

        # Cleans up a concurrent column name.
        #
        # This method takes care of removing previously installed triggers as well
        # as removing the old column.
        #
        # table - The name of the database table.
        # old_column - The name of the old column.
        # new_column - The name of the new column.
        def cleanup_concurrent_column_rename(table, old_column, new_column)
          teardown_rename_mechanism(table, old_column, new_column, column_to_remove: old_column)
        end

        # Reverses the operations performed by cleanup_concurrent_column_rename.
        #
        # This method adds back the old_column removed
        # by cleanup_concurrent_column_rename.
        # It also adds back the triggers that are removed
        # by cleanup_concurrent_column_rename.
        #
        # table - The name of the database table containing the column.
        # old_column - The old column name.
        # new_column - The new column name.
        # type - The type of the old column. If no type is given the new column's
        #        type is used.
        # batch_column_name - option is for tables without primary key, in this
        #        case another unique integer column can be used. Example: :user_id
        #
        def undo_cleanup_concurrent_column_rename(table, old_column, new_column, type: nil, batch_column_name: :id)
          Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_ddl_mode!

          setup_renamed_column(__callee__, table, new_column, old_column, type, batch_column_name)

          with_lock_retries do
            install_bidirectional_triggers(table, old_column, new_column)
          end
        end

        # TRUNCATE is a DDL statement (it drops the table and re-creates it), so we want to run the
        # migration in DDL mode, but we also don't want to execute it against all schemas because
        # it will be prevented by the lock_writes trigger.
        #
        # For example,
        # a `gitlab_main` table on `:gitlab_main` database will be truncated,
        # and a `gitlab_main` table on `:gitlab_ci` database will be skipped.
        #
        # Note Rails already has a truncate_tables, see
        # https://github.com/rails/rails/blob/6-1-stable/activerecord/lib/active_record/connection_adapters/abstract/database_statements.rb#L193
        def truncate_tables!(*table_names, connection: self.connection)
          table_schemas = Gitlab::Database::GitlabSchema.table_schemas!(table_names)

          raise ArgumentError, "`table_names` must resolve to only one `gitlab_schema`" if table_schemas.size != 1

          return unless Gitlab::Database.gitlab_schemas_for_connection(connection).include?(table_schemas.first)

          quoted_tables = table_names.map { |table_name| quote_table_name(table_name) }.join(', ')

          execute("TRUNCATE TABLE #{quoted_tables}")
        end

        private

        def setup_renamed_column(calling_operation, table, old_column, new_column, type, batch_column_name)
          if transaction_open?
            raise "#{calling_operation} can not be run inside a transaction"
          end

          column = columns(table).find { |column| column.name == old_column.to_s }

          unless column
            raise "Column #{old_column} does not exist on #{table}"
          end

          if column.default_function
            raise "#{calling_operation} does not currently support columns with default functions"
          end

          unless column_exists?(table, batch_column_name)
            raise "Column #{batch_column_name} does not exist on #{table}"
          end

          check_trigger_permissions!(table)

          unless column_exists?(table, new_column)
            create_column_from(table, old_column, new_column, type: type, batch_column_name: batch_column_name)
          end
        end

        def teardown_rename_mechanism(table, old_column, new_column, column_to_remove:)
          return unless column_exists?(table, column_to_remove)

          with_lock_retries do
            check_trigger_permissions!(table)

            remove_bidirectional_triggers(table, old_column, new_column)

            remove_column(table, column_to_remove)
          end
        end

        def install_bidirectional_triggers(table, old_column, new_column)
          insert_trigger_name, update_old_trigger_name, update_new_trigger_name =
            bidirectional_trigger_names(table, old_column, new_column)

          quoted_table = quote_table_name(table)
          quoted_old = quote_column_name(old_column)
          quoted_new = quote_column_name(new_column)

          create_insert_trigger(insert_trigger_name, quoted_table, quoted_old, quoted_new)
          create_update_trigger(update_old_trigger_name, quoted_table, quoted_new, quoted_old)
          create_update_trigger(update_new_trigger_name, quoted_table, quoted_old, quoted_new)
        end

        def remove_bidirectional_triggers(table, old_column, new_column)
          insert_trigger_name, update_old_trigger_name, update_new_trigger_name =
            bidirectional_trigger_names(table, old_column, new_column)

          quoted_table = quote_table_name(table)

          drop_trigger(insert_trigger_name, quoted_table)
          drop_trigger(update_old_trigger_name, quoted_table)
          drop_trigger(update_new_trigger_name, quoted_table)
        end

        def bidirectional_trigger_names(table, old_column, new_column)
          %w[insert update_old update_new].map do |operation|
            'trigger_' + Digest::SHA256.hexdigest("#{table}_#{old_column}_#{new_column}_#{operation}").first(12)
          end
        end

        def function_name_for_trigger(trigger_name)
          "function_for_#{trigger_name}"
        end

        def create_insert_trigger(trigger_name, quoted_table, quoted_old_column, quoted_new_column)
          function_name = function_name_for_trigger(trigger_name)

          column = columns(quoted_table.delete('"').to_sym).find { |column| column.name == quoted_old_column.delete('"') }
          quoted_default_value = connection.quote(column.default)

          execute(<<~SQL)
            CREATE OR REPLACE FUNCTION #{function_name}()
            RETURNS trigger
            LANGUAGE plpgsql
            AS $$
            BEGIN
              IF NEW.#{quoted_old_column} IS NOT DISTINCT FROM #{quoted_default_value} AND NEW.#{quoted_new_column} IS DISTINCT FROM #{quoted_default_value} THEN
                NEW.#{quoted_old_column} = NEW.#{quoted_new_column};
              END IF;

              IF NEW.#{quoted_new_column} IS NOT DISTINCT FROM #{quoted_default_value} AND NEW.#{quoted_old_column} IS DISTINCT FROM #{quoted_default_value} THEN
                NEW.#{quoted_new_column} = NEW.#{quoted_old_column};
              END IF;

              RETURN NEW;
            END
            $$;

            DROP TRIGGER IF EXISTS #{trigger_name}
            ON #{quoted_table};

            CREATE TRIGGER #{trigger_name}
            BEFORE INSERT ON #{quoted_table}
            FOR EACH ROW EXECUTE FUNCTION #{function_name}();
          SQL
        end

        def create_update_trigger(trigger_name, quoted_table, quoted_source_column, quoted_target_column)
          function_name = function_name_for_trigger(trigger_name)

          execute(<<~SQL)
            CREATE OR REPLACE FUNCTION #{function_name}()
            RETURNS trigger
            LANGUAGE plpgsql
            AS $$
            BEGIN
              NEW.#{quoted_target_column} := NEW.#{quoted_source_column};
              RETURN NEW;
            END
            $$;

            DROP TRIGGER IF EXISTS #{trigger_name}
            ON #{quoted_table};

            CREATE TRIGGER #{trigger_name}
            BEFORE UPDATE OF #{quoted_source_column} ON #{quoted_table}
            FOR EACH ROW EXECUTE FUNCTION #{function_name}();
          SQL
        end

        def drop_trigger(trigger_name, quoted_table)
          function_name = function_name_for_trigger(trigger_name)

          execute(<<~SQL)
            DROP TRIGGER IF EXISTS #{trigger_name}
            ON #{quoted_table};

            DROP FUNCTION IF EXISTS #{function_name};
          SQL
        end
      end
    end
  end
end