debian-mirror-gitlab/lib/tasks/gitlab/db.rake

485 lines
19 KiB
Ruby
Raw Normal View History

2021-03-11 19:13:27 +05:30
# frozen_string_literal: true
2022-01-26 12:08:38 +05:30
databases = ActiveRecord::Tasks::DatabaseTasks.setup_initial_database_yaml
2022-06-21 17:19:12 +05:30
def each_database(databases, include_geo: false)
ActiveRecord::Tasks::DatabaseTasks.for_each(databases) do |database|
next if !include_geo && database == 'geo'
yield database
end
end
2016-06-02 11:05:42 +05:30
namespace :gitlab do
namespace :db do
2022-05-07 20:08:51 +05:30
desc 'GitLab | DB | Manually insert schema migration version on all configured databases'
2016-06-02 11:05:42 +05:30
task :mark_migration_complete, [:version] => :environment do |_, args|
2022-04-04 11:22:00 +05:30
mark_migration_complete(args[:version])
end
namespace :mark_migration_complete do
2022-06-21 17:19:12 +05:30
each_database(databases) do |database_name|
desc "Gitlab | DB | Manually insert schema migration version on #{database_name} database"
task database_name, [:version] => :environment do |_, args|
mark_migration_complete(args[:version], only_on: database_name)
2022-04-04 11:22:00 +05:30
end
2016-06-02 11:05:42 +05:30
end
2022-04-04 11:22:00 +05:30
end
2016-06-02 11:05:42 +05:30
2022-05-07 20:08:51 +05:30
def mark_migration_complete(version, only_on: nil)
2022-04-04 11:22:00 +05:30
if version.to_i == 0
puts 'Must give a version argument that is a non-zero integer'.color(:red)
2016-06-02 11:05:42 +05:30
exit 1
end
2022-05-07 20:08:51 +05:30
Gitlab::Database::EachDatabase.each_database_connection(only: only_on) do |connection, name|
connection.execute("INSERT INTO schema_migrations (version) VALUES (#{connection.quote(version)})")
2022-04-04 11:22:00 +05:30
puts "Successfully marked '#{version}' as complete on database #{name}".color(:green)
2016-06-02 11:05:42 +05:30
rescue ActiveRecord::RecordNotUnique
2022-04-04 11:22:00 +05:30
puts "Migration version '#{version}' is already marked complete on database #{name}".color(:yellow)
2016-06-02 11:05:42 +05:30
end
end
2022-05-07 20:08:51 +05:30
desc 'GitLab | DB | Drop all tables on all configured databases'
2017-08-17 22:00:37 +05:30
task drop_tables: :environment do
2022-05-07 20:08:51 +05:30
drop_tables
end
2016-08-24 12:49:21 +05:30
2022-05-07 20:08:51 +05:30
namespace :drop_tables do
2022-06-21 17:19:12 +05:30
each_database(databases) do |database_name|
desc "GitLab | DB | Drop all tables on the #{database_name} database"
task database_name => :environment do
drop_tables(only_on: database_name)
2022-05-07 20:08:51 +05:30
end
end
end
2019-07-07 11:18:12 +05:30
2022-05-07 20:08:51 +05:30
def drop_tables(only_on: nil)
Gitlab::Database::EachDatabase.each_database_connection(only: only_on) do |connection, name|
# In PostgreSQLAdapter, data_sources returns both views and tables, so use tables instead
tables = connection.tables
2016-06-02 11:05:42 +05:30
2022-05-07 20:08:51 +05:30
# Removes the entry from the array
tables.delete 'schema_migrations'
# Truncate schema_migrations to ensure migrations re-run
connection.execute('TRUNCATE schema_migrations') if connection.table_exists? 'schema_migrations'
2021-01-03 14:25:43 +05:30
2022-05-07 20:08:51 +05:30
# Drop any views
connection.views.each do |view|
connection.execute("DROP VIEW IF EXISTS #{connection.quote_table_name(view)} CASCADE")
end
2020-07-28 23:09:34 +05:30
2022-05-07 20:08:51 +05:30
# Drop tables with cascade to avoid dependent table errors
# PG: http://www.postgresql.org/docs/current/static/ddl-depend.html
# Add `IF EXISTS` because cascade could have already deleted a table.
tables.each { |t| connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name(t)} CASCADE") }
# Drop all extra schema objects GitLab owns
Gitlab::Database::EXTRA_SCHEMAS.each do |schema|
connection.execute("DROP SCHEMA IF EXISTS #{connection.quote_table_name(schema)} CASCADE")
end
2020-07-28 23:09:34 +05:30
end
end
2020-03-13 15:44:24 +05:30
desc 'GitLab | DB | Configures the database by running migrate, or by loading the schema and seeding if needed'
task configure: :environment do
2022-06-21 17:19:12 +05:30
databases_with_tasks = ActiveRecord::Base.configurations.configs_for(env_name: Rails.env)
databases_loaded = []
if databases_with_tasks.size == 1
next unless databases_with_tasks.first.name == 'main'
connection = Gitlab::Database.database_base_models['main'].connection
databases_loaded << configure_database(connection)
else
2022-08-13 15:12:31 +05:30
Gitlab::Database.database_base_models_with_gitlab_shared.each do |name, model|
2022-06-21 17:19:12 +05:30
next unless databases_with_tasks.any? { |db_with_tasks| db_with_tasks.name == name }
databases_loaded << configure_database(model.connection, database_name: name)
end
end
Rake::Task['db:seed_fu'].invoke if databases_loaded.present? && databases_loaded.all?
end
def configure_database(connection, database_name: nil)
database_name = ":#{database_name}" if database_name
load_database = connection.tables.count <= 1
if load_database
2018-12-05 23:21:45 +05:30
Gitlab::Database.add_post_migrate_path_to_rails(force: true)
2022-06-21 17:19:12 +05:30
Rake::Task["db:schema:load#{database_name}"].invoke
else
Rake::Task["db:migrate#{database_name}"].invoke
end
2022-06-21 17:19:12 +05:30
load_database
2016-06-02 11:05:42 +05:30
end
2016-08-24 12:49:21 +05:30
2021-01-03 14:25:43 +05:30
desc 'GitLab | DB | Run database migrations and print `unattended_migrations_completed` if action taken'
task unattended: :environment do
no_database = !ActiveRecord::Base.connection.schema_migration.table_exists?
needs_migrations = ActiveRecord::Base.connection.migration_context.needs_migration?
if no_database || needs_migrations
Rake::Task['gitlab:db:configure'].invoke
puts "unattended_migrations_completed"
else
puts "unattended_migrations_static"
end
end
2020-04-22 19:07:51 +05:30
desc 'This adjusts and cleans db/structure.sql - it runs after db:structure:dump'
task :clean_structure_sql do |task_name|
2021-09-30 23:02:18 +05:30
ActiveRecord::Base.configurations.configs_for(env_name: ActiveRecord::Tasks::DatabaseTasks.env).each do |db_config|
structure_file = ActiveRecord::Tasks::DatabaseTasks.dump_filename(db_config.name)
2020-04-22 19:07:51 +05:30
2021-09-30 23:02:18 +05:30
schema = File.read(structure_file)
2020-04-22 19:07:51 +05:30
2021-09-30 23:02:18 +05:30
File.open(structure_file, 'wb+') do |io|
Gitlab::Database::SchemaCleaner.new(schema).clean(io)
end
end
2020-06-23 00:09:42 +05:30
# Allow this task to be called multiple times, as happens when running db:migrate:redo
Rake::Task[task_name].reenable
end
# Inform Rake that custom tasks should be run every time rake db:structure:dump is run
2021-09-04 01:27:46 +05:30
#
# Rails 6.1 deprecates db:structure:dump in favor of db:schema:dump
2020-04-22 19:07:51 +05:30
Rake::Task['db:structure:dump'].enhance do
Rake::Task['gitlab:db:clean_structure_sql'].invoke
2020-06-23 00:09:42 +05:30
end
2021-09-04 01:27:46 +05:30
# Inform Rake that custom tasks should be run every time rake db:schema:dump is run
Rake::Task['db:schema:dump'].enhance do
Rake::Task['gitlab:db:clean_structure_sql'].invoke
end
2022-01-26 12:08:38 +05:30
ActiveRecord::Tasks::DatabaseTasks.for_each(databases) do |name|
# Inform Rake that custom tasks should be run every time rake db:structure:dump is run
#
# Rails 6.1 deprecates db:structure:dump in favor of db:schema:dump
Rake::Task["db:structure:dump:#{name}"].enhance do
Rake::Task['gitlab:db:clean_structure_sql'].invoke
end
Rake::Task["db:schema:dump:#{name}"].enhance do
Rake::Task['gitlab:db:clean_structure_sql'].invoke
end
end
2020-07-28 23:09:34 +05:30
desc 'Create missing dynamic database partitions'
2021-09-30 23:02:18 +05:30
task create_dynamic_partitions: :environment do
2021-11-11 11:23:49 +05:30
Gitlab::Database::Partitioning.sync_partitions
2020-07-28 23:09:34 +05:30
end
2022-06-21 17:19:12 +05:30
namespace :create_dynamic_partitions do
each_database(databases) do |database_name|
desc "Create missing dynamic database partitions on the #{database_name} database"
task database_name => :environment do
Gitlab::Database::Partitioning.sync_partitions(only_on: database_name)
end
end
end
2020-07-28 23:09:34 +05:30
# This is targeted towards deploys and upgrades of GitLab.
# Since we're running migrations already at this time,
# we also check and create partitions as needed here.
Rake::Task['db:migrate'].enhance do
Rake::Task['gitlab:db:create_dynamic_partitions'].invoke
end
2022-06-21 17:19:12 +05:30
# We'll temporarily skip this enhancement for geo, since in some situations we
# wish to setup the geo database before the other databases have been setup,
# and partition management attempts to connect to the main database.
each_database(databases) do |database_name|
Rake::Task["db:migrate:#{database_name}"].enhance do
Rake::Task["gitlab:db:create_dynamic_partitions:#{database_name}"].invoke
2022-05-07 20:08:51 +05:30
end
end
2020-07-28 23:09:34 +05:30
# When we load the database schema from db/structure.sql
# we don't have any dynamic partitions created. We don't really need to
# because application initializers/sidekiq take care of that, too.
# However, the presence of partitions for a table has influence on their
# position in db/structure.sql (which is topologically sorted).
#
# Other than that it's helpful to create partitions early when bootstrapping
# a new installation.
2022-05-07 20:08:51 +05:30
Rake::Task['db:schema:load'].enhance do
2020-07-28 23:09:34 +05:30
Rake::Task['gitlab:db:create_dynamic_partitions'].invoke
end
2022-06-21 17:19:12 +05:30
# We'll temporarily skip this enhancement for geo, since in some situations we
# wish to setup the geo database before the other databases have been setup,
# and partition management attempts to connect to the main database.
each_database(databases) do |database_name|
# :nocov:
Rake::Task["db:schema:load:#{database_name}"].enhance do
Rake::Task["gitlab:db:create_dynamic_partitions:#{database_name}"].invoke
2022-05-07 20:08:51 +05:30
end
2022-06-21 17:19:12 +05:30
# :nocov:
2022-05-07 20:08:51 +05:30
end
2020-07-28 23:09:34 +05:30
# During testing, db:test:load restores the database schema from scratch
# which does not include dynamic partitions. We cannot rely on application
# initializers here as the application can continue to run while
# a rake task reloads the database schema.
Rake::Task['db:test:load'].enhance do
2021-11-11 11:23:49 +05:30
# Due to bug in `db:test:load` if many DBs are used
# the `ActiveRecord::Base.connection` might be switched to another one
# This is due to `if should_reconnect`:
# https://github.com/rails/rails/blob/a81aeb63a007ede2fe606c50539417dada9030c7/activerecord/lib/active_record/railties/databases.rake#L622
2022-03-02 08:16:31 +05:30
ActiveRecord::Base.establish_connection :main # rubocop: disable Database/EstablishConnection
2021-11-11 11:23:49 +05:30
2020-07-28 23:09:34 +05:30
Rake::Task['gitlab:db:create_dynamic_partitions'].invoke
end
2020-11-24 15:15:51 +05:30
2022-01-26 12:08:38 +05:30
desc "Reindex database without downtime to eliminate bloat"
2021-12-11 22:18:48 +05:30
task reindex: :environment do
2022-01-26 12:08:38 +05:30
unless Gitlab::Database::Reindexing.enabled?
2021-01-03 14:25:43 +05:30
puts "This feature (database_reindexing) is currently disabled.".color(:yellow)
exit
end
2022-01-26 12:08:38 +05:30
Gitlab::Database::Reindexing.invoke
end
namespace :reindex do
2022-06-21 17:19:12 +05:30
each_database(databases) do |database_name|
2022-01-26 12:08:38 +05:30
desc "Reindex #{database_name} database without downtime to eliminate bloat"
task database_name => :environment do
unless Gitlab::Database::Reindexing.enabled?
puts "This feature (database_reindexing) is currently disabled.".color(:yellow)
exit
end
2021-02-22 17:27:13 +05:30
2022-01-26 12:08:38 +05:30
Gitlab::Database::Reindexing.invoke(database_name)
end
2021-02-22 17:27:13 +05:30
end
2021-12-11 22:18:48 +05:30
end
2021-02-22 17:27:13 +05:30
2021-12-11 22:18:48 +05:30
desc 'Enqueue an index for reindexing'
task :enqueue_reindexing_action, [:index_name, :database] => :environment do |_, args|
model = Gitlab::Database.database_base_models[args.fetch(:database, Gitlab::Database::PRIMARY_DATABASE_NAME)]
2020-11-24 15:15:51 +05:30
2021-12-11 22:18:48 +05:30
Gitlab::Database::SharedModel.using_connection(model.connection) do
queued_action = Gitlab::Database::PostgresIndex.find(args[:index_name]).queued_reindexing_actions.create!
2021-10-27 15:23:28 +05:30
2021-12-11 22:18:48 +05:30
puts "Queued reindexing action: #{queued_action}"
puts "There are #{Gitlab::Database::Reindexing::QueuedAction.queued.size} queued actions in total."
end
2021-10-27 15:23:28 +05:30
2022-07-16 23:28:13 +05:30
unless Feature.enabled?(:database_reindexing, type: :ops)
2021-12-11 22:18:48 +05:30
puts <<~NOTE.color(:yellow)
Note: database_reindexing feature is currently disabled.
Enable with: Feature.enable(:database_reindexing)
NOTE
end
2020-11-24 15:15:51 +05:30
end
2021-02-22 17:27:13 +05:30
desc 'Check if there have been user additions to the database'
task active: :environment do
if ActiveRecord::Base.connection.migration_context.needs_migration?
puts "Migrations pending. Database not active"
exit 1
end
# A list of projects that GitLab creates automatically on install/upgrade
# gc = Gitlab::CurrentSettings.current_application_settings
seed_projects = [Gitlab::CurrentSettings.current_application_settings.self_monitoring_project]
2022-08-27 11:52:29 +05:30
if (Project.count - seed_projects.count { |x| !x.nil? }).eql?(0)
2021-02-22 17:27:13 +05:30
puts "No user created projects. Database not active"
exit 1
end
puts "Found user created projects. Database active"
exit 0
end
2021-03-11 19:13:27 +05:30
2021-11-18 22:05:49 +05:30
namespace :migration_testing do
2022-11-25 23:54:43 +05:30
# Not possible to import Gitlab::Database::DATABASE_NAMES here
# Specs verify that a task exists for each entry in that array.
all_databases = %i[main ci]
2021-11-18 22:05:49 +05:30
task up: :environment do
2022-11-25 23:54:43 +05:30
Gitlab::Database::Migrations::Runner.up(database: 'main', legacy_mode: true).run
end
namespace :up do
all_databases.each do |db|
desc "Run migrations on #{db} with instrumentation"
task db => :environment do
2023-03-04 22:38:38 +05:30
Gitlab::Database::Migrations::Runner.batched_migrations_last_id(db).store
2022-11-25 23:54:43 +05:30
Gitlab::Database::Migrations::Runner.up(database: db).run
end
end
2021-03-11 19:13:27 +05:30
end
2022-11-25 23:54:43 +05:30
namespace :down do
all_databases.each do |db|
desc "Run down migrations on #{db} in current branch with instrumentation"
task db => :environment do
Gitlab::Database::Migrations::Runner.down(database: db).run
end
end
2021-03-11 19:13:27 +05:30
end
2022-06-21 17:19:12 +05:30
desc 'Sample traditional background migrations with instrumentation'
task :sample_background_migrations, [:duration_s] => [:environment] do |_t, args|
duration = args[:duration_s]&.to_i&.seconds || 30.minutes # Default of 30 minutes
Gitlab::Database::Migrations::Runner.background_migrations.run_jobs(for_duration: duration)
end
2022-07-16 23:28:13 +05:30
2022-11-25 23:54:43 +05:30
namespace :sample_batched_background_migrations do
all_databases.each do |db|
desc "Sample batched background migrations on #{db} with instrumentation"
task db, [:duration_s] => [:environment] do |_t, args|
duration = args[:duration_s]&.to_i&.seconds || 30.minutes # Default of 30 minutes
Gitlab::Database::Migrations::Runner.batched_background_migrations(for_database: db)
.run_jobs(for_duration: duration)
end
end
end
desc "Sample batched background migrations with instrumentation (legacy)"
2022-07-16 23:28:13 +05:30
task :sample_batched_background_migrations, [:database, :duration_s] => [:environment] do |_t, args|
duration = args[:duration_s]&.to_i&.seconds || 30.minutes # Default of 30 minutes
2022-11-25 23:54:43 +05:30
database = args[:database] || 'main'
Gitlab::Database::Migrations::Runner.batched_background_migrations(for_database: database, legacy_mode: true)
2022-07-16 23:28:13 +05:30
.run_jobs(for_duration: duration)
end
2021-03-11 19:13:27 +05:30
end
2021-06-08 01:23:25 +05:30
desc 'Run all pending batched migrations'
task execute_batched_migrations: :environment do
2022-06-21 17:19:12 +05:30
Gitlab::Database::EachDatabase.each_database_connection do |connection, name|
Gitlab::Database::BackgroundMigration::BatchedMigration.with_status(:active).queue_order.each do |migration|
Gitlab::AppLogger.info("Executing batched migration #{migration.id} on database #{name} inline")
Gitlab::Database::BackgroundMigration::BatchedMigrationRunner.new(connection: connection).run_entire_migration(migration)
end
2021-06-08 01:23:25 +05:30
end
end
2022-04-04 11:22:00 +05:30
desc 'Run migration as gitlab non-superuser'
task :reset_as_non_superuser, [:username] => :environment do |_, args|
username = args.fetch(:username, 'gitlab')
puts "Migrate using username #{username}"
Rake::Task['db:drop'].invoke
Rake::Task['db:create'].invoke
ActiveRecord::Base.configurations.configs_for(env_name: ActiveRecord::Tasks::DatabaseTasks.env).each do |db_config|
2022-07-23 23:45:48 +05:30
config = ActiveRecord::DatabaseConfigurations::HashConfig.new(
db_config.env_name,
db_config.name,
db_config.configuration_hash.merge(username: username)
)
ActiveRecord::Base.establish_connection(config) # rubocop: disable Database/EstablishConnection
2022-04-04 11:22:00 +05:30
Gitlab::Database.check_for_non_superuser
Rake::Task['db:migrate'].invoke
end
end
2021-06-08 01:23:25 +05:30
# Only for development environments,
# we execute pending data migrations inline for convenience.
Rake::Task['db:migrate'].enhance do
2022-01-26 12:08:38 +05:30
if Rails.env.development? && Gitlab::Database::BackgroundMigration::BatchedMigration.table_exists?
Rake::Task['gitlab:db:execute_batched_migrations'].invoke
end
2021-06-08 01:23:25 +05:30
end
2022-08-13 15:12:31 +05:30
namespace :dictionary do
DB_DOCS_PATH = File.join(Rails.root, 'db', 'docs')
desc 'Generate database docs yaml'
task generate: :environment do
FileUtils.mkdir_p(DB_DOCS_PATH) unless Dir.exist?(DB_DOCS_PATH)
Rails.application.eager_load!
tables = Gitlab::Database.database_base_models.flat_map { |_, m| m.connection.tables }
2023-03-17 16:20:25 +05:30
views = Gitlab::Database.database_base_models.flat_map { |_, m| m.connection.views }
sources = tables + views
classes = sources.index_with { [] }
2022-08-13 15:12:31 +05:30
Gitlab::Database.database_base_models.each do |_, model_class|
model_class
.descendants
.reject(&:abstract_class)
.reject { |c| c.name =~ /^(?:EE::)?Gitlab::(?:BackgroundMigration|DatabaseImporters)::/ }
.reject { |c| c.name =~ /^HABTM_/ }
.each { |c| classes[c.table_name] << c.name if classes.has_key?(c.table_name) }
end
version = Gem::Version.new(File.read('VERSION'))
milestone = version.release.segments[0..1].join('.')
2023-03-17 16:20:25 +05:30
sources.each do |source_name|
file = dictionary_file_path(source_name, views)
key_name = "#{data_source_type(source_name, views)}_name"
2022-08-13 15:12:31 +05:30
table_metadata = {
2023-03-17 16:20:25 +05:30
key_name => source_name,
'classes' => classes[source_name]&.sort&.uniq,
2022-08-13 15:12:31 +05:30
'feature_categories' => [],
'description' => nil,
'introduced_by_url' => nil,
'milestone' => milestone
}
if File.exist?(file)
outdated = false
existing_metadata = YAML.safe_load(File.read(file))
2023-03-17 16:20:25 +05:30
if existing_metadata[key_name] != table_metadata[key_name]
existing_metadata[key_name] = table_metadata[key_name]
2022-08-13 15:12:31 +05:30
outdated = true
end
2023-03-17 16:20:25 +05:30
if existing_metadata['classes'].sort != table_metadata['classes'].sort
2022-08-13 15:12:31 +05:30
existing_metadata['classes'] = table_metadata['classes']
outdated = true
end
File.write(file, existing_metadata.to_yaml) if outdated
else
File.write(file, table_metadata.to_yaml)
end
end
end
2023-03-17 16:20:25 +05:30
private
def data_source_type(source_name, views)
return 'view' if views.include?(source_name)
'table'
end
def dictionary_file_path(source_name, views)
sub_directory = views.include?(source_name) ? 'views' : ''
File.join(DB_DOCS_PATH, sub_directory, "#{source_name}.yml")
end
2022-08-13 15:12:31 +05:30
# Temporary disable this, see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/85760#note_998452069
# Rake::Task['db:migrate'].enhance do
# Rake::Task['gitlab:db:dictionary:generate'].invoke if Rails.env.development?
# end
end
2016-06-02 11:05:42 +05:30
end
end