debian-mirror-gitlab/lib/backup/database.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

281 lines
8.4 KiB
Ruby
Raw Permalink Normal View History

2018-12-05 23:21:45 +05:30
# frozen_string_literal: true
2014-09-02 18:07:02 +05:30
require 'yaml'
module Backup
2022-05-07 20:08:51 +05:30
class Database < Task
extend ::Gitlab::Utils::Override
2019-07-07 11:18:12 +05:30
include Backup::Helper
2023-04-23 21:23:45 +05:30
attr_reader :force
2014-09-02 18:07:02 +05:30
2020-11-24 15:15:51 +05:30
IGNORED_ERRORS = [
2021-09-30 23:02:18 +05:30
# Ignore warnings
/WARNING:/,
2020-11-24 15:15:51 +05:30
# Ignore the DROP errors; recent database dumps will use --if-exists with pg_dump
/does not exist$/,
# User may not have permissions to drop extensions or schemas
/must be owner of/
].freeze
IGNORED_ERRORS_REGEXP = Regexp.union(IGNORED_ERRORS).freeze
2023-04-23 21:23:45 +05:30
def initialize(progress, force:)
2022-05-07 20:08:51 +05:30
super(progress)
@force = force
2014-09-02 18:07:02 +05:30
end
2022-05-07 20:08:51 +05:30
override :dump
2023-04-23 21:23:45 +05:30
def dump(destination_dir, backup_id)
FileUtils.mkdir_p(destination_dir)
2020-07-28 23:09:34 +05:30
2023-06-20 00:43:36 +05:30
each_database_snapshot_id do |database_name, snapshot_id|
2023-04-23 21:23:45 +05:30
base_model = base_models_for_backup[database_name]
config = base_model.connection_db_config.configuration_hash
db_file_name = file_name(destination_dir, database_name)
FileUtils.rm_f(db_file_name)
pg_database = config[:database]
2018-03-17 18:26:18 +05:30
2023-04-23 21:23:45 +05:30
progress.print "Dumping PostgreSQL database #{pg_database} ... "
pg_env(config)
pgsql_args = ["--clean"] # Pass '--clean' to include 'DROP TABLE' statements in the DB dump.
pgsql_args << '--if-exists'
2023-07-09 08:55:56 +05:30
pgsql_args << "--snapshot=#{snapshot_id}" if snapshot_id
2023-04-23 21:23:45 +05:30
if Gitlab.config.backup.pg_schema
pgsql_args << '-n'
pgsql_args << Gitlab.config.backup.pg_schema
Gitlab::Database::EXTRA_SCHEMAS.each do |schema|
pgsql_args << '-n'
pgsql_args << schema.to_s
end
2015-09-25 12:07:36 +05:30
end
2015-11-26 14:37:03 +05:30
2023-04-23 21:23:45 +05:30
success = Backup::Dump::Postgres.new.dump(pg_database, db_file_name, pgsql_args)
2023-07-09 08:55:56 +05:30
base_model.connection.rollback_transaction if snapshot_id
2015-09-11 14:41:01 +05:30
2023-04-23 21:23:45 +05:30
raise DatabaseBackupError.new(config, db_file_name) unless success
2020-11-24 15:15:51 +05:30
2023-04-23 21:23:45 +05:30
report_success(success)
progress.flush
end
ensure
2023-06-20 00:43:36 +05:30
::Gitlab::Database::EachDatabase.each_database_connection(
only: base_models_for_backup.keys, include_shared: false
) do |connection, _|
Gitlab::Database::TransactionTimeoutSettings.new(connection).restore_timeouts
2023-04-23 21:23:45 +05:30
end
2014-09-02 18:07:02 +05:30
end
2022-05-07 20:08:51 +05:30
override :restore
2023-04-23 21:23:45 +05:30
def restore(destination_dir)
base_models_for_backup.each do |database_name, base_model|
config = base_model.connection_db_config.configuration_hash
2022-08-27 11:52:29 +05:30
2023-04-23 21:23:45 +05:30
db_file_name = file_name(destination_dir, database_name)
database = config[:database]
2022-08-27 11:52:29 +05:30
2023-04-23 21:23:45 +05:30
unless File.exist?(db_file_name)
raise(Backup::Error, "Source database file does not exist #{db_file_name}") if main_database?(database_name)
2022-05-07 20:08:51 +05:30
2023-04-23 21:23:45 +05:30
progress.puts "Source backup for the database #{database_name} doesn't exist. Skipping the task"
return false
2017-08-17 22:00:37 +05:30
end
2015-11-26 14:37:03 +05:30
2023-04-23 21:23:45 +05:30
unless force
progress.puts 'Removing all tables. Press `Ctrl-C` within 5 seconds to abort'.color(:yellow)
sleep(5)
end
2020-11-24 15:15:51 +05:30
2023-04-23 21:23:45 +05:30
# Drop all tables Load the schema to ensure we don't have any newer tables
# hanging out from a failed upgrade
drop_tables(database_name)
2015-11-26 14:37:03 +05:30
2023-04-23 21:23:45 +05:30
decompress_rd, decompress_wr = IO.pipe
decompress_pid = spawn(*%w(gzip -cd), out: decompress_wr, in: db_file_name)
decompress_wr.close
status, @errors =
case config[:adapter]
when "postgresql" then
progress.print "Restoring PostgreSQL database #{database} ... "
pg_env(config)
execute_and_track_errors(pg_restore_cmd(database), decompress_rd)
end
decompress_rd.close
Process.waitpid(decompress_pid)
success = $?.success? && status.success?
if @errors.present?
progress.print "------ BEGIN ERRORS -----\n".color(:yellow)
progress.print @errors.join.color(:yellow)
progress.print "------ END ERRORS -------\n".color(:yellow)
end
report_success(success)
raise Backup::Error, 'Restore failed' unless success
end
2022-05-07 20:08:51 +05:30
end
2020-11-24 15:15:51 +05:30
2022-05-07 20:08:51 +05:30
override :pre_restore_warning
def pre_restore_warning
return if force
<<-MSG.strip_heredoc
Be sure to stop Puma, Sidekiq, and any other process that
connects to the database before proceeding. For Omnibus
installs, see the following link for more information:
https://docs.gitlab.com/ee/raketasks/backup_restore.html#restore-for-omnibus-gitlab-installations
Before restoring the database, we will remove all existing
tables to avoid future upgrade problems. Be aware that if you have
custom tables in the GitLab database these tables and all data will be
removed.
MSG
2022-04-04 11:22:00 +05:30
end
2022-05-07 20:08:51 +05:30
override :post_restore_warning
def post_restore_warning
return unless @errors.present?
<<-MSG.strip_heredoc
There were errors in restoring the schema. This may cause
issues if this results in missing indexes, constraints, or
columns. Please record the errors above and contact GitLab
Support if you have questions:
https://about.gitlab.com/support/
MSG
2022-04-04 11:22:00 +05:30
end
2014-09-02 18:07:02 +05:30
protected
2023-04-23 21:23:45 +05:30
def base_models_for_backup
@base_models_for_backup ||= Gitlab::Database.database_base_models_with_gitlab_shared
2021-09-30 23:02:18 +05:30
end
2023-04-23 21:23:45 +05:30
def main_database?(database_name)
database_name.to_sym == :main
2022-08-27 11:52:29 +05:30
end
2023-04-23 21:23:45 +05:30
def file_name(base_dir, database_name)
prefix = if database_name.to_sym != :main
"#{database_name}_"
else
''
end
File.join(base_dir, "#{prefix}database.sql.gz")
2022-08-27 11:52:29 +05:30
end
2020-11-24 15:15:51 +05:30
def ignore_error?(line)
IGNORED_ERRORS_REGEXP.match?(line)
end
def execute_and_track_errors(cmd, decompress_rd)
errors = []
Open3.popen3(ENV, *cmd) do |stdin, stdout, stderr, thread|
stdin.binmode
out_reader = Thread.new do
data = stdout.read
$stdout.write(data)
end
err_reader = Thread.new do
until (raw_line = stderr.gets).nil?
warn(raw_line)
errors << raw_line unless ignore_error?(raw_line)
end
end
begin
IO.copy_stream(decompress_rd, stdin)
rescue Errno::EPIPE
end
stdin.close
[thread, out_reader, err_reader].each(&:join)
[thread.value, errors]
end
end
2023-04-23 21:23:45 +05:30
def pg_env(config)
2017-08-17 22:00:37 +05:30
args = {
2021-09-30 23:02:18 +05:30
username: 'PGUSER',
host: 'PGHOST',
port: 'PGPORT',
password: 'PGPASSWORD',
2017-08-17 22:00:37 +05:30
# SSL
2021-09-30 23:02:18 +05:30
sslmode: 'PGSSLMODE',
sslkey: 'PGSSLKEY',
sslcert: 'PGSSLCERT',
sslrootcert: 'PGSSLROOTCERT',
sslcrl: 'PGSSLCRL',
sslcompression: 'PGSSLCOMPRESSION'
2017-08-17 22:00:37 +05:30
}
2021-01-03 14:25:43 +05:30
args.each do |opt, arg|
# This enables the use of different PostgreSQL settings in
# case PgBouncer is used. PgBouncer clears the search path,
# which wreaks havoc on Rails if connections are reused.
override = "GITLAB_BACKUP_#{arg}"
val = ENV[override].presence || config[opt].to_s.presence
ENV[arg] = val if val
end
2014-09-02 18:07:02 +05:30
end
def report_success(success)
if success
2018-11-08 19:23:39 +05:30
progress.puts '[DONE]'.color(:green)
2014-09-02 18:07:02 +05:30
else
2018-11-08 19:23:39 +05:30
progress.puts '[FAILED]'.color(:red)
2014-09-02 18:07:02 +05:30
end
end
2020-11-24 15:15:51 +05:30
private
2023-04-23 21:23:45 +05:30
def drop_tables(database_name)
2023-06-20 00:43:36 +05:30
puts_time 'Cleaning the database ... '.color(:blue)
2023-04-23 21:23:45 +05:30
if Rake::Task.task_defined? "gitlab:db:drop_tables:#{database_name}"
Rake::Task["gitlab:db:drop_tables:#{database_name}"].invoke
2023-06-20 00:43:36 +05:30
else
# In single database (single or two connections)
2023-04-23 21:23:45 +05:30
Rake::Task["gitlab:db:drop_tables"].invoke
end
2023-06-20 00:43:36 +05:30
puts_time 'done'.color(:green)
2023-04-23 21:23:45 +05:30
end
def pg_restore_cmd(database)
2021-09-30 23:02:18 +05:30
['psql', database]
2020-11-24 15:15:51 +05:30
end
2023-04-23 21:23:45 +05:30
2023-06-20 00:43:36 +05:30
def each_database_snapshot_id(&block)
@database_to_snapshot_id = {}
2023-04-23 21:23:45 +05:30
2023-06-20 00:43:36 +05:30
if @database_to_snapshot_id.empty?
::Gitlab::Database::EachDatabase.each_database_connection(
only: base_models_for_backup.keys, include_shared: false
) do |connection, database_name|
2023-07-09 08:55:56 +05:30
@database_to_snapshot_id[database_name] = nil
next unless Gitlab::Database.database_mode == Gitlab::Database::MODE_MULTIPLE_DATABASES
2023-06-20 00:43:36 +05:30
Gitlab::Database::TransactionTimeoutSettings.new(connection).disable_timeouts
2023-04-23 21:23:45 +05:30
2023-06-20 00:43:36 +05:30
connection.begin_transaction(isolation: :repeatable_read)
@database_to_snapshot_id[database_name] = connection.select_value("SELECT pg_export_snapshot()")
end
2023-04-23 21:23:45 +05:30
end
2023-06-20 00:43:36 +05:30
@database_to_snapshot_id.each(&block)
2023-04-23 21:23:45 +05:30
end
2014-09-02 18:07:02 +05:30
end
end