debian-mirror-gitlab/lib/backup/database.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

268 lines
8.1 KiB
Ruby
Raw Normal View History

2018-12-05 23:21:45 +05:30
# frozen_string_literal: true
2014-09-02 18:07:02 +05:30
require 'yaml'
module Backup
2022-05-07 20:08:51 +05:30
class Database < Task
extend ::Gitlab::Utils::Override
2019-07-07 11:18:12 +05:30
include Backup::Helper
2023-04-23 21:23:45 +05:30
attr_reader :force
2014-09-02 18:07:02 +05:30
2020-11-24 15:15:51 +05:30
IGNORED_ERRORS = [
2021-09-30 23:02:18 +05:30
# Ignore warnings
/WARNING:/,
2020-11-24 15:15:51 +05:30
# Ignore the DROP errors; recent database dumps will use --if-exists with pg_dump
/does not exist$/,
# User may not have permissions to drop extensions or schemas
/must be owner of/
].freeze
IGNORED_ERRORS_REGEXP = Regexp.union(IGNORED_ERRORS).freeze
2023-04-23 21:23:45 +05:30
def initialize(progress, force:)
2022-05-07 20:08:51 +05:30
super(progress)
@force = force
2014-09-02 18:07:02 +05:30
end
2022-05-07 20:08:51 +05:30
override :dump
2023-04-23 21:23:45 +05:30
def dump(destination_dir, backup_id)
FileUtils.mkdir_p(destination_dir)
2020-07-28 23:09:34 +05:30
2023-04-23 21:23:45 +05:30
snapshot_ids.each do |database_name, snapshot_id|
base_model = base_models_for_backup[database_name]
config = base_model.connection_db_config.configuration_hash
db_file_name = file_name(destination_dir, database_name)
FileUtils.rm_f(db_file_name)
pg_database = config[:database]
2018-03-17 18:26:18 +05:30
2023-04-23 21:23:45 +05:30
progress.print "Dumping PostgreSQL database #{pg_database} ... "
pg_env(config)
pgsql_args = ["--clean"] # Pass '--clean' to include 'DROP TABLE' statements in the DB dump.
pgsql_args << '--if-exists'
pgsql_args << "--snapshot=#{snapshot_ids[database_name]}"
if Gitlab.config.backup.pg_schema
pgsql_args << '-n'
pgsql_args << Gitlab.config.backup.pg_schema
Gitlab::Database::EXTRA_SCHEMAS.each do |schema|
pgsql_args << '-n'
pgsql_args << schema.to_s
end
2015-09-25 12:07:36 +05:30
end
2015-11-26 14:37:03 +05:30
2023-04-23 21:23:45 +05:30
success = Backup::Dump::Postgres.new.dump(pg_database, db_file_name, pgsql_args)
base_model.connection.rollback_transaction
2015-09-11 14:41:01 +05:30
2023-04-23 21:23:45 +05:30
raise DatabaseBackupError.new(config, db_file_name) unless success
2020-11-24 15:15:51 +05:30
2023-04-23 21:23:45 +05:30
report_success(success)
progress.flush
end
ensure
base_models_for_backup.each do |_database_name, base_model|
Gitlab::Database::TransactionTimeoutSettings.new(base_model.connection).restore_timeouts
end
2014-09-02 18:07:02 +05:30
end
2022-05-07 20:08:51 +05:30
override :restore
2023-04-23 21:23:45 +05:30
def restore(destination_dir)
base_models_for_backup.each do |database_name, base_model|
config = base_model.connection_db_config.configuration_hash
2022-08-27 11:52:29 +05:30
2023-04-23 21:23:45 +05:30
db_file_name = file_name(destination_dir, database_name)
database = config[:database]
2022-08-27 11:52:29 +05:30
2023-04-23 21:23:45 +05:30
unless File.exist?(db_file_name)
raise(Backup::Error, "Source database file does not exist #{db_file_name}") if main_database?(database_name)
2022-05-07 20:08:51 +05:30
2023-04-23 21:23:45 +05:30
progress.puts "Source backup for the database #{database_name} doesn't exist. Skipping the task"
return false
2017-08-17 22:00:37 +05:30
end
2015-11-26 14:37:03 +05:30
2023-04-23 21:23:45 +05:30
unless force
progress.puts 'Removing all tables. Press `Ctrl-C` within 5 seconds to abort'.color(:yellow)
sleep(5)
end
2020-11-24 15:15:51 +05:30
2023-04-23 21:23:45 +05:30
# Drop all tables Load the schema to ensure we don't have any newer tables
# hanging out from a failed upgrade
drop_tables(database_name)
2015-11-26 14:37:03 +05:30
2023-04-23 21:23:45 +05:30
decompress_rd, decompress_wr = IO.pipe
decompress_pid = spawn(*%w(gzip -cd), out: decompress_wr, in: db_file_name)
decompress_wr.close
status, @errors =
case config[:adapter]
when "postgresql" then
progress.print "Restoring PostgreSQL database #{database} ... "
pg_env(config)
execute_and_track_errors(pg_restore_cmd(database), decompress_rd)
end
decompress_rd.close
Process.waitpid(decompress_pid)
success = $?.success? && status.success?
if @errors.present?
progress.print "------ BEGIN ERRORS -----\n".color(:yellow)
progress.print @errors.join.color(:yellow)
progress.print "------ END ERRORS -------\n".color(:yellow)
end
report_success(success)
raise Backup::Error, 'Restore failed' unless success
end
2022-05-07 20:08:51 +05:30
end
2020-11-24 15:15:51 +05:30
2022-05-07 20:08:51 +05:30
override :pre_restore_warning
def pre_restore_warning
return if force
<<-MSG.strip_heredoc
Be sure to stop Puma, Sidekiq, and any other process that
connects to the database before proceeding. For Omnibus
installs, see the following link for more information:
https://docs.gitlab.com/ee/raketasks/backup_restore.html#restore-for-omnibus-gitlab-installations
Before restoring the database, we will remove all existing
tables to avoid future upgrade problems. Be aware that if you have
custom tables in the GitLab database these tables and all data will be
removed.
MSG
2022-04-04 11:22:00 +05:30
end
2022-05-07 20:08:51 +05:30
override :post_restore_warning
def post_restore_warning
return unless @errors.present?
<<-MSG.strip_heredoc
There were errors in restoring the schema. This may cause
issues if this results in missing indexes, constraints, or
columns. Please record the errors above and contact GitLab
Support if you have questions:
https://about.gitlab.com/support/
MSG
2022-04-04 11:22:00 +05:30
end
2014-09-02 18:07:02 +05:30
protected
2023-04-23 21:23:45 +05:30
def base_models_for_backup
@base_models_for_backup ||= Gitlab::Database.database_base_models_with_gitlab_shared
2021-09-30 23:02:18 +05:30
end
2023-04-23 21:23:45 +05:30
def main_database?(database_name)
database_name.to_sym == :main
2022-08-27 11:52:29 +05:30
end
2023-04-23 21:23:45 +05:30
def file_name(base_dir, database_name)
prefix = if database_name.to_sym != :main
"#{database_name}_"
else
''
end
File.join(base_dir, "#{prefix}database.sql.gz")
2022-08-27 11:52:29 +05:30
end
2020-11-24 15:15:51 +05:30
def ignore_error?(line)
IGNORED_ERRORS_REGEXP.match?(line)
end
def execute_and_track_errors(cmd, decompress_rd)
errors = []
Open3.popen3(ENV, *cmd) do |stdin, stdout, stderr, thread|
stdin.binmode
out_reader = Thread.new do
data = stdout.read
$stdout.write(data)
end
err_reader = Thread.new do
until (raw_line = stderr.gets).nil?
warn(raw_line)
errors << raw_line unless ignore_error?(raw_line)
end
end
begin
IO.copy_stream(decompress_rd, stdin)
rescue Errno::EPIPE
end
stdin.close
[thread, out_reader, err_reader].each(&:join)
[thread.value, errors]
end
end
2023-04-23 21:23:45 +05:30
def pg_env(config)
2017-08-17 22:00:37 +05:30
args = {
2021-09-30 23:02:18 +05:30
username: 'PGUSER',
host: 'PGHOST',
port: 'PGPORT',
password: 'PGPASSWORD',
2017-08-17 22:00:37 +05:30
# SSL
2021-09-30 23:02:18 +05:30
sslmode: 'PGSSLMODE',
sslkey: 'PGSSLKEY',
sslcert: 'PGSSLCERT',
sslrootcert: 'PGSSLROOTCERT',
sslcrl: 'PGSSLCRL',
sslcompression: 'PGSSLCOMPRESSION'
2017-08-17 22:00:37 +05:30
}
2021-01-03 14:25:43 +05:30
args.each do |opt, arg|
# This enables the use of different PostgreSQL settings in
# case PgBouncer is used. PgBouncer clears the search path,
# which wreaks havoc on Rails if connections are reused.
override = "GITLAB_BACKUP_#{arg}"
val = ENV[override].presence || config[opt].to_s.presence
ENV[arg] = val if val
end
2014-09-02 18:07:02 +05:30
end
def report_success(success)
if success
2018-11-08 19:23:39 +05:30
progress.puts '[DONE]'.color(:green)
2014-09-02 18:07:02 +05:30
else
2018-11-08 19:23:39 +05:30
progress.puts '[FAILED]'.color(:red)
2014-09-02 18:07:02 +05:30
end
end
2020-11-24 15:15:51 +05:30
private
2023-04-23 21:23:45 +05:30
def drop_tables(database_name)
if Rake::Task.task_defined? "gitlab:db:drop_tables:#{database_name}"
puts_time 'Cleaning the database ... '.color(:blue)
Rake::Task["gitlab:db:drop_tables:#{database_name}"].invoke
puts_time 'done'.color(:green)
elsif Gitlab::Database.database_base_models.one?
# In single database, we do not have rake tasks per database
puts_time 'Cleaning the database ... '.color(:blue)
Rake::Task["gitlab:db:drop_tables"].invoke
puts_time 'done'.color(:green)
end
end
def pg_restore_cmd(database)
2021-09-30 23:02:18 +05:30
['psql', database]
2020-11-24 15:15:51 +05:30
end
2023-04-23 21:23:45 +05:30
def snapshot_ids
@snapshot_ids ||= base_models_for_backup.each_with_object({}) do |(database_name, base_model), snapshot_ids|
Gitlab::Database::TransactionTimeoutSettings.new(base_model.connection).disable_timeouts
base_model.connection.begin_transaction(isolation: :repeatable_read)
snapshot_ids[database_name] =
base_model.connection.execute("SELECT pg_export_snapshot() as snapshot_id;").first['snapshot_id']
end
end
2014-09-02 18:07:02 +05:30
end
end