debian-mirror-gitlab/spec/tasks/gitlab/backup_rake_spec.rb
2023-07-09 08:55:56 +05:30

700 lines
26 KiB
Ruby

# frozen_string_literal: true
require 'rake_helper'
RSpec.describe 'gitlab:backup namespace rake tasks', :delete, feature_category: :backup_restore do
let(:enable_registry) { true }
let(:backup_restore_pid_path) { "#{Rails.application.root}/tmp/backup_restore.pid" }
let(:backup_tasks) { %w[db repo uploads builds artifacts pages lfs terraform_state registry packages] }
let(:backup_types) do
%w[db repositories uploads builds artifacts pages lfs terraform_state registry packages]
end
def tars_glob
Dir.glob(File.join(Gitlab.config.backup.path, '*_gitlab_backup.tar'))
end
def backup_tar
tars_glob.first
end
def backup_files
%w[
backup_information.yml
artifacts.tar.gz
builds.tar.gz
lfs.tar.gz
terraform_state.tar.gz
pages.tar.gz
packages.tar.gz
]
end
def backup_directories
%w[db repositories]
end
before(:all) do
Rake.application.rake_require 'active_record/railties/databases'
Rake.application.rake_require 'tasks/gitlab/backup'
Rake.application.rake_require 'tasks/gitlab/shell'
Rake.application.rake_require 'tasks/gitlab/db'
Rake.application.rake_require 'tasks/cache'
end
before do
stub_env('force', 'yes')
FileUtils.rm(tars_glob, force: true)
FileUtils.rm(backup_files, force: true)
FileUtils.rm_rf(backup_directories, secure: true)
FileUtils.mkdir_p('tmp/tests/public/uploads')
reenable_backup_sub_tasks
stub_container_registry_config(enabled: enable_registry)
end
after do
FileUtils.rm(tars_glob, force: true)
FileUtils.rm(backup_files, force: true)
FileUtils.rm(backup_restore_pid_path, force: true)
FileUtils.rm_rf(backup_directories, secure: true)
FileUtils.rm_rf('tmp/tests/public/uploads', secure: true)
end
def reenable_backup_sub_tasks
backup_tasks.each do |subtask|
Rake::Task["gitlab:backup:#{subtask}:create"].reenable
end
end
describe 'lock parallel backups' do
let(:progress) { $stdout }
let(:delete_message) { /-- Deleting backup and restore PID file/ }
let(:pid_file) do
File.open(backup_restore_pid_path, File::RDWR | File::CREAT)
end
before do
allow(Kernel).to receive(:system).and_return(true)
allow(YAML).to receive(:safe_load_file).and_return({ gitlab_version: Gitlab::VERSION })
end
context 'when a process is running in parallel' do
before do
File.open(backup_restore_pid_path, 'wb') do |file|
file.write('123456')
file.close
end
end
it 'exits the new process' do
allow(File).to receive(:open).and_call_original
allow(File).to receive(:open).with(backup_restore_pid_path, any_args).and_yield(pid_file)
allow(Process).to receive(:getpgid).with(123456).and_return(123456)
expect { run_rake_task('gitlab:backup:create') }.to raise_error(SystemExit).and output(
<<~MESSAGE
Backup and restore in progress:
There is a backup and restore task in progress (PID 123456).
Try to run the current task once the previous one ends.
MESSAGE
).to_stdout
end
end
context 'when no process is running in parallel but a PID file exists' do
let(:rewritten_message) do
<<~MESSAGE
The PID file #{backup_restore_pid_path} exists and contains 123456, but the process is not running.
The PID file will be rewritten with the current process ID #{Process.pid}.
MESSAGE
end
before do
File.open(backup_restore_pid_path, 'wb') do |file|
file.write('123456')
file.close
end
end
it 'rewrites, locks and deletes the PID file while logging a message' do
allow(File).to receive(:open).and_call_original
allow(File).to receive(:open).with(backup_restore_pid_path, any_args).and_yield(pid_file)
allow(Process).to receive(:getpgid).with(123456).and_raise(Errno::ESRCH)
allow(progress).to receive(:puts).with(delete_message).once
allow(progress).to receive(:puts).with(rewritten_message).once
allow_next_instance_of(::Backup::Manager) do |instance|
allow(instance).to receive(:run_restore_task).with('db')
end
expect(pid_file).to receive(:flock).with(File::LOCK_EX)
expect(pid_file).to receive(:flock).with(File::LOCK_UN)
expect(File).to receive(:delete).with(backup_restore_pid_path)
expect(progress).to receive(:puts).with(rewritten_message).once
expect(progress).to receive(:puts).with(delete_message).once
run_rake_task('gitlab:backup:db:restore')
end
end
context 'when no process is running in parallel' do
using RSpec::Parameterized::TableSyntax
where(:task_name, :rake_task) do
'db' | 'gitlab:backup:db:restore'
'repositories' | 'gitlab:backup:repo:restore'
'builds' | 'gitlab:backup:builds:restore'
'uploads' | 'gitlab:backup:uploads:restore'
'artifacts' | 'gitlab:backup:artifacts:restore'
'pages' | 'gitlab:backup:pages:restore'
'lfs' | 'gitlab:backup:lfs:restore'
'terraform_state' | 'gitlab:backup:terraform_state:restore'
'registry' | 'gitlab:backup:registry:restore'
'packages' | 'gitlab:backup:packages:restore'
end
with_them do
before do
allow(File).to receive(:open).and_call_original
allow(File).to receive(:open).with(backup_restore_pid_path, any_args).and_yield(pid_file)
allow(File).to receive(:delete).with(backup_restore_pid_path)
allow(progress).to receive(:puts).at_least(:once)
allow_next_instance_of(::Backup::Manager) do |instance|
Array(task_name).each do |task|
allow(instance).to receive(:run_restore_task).with(task)
end
end
end
it 'locks and deletes the PID file while logging a message' do
expect(pid_file).to receive(:flock).with(File::LOCK_EX)
expect(pid_file).to receive(:flock).with(File::LOCK_UN)
expect(File).to receive(:delete).with(backup_restore_pid_path)
expect(progress).to receive(:puts).with(delete_message)
run_rake_task(rake_task)
end
end
end
end
describe 'backup_restore' do
context 'with gitlab version' do
before do
allow(Dir).to receive(:glob).and_return(['1_gitlab_backup.tar'])
allow(File).to receive(:exist?).and_return(true)
allow(File).to receive(:exist?).with(backup_restore_pid_path).and_return(false)
allow(Kernel).to receive(:system).and_return(true)
allow(FileUtils).to receive(:cp_r).and_return(true)
allow(FileUtils).to receive(:mv).and_return(true)
allow(Rake::Task["gitlab:shell:setup"])
.to receive(:invoke).and_return(true)
end
let(:gitlab_version) { Gitlab::VERSION }
context 'when restore matches gitlab version' do
before do
allow(YAML).to receive(:safe_load_file)
.and_return({ gitlab_version: gitlab_version })
expect_next_instance_of(::Backup::Manager) do |instance|
backup_types.each do |subtask|
expect(instance).to receive(:run_restore_task).with(subtask).ordered
end
expect(instance).not_to receive(:run_restore_task)
end
expect(Rake::Task['gitlab:shell:setup']).to receive(:invoke)
end
it 'invokes restoration on match' do
expect { run_rake_task('gitlab:backup:restore') }.to output.to_stdout_from_any_process
end
end
end
context 'when the restore directory is not empty' do
before do
# We only need a backup of the repositories for this test
stub_env('SKIP', 'db,uploads,builds,artifacts,lfs,terraform_state,registry')
create(:project, :repository)
end
it 'removes stale data' do
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
excluded_project = create(:project, :repository, name: 'mepmep')
expect { run_rake_task('gitlab:backup:restore') }.to output.to_stdout_from_any_process
raw_repo = excluded_project.repository.raw
# The restore will not find the repository in the backup, but will create
# an empty one in its place
expect(raw_repo.empty?).to be(true)
end
end
context 'when the backup is restored' do
let!(:included_project) { create(:project, :repository) }
let!(:original_checksum) { included_project.repository.checksum }
before do
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
backup_tar = Dir.glob(File.join(Gitlab.config.backup.path, '*_gitlab_backup.tar')).last
allow(Dir).to receive(:glob).and_return([backup_tar])
allow(File).to receive(:exist?).and_return(true)
allow(File).to receive(:exist?).with(backup_restore_pid_path).and_return(false)
allow(Kernel).to receive(:system).and_return(true)
allow(FileUtils).to receive(:cp_r).and_return(true)
allow(FileUtils).to receive(:mv).and_return(true)
allow(YAML).to receive(:safe_load_file)
.and_return({ gitlab_version: Gitlab::VERSION })
expect_next_instance_of(::Backup::Manager) do |instance|
backup_types.each do |subtask|
expect(instance).to receive(:run_restore_task).with(subtask).ordered
end
expect(instance).not_to receive(:run_restore_task)
end
expect(Rake::Task['gitlab:shell:setup']).to receive(:invoke)
end
it 'restores the data' do
expect { run_rake_task('gitlab:backup:restore') }.to output.to_stdout_from_any_process
raw_repo = included_project.repository.raw
expect(raw_repo.empty?).to be(false)
expect(included_project.repository.checksum).to eq(original_checksum)
end
end
end
# backup_restore task
describe 'backup' do
before do
# This reconnect makes our project fixture disappear, breaking the restore. Stub it out.
allow(ApplicationRecord.connection).to receive(:reconnect!)
allow(Ci::ApplicationRecord.connection).to receive(:reconnect!)
end
let!(:project) { create(:project, :repository) }
context 'with specific backup tasks' do
before do
stub_env('SKIP', 'db')
end
it 'prints a progress message to stdout' do
backup_tasks.each do |task|
expect { run_rake_task("gitlab:backup:#{task}:create") }.to output(/Dumping /).to_stdout_from_any_process
end
end
it 'logs the progress to log file' do
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping database ... [SKIPPED]")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping repositories ... ")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping repositories ... done")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping uploads ... ")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping uploads ... done")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping builds ... ")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping builds ... done")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping artifacts ... ")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping artifacts ... done")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping pages ... ")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping pages ... done")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping lfs objects ... ")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping lfs objects ... done")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping terraform states ... ")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping terraform states ... done")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping container registry images ... ")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping container registry images ... done")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping packages ... ")
expect(Gitlab::BackupLogger).to receive(:info).with(message: "Dumping packages ... done")
backup_tasks.each do |task|
run_rake_task("gitlab:backup:#{task}:create")
end
end
end
describe 'backup create fails' do
using RSpec::Parameterized::TableSyntax
file_backup_error = Backup::FileBackupError.new('/tmp', '/tmp/backup/uploads')
config = ActiveRecord::Base.configurations.find_db_config(Rails.env).configuration_hash
db_file_name = File.join(Gitlab.config.backup.path, 'db', 'database.sql.gz')
db_backup_error = Backup::DatabaseBackupError.new(config, db_file_name)
where(:backup_class, :rake_task, :error) do
Backup::Database | 'gitlab:backup:db:create' | db_backup_error
Backup::Files | 'gitlab:backup:builds:create' | file_backup_error
Backup::Files | 'gitlab:backup:uploads:create' | file_backup_error
Backup::Files | 'gitlab:backup:artifacts:create' | file_backup_error
Backup::Files | 'gitlab:backup:pages:create' | file_backup_error
Backup::Files | 'gitlab:backup:lfs:create' | file_backup_error
Backup::Files | 'gitlab:backup:registry:create' | file_backup_error
end
with_them do
before do
allow_next_instance_of(backup_class) do |instance|
allow(instance).to receive(:dump).and_raise(error)
end
end
it "raises an error with message" do
expect { run_rake_task(rake_task) }.to output(Regexp.new(error.message)).to_stdout_from_any_process
end
end
end
context 'with tar creation' do
context 'with archive file permissions' do
it 'sets correct permissions on the tar file' do
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
expect(File).to exist(backup_tar)
expect(File::Stat.new(backup_tar).mode.to_s(8)).to eq('100600')
end
context 'with custom archive_permissions' do
before do
allow(Gitlab.config.backup).to receive(:archive_permissions).and_return(0o651)
end
it 'uses the custom permissions' do
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
expect(File::Stat.new(backup_tar).mode.to_s(8)).to eq('100651')
end
end
end
it 'sets correct permissions on the tar contents' do
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
tar_contents, exit_status = Gitlab::Popen.popen(
%W[
tar -tvf #{backup_tar}
db
uploads.tar.gz
repositories
builds.tar.gz
artifacts.tar.gz
pages.tar.gz
lfs.tar.gz
terraform_state.tar.gz
registry.tar.gz
packages.tar.gz
]
)
expect(exit_status).to eq(0)
expect(tar_contents).to match('db')
expect(tar_contents).to match('uploads.tar.gz')
expect(tar_contents).to match('repositories/')
expect(tar_contents).to match('builds.tar.gz')
expect(tar_contents).to match('artifacts.tar.gz')
expect(tar_contents).to match('pages.tar.gz')
expect(tar_contents).to match('lfs.tar.gz')
expect(tar_contents).to match('terraform_state.tar.gz')
expect(tar_contents).to match('registry.tar.gz')
expect(tar_contents).to match('packages.tar.gz')
expect(tar_contents).not_to match(%r{^.{4,9}[rwx].* (database.sql.gz|uploads.tar.gz|repositories|builds.tar.gz|
pages.tar.gz|artifacts.tar.gz|registry.tar.gz)/$})
end
it 'deletes temp directories' do
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
temp_dirs = Dir.glob(
File.join(
Gitlab.config.backup.path,
'{db,repositories,uploads,builds,artifacts,pages,lfs,terraform_state,registry,packages}'
)
)
expect(temp_dirs).to be_empty
end
context 'when registry is disabled' do
let(:enable_registry) { false }
it 'does not create registry.tar.gz' do
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
tar_contents, exit_status = Gitlab::Popen.popen(
%W[tar -tvf #{backup_tar}]
)
expect(exit_status).to eq(0)
expect(tar_contents).not_to match('registry.tar.gz')
end
end
end
context 'with multiple repository storages' do
include StubConfiguration
let(:default_storage_name) { 'default' }
let(:second_storage_name) { 'test_second_storage' }
before do
# We only need a backup of the repositories for this test
stub_env('SKIP', 'db,uploads,builds,artifacts,lfs,terraform_state,registry')
stub_storage_settings(second_storage_name => {
'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address,
'path' => TestEnv::SECOND_STORAGE_PATH
})
end
shared_examples 'includes repositories in all repository storages' do
specify :aggregate_failures do
project_a = create(:project, :repository)
project_snippet_a = create(:project_snippet, :repository, project: project_a, author: project_a.first_owner)
project_b = create(:project, :repository, repository_storage: second_storage_name)
project_snippet_b = create(
:project_snippet,
:repository,
project: project_b,
author: project_b.first_owner,
repository_storage: second_storage_name
)
create(:wiki_page, container: project_a)
create(:design, :with_file, issue: create(:issue, project: project_a))
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
tar_contents, exit_status = Gitlab::Popen.popen(
%W[tar -tvf #{backup_tar} repositories]
)
tar_lines = tar_contents.lines.grep(/\.bundle/)
expect(exit_status).to eq(0)
%W[
#{project_a.disk_path}/.+/001.bundle
#{project_a.disk_path}.wiki/.+/001.bundle
#{project_a.disk_path}.design/.+/001.bundle
#{project_b.disk_path}/.+/001.bundle
#{project_snippet_a.disk_path}/.+/001.bundle
#{project_snippet_b.disk_path}/.+/001.bundle
].each do |repo_name|
expect(tar_lines).to include(a_string_matching(repo_name))
end
end
end
context 'with no concurrency' do
it_behaves_like 'includes repositories in all repository storages'
end
context 'with concurrency' do
before do
stub_env('GITLAB_BACKUP_MAX_CONCURRENCY', 4)
end
it_behaves_like 'includes repositories in all repository storages'
end
context 'when REPOSITORIES_STORAGES is set' do
before do
stub_env('REPOSITORIES_STORAGES', default_storage_name)
end
it 'includes repositories in default repository storage', :aggregate_failures do
project_a = create(:project, :repository)
project_snippet_a = create(:project_snippet, :repository, project: project_a, author: project_a.first_owner)
project_b = create(:project, :repository, repository_storage: second_storage_name)
project_snippet_b = create(
:project_snippet,
:repository,
project: project_b,
author: project_b.first_owner,
repository_storage: second_storage_name
)
create(:wiki_page, container: project_a)
create(:design, :with_file, issue: create(:issue, project: project_a))
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
tar_contents, exit_status = Gitlab::Popen.popen(
%W[tar -tvf #{backup_tar} repositories]
)
tar_lines = tar_contents.lines.grep(/\.bundle/)
expect(exit_status).to eq(0)
%W[
#{project_a.disk_path}/.+/001.bundle
#{project_a.disk_path}.wiki/.+/001.bundle
#{project_a.disk_path}.design/.+/001.bundle
#{project_snippet_a.disk_path}/.+/001.bundle
].each do |repo_name|
expect(tar_lines).to include(a_string_matching(repo_name))
end
%W[
#{project_b.disk_path}/.+/001.bundle
#{project_snippet_b.disk_path}/.+/001.bundle
].each do |repo_name|
expect(tar_lines).not_to include(a_string_matching(repo_name))
end
end
end
end
context 'with concurrency settings' do
before do
# We only need a backup of the repositories for this test
stub_env('SKIP', 'db,uploads,builds,artifacts,lfs,terraform_state,registry')
create(:project, :repository)
end
it 'passes through concurrency environment variables' do
stub_env('GITLAB_BACKUP_MAX_CONCURRENCY', 5)
stub_env('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 2)
expect(::Backup::Repositories).to receive(:new)
.with(anything, strategy: anything, storages: [], paths: [])
.and_call_original
expect(::Backup::GitalyBackup).to receive(:new).with(
anything,
max_parallelism: 5,
storage_parallelism: 2,
incremental: false
).and_call_original
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
end
end
context 'when CRON env is set' do
before do
stub_env('CRON', '1')
end
it 'does not output to stdout' do
expect { run_rake_task('gitlab:backup:create') }.not_to output.to_stdout_from_any_process
end
end
end
# backup_create task
describe "skipping items in a backup" do
before do
stub_env('SKIP', 'an-unknown-type,repositories,uploads,anotherunknowntype')
create(:project, :repository)
end
it "does not contain repositories and uploads" do
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
tar_contents, _exit_status = Gitlab::Popen.popen(
%W[
tar -tvf #{backup_tar}
db
uploads.tar.gz
repositories
builds.tar.gz
artifacts.tar.gz
pages.tar.gz
lfs.tar.gz
terraform_state.tar.gz
registry.tar.gz
packages.tar.gz
]
)
expect(tar_contents).to match('db/')
expect(tar_contents).to match('uploads.tar.gz: Not found in archive')
expect(tar_contents).to match('builds.tar.gz')
expect(tar_contents).to match('artifacts.tar.gz')
expect(tar_contents).to match('lfs.tar.gz')
expect(tar_contents).to match('terraform_state.tar.gz')
expect(tar_contents).to match('pages.tar.gz')
expect(tar_contents).to match('registry.tar.gz')
expect(tar_contents).to match('packages.tar.gz')
expect(tar_contents).not_to match('repositories/')
expect(tar_contents).to match('repositories: Not found in archive')
end
it 'does not invoke restore of repositories and uploads' do
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
allow(Rake::Task['gitlab:shell:setup'])
.to receive(:invoke).and_return(true)
expect_next_instance_of(::Backup::Manager) do |instance|
(backup_types - %w[repositories uploads]).each do |subtask|
expect(instance).to receive(:run_restore_task).with(subtask).ordered
end
expect(instance).not_to receive(:run_restore_task)
end
expect(Rake::Task['gitlab:shell:setup']).to receive :invoke
expect { run_rake_task('gitlab:backup:restore') }.to output.to_stdout_from_any_process
end
end
describe 'skipping tar archive creation' do
before do
stub_env('SKIP', 'tar')
create(:project, :repository)
end
it 'created files with backup content and no tar archive' do
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
dir_contents = Dir.children(Gitlab.config.backup.path)
expect(dir_contents).to contain_exactly(
'backup_information.yml',
'db',
'uploads.tar.gz',
'builds.tar.gz',
'artifacts.tar.gz',
'lfs.tar.gz',
'terraform_state.tar.gz',
'pages.tar.gz',
'registry.tar.gz',
'packages.tar.gz',
'repositories'
)
end
it 'those component files can be restored from' do
expect { run_rake_task("gitlab:backup:create") }.to output.to_stdout_from_any_process
allow(Rake::Task['gitlab:shell:setup'])
.to receive(:invoke).and_return(true)
expect_next_instance_of(::Backup::Manager) do |instance|
backup_types.each do |subtask|
expect(instance).to receive(:run_restore_task).with(subtask).ordered
end
expect(instance).not_to receive(:run_restore_task)
end
expect(Rake::Task['gitlab:shell:setup']).to receive :invoke
expect { run_rake_task("gitlab:backup:restore") }.to output.to_stdout_from_any_process
end
end
describe "Human Readable Backup Name" do
it 'name has human readable time' do
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
expect(backup_tar).to match(/\d+_\d{4}_\d{2}_\d{2}_\d+\.\d+\.\d+.*_gitlab_backup.tar$/)
end
end
end
# gitlab:app namespace