debian-mirror-gitlab/lib/backup/repositories.rb

241 lines
6.4 KiB
Ruby
Raw Normal View History

2021-01-03 14:25:43 +05:30
# frozen_string_literal: true
require 'yaml'
module Backup
2022-05-07 20:08:51 +05:30
class Repositories < Task
extend ::Gitlab::Utils::Override
2022-04-04 11:22:00 +05:30
def initialize(progress, strategy:, max_concurrency: 1, max_storage_concurrency: 1)
2022-05-07 20:08:51 +05:30
super(progress)
2021-09-04 01:27:46 +05:30
@strategy = strategy
2022-04-04 11:22:00 +05:30
@max_concurrency = max_concurrency
@max_storage_concurrency = max_storage_concurrency
2021-01-03 14:25:43 +05:30
end
2022-05-07 20:08:51 +05:30
override :dump
def dump(path)
strategy.start(:create, path)
2021-01-03 14:25:43 +05:30
2021-09-30 23:02:18 +05:30
# gitaly-backup is designed to handle concurrency on its own. So we want
# to avoid entering the buggy concurrency code here when gitaly-backup
# is enabled.
if (max_concurrency <= 1 && max_storage_concurrency <= 1) || !strategy.parallel_enqueue?
2021-09-04 01:27:46 +05:30
return enqueue_consecutive
2021-01-03 14:25:43 +05:30
end
2022-04-04 11:22:00 +05:30
if max_concurrency < 1 || max_storage_concurrency < 1
puts "GITLAB_BACKUP_MAX_CONCURRENCY and GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY must have a value of at least 1".color(:red)
exit 1
end
2021-01-03 14:25:43 +05:30
check_valid_storages!
semaphore = Concurrent::Semaphore.new(max_concurrency)
errors = Queue.new
threads = Gitlab.config.repositories.storages.keys.map do |storage|
Thread.new do
Rails.application.executor.wrap do
2021-09-04 01:27:46 +05:30
enqueue_storage(storage, semaphore, max_storage_concurrency: max_storage_concurrency)
2021-06-08 01:23:25 +05:30
rescue StandardError => e
2021-01-03 14:25:43 +05:30
errors << e
end
end
end
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
threads.each(&:join)
end
raise errors.pop unless errors.empty?
2021-09-04 01:27:46 +05:30
ensure
2022-03-02 08:16:31 +05:30
strategy.finish!
2021-01-03 14:25:43 +05:30
end
2022-05-07 20:08:51 +05:30
override :restore
def restore(path)
strategy.start(:restore, path)
2021-09-04 01:27:46 +05:30
enqueue_consecutive
ensure
2022-03-02 08:16:31 +05:30
strategy.finish!
2021-03-08 18:12:59 +05:30
2021-09-04 01:27:46 +05:30
cleanup_snippets_without_repositories
2021-03-08 18:12:59 +05:30
restore_object_pools
end
2022-05-07 20:08:51 +05:30
override :human_name
2022-04-04 11:22:00 +05:30
def human_name
_('repositories')
end
2021-03-08 18:12:59 +05:30
private
2022-05-07 20:08:51 +05:30
attr_reader :strategy, :max_concurrency, :max_storage_concurrency
2021-01-03 14:25:43 +05:30
def check_valid_storages!
2021-03-08 18:12:59 +05:30
repository_storage_klasses.each do |klass|
2021-01-03 14:25:43 +05:30
if klass.excluding_repository_storage(Gitlab.config.repositories.storages.keys).exists?
raise Error, "repositories.storages in gitlab.yml does not include all storages used by #{klass}"
end
end
end
2021-03-08 18:12:59 +05:30
def repository_storage_klasses
[ProjectRepository, SnippetRepository]
end
2021-09-04 01:27:46 +05:30
def enqueue_consecutive
enqueue_consecutive_projects
enqueue_consecutive_snippets
2021-01-03 14:25:43 +05:30
end
2021-09-04 01:27:46 +05:30
def enqueue_consecutive_projects
2021-01-03 14:25:43 +05:30
project_relation.find_each(batch_size: 1000) do |project|
2021-09-04 01:27:46 +05:30
enqueue_project(project)
2021-01-03 14:25:43 +05:30
end
end
2021-09-04 01:27:46 +05:30
def enqueue_consecutive_snippets
Snippet.find_each(batch_size: 1000) { |snippet| enqueue_snippet(snippet) }
2021-01-03 14:25:43 +05:30
end
2021-09-04 01:27:46 +05:30
def enqueue_storage(storage, semaphore, max_storage_concurrency:)
2021-01-03 14:25:43 +05:30
errors = Queue.new
queue = InterlockSizedQueue.new(1)
threads = Array.new(max_storage_concurrency) do
Thread.new do
Rails.application.executor.wrap do
while container = queue.pop
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
semaphore.acquire
end
begin
2021-09-04 01:27:46 +05:30
enqueue_container(container)
2021-06-08 01:23:25 +05:30
rescue StandardError => e
2021-01-03 14:25:43 +05:30
errors << e
break
ensure
semaphore.release
end
end
end
end
end
enqueue_records_for_storage(storage, queue, errors)
raise errors.pop unless errors.empty?
ensure
queue.close
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
threads.each(&:join)
end
end
2021-09-04 01:27:46 +05:30
def enqueue_container(container)
2021-03-08 18:12:59 +05:30
case container
when Project
2021-09-04 01:27:46 +05:30
enqueue_project(container)
2021-03-08 18:12:59 +05:30
when Snippet
2021-09-04 01:27:46 +05:30
enqueue_snippet(container)
2021-03-08 18:12:59 +05:30
end
end
2021-09-04 01:27:46 +05:30
def enqueue_project(project)
strategy.enqueue(project, Gitlab::GlRepository::PROJECT)
strategy.enqueue(project, Gitlab::GlRepository::WIKI)
strategy.enqueue(project, Gitlab::GlRepository::DESIGN)
2021-01-03 14:25:43 +05:30
end
2021-09-04 01:27:46 +05:30
def enqueue_snippet(snippet)
strategy.enqueue(snippet, Gitlab::GlRepository::SNIPPET)
2021-01-03 14:25:43 +05:30
end
def enqueue_records_for_storage(storage, queue, errors)
records_to_enqueue(storage).each do |relation|
relation.find_each(batch_size: 100) do |project|
break unless errors.empty?
queue.push(project)
end
end
end
def records_to_enqueue(storage)
[projects_in_storage(storage), snippets_in_storage(storage)]
end
def projects_in_storage(storage)
project_relation.id_in(ProjectRepository.for_repository_storage(storage).select(:project_id))
end
def project_relation
Project.includes(:route, :group, namespace: :owner)
end
def snippets_in_storage(storage)
Snippet.id_in(SnippetRepository.for_repository_storage(storage).select(:snippet_id))
end
def restore_object_pools
PoolRepository.includes(:source_project).find_each do |pool|
progress.puts " - Object pool #{pool.disk_path}..."
2021-04-17 20:07:23 +05:30
pool.source_project ||= pool.member_projects.first&.root_of_fork_network
unless pool.source_project
progress.puts " - Object pool #{pool.disk_path}... " + "[SKIPPED]".color(:cyan)
next
end
2021-01-03 14:25:43 +05:30
pool.state = 'none'
pool.save
pool.schedule
end
end
# Snippets without a repository should be removed because they failed to import
# due to having invalid repositories
2021-09-04 01:27:46 +05:30
def cleanup_snippets_without_repositories
invalid_snippets = []
2021-01-03 14:25:43 +05:30
2021-09-04 01:27:46 +05:30
Snippet.find_each(batch_size: 1000).each do |snippet|
response = Snippets::RepositoryValidationService.new(nil, snippet).execute
next if response.success?
2021-01-03 14:25:43 +05:30
2021-09-04 01:27:46 +05:30
snippet.repository.remove
progress.puts("Snippet #{snippet.full_path} can't be restored: #{response.message}")
2021-01-03 14:25:43 +05:30
2021-09-04 01:27:46 +05:30
invalid_snippets << snippet.id
2021-01-03 14:25:43 +05:30
end
2021-09-04 01:27:46 +05:30
Snippet.id_in(invalid_snippets).delete_all
2021-01-03 14:25:43 +05:30
end
class InterlockSizedQueue < SizedQueue
extend ::Gitlab::Utils::Override
override :pop
def pop(*)
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
super
end
end
override :push
def push(*)
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
super
end
end
end
end
end
2021-03-08 18:12:59 +05:30
2021-06-08 01:23:25 +05:30
Backup::Repositories.prepend_mod_with('Backup::Repositories')