debian-mirror-gitlab/lib/tasks/gitlab/import_export/import.rake
2020-03-13 15:44:24 +05:30

211 lines
5.5 KiB
Ruby

# frozen_string_literal: true
# Import large project archives
#
# This task:
# 1. Disables ObjectStorage for archive upload
# 2. Performs Sidekiq job synchronously
#
# @example
# bundle exec rake "gitlab:import_export:import[root, root, imported_project, /path/to/file.tar.gz, true]"
#
namespace :gitlab do
namespace :import_export do
desc 'GitLab | Import/Export | EXPERIMENTAL | Import large project archives'
task :import, [:username, :namespace_path, :project_path, :archive_path, :measurement_enabled] => :gitlab_environment do |_t, args|
# Load it here to avoid polluting Rake tasks with Sidekiq test warnings
require 'sidekiq/testing'
warn_user_is_not_gitlab
if ENV['IMPORT_DEBUG'].present?
ActiveRecord::Base.logger = Logger.new(STDOUT)
end
GitlabProjectImport.new(
namespace_path: args.namespace_path,
project_path: args.project_path,
username: args.username,
file_path: args.archive_path,
measurement_enabled: args.measurement_enabled == 'true'
).import
end
end
end
class GitlabProjectImport
def initialize(opts)
@project_path = opts.fetch(:project_path)
@file_path = opts.fetch(:file_path)
@namespace = Namespace.find_by_full_path(opts.fetch(:namespace_path))
@current_user = User.find_by_username(opts.fetch(:username))
@measurement_enabled = opts.fetch(:measurement_enabled)
end
def import
show_import_start_message
run_isolated_sidekiq_job
show_import_failures_count
if @project&.import_state&.last_error
puts "ERROR: #{@project.import_state.last_error}"
exit 1
elsif @project.errors.any?
puts "ERROR: #{@project.errors.full_messages.join(', ')}"
exit 1
else
puts 'Done!'
end
rescue StandardError => e
puts "Exception: #{e.message}"
puts e.backtrace
exit 1
end
private
def with_request_store
RequestStore.begin!
yield
ensure
RequestStore.end!
RequestStore.clear!
end
def with_count_queries(&block)
count = 0
counter_f = ->(name, started, finished, unique_id, payload) {
unless payload[:name].in? %w[CACHE SCHEMA]
count += 1
end
}
ActiveSupport::Notifications.subscribed(counter_f, "sql.active_record", &block)
puts "Number of sql calls: #{count}"
end
def with_gc_counter
gc_counts_before = GC.stat.select { |k, v| k =~ /count/ }
yield
gc_counts_after = GC.stat.select { |k, v| k =~ /count/ }
stats = gc_counts_before.merge(gc_counts_after) { |k, vb, va| va - vb }
puts "Total GC count: #{stats[:count]}"
puts "Minor GC count: #{stats[:minor_gc_count]}"
puts "Major GC count: #{stats[:major_gc_count]}"
end
def with_measure_time
timing = Benchmark.realtime do
yield
end
time = Time.at(timing).utc.strftime("%H:%M:%S")
puts "Time to finish: #{time}"
end
def with_measuring
puts "Measuring enabled..."
with_gc_counter do
with_count_queries do
with_measure_time do
yield
end
end
end
end
def measurement_enabled?
@measurement_enabled != false
end
# We want to ensure that all Sidekiq jobs are executed
# synchronously as part of that process.
# This ensures that all expensive operations do not escape
# to general Sidekiq clusters/nodes.
def with_isolated_sidekiq_job
Sidekiq::Testing.fake! do
with_request_store do
# If you are attempting to import a large project into a development environment,
# you may see Gitaly throw an error about too many calls or invocations.
# This is due to a n+1 calls limit being set for development setups (not enforced in production)
# https://gitlab.com/gitlab-org/gitlab/-/merge_requests/24475#note_283090635
# For development setups, this code-path will be excluded from n+1 detection.
::Gitlab::GitalyClient.allow_n_plus_1_calls do
measurement_enabled? ? with_measuring { yield } : yield
end
end
true
end
end
def run_isolated_sidekiq_job
with_isolated_sidekiq_job do
@project = create_project
execute_sidekiq_job
end
end
def create_project
# We are disabling ObjectStorage for `import`
# as it is too slow to handle big archives:
# 1. DB transaction timeouts on upload
# 2. Download of archive before unpacking
disable_upload_object_storage do
service = Projects::GitlabProjectsImportService.new(
@current_user,
{
namespace_id: @namespace.id,
path: @project_path,
file: File.open(@file_path)
}
)
service.execute
end
end
def execute_sidekiq_job
Sidekiq::Worker.drain_all
end
def disable_upload_object_storage
overwrite_uploads_setting('background_upload', false) do
overwrite_uploads_setting('direct_upload', false) do
yield
end
end
end
def overwrite_uploads_setting(key, value)
old_value = Settings.uploads.object_store[key]
Settings.uploads.object_store[key] = value
yield
ensure
Settings.uploads.object_store[key] = old_value
end
def full_path
"#{@namespace.full_path}/#{@project_path}"
end
def show_import_start_message
puts "Importing GitLab export: #{@file_path} into GitLab" \
" #{full_path}" \
" as #{@current_user.name}"
end
def show_import_failures_count
return unless @project.import_failures.exists?
puts "Total number of not imported relations: #{@project.import_failures.count}"
end
end