debian-mirror-gitlab/lib/gitlab/github_import/bulk_importing.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

113 lines
3.2 KiB
Ruby
Raw Normal View History

2018-03-17 18:26:18 +05:30
# frozen_string_literal: true
module Gitlab
module GithubImport
module BulkImporting
2021-10-27 15:23:28 +05:30
attr_reader :project, :client
# project - An instance of `Project`.
# client - An instance of `Gitlab::GithubImport::Client`.
def initialize(project, client)
@project = project
@client = client
2023-03-04 22:38:38 +05:30
@validation_errors = []
2021-10-27 15:23:28 +05:30
end
2018-03-17 18:26:18 +05:30
# Builds and returns an Array of objects to bulk insert into the
2023-03-04 22:38:38 +05:30
# database and array of validation errors if object is invalid.
2018-03-17 18:26:18 +05:30
#
# enum - An Enumerable that returns the objects to turn into database
# rows.
def build_database_rows(enum)
2023-03-04 22:38:38 +05:30
errors = []
2021-10-27 15:23:28 +05:30
rows = enum.each_with_object([]) do |(object, _), result|
2023-03-04 22:38:38 +05:30
next if already_imported?(object)
attrs = build_attributes(object)
build_record = model.new(attrs)
if build_record.invalid?
2023-06-20 00:43:36 +05:30
github_identifiers = github_identifiers(object)
log_error(github_identifiers, build_record.errors.full_messages)
errors << {
validation_errors: build_record.errors,
github_identifiers: github_identifiers
}
2023-03-04 22:38:38 +05:30
next
end
result << attrs
2018-03-17 18:26:18 +05:30
end
2021-10-27 15:23:28 +05:30
log_and_increment_counter(rows.size, :fetched)
2023-03-04 22:38:38 +05:30
[rows, errors]
2018-03-17 18:26:18 +05:30
end
# Bulk inserts the given rows into the database.
2023-03-04 22:38:38 +05:30
def bulk_insert(rows, batch_size: 100)
2018-03-17 18:26:18 +05:30
rows.each_slice(batch_size) do |slice|
2021-12-11 22:18:48 +05:30
ApplicationRecord.legacy_bulk_insert(model.table_name, slice) # rubocop:disable Gitlab/BulkInsert
2021-10-27 15:23:28 +05:30
log_and_increment_counter(slice.size, :imported)
2018-03-17 18:26:18 +05:30
end
end
2021-10-27 15:23:28 +05:30
def object_type
raise NotImplementedError
end
2023-06-20 00:43:36 +05:30
def bulk_insert_failures(errors)
rows = errors.map do |error|
2023-03-04 22:38:38 +05:30
correlation_id_value = Labkit::Correlation::CorrelationId.current_or_new_id
{
source: self.class.name,
exception_class: 'ActiveRecord::RecordInvalid',
2023-06-20 00:43:36 +05:30
exception_message: error[:validation_errors].full_messages.first.truncate(255),
2023-03-04 22:38:38 +05:30
correlation_id_value: correlation_id_value,
retry_count: nil,
2023-06-20 00:43:36 +05:30
created_at: Time.zone.now,
external_identifiers: error[:github_identifiers]
2023-03-04 22:38:38 +05:30
}
end
project.import_failures.insert_all(rows)
end
2021-10-27 15:23:28 +05:30
private
def log_and_increment_counter(value, operation)
Gitlab::Import::Logger.info(
import_type: :github,
project_id: project.id,
importer: self.class.name,
message: "#{value} #{object_type.to_s.pluralize} #{operation}"
)
Gitlab::GithubImport::ObjectCounter.increment(
project,
object_type,
operation,
value: value
)
end
2023-03-04 22:38:38 +05:30
2023-06-20 00:43:36 +05:30
def log_error(github_identifiers, messages)
2023-03-04 22:38:38 +05:30
Gitlab::Import::Logger.error(
import_type: :github,
project_id: project.id,
importer: self.class.name,
message: messages,
2023-06-20 00:43:36 +05:30
github_identifiers: github_identifiers
2023-03-04 22:38:38 +05:30
)
end
2023-06-20 00:43:36 +05:30
def github_identifiers(object)
raise NotImplementedError
end
2018-03-17 18:26:18 +05:30
end
end
end