debian-mirror-gitlab/spec/lib/bulk_imports/pipeline/runner_spec.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

324 lines
9.9 KiB
Ruby
Raw Permalink Normal View History

2021-01-29 00:20:46 +05:30
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe BulkImports::Pipeline::Runner do
2021-02-22 17:27:13 +05:30
let(:extractor) do
Class.new do
def initialize(options = {}); end
2021-01-29 00:20:46 +05:30
2021-02-22 17:27:13 +05:30
def extract(context); end
end
end
2021-01-29 00:20:46 +05:30
2021-02-22 17:27:13 +05:30
let(:transformer) do
Class.new do
def initialize(options = {}); end
2021-01-29 00:20:46 +05:30
2022-08-13 15:12:31 +05:30
def transform(context, data); end
2021-02-22 17:27:13 +05:30
end
end
2021-01-29 00:20:46 +05:30
2021-02-22 17:27:13 +05:30
let(:loader) do
Class.new do
def initialize(options = {}); end
2021-01-29 00:20:46 +05:30
2022-08-13 15:12:31 +05:30
def load(context, data); end
2021-02-22 17:27:13 +05:30
end
end
2021-01-29 00:20:46 +05:30
2021-04-17 20:07:23 +05:30
before do
stub_const('BulkImports::Extractor', extractor)
stub_const('BulkImports::Transformer', transformer)
stub_const('BulkImports::Loader', loader)
2021-01-29 00:20:46 +05:30
2021-04-17 20:07:23 +05:30
pipeline = Class.new do
include BulkImports::Pipeline
2021-03-11 19:13:27 +05:30
2021-04-17 20:07:23 +05:30
extractor BulkImports::Extractor
transformer BulkImports::Transformer
loader BulkImports::Loader
2021-01-29 00:20:46 +05:30
end
2021-04-17 20:07:23 +05:30
stub_const('BulkImports::MyPipeline', pipeline)
end
2021-01-29 00:20:46 +05:30
2021-04-29 21:17:54 +05:30
let_it_be_with_reload(:entity) { create(:bulk_import_entity) }
2022-08-13 15:12:31 +05:30
let(:tracker) { create(:bulk_import_tracker, entity: entity) }
let(:context) { BulkImports::Pipeline::Context.new(tracker, extra: :data) }
2021-04-17 20:07:23 +05:30
subject { BulkImports::MyPipeline.new(context) }
2022-08-13 15:12:31 +05:30
shared_examples 'failed pipeline' do |exception_class, exception_message|
it 'logs import failure' do
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger).to receive(:error)
.with(
2023-01-13 00:05:48 +05:30
a_hash_including(
'bulk_import_entity_id' => entity.id,
'bulk_import_id' => entity.bulk_import_id,
'bulk_import_entity_type' => entity.source_type,
'source_full_path' => entity.source_full_path,
'pipeline_step' => :extractor,
'pipeline_class' => 'BulkImports::MyPipeline',
'exception.class' => exception_class,
'exception.message' => exception_message,
'correlation_id' => anything,
'class' => 'BulkImports::MyPipeline',
'message' => "Pipeline failed",
'importer' => 'gitlab_migration',
'exception.backtrace' => anything,
'source_version' => entity.bulk_import.source_version_info.to_s
2022-08-13 15:12:31 +05:30
)
)
end
expect { subject.run }
.to change(entity.failures, :count).by(1)
failure = entity.failures.first
expect(failure).to be_present
expect(failure.pipeline_class).to eq('BulkImports::MyPipeline')
expect(failure.pipeline_step).to eq('extractor')
expect(failure.exception_class).to eq(exception_class)
expect(failure.exception_message).to eq(exception_message)
end
context 'when pipeline is marked to abort on failure' do
before do
BulkImports::MyPipeline.abort_on_failure!
end
it 'logs a warn message and marks entity and tracker as failed' do
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger).to receive(:warn)
.with(
log_params(
context,
message: 'Aborting entity migration due to pipeline failure',
2022-11-25 23:54:43 +05:30
pipeline_class: 'BulkImports::MyPipeline',
importer: 'gitlab_migration'
2022-08-13 15:12:31 +05:30
)
)
end
subject.run
expect(entity.failed?).to eq(true)
expect(tracker.failed?).to eq(true)
end
end
context 'when pipeline is not marked to abort on failure' do
it 'does not mark entity as failed' do
subject.run
expect(tracker.failed?).to eq(true)
expect(entity.failed?).to eq(false)
end
end
end
2021-04-17 20:07:23 +05:30
describe 'pipeline runner' do
context 'when entity is not marked as failed' do
2021-02-22 17:27:13 +05:30
it 'runs pipeline extractor, transformer, loader' do
expect_next_instance_of(BulkImports::Extractor) do |extractor|
2021-03-11 19:13:27 +05:30
expect(extractor)
.to receive(:extract)
.with(context)
.and_return(extracted_data)
2021-02-22 17:27:13 +05:30
end
expect_next_instance_of(BulkImports::Transformer) do |transformer|
2021-03-11 19:13:27 +05:30
expect(transformer)
.to receive(:transform)
.with(context, extracted_data.data.first)
.and_return(extracted_data.data.first)
2021-02-22 17:27:13 +05:30
end
expect_next_instance_of(BulkImports::Loader) do |loader|
2021-03-11 19:13:27 +05:30
expect(loader)
.to receive(:load)
.with(context, extracted_data.data.first)
2021-02-22 17:27:13 +05:30
end
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger).to receive(:info)
.with(
2021-04-17 20:07:23 +05:30
log_params(
context,
message: 'Pipeline started',
pipeline_class: 'BulkImports::MyPipeline'
)
2021-03-11 19:13:27 +05:30
)
expect(logger).to receive(:info)
.with(
2021-04-17 20:07:23 +05:30
log_params(
context,
pipeline_class: 'BulkImports::MyPipeline',
pipeline_step: :extractor,
step_class: 'BulkImports::Extractor'
)
2021-02-22 17:27:13 +05:30
)
expect(logger).to receive(:info)
2021-03-11 19:13:27 +05:30
.with(
2021-04-17 20:07:23 +05:30
log_params(
context,
pipeline_class: 'BulkImports::MyPipeline',
pipeline_step: :transformer,
step_class: 'BulkImports::Transformer'
)
2021-03-11 19:13:27 +05:30
)
2021-02-22 17:27:13 +05:30
expect(logger).to receive(:info)
2021-03-11 19:13:27 +05:30
.with(
2021-04-17 20:07:23 +05:30
log_params(
context,
pipeline_class: 'BulkImports::MyPipeline',
pipeline_step: :loader,
step_class: 'BulkImports::Loader'
)
2021-03-11 19:13:27 +05:30
)
expect(logger).to receive(:info)
.with(
2021-04-17 20:07:23 +05:30
log_params(
context,
pipeline_class: 'BulkImports::MyPipeline',
pipeline_step: :after_run
)
2021-03-11 19:13:27 +05:30
)
2021-02-22 17:27:13 +05:30
expect(logger).to receive(:info)
2021-03-11 19:13:27 +05:30
.with(
2021-04-17 20:07:23 +05:30
log_params(
context,
message: 'Pipeline finished',
pipeline_class: 'BulkImports::MyPipeline'
)
2021-03-11 19:13:27 +05:30
)
2021-02-22 17:27:13 +05:30
end
2021-04-17 20:07:23 +05:30
subject.run
2021-01-29 00:20:46 +05:30
end
2021-04-29 21:17:54 +05:30
context 'when extracted data has multiple pages' do
it 'updates tracker information and runs pipeline again' do
first_page = extracted_data(has_next_page: true)
last_page = extracted_data
expect_next_instance_of(BulkImports::Extractor) do |extractor|
expect(extractor)
.to receive(:extract)
.with(context)
.and_return(first_page, last_page)
end
subject.run
end
end
2022-08-13 15:12:31 +05:30
context 'when the exception BulkImports::NetworkError is raised' do
2021-02-22 17:27:13 +05:30
before do
allow_next_instance_of(BulkImports::Extractor) do |extractor|
2022-08-13 15:12:31 +05:30
allow(extractor).to receive(:extract).with(context).and_raise(
BulkImports::NetworkError.new(
'Net::ReadTimeout',
response: instance_double(HTTParty::Response, code: reponse_status_code, headers: {})
)
)
2021-02-22 17:27:13 +05:30
end
end
2022-08-13 15:12:31 +05:30
context 'when exception is retriable' do
let(:reponse_status_code) { 429 }
2021-04-17 20:07:23 +05:30
2022-08-13 15:12:31 +05:30
it 'raises the exception BulkImports::RetryPipelineError' do
expect { subject.run }.to raise_error(BulkImports::RetryPipelineError)
end
end
2021-02-22 17:27:13 +05:30
2022-08-13 15:12:31 +05:30
context 'when exception is not retriable' do
let(:reponse_status_code) { 503 }
2021-02-22 17:27:13 +05:30
2022-08-13 15:12:31 +05:30
it_behaves_like 'failed pipeline', 'BulkImports::NetworkError', 'Net::ReadTimeout'
2021-02-22 17:27:13 +05:30
end
2022-08-13 15:12:31 +05:30
end
2021-02-22 17:27:13 +05:30
2022-08-13 15:12:31 +05:30
context 'when a retriable BulkImports::NetworkError exception is raised while extracting the next page' do
before do
call_count = 0
allow_next_instance_of(BulkImports::Extractor) do |extractor|
allow(extractor).to receive(:extract).with(context).twice do
if call_count.zero?
call_count += 1
extracted_data(has_next_page: true)
else
raise(
BulkImports::NetworkError.new(
response: instance_double(HTTParty::Response, code: 429, headers: {})
2021-04-17 20:07:23 +05:30
)
2021-02-22 17:27:13 +05:30
)
2022-08-13 15:12:31 +05:30
end
2021-02-22 17:27:13 +05:30
end
end
end
2022-08-13 15:12:31 +05:30
it 'raises the exception BulkImports::RetryPipelineError' do
expect { subject.run }.to raise_error(BulkImports::RetryPipelineError)
end
end
2021-02-22 17:27:13 +05:30
2022-08-13 15:12:31 +05:30
context 'when the exception StandardError is raised' do
before do
allow_next_instance_of(BulkImports::Extractor) do |extractor|
allow(extractor).to receive(:extract).with(context).and_raise(StandardError, 'Error!')
2021-02-22 17:27:13 +05:30
end
end
2022-08-13 15:12:31 +05:30
it_behaves_like 'failed pipeline', 'StandardError', 'Error!'
2021-01-29 00:20:46 +05:30
end
2021-02-22 17:27:13 +05:30
end
2021-01-29 00:20:46 +05:30
2021-02-22 17:27:13 +05:30
context 'when entity is marked as failed' do
it 'logs and returns without execution' do
2021-04-17 20:07:23 +05:30
entity.fail_op!
2021-03-11 19:13:27 +05:30
2021-02-22 17:27:13 +05:30
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
2021-04-29 21:17:54 +05:30
expect(logger).to receive(:warn)
2021-02-22 17:27:13 +05:30
.with(
2021-04-17 20:07:23 +05:30
log_params(
context,
2021-04-29 21:17:54 +05:30
message: 'Skipping pipeline due to failed entity',
2021-04-17 20:07:23 +05:30
pipeline_class: 'BulkImports::MyPipeline'
)
2021-02-22 17:27:13 +05:30
)
end
2021-04-17 20:07:23 +05:30
subject.run
2021-02-22 17:27:13 +05:30
end
2021-01-29 00:20:46 +05:30
end
2021-04-17 20:07:23 +05:30
2021-04-29 21:17:54 +05:30
def log_params(context, extra = {})
{
2022-11-25 23:54:43 +05:30
bulk_import_id: context.bulk_import_id,
2021-04-29 21:17:54 +05:30
bulk_import_entity_id: context.entity.id,
bulk_import_entity_type: context.entity.source_type,
2023-01-13 00:05:48 +05:30
source_full_path: entity.source_full_path,
source_version: context.entity.bulk_import.source_version_info.to_s,
2022-11-25 23:54:43 +05:30
importer: 'gitlab_migration',
2021-04-29 21:17:54 +05:30
context_extra: context.extra
}.merge(extra)
end
def extracted_data(has_next_page: false)
BulkImports::Pipeline::ExtractedData.new(
data: { foo: :bar },
page_info: {
'has_next_page' => has_next_page,
'next_page' => has_next_page ? 'cursor' : nil
}
)
end
2021-04-17 20:07:23 +05:30
end
2021-01-29 00:20:46 +05:30
end