debian-mirror-gitlab/spec/workers/bulk_imports/pipeline_worker_spec.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

408 lines
12 KiB
Ruby
Raw Normal View History

2021-04-29 21:17:54 +05:30
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe BulkImports::PipelineWorker do
let(:pipeline_class) do
Class.new do
def initialize(_); end
def run; end
2021-09-04 01:27:46 +05:30
2022-07-16 23:28:13 +05:30
def self.file_extraction_pipeline?
2021-09-04 01:27:46 +05:30
false
end
2021-04-29 21:17:54 +05:30
end
end
2021-09-04 01:27:46 +05:30
let_it_be(:bulk_import) { create(:bulk_import) }
let_it_be(:config) { create(:bulk_import_configuration, bulk_import: bulk_import) }
let_it_be(:entity) { create(:bulk_import_entity, bulk_import: bulk_import) }
2021-04-29 21:17:54 +05:30
before do
stub_const('FakePipeline', pipeline_class)
2021-11-11 11:23:49 +05:30
2022-07-23 23:45:48 +05:30
allow(entity).to receive(:pipeline_exists?).with('FakePipeline').and_return(true)
2021-11-18 22:05:49 +05:30
allow_next_instance_of(BulkImports::Groups::Stage) do |instance|
allow(instance).to receive(:pipelines)
2022-07-23 23:45:48 +05:30
.and_return([{ stage: 0, pipeline: pipeline_class }])
2021-11-18 22:05:49 +05:30
end
2021-04-29 21:17:54 +05:30
end
2021-11-18 22:05:49 +05:30
shared_examples 'successfully runs the pipeline' do
it 'runs the given pipeline successfully' do
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:info)
.with(
2022-06-21 17:19:12 +05:30
hash_including(
'pipeline_name' => 'FakePipeline',
'entity_id' => entity.id
)
2021-11-18 22:05:49 +05:30
)
end
2021-04-29 21:17:54 +05:30
2021-11-18 22:05:49 +05:30
expect(BulkImports::EntityWorker)
.to receive(:perform_async)
.with(entity.id, pipeline_tracker.stage)
2021-04-29 21:17:54 +05:30
2022-06-21 17:19:12 +05:30
allow(subject).to receive(:jid).and_return('jid')
2021-04-29 21:17:54 +05:30
2021-11-18 22:05:49 +05:30
subject.perform(pipeline_tracker.id, pipeline_tracker.stage, entity.id)
2021-04-29 21:17:54 +05:30
2021-11-18 22:05:49 +05:30
pipeline_tracker.reload
2021-04-29 21:17:54 +05:30
2021-11-18 22:05:49 +05:30
expect(pipeline_tracker.status_name).to eq(:finished)
expect(pipeline_tracker.jid).to eq('jid')
end
end
2021-04-29 21:17:54 +05:30
2021-11-18 22:05:49 +05:30
it_behaves_like 'successfully runs the pipeline' do
let(:pipeline_tracker) do
create(
:bulk_import_tracker,
entity: entity,
2022-01-26 12:08:38 +05:30
pipeline_name: 'FakePipeline',
status_event: 'enqueue'
2021-11-18 22:05:49 +05:30
)
end
2021-04-29 21:17:54 +05:30
end
context 'when the pipeline cannot be found' do
it 'logs the error' do
pipeline_tracker = create(
:bulk_import_tracker,
2021-11-18 22:05:49 +05:30
:finished,
2021-04-29 21:17:54 +05:30
entity: entity,
pipeline_name: 'FakePipeline'
)
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:error)
.with(
2022-06-21 17:19:12 +05:30
hash_including(
'pipeline_tracker_id' => pipeline_tracker.id,
'entity_id' => entity.id,
'message' => 'Unstarted pipeline not found'
)
2021-04-29 21:17:54 +05:30
)
end
expect(BulkImports::EntityWorker)
.to receive(:perform_async)
.with(entity.id, pipeline_tracker.stage)
subject.perform(pipeline_tracker.id, pipeline_tracker.stage, entity.id)
end
end
context 'when the pipeline raises an exception' do
it 'logs the error' do
pipeline_tracker = create(
:bulk_import_tracker,
entity: entity,
2022-07-23 23:45:48 +05:30
pipeline_name: 'FakePipeline',
2022-01-26 12:08:38 +05:30
status_event: 'enqueue'
2021-04-29 21:17:54 +05:30
)
2022-07-23 23:45:48 +05:30
allow(subject).to receive(:jid).and_return('jid')
expect_next_instance_of(pipeline_class) do |pipeline|
expect(pipeline)
.to receive(:run)
.and_raise(StandardError, 'Error!')
end
2021-04-29 21:17:54 +05:30
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:error)
.with(
2022-06-21 17:19:12 +05:30
hash_including(
2022-07-23 23:45:48 +05:30
'pipeline_name' => 'FakePipeline',
2022-06-21 17:19:12 +05:30
'entity_id' => entity.id,
2022-07-23 23:45:48 +05:30
'message' => 'Error!'
2022-06-21 17:19:12 +05:30
)
2021-04-29 21:17:54 +05:30
)
end
expect(Gitlab::ErrorTracking)
.to receive(:track_exception)
.with(
2022-07-23 23:45:48 +05:30
instance_of(StandardError),
2021-04-29 21:17:54 +05:30
entity_id: entity.id,
pipeline_name: pipeline_tracker.pipeline_name
)
expect(BulkImports::EntityWorker)
.to receive(:perform_async)
.with(entity.id, pipeline_tracker.stage)
2022-07-23 23:45:48 +05:30
expect(BulkImports::Failure)
.to receive(:create)
.with(
a_hash_including(
bulk_import_entity_id: entity.id,
pipeline_class: 'FakePipeline',
pipeline_step: 'pipeline_worker_run',
exception_class: 'StandardError',
exception_message: 'Error!',
correlation_id_value: anything
)
)
2021-04-29 21:17:54 +05:30
subject.perform(pipeline_tracker.id, pipeline_tracker.stage, entity.id)
pipeline_tracker.reload
expect(pipeline_tracker.status_name).to eq(:failed)
expect(pipeline_tracker.jid).to eq('jid')
end
2021-11-18 22:05:49 +05:30
2022-05-07 20:08:51 +05:30
context 'when entity is failed' do
it 'marks tracker as failed and logs the error' do
pipeline_tracker = create(
:bulk_import_tracker,
entity: entity,
2022-07-23 23:45:48 +05:30
pipeline_name: 'FakePipeline',
2022-05-07 20:08:51 +05:30
status_event: 'enqueue'
)
entity.update!(status: -1)
2022-07-23 23:45:48 +05:30
expect(BulkImports::Failure).to receive(:create)
2022-05-07 20:08:51 +05:30
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:error)
.with(
2022-06-21 17:19:12 +05:30
hash_including(
2022-07-23 23:45:48 +05:30
'pipeline_name' => 'FakePipeline',
2022-06-21 17:19:12 +05:30
'entity_id' => entity.id,
'message' => 'Failed entity status'
)
2022-05-07 20:08:51 +05:30
)
end
subject.perform(pipeline_tracker.id, pipeline_tracker.stage, entity.id)
expect(pipeline_tracker.reload.status_name).to eq(:failed)
end
end
2022-07-23 23:45:48 +05:30
context 'when network error is raised' do
let(:pipeline_tracker) do
create(
2021-11-18 22:05:49 +05:30
:bulk_import_tracker,
entity: entity,
2022-01-26 12:08:38 +05:30
pipeline_name: 'FakePipeline',
status_event: 'enqueue'
2021-11-18 22:05:49 +05:30
)
2022-07-23 23:45:48 +05:30
end
2021-11-18 22:05:49 +05:30
2022-07-23 23:45:48 +05:30
let(:exception) do
BulkImports::NetworkError.new(response: instance_double(HTTParty::Response, code: 429, headers: {}))
end
before do
allow(subject).to receive(:jid).and_return('jid')
2021-11-18 22:05:49 +05:30
expect_next_instance_of(pipeline_class) do |pipeline|
expect(pipeline)
.to receive(:run)
.and_raise(exception)
end
2022-07-23 23:45:48 +05:30
end
2021-11-18 22:05:49 +05:30
2022-07-23 23:45:48 +05:30
context 'when error is retriable' do
it 'reenqueues the worker' do
expect_any_instance_of(BulkImports::Tracker) do |tracker|
expect(tracker).to receive(:retry).and_call_original
end
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:info)
.with(
hash_including(
'pipeline_name' => 'FakePipeline',
'entity_id' => entity.id
)
)
end
2022-01-26 12:08:38 +05:30
2022-07-23 23:45:48 +05:30
expect(described_class)
.to receive(:perform_in)
2022-01-26 12:08:38 +05:30
.with(
2022-07-23 23:45:48 +05:30
60.seconds,
pipeline_tracker.id,
pipeline_tracker.stage,
pipeline_tracker.entity.id
2022-01-26 12:08:38 +05:30
)
2022-07-23 23:45:48 +05:30
subject.perform(pipeline_tracker.id, pipeline_tracker.stage, entity.id)
pipeline_tracker.reload
expect(pipeline_tracker.enqueued?).to be_truthy
2022-01-26 12:08:38 +05:30
end
2021-11-18 22:05:49 +05:30
2022-07-23 23:45:48 +05:30
context 'when error is not retriable' do
let(:exception) do
BulkImports::NetworkError.new(response: instance_double(HTTParty::Response, code: 503, headers: {}))
end
2021-11-18 22:05:49 +05:30
2022-07-23 23:45:48 +05:30
it 'marks tracker as failed and logs the error' do
expect(described_class).not_to receive(:perform_in)
subject.perform(pipeline_tracker.id, pipeline_tracker.stage, entity.id)
2022-01-26 12:08:38 +05:30
2022-07-23 23:45:48 +05:30
pipeline_tracker.reload
2022-01-26 12:08:38 +05:30
2022-07-23 23:45:48 +05:30
expect(pipeline_tracker.failed?).to eq(true)
end
end
2021-11-18 22:05:49 +05:30
end
end
2021-04-29 21:17:54 +05:30
end
2021-09-04 01:27:46 +05:30
2022-07-16 23:28:13 +05:30
context 'when file extraction pipeline' do
let(:file_extraction_pipeline) do
2021-09-04 01:27:46 +05:30
Class.new do
def initialize(_); end
def run; end
2022-07-16 23:28:13 +05:30
def self.file_extraction_pipeline?
2021-09-04 01:27:46 +05:30
true
end
def self.relation
'test'
end
end
end
let(:pipeline_tracker) do
create(
:bulk_import_tracker,
entity: entity,
2022-01-26 12:08:38 +05:30
pipeline_name: 'NdjsonPipeline',
status_event: 'enqueue'
2021-09-04 01:27:46 +05:30
)
end
before do
2022-07-16 23:28:13 +05:30
stub_const('NdjsonPipeline', file_extraction_pipeline)
2021-11-11 11:23:49 +05:30
2021-11-18 22:05:49 +05:30
allow_next_instance_of(BulkImports::Groups::Stage) do |instance|
allow(instance).to receive(:pipelines)
2022-07-23 23:45:48 +05:30
.and_return([{ stage: 0, pipeline: file_extraction_pipeline }])
2021-11-18 22:05:49 +05:30
end
2021-09-04 01:27:46 +05:30
end
it 'runs the pipeline successfully' do
allow_next_instance_of(BulkImports::ExportStatus) do |status|
allow(status).to receive(:started?).and_return(false)
2022-07-23 23:45:48 +05:30
allow(status).to receive(:empty?).and_return(false)
2021-09-04 01:27:46 +05:30
allow(status).to receive(:failed?).and_return(false)
end
subject.perform(pipeline_tracker.id, pipeline_tracker.stage, entity.id)
expect(pipeline_tracker.reload.status_name).to eq(:finished)
end
context 'when export status is started' do
it 'reenqueues pipeline worker' do
allow_next_instance_of(BulkImports::ExportStatus) do |status|
allow(status).to receive(:started?).and_return(true)
2022-07-23 23:45:48 +05:30
allow(status).to receive(:empty?).and_return(false)
allow(status).to receive(:failed?).and_return(false)
end
expect(described_class)
.to receive(:perform_in)
.with(
described_class::FILE_EXTRACTION_PIPELINE_PERFORM_DELAY,
pipeline_tracker.id,
pipeline_tracker.stage,
entity.id
)
subject.perform(pipeline_tracker.id, pipeline_tracker.stage, entity.id)
end
end
context 'when export status is empty' do
it 'reenqueues pipeline worker' do
allow_next_instance_of(BulkImports::ExportStatus) do |status|
allow(status).to receive(:started?).and_return(false)
allow(status).to receive(:empty?).and_return(true)
2021-09-04 01:27:46 +05:30
allow(status).to receive(:failed?).and_return(false)
end
expect(described_class)
.to receive(:perform_in)
.with(
2022-07-16 23:28:13 +05:30
described_class::FILE_EXTRACTION_PIPELINE_PERFORM_DELAY,
2021-09-04 01:27:46 +05:30
pipeline_tracker.id,
pipeline_tracker.stage,
entity.id
)
subject.perform(pipeline_tracker.id, pipeline_tracker.stage, entity.id)
end
end
context 'when job reaches timeout' do
it 'marks as failed and logs the error' do
old_created_at = entity.created_at
entity.update!(created_at: (BulkImports::Pipeline::NDJSON_EXPORT_TIMEOUT + 1.hour).ago)
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:error)
.with(
2022-06-21 17:19:12 +05:30
hash_including(
'pipeline_name' => 'NdjsonPipeline',
'entity_id' => entity.id,
'message' => 'Pipeline timeout'
)
2021-09-04 01:27:46 +05:30
)
end
subject.perform(pipeline_tracker.id, pipeline_tracker.stage, entity.id)
expect(pipeline_tracker.reload.status_name).to eq(:failed)
entity.update!(created_at: old_created_at)
end
end
context 'when export status is failed' do
it 'marks as failed and logs the error' do
allow_next_instance_of(BulkImports::ExportStatus) do |status|
allow(status).to receive(:failed?).and_return(true)
allow(status).to receive(:error).and_return('Error!')
end
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:error)
.with(
2022-06-21 17:19:12 +05:30
hash_including(
'pipeline_name' => 'NdjsonPipeline',
'entity_id' => entity.id,
'message' => 'Error!'
)
2021-09-04 01:27:46 +05:30
)
end
subject.perform(pipeline_tracker.id, pipeline_tracker.stage, entity.id)
expect(pipeline_tracker.reload.status_name).to eq(:failed)
end
end
end
2021-04-29 21:17:54 +05:30
end