debian-mirror-gitlab/lib/gitlab/jira_import/issues_importer.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

127 lines
4.5 KiB
Ruby
Raw Normal View History

2020-04-22 19:07:51 +05:30
# frozen_string_literal: true
module Gitlab
module JiraImport
class IssuesImporter < BaseImporter
# Jira limits max items per request to be fetched to 100
# see https://jira.atlassian.com/browse/JRACLOUD-67570
# We set it to 1000 in case they change their mind.
BATCH_SIZE = 1000
2022-08-27 11:52:29 +05:30
JIRA_IMPORT_THRESHOLD = 100_000
JIRA_IMPORT_PAUSE_LIMIT = 50_000
RetriesExceededError = Class.new(RuntimeError)
2020-04-22 19:07:51 +05:30
attr_reader :imported_items_cache_key, :start_at, :job_waiter
def initialize(project)
super
# get cached start_at value, or zero if not cached yet
@start_at = Gitlab::JiraImport.get_issues_next_start_at(project.id)
@imported_items_cache_key = JiraImport.already_imported_cache_key(:issues, project.id)
@job_waiter = JobWaiter.new
2022-08-13 15:12:31 +05:30
@issue_type_id = WorkItems::Type.default_issue_type.id
2020-04-22 19:07:51 +05:30
end
def execute
import_issues
end
private
def import_issues
return job_waiter if jira_last_page_reached?
issues = fetch_issues(start_at)
update_start_at_with(issues)
schedule_issue_import_workers(issues)
end
def jira_last_page_reached?
start_at < 0
end
def update_start_at_with(issues)
@start_at += issues.size
# store -1 if this is the last page to be imported, so no more `ImportIssuesWorker` workers are scheduled
# from Gitlab::JiraImport::Stage::ImportIssuesWorker#perform
@start_at = -1 if issues.blank?
Gitlab::JiraImport.store_issues_next_started_at(project.id, start_at)
end
def schedule_issue_import_workers(issues)
next_iid = project.issues.maximum(:iid).to_i + 1
issues.each do |jira_issue|
# Technically it's possible that the same work is performed multiple
# times, as Sidekiq doesn't guarantee there will ever only be one
# instance of a job or if for some reason the paginated results
# returned from Jira include issues there were returned before.
# For such cases we exit early if issue was already imported.
next if already_imported?(jira_issue.id)
2020-06-23 00:09:42 +05:30
begin
2022-08-13 15:12:31 +05:30
issue_attrs = IssueSerializer.new(
project,
jira_issue,
running_import.user_id,
@issue_type_id,
{ iid: next_iid }
).execute
2022-08-27 11:52:29 +05:30
# Pause the importer to allow the import to catch up and cache to drain
pause_jira_issue_importer if jira_import_issue_worker.queue_size > JIRA_IMPORT_THRESHOLD
2020-06-23 00:09:42 +05:30
Gitlab::JiraImport::ImportIssueWorker.perform_async(project.id, jira_issue.id, issue_attrs, job_waiter.key)
job_waiter.jobs_remaining += 1
next_iid += 1
# Mark the issue as imported immediately so we don't end up
# importing it multiple times within same import.
# These ids are cleaned-up when import finishes.
# see Gitlab::JiraImport::Stage::FinishImportWorker
mark_as_imported(jira_issue.id)
2021-06-08 01:23:25 +05:30
rescue StandardError => ex
2020-06-23 00:09:42 +05:30
# handle exceptionn here and skip the failed to import issue, instead of
# failing to import the entire batch of issues
# track the failed to import issue.
Gitlab::ErrorTracking.track_exception(ex, project_id: project.id)
JiraImport.increment_issue_failures(project.id)
end
2020-04-22 19:07:51 +05:30
end
job_waiter
end
2022-08-27 11:52:29 +05:30
def jira_import_issue_worker
@_jira_import_issue_worker ||= Gitlab::JiraImport::ImportIssueWorker
end
def pause_jira_issue_importer
# Wait for import workers to drop below 50K in the iterations of the timeout
# timeout - Set to 5 seconds.
# Time to process 100K jobs is currently ~14 seconds.
# Source: https://github.com/mperham/sidekiq#performance
# retries - Set to 10 times to avoid indefinitely pause.
# Raises an error if the queue does not reduce below the limit after 10 tries.
retries = 10
while retries > 0 && jira_import_issue_worker.queue_size >= JIRA_IMPORT_PAUSE_LIMIT
job_waiter.wait(5)
retries -= 1
end
raise RetriesExceededError, 'Retry failed after 10 attempts' if retries == 0
end
2020-04-22 19:07:51 +05:30
def fetch_issues(start_at)
client.Issue.jql("PROJECT='#{jira_project_key}' ORDER BY created ASC", { max_results: BATCH_SIZE, start_at: start_at })
end
end
end
end