debian-mirror-gitlab/app/services/ci/register_job_service.rb

245 lines
8.8 KiB
Ruby
Raw Normal View History

2018-11-18 11:00:15 +05:30
# frozen_string_literal: true
2017-08-17 22:00:37 +05:30
module Ci
# This class responsible for assigning
# proper pending build to runner on runner API request
class RegisterJobService
attr_reader :runner
2019-12-04 20:38:33 +05:30
JOB_QUEUE_DURATION_SECONDS_BUCKETS = [1, 3, 10, 30, 60, 300, 900, 1800, 3600].freeze
2018-10-15 14:42:47 +05:30
JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET = 5.freeze
2020-04-08 14:13:33 +05:30
METRICS_SHARD_TAG_PREFIX = 'metrics_shard::'.freeze
DEFAULT_METRICS_SHARD = 'default'.freeze
2018-10-15 14:42:47 +05:30
2020-07-28 23:09:34 +05:30
Result = Struct.new(:build, :build_json, :valid?)
2017-08-17 22:00:37 +05:30
def initialize(runner)
@runner = runner
end
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2018-11-08 19:23:39 +05:30
def execute(params = {})
2017-08-17 22:00:37 +05:30
builds =
2018-11-08 19:23:39 +05:30
if runner.instance_type?
2017-08-17 22:00:37 +05:30
builds_for_shared_runner
2018-10-15 14:42:47 +05:30
elsif runner.group_type?
builds_for_group_runner
2017-08-17 22:00:37 +05:30
else
2018-10-15 14:42:47 +05:30
builds_for_project_runner
2017-08-17 22:00:37 +05:30
end
valid = true
2018-11-08 19:23:39 +05:30
# pick builds that does not have other tags than runner's one
builds = builds.matches_tag_ids(runner.tags.ids)
2018-03-17 18:26:18 +05:30
2018-11-08 19:23:39 +05:30
# pick builds that have at least one tag
unless runner.run_untagged?
builds = builds.with_any_tags
2018-03-17 18:26:18 +05:30
end
2019-09-04 21:01:54 +05:30
# pick builds that older than specified age
if params.key?(:job_age)
builds = builds.queued_before(params[:job_age].seconds.ago)
end
2019-02-15 15:39:39 +05:30
builds.each do |build|
2019-12-26 22:10:19 +05:30
result = process_build(build, params)
next unless result
if result.valid?
register_success(result.build)
return result
else
# The usage of valid: is described in
# handling of ActiveRecord::StaleObjectError
2017-08-17 22:00:37 +05:30
valid = false
end
end
2017-09-10 17:25:29 +05:30
register_failure
2020-07-28 23:09:34 +05:30
Result.new(nil, nil, valid)
2017-08-17 22:00:37 +05:30
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2017-08-17 22:00:37 +05:30
private
2019-12-26 22:10:19 +05:30
def process_build(build, params)
return unless runner.can_pick?(build)
# In case when 2 runners try to assign the same build, second runner will be declined
# with StateMachines::InvalidTransition or StaleObjectError when doing run! or save method.
if assign_runner!(build, params)
2020-07-28 23:09:34 +05:30
present_build!(build)
2019-12-26 22:10:19 +05:30
end
rescue StateMachines::InvalidTransition, ActiveRecord::StaleObjectError
# We are looping to find another build that is not conflicting
# It also indicates that this build can be picked and passed to runner.
# If we don't do it, basically a bunch of runners would be competing for a build
# and thus we will generate a lot of 409. This will increase
# the number of generated requests, also will reduce significantly
# how many builds can be picked by runner in a unit of time.
# In case we hit the concurrency-access lock,
# we still have to return 409 in the end,
# to make sure that this is properly handled by runner.
2020-07-28 23:09:34 +05:30
Result.new(nil, nil, false)
2019-12-26 22:10:19 +05:30
rescue => ex
2020-07-28 23:09:34 +05:30
# If an error (e.g. GRPC::DeadlineExceeded) occurred constructing
# the result, consider this as a failure to be retried.
2019-12-26 22:10:19 +05:30
scheduler_failure!(build)
track_exception_for_build(ex, build)
# skip, and move to next one
nil
end
2020-07-28 23:09:34 +05:30
# Force variables evaluation to occur now
def present_build!(build)
# We need to use the presenter here because Gitaly calls in the presenter
# may fail, and we need to ensure the response has been generated.
presented_build = ::Ci::BuildRunnerPresenter.new(build) # rubocop:disable CodeReuse/Presenter
build_json = ::API::Entities::JobRequest::Response.new(presented_build).to_json
Result.new(build, build_json, true)
end
2018-11-18 11:00:15 +05:30
def assign_runner!(build, params)
build.runner_id = runner.id
build.runner_session_attributes = params[:session] if params[:session].present?
2020-10-24 23:57:45 +05:30
failure_reason, _ = pre_assign_runner_checks.find { |_, check| check.call(build, params) }
2018-11-18 11:00:15 +05:30
2020-10-24 23:57:45 +05:30
if failure_reason
build.drop!(failure_reason)
else
build.run!
2018-12-13 13:39:08 +05:30
end
2020-10-24 23:57:45 +05:30
!failure_reason
2018-11-18 11:00:15 +05:30
end
2019-12-26 22:10:19 +05:30
def scheduler_failure!(build)
Gitlab::OptimisticLocking.retry_lock(build, 3) do |subject|
subject.drop!(:scheduler_failure)
end
rescue => ex
build.doom!
# This requires extra exception, otherwise we would loose information
# why we cannot perform `scheduler_failure`
track_exception_for_build(ex, build)
end
def track_exception_for_build(ex, build)
2020-01-01 13:55:28 +05:30
Gitlab::ErrorTracking.track_exception(ex,
2019-12-26 22:10:19 +05:30
build_id: build.id,
build_name: build.name,
build_stage: build.stage,
pipeline_id: build.pipeline_id,
project_id: build.project_id
2020-01-01 13:55:28 +05:30
)
2019-12-26 22:10:19 +05:30
end
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2017-08-17 22:00:37 +05:30
def builds_for_shared_runner
new_builds.
# don't run projects which have not enabled shared runners and builds
2017-09-10 17:25:29 +05:30
joins(:project).where(projects: { shared_runners_enabled: true, pending_delete: false })
.joins('LEFT JOIN project_features ON ci_builds.project_id = project_features.project_id')
.where('project_features.builds_access_level IS NULL or project_features.builds_access_level > 0').
2017-08-17 22:00:37 +05:30
2018-10-15 14:42:47 +05:30
# Implement fair scheduling
# this returns builds that are ordered by number of running builds
# we prefer projects that don't use shared runners at all
joins("LEFT JOIN (#{running_builds_for_shared_runners.to_sql}) AS project_builds ON ci_builds.project_id=project_builds.project_id")
2019-12-26 22:10:19 +05:30
.order(Arel.sql('COALESCE(project_builds.running_builds, 0) ASC'), 'ci_builds.id ASC')
2017-08-17 22:00:37 +05:30
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2017-08-17 22:00:37 +05:30
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2018-10-15 14:42:47 +05:30
def builds_for_project_runner
new_builds.where(project: runner.projects.without_deleted.with_builds_enabled).order('id ASC')
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2018-10-15 14:42:47 +05:30
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2018-10-15 14:42:47 +05:30
def builds_for_group_runner
2018-11-08 19:23:39 +05:30
# Workaround for weird Rails bug, that makes `runner.groups.to_sql` to return `runner_id = NULL`
groups = ::Group.joins(:runner_namespaces).merge(runner.runner_namespaces)
2019-02-15 15:39:39 +05:30
hierarchy_groups = Gitlab::ObjectHierarchy.new(groups).base_and_descendants
2018-10-15 14:42:47 +05:30
projects = Project.where(namespace_id: hierarchy_groups)
.with_group_runners_enabled
.with_builds_enabled
.without_deleted
new_builds.where(project: projects).order('id ASC')
2017-08-17 22:00:37 +05:30
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2017-08-17 22:00:37 +05:30
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2017-08-17 22:00:37 +05:30
def running_builds_for_shared_runners
2018-11-08 19:23:39 +05:30
Ci::Build.running.where(runner: Ci::Runner.instance_type)
2017-09-10 17:25:29 +05:30
.group(:project_id).select(:project_id, 'count(*) AS running_builds')
2017-08-17 22:00:37 +05:30
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2017-08-17 22:00:37 +05:30
def new_builds
2018-03-17 18:26:18 +05:30
builds = Ci::Build.pending.unstarted
builds = builds.ref_protected if runner.ref_protected?
builds
2017-08-17 22:00:37 +05:30
end
2017-09-10 17:25:29 +05:30
def register_failure
failed_attempt_counter.increment
attempt_counter.increment
end
def register_success(job)
2018-11-08 19:23:39 +05:30
labels = { shared_runner: runner.instance_type?,
2020-04-08 14:13:33 +05:30
jobs_running_for_project: jobs_running_for_project(job),
shard: DEFAULT_METRICS_SHARD }
if runner.instance_type?
shard = runner.tag_list.sort.find { |name| name.starts_with?(METRICS_SHARD_TAG_PREFIX) }
labels[:shard] = shard.gsub(METRICS_SHARD_TAG_PREFIX, '') if shard
end
2018-10-15 14:42:47 +05:30
2020-05-24 23:13:21 +05:30
job_queue_duration_seconds.observe(labels, Time.current - job.queued_at) unless job.queued_at.nil?
2017-09-10 17:25:29 +05:30
attempt_counter.increment
end
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2018-10-15 14:42:47 +05:30
def jobs_running_for_project(job)
2018-11-08 19:23:39 +05:30
return '+Inf' unless runner.instance_type?
2018-10-15 14:42:47 +05:30
# excluding currently started job
2018-11-08 19:23:39 +05:30
running_jobs_count = job.project.builds.running.where(runner: Ci::Runner.instance_type)
2018-10-15 14:42:47 +05:30
.limit(JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET + 1).count - 1
running_jobs_count < JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET ? running_jobs_count : "#{JOBS_RUNNING_FOR_PROJECT_MAX_BUCKET}+"
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2018-10-15 14:42:47 +05:30
2017-09-10 17:25:29 +05:30
def failed_attempt_counter
@failed_attempt_counter ||= Gitlab::Metrics.counter(:job_register_attempts_failed_total, "Counts the times a runner tries to register a job")
end
def attempt_counter
@attempt_counter ||= Gitlab::Metrics.counter(:job_register_attempts_total, "Counts the times a runner tries to register a job")
end
def job_queue_duration_seconds
2018-10-15 14:42:47 +05:30
@job_queue_duration_seconds ||= Gitlab::Metrics.histogram(:job_queue_duration_seconds, 'Request handling execution time', {}, JOB_QUEUE_DURATION_SECONDS_BUCKETS)
2017-09-10 17:25:29 +05:30
end
2020-10-24 23:57:45 +05:30
def pre_assign_runner_checks
{
missing_dependency_failure: -> (build, _) { !build.has_valid_build_dependencies? },
runner_unsupported: -> (build, params) { !build.supported_runner?(params.dig(:info, :features)) },
archived_failure: -> (build, _) { build.archived? }
}
end
2017-08-17 22:00:37 +05:30
end
end
2019-12-04 20:38:33 +05:30
Ci::RegisterJobService.prepend_if_ee('EE::Ci::RegisterJobService')