debian-mirror-gitlab/lib/gitlab/github_import/client.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

308 lines
9.4 KiB
Ruby
Raw Normal View History

2018-03-17 18:26:18 +05:30
# frozen_string_literal: true
2015-04-26 12:48:37 +05:30
module Gitlab
module GithubImport
2018-03-17 18:26:18 +05:30
# HTTP client for interacting with the GitHub API.
#
# This class is basically a fancy wrapped around Octokit while adding some
# functionality to deal with rate limiting and parallel imports. Usage is
# mostly the same as Octokit, for example:
#
# client = GithubImport::Client.new('hunter2')
#
# client.labels.each do |label|
# puts label.name
# end
2015-04-26 12:48:37 +05:30
class Client
2018-03-17 18:26:18 +05:30
include ::Gitlab::Utils::StrongMemoize
attr_reader :octokit
2021-01-29 00:20:46 +05:30
SEARCH_MAX_REQUESTS_PER_MINUTE = 30
2021-11-11 11:23:49 +05:30
DEFAULT_PER_PAGE = 100
LOWER_PER_PAGE = 50
2022-01-26 12:08:38 +05:30
CLIENT_CONNECTION_ERROR = ::Faraday::ConnectionFailed # used/set in sawyer agent which octokit uses
2021-01-29 00:20:46 +05:30
2018-03-17 18:26:18 +05:30
# A single page of data and the corresponding page number.
Page = Struct.new(:objects, :number)
# The minimum number of requests we want to keep available.
#
# We don't use a value of 0 as multiple threads may be using the same
# token in parallel. This could result in all of them hitting the GitHub
# rate limit at once. The threshold is put in place to not hit the limit
# in most cases.
RATE_LIMIT_THRESHOLD = 50
2021-01-29 00:20:46 +05:30
SEARCH_RATE_LIMIT_THRESHOLD = 3
2018-03-17 18:26:18 +05:30
# token - The GitHub API token to use.
#
2021-01-29 00:20:46 +05:30
# host - The GitHub hostname. If nil, github.com will be used.
#
2018-03-17 18:26:18 +05:30
# per_page - The number of objects that should be displayed per page.
#
# parallel - When set to true hitting the rate limit will result in a
# dedicated error being raised. When set to `false` we will
# instead just `sleep()` until the rate limit is reset. Setting
# this value to `true` for parallel importing is crucial as
# otherwise hitting the rate limit will result in a thread
# being blocked in a `sleep()` call for up to an hour.
2021-11-11 11:23:49 +05:30
def initialize(token, host: nil, per_page: DEFAULT_PER_PAGE, parallel: true)
2021-01-29 00:20:46 +05:30
@host = host
2019-07-31 22:56:46 +05:30
@octokit = ::Octokit::Client.new(
2018-03-17 18:26:18 +05:30
access_token: token,
per_page: per_page,
2021-01-29 00:20:46 +05:30
api_endpoint: api_endpoint,
web_endpoint: web_endpoint
2018-03-17 18:26:18 +05:30
)
2018-03-17 18:26:18 +05:30
@octokit.connection_options[:ssl] = { verify: verify_ssl }
2015-04-26 12:48:37 +05:30
2018-03-17 18:26:18 +05:30
@parallel = parallel
end
2015-04-26 12:48:37 +05:30
2018-03-17 18:26:18 +05:30
def parallel?
@parallel
2016-08-24 12:49:21 +05:30
end
2016-06-02 11:05:42 +05:30
2018-03-17 18:26:18 +05:30
# Returns the details of a GitHub user.
#
# username - The username of the user.
def user(username)
2022-11-25 23:54:43 +05:30
with_rate_limit { octokit.user(username).to_h }
2016-08-24 12:49:21 +05:30
end
2021-02-22 17:27:13 +05:30
def pull_request_reviews(repo_name, iid)
2021-06-08 01:23:25 +05:30
each_object(:pull_request_reviews, repo_name, iid)
2021-02-22 17:27:13 +05:30
end
2022-10-11 01:57:18 +05:30
def repos(options = {})
octokit.repos(nil, options).map(&:to_h)
end
2018-03-17 18:26:18 +05:30
# Returns the details of a GitHub repository.
#
# name - The path (in the form `owner/repository`) of the repository.
def repository(name)
2022-10-11 01:57:18 +05:30
with_rate_limit { octokit.repo(name).to_h }
2018-03-17 18:26:18 +05:30
end
2016-08-24 12:49:21 +05:30
2021-02-22 17:27:13 +05:30
def pull_request(repo_name, iid)
2022-11-25 23:54:43 +05:30
with_rate_limit { octokit.pull_request(repo_name, iid).to_h }
2021-02-22 17:27:13 +05:30
end
2018-03-17 18:26:18 +05:30
def labels(*args)
each_object(:labels, *args)
2015-04-26 12:48:37 +05:30
end
2018-03-17 18:26:18 +05:30
def milestones(*args)
each_object(:milestones, *args)
2015-04-26 12:48:37 +05:30
end
2018-03-17 18:26:18 +05:30
def releases(*args)
each_object(:releases, *args)
2015-04-26 12:48:37 +05:30
end
2022-10-11 01:57:18 +05:30
def branches(*args)
each_object(:branches, *args)
end
def branch_protection(repo_name, branch_name)
2022-11-25 23:54:43 +05:30
with_rate_limit { octokit.branch_protection(repo_name, branch_name).to_h }
2022-10-11 01:57:18 +05:30
end
2018-03-17 18:26:18 +05:30
# Fetches data from the GitHub API and yields a Page object for every page
# of data, without loading all of them into memory.
#
# method - The Octokit method to use for getting the data.
# args - Arguments to pass to the Octokit method.
#
# rubocop: disable GitlabSecurity/PublicSend
def each_page(method, *args, &block)
2022-08-27 11:52:29 +05:30
return to_enum(__method__, method, *args) unless block
2018-03-17 18:26:18 +05:30
page =
if args.last.is_a?(Hash) && args.last[:page]
args.last[:page]
else
1
end
collection = with_rate_limit { octokit.public_send(method, *args) }
next_url = octokit.last_response.rels[:next]
yield Page.new(collection, page)
while next_url
response = with_rate_limit { next_url.get }
next_url = response.rels[:next]
yield Page.new(response.data, page += 1)
2015-04-26 12:48:37 +05:30
end
end
2018-03-17 18:26:18 +05:30
# Iterates over all of the objects for the given method (e.g. `:labels`).
#
# method - The method to send to Octokit for querying data.
# args - Any arguments to pass to the Octokit method.
def each_object(method, *args, &block)
2022-08-27 11:52:29 +05:30
return to_enum(__method__, method, *args) unless block
2018-03-17 18:26:18 +05:30
each_page(method, *args) do |page|
page.objects.each do |object|
2022-11-25 23:54:43 +05:30
yield object.to_h
2018-03-17 18:26:18 +05:30
end
end
2015-04-26 12:48:37 +05:30
end
2018-03-17 18:26:18 +05:30
# Yields the supplied block, responding to any rate limit errors.
#
# The exact strategy used for handling rate limiting errors depends on
# whether we are running in parallel mode or not. For more information see
# `#rate_or_wait_for_rate_limit`.
def with_rate_limit
2022-01-26 12:08:38 +05:30
return with_retry { yield } unless rate_limiting_enabled?
2017-08-17 22:00:37 +05:30
2018-03-17 18:26:18 +05:30
request_count_counter.increment
2017-08-17 22:00:37 +05:30
2018-03-17 18:26:18 +05:30
raise_or_wait_for_rate_limit unless requests_remaining?
2015-04-26 12:48:37 +05:30
2018-03-17 18:26:18 +05:30
begin
2022-01-26 12:08:38 +05:30
with_retry { yield }
2019-07-31 22:56:46 +05:30
rescue ::Octokit::TooManyRequests
2018-03-17 18:26:18 +05:30
raise_or_wait_for_rate_limit
# This retry will only happen when running in sequential mode as we'll
# raise an error in parallel mode.
retry
2017-08-17 22:00:37 +05:30
end
end
2021-02-22 17:27:13 +05:30
def search_repos_by_name(name, options = {})
2022-10-11 01:57:18 +05:30
with_retry { octokit.search_repositories(search_query(str: name, type: :name), options).to_h }
2021-01-29 00:20:46 +05:30
end
def search_query(str:, type:, include_collaborations: true, include_orgs: true)
2022-11-25 23:54:43 +05:30
query = "#{str} in:#{type} is:public,private user:#{octokit.user.to_h[:login]}"
2021-01-29 00:20:46 +05:30
query = [query, collaborations_subquery].join(' ') if include_collaborations
query = [query, organizations_subquery].join(' ') if include_orgs
query
end
2018-03-17 18:26:18 +05:30
# Returns `true` if we're still allowed to perform API calls.
2021-01-29 00:20:46 +05:30
# Search API has rate limit of 30, use lowered threshold when search is used.
2018-03-17 18:26:18 +05:30
def requests_remaining?
2021-01-29 00:20:46 +05:30
if requests_limit == SEARCH_MAX_REQUESTS_PER_MINUTE
return remaining_requests > SEARCH_RATE_LIMIT_THRESHOLD
end
2018-03-17 18:26:18 +05:30
remaining_requests > RATE_LIMIT_THRESHOLD
end
def remaining_requests
octokit.rate_limit.remaining
2015-04-26 12:48:37 +05:30
end
2021-01-29 00:20:46 +05:30
def requests_limit
octokit.rate_limit.limit
end
2018-03-17 18:26:18 +05:30
def raise_or_wait_for_rate_limit
rate_limit_counter.increment
if parallel?
raise RateLimitError
2016-08-24 12:49:21 +05:30
else
2018-03-17 18:26:18 +05:30
sleep(rate_limit_resets_in)
2016-08-24 12:49:21 +05:30
end
2015-04-26 12:48:37 +05:30
end
2018-03-17 18:26:18 +05:30
def rate_limit_resets_in
# We add a few seconds to the rate limit so we don't _immediately_
# resume when the rate limit resets as this may result in us performing
# a request before GitHub has a chance to reset the limit.
octokit.rate_limit.resets_in + 5
2016-08-24 12:49:21 +05:30
end
2018-03-17 18:26:18 +05:30
def rate_limiting_enabled?
strong_memoize(:rate_limiting_enabled) do
api_endpoint.include?('.github.com')
end
end
2018-03-17 18:26:18 +05:30
def api_endpoint
2021-01-29 00:20:46 +05:30
@host || custom_api_endpoint || default_api_endpoint
end
def web_endpoint
@host || custom_api_endpoint || ::Octokit::Default.web_endpoint
end
2018-03-17 18:26:18 +05:30
def custom_api_endpoint
github_omniauth_provider.dig('args', 'client_options', 'site')
end
2018-03-17 18:26:18 +05:30
def default_api_endpoint
2020-03-13 15:44:24 +05:30
OmniAuth::Strategies::GitHub.default_options[:client_options][:site] || ::Octokit::Default.api_endpoint
2018-03-17 18:26:18 +05:30
end
2018-03-17 18:26:18 +05:30
def verify_ssl
github_omniauth_provider.fetch('verify_ssl', true)
end
2018-03-17 18:26:18 +05:30
def github_omniauth_provider
2018-05-09 12:01:36 +05:30
@github_omniauth_provider ||= Gitlab::Auth::OAuth::Provider.config_for('github').to_h
2018-03-17 18:26:18 +05:30
end
2018-03-17 18:26:18 +05:30
def rate_limit_counter
@rate_limit_counter ||= Gitlab::Metrics.counter(
:github_importer_rate_limit_hits,
'The number of times we hit the GitHub rate limit when importing projects'
)
2016-11-03 12:29:30 +05:30
end
2018-03-17 18:26:18 +05:30
def request_count_counter
@request_counter ||= Gitlab::Metrics.counter(
:github_importer_request_count,
'The number of GitHub API calls performed when importing projects'
)
end
2021-01-29 00:20:46 +05:30
private
def collaborations_subquery
each_object(:repos, nil, { affiliation: 'collaborator' })
2022-11-25 23:54:43 +05:30
.map { |repo| "repo:#{repo[:full_name]}" }
2021-01-29 00:20:46 +05:30
.join(' ')
end
def organizations_subquery
each_object(:organizations)
2022-11-25 23:54:43 +05:30
.map { |org| "org:#{org[:login]}" }
2021-01-29 00:20:46 +05:30
.join(' ')
end
2022-01-26 12:08:38 +05:30
def with_retry
Retriable.retriable(on: CLIENT_CONNECTION_ERROR, on_retry: on_retry) do
yield
end
end
def on_retry
proc do |exception, try, elapsed_time, next_interval|
Gitlab::Import::Logger.info(
message: "GitHub connection retry triggered",
'error.class': exception.class,
'error.message': exception.message,
try_count: try,
elapsed_time_s: elapsed_time,
wait_to_retry_s: next_interval
)
end
end
2015-04-26 12:48:37 +05:30
end
end
end