debian-mirror-gitlab/lib/gitlab/github_import/user_finder.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

196 lines
6.3 KiB
Ruby
Raw Normal View History

2018-03-17 18:26:18 +05:30
# frozen_string_literal: true
module Gitlab
module GithubImport
# Class that can be used for finding a GitLab user ID based on a GitHub user
# ID or username.
#
# Any found user IDs are cached in Redis to reduce the number of SQL queries
# executed over time. Valid keys are refreshed upon access so frequently
# used keys stick around.
#
# Lookups are cached even if no ID was found to remove the need for querying
# the database when most queries are not going to return results anyway.
class UserFinder
attr_reader :project, :client
# The base cache key to use for caching user IDs for a given GitHub user
# ID.
2019-12-04 20:38:33 +05:30
ID_CACHE_KEY = 'github-import/user-finder/user-id/%s'
2018-03-17 18:26:18 +05:30
# The base cache key to use for caching user IDs for a given GitHub email
# address.
ID_FOR_EMAIL_CACHE_KEY =
2019-12-04 20:38:33 +05:30
'github-import/user-finder/id-for-email/%s'
2018-03-17 18:26:18 +05:30
# The base cache key to use for caching the Email addresses of GitHub
# usernames.
EMAIL_FOR_USERNAME_CACHE_KEY =
2019-12-04 20:38:33 +05:30
'github-import/user-finder/email-for-username/%s'
2018-03-17 18:26:18 +05:30
# project - An instance of `Project`
# client - An instance of `Gitlab::GithubImport::Client`
def initialize(project, client)
@project = project
@client = client
end
# Returns the GitLab user ID of an object's author.
#
# If the object has no author ID we'll use the ID of the GitLab ghost
# user.
2022-11-25 23:54:43 +05:30
# object - An instance of `Hash` or a `Github::Representer`
2022-08-13 15:12:31 +05:30
def author_id_for(object, author_key: :author)
2022-08-27 11:52:29 +05:30
user_info = case author_key
when :actor
2022-11-25 23:54:43 +05:30
object[:actor]
2022-08-27 11:52:29 +05:30
when :assignee
2022-11-25 23:54:43 +05:30
object[:assignee]
2022-10-11 01:57:18 +05:30
when :requested_reviewer
2022-11-25 23:54:43 +05:30
object[:requested_reviewer]
2022-10-11 01:57:18 +05:30
when :review_requester
2022-11-25 23:54:43 +05:30
object[:review_requester]
2022-08-27 11:52:29 +05:30
else
2022-11-25 23:54:43 +05:30
object ? object[:author] : nil
2022-08-27 11:52:29 +05:30
end
2022-08-13 15:12:31 +05:30
id = user_info ? user_id_for(user_info) : GithubImport.ghost_user_id
2018-03-17 18:26:18 +05:30
if id
[id, true]
else
[project.creator_id, false]
end
end
# Returns the GitLab user ID of an issuable's assignee.
def assignee_id_for(issuable)
2022-11-25 23:54:43 +05:30
user_id_for(issuable[:assignee]) if issuable[:assignee]
2018-03-17 18:26:18 +05:30
end
# Returns the GitLab user ID for a GitHub user.
#
2022-11-25 23:54:43 +05:30
# user - An instance of `Gitlab::GithubImport::Representation::User` or `Hash`.
2018-03-17 18:26:18 +05:30
def user_id_for(user)
2022-11-25 23:54:43 +05:30
find(user[:id], user[:login]) if user.present?
2018-03-17 18:26:18 +05:30
end
# Returns the GitLab ID for the given GitHub ID or username.
#
# id - The ID of the GitHub user.
# username - The username of the GitHub user.
def find(id, username)
email = email_for_github_username(username)
cached, found_id = find_from_cache(id, email)
return found_id if found_id
# We only want to query the database if necessary. If previous lookups
# didn't yield a user ID we won't query the database again until the
# keys expire.
find_id_from_database(id, email) unless cached
end
# Finds a user ID from the cache for a given GitHub ID or Email.
def find_from_cache(id, email = nil)
id_exists, id_for_github_id = cached_id_for_github_id(id)
return [id_exists, id_for_github_id] if id_for_github_id
# Just in case no Email address could be retrieved (for whatever reason)
return [false] unless email
cached_id_for_github_email(email)
end
# Finds a GitLab user ID from the database for a given GitHub user ID or
# Email.
def find_id_from_database(id, email)
id_for_github_id(id) || id_for_github_email(email)
end
def email_for_github_username(username)
cache_key = EMAIL_FOR_USERNAME_CACHE_KEY % username
2020-04-08 14:13:33 +05:30
email = Gitlab::Cache::Import::Caching.read(cache_key)
2018-03-17 18:26:18 +05:30
unless email
user = client.user(username)
2022-11-25 23:54:43 +05:30
email = Gitlab::Cache::Import::Caching.write(cache_key, user[:email], timeout: timeout(user[:email])) if user
2018-03-17 18:26:18 +05:30
end
email
end
def cached_id_for_github_id(id)
read_id_from_cache(ID_CACHE_KEY % id)
end
def cached_id_for_github_email(email)
read_id_from_cache(ID_FOR_EMAIL_CACHE_KEY % email)
end
2021-10-27 15:23:28 +05:30
# If importing from github.com, queries and caches the GitLab user ID for
# a GitHub user ID, if one was found.
#
# When importing from Github Enterprise, do not query user by Github ID
# since we only have users' Github ID from github.com.
2018-03-17 18:26:18 +05:30
def id_for_github_id(id)
2021-10-27 15:23:28 +05:30
gitlab_id =
if project.github_enterprise_import?
nil
else
query_id_for_github_id(id)
end
2018-03-17 18:26:18 +05:30
2020-04-08 14:13:33 +05:30
Gitlab::Cache::Import::Caching.write(ID_CACHE_KEY % id, gitlab_id)
2018-03-17 18:26:18 +05:30
end
# Queries and caches the GitLab user ID for a GitHub email, if one was
# found.
def id_for_github_email(email)
gitlab_id = query_id_for_github_email(email) || nil
2020-04-08 14:13:33 +05:30
Gitlab::Cache::Import::Caching.write(ID_FOR_EMAIL_CACHE_KEY % email, gitlab_id)
2018-03-17 18:26:18 +05:30
end
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2018-03-17 18:26:18 +05:30
def query_id_for_github_id(id)
2021-09-30 23:02:18 +05:30
User.by_provider_and_extern_uid(:github, id).select(:id).first&.id
2018-03-17 18:26:18 +05:30
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2018-03-17 18:26:18 +05:30
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2018-03-17 18:26:18 +05:30
def query_id_for_github_email(email)
2022-08-27 11:52:29 +05:30
User.by_any_email(email).pick(:id)
2018-03-17 18:26:18 +05:30
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2018-03-17 18:26:18 +05:30
# Reads an ID from the cache.
#
# The return value is an Array with two values:
#
# 1. A boolean indicating if the key was present or not.
# 2. The ID as an Integer, or nil in case no ID could be found.
def read_id_from_cache(key)
2020-04-08 14:13:33 +05:30
value = Gitlab::Cache::Import::Caching.read(key)
2018-03-17 18:26:18 +05:30
exists = !value.nil?
number = value.to_i
# The cache key may be empty to indicate a previously looked up user for
# which we couldn't find an ID.
2020-10-24 23:57:45 +05:30
[exists, number > 0 ? number : nil]
2018-03-17 18:26:18 +05:30
end
2021-11-11 11:23:49 +05:30
private
def timeout(email)
if email
Gitlab::Cache::Import::Caching::TIMEOUT
else
Gitlab::Cache::Import::Caching::SHORTER_TIMEOUT
end
end
2018-03-17 18:26:18 +05:30
end
end
end