334 lines
11 KiB
Ruby
334 lines
11 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module Gitlab
|
|
module LegacyGithubImport
|
|
class Importer
|
|
include Gitlab::ShellAdapter
|
|
|
|
def self.refmap
|
|
Gitlab::GithubImport.refmap
|
|
end
|
|
|
|
attr_reader :errors, :project, :repo, :repo_url
|
|
|
|
def initialize(project)
|
|
@project = project
|
|
@repo = project.import_source
|
|
@repo_url = project.import_url
|
|
@errors = []
|
|
@labels = {}
|
|
end
|
|
|
|
def client
|
|
return @client if defined?(@client)
|
|
|
|
unless credentials
|
|
raise Projects::ImportService::Error,
|
|
"Unable to find project import data credentials for project ID: #{@project.id}"
|
|
end
|
|
|
|
opts = {}
|
|
# Gitea plan to be GitHub compliant
|
|
if project.gitea_import?
|
|
uri = URI.parse(project.import_url)
|
|
host = "#{uri.scheme}://#{uri.host}:#{uri.port}#{uri.path}".sub(%r{/?[\w-]+/[\w-]+\.git\z}, '')
|
|
opts = {
|
|
host: host,
|
|
api_version: 'v1'
|
|
}
|
|
end
|
|
|
|
@client = Client.new(credentials[:user], opts)
|
|
end
|
|
|
|
def execute
|
|
# The ordering of importing is important here due to the way GitHub structures their data
|
|
# 1. Labels are required by other items while not having a dependency on anything else
|
|
# so need to be first
|
|
# 2. Pull requests must come before issues. Every pull request is also an issue but not
|
|
# all issues are pull requests. Only the issue entity has labels defined in GitHub. GitLab
|
|
# doesn't structure data like this so we need to make sure that we've created the MRs
|
|
# before we attempt to add the labels defined in the GitHub issue for the related, already
|
|
# imported, pull request
|
|
import_labels
|
|
import_milestones
|
|
import_pull_requests
|
|
import_issues
|
|
import_comments(:issues)
|
|
import_comments(:pull_requests)
|
|
import_wiki
|
|
|
|
# Gitea doesn't have a Release API yet
|
|
# See https://github.com/go-gitea/gitea/issues/330
|
|
unless project.gitea_import?
|
|
import_releases
|
|
end
|
|
|
|
handle_errors
|
|
|
|
true
|
|
end
|
|
|
|
private
|
|
|
|
def credentials
|
|
return @credentials if defined?(@credentials)
|
|
|
|
@credentials = project.import_data ? project.import_data.credentials : nil
|
|
end
|
|
|
|
def handle_errors
|
|
return unless errors.any?
|
|
|
|
project.import_state.update_column(:last_error, {
|
|
message: 'The remote data could not be fully imported.',
|
|
errors: errors
|
|
}.to_json)
|
|
end
|
|
|
|
def import_labels
|
|
fetch_resources(:labels, repo, per_page: 100) do |labels|
|
|
labels.each do |raw|
|
|
gh_label = LabelFormatter.new(project, raw)
|
|
gh_label.create!
|
|
rescue => e
|
|
errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(gh_label.url), errors: e.message }
|
|
end
|
|
end
|
|
|
|
cache_labels!
|
|
end
|
|
|
|
def import_milestones
|
|
fetch_resources(:milestones, repo, state: :all, per_page: 100) do |milestones|
|
|
milestones.each do |raw|
|
|
gh_milestone = MilestoneFormatter.new(project, raw)
|
|
gh_milestone.create!
|
|
rescue => e
|
|
errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(gh_milestone.url), errors: e.message }
|
|
end
|
|
end
|
|
end
|
|
|
|
# rubocop: disable CodeReuse/ActiveRecord
|
|
def import_issues
|
|
fetch_resources(:issues, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
|
|
issues.each do |raw|
|
|
gh_issue = IssueFormatter.new(project, raw, client)
|
|
|
|
begin
|
|
issuable =
|
|
if gh_issue.pull_request?
|
|
MergeRequest.find_by(target_project_id: project.id, iid: gh_issue.number)
|
|
else
|
|
gh_issue.create!
|
|
end
|
|
|
|
apply_labels(issuable, raw)
|
|
rescue => e
|
|
errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(gh_issue.url), errors: e.message }
|
|
end
|
|
end
|
|
end
|
|
end
|
|
# rubocop: enable CodeReuse/ActiveRecord
|
|
|
|
def import_pull_requests
|
|
fetch_resources(:pull_requests, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
|
|
pull_requests.each do |raw|
|
|
gh_pull_request = PullRequestFormatter.new(project, raw, client)
|
|
|
|
next unless gh_pull_request.valid?
|
|
|
|
begin
|
|
restore_source_branch(gh_pull_request) unless gh_pull_request.source_branch_exists?
|
|
restore_target_branch(gh_pull_request) unless gh_pull_request.target_branch_exists?
|
|
|
|
merge_request = gh_pull_request.create!
|
|
|
|
# Gitea doesn't return PR in the Issue API endpoint, so labels must be assigned at this stage
|
|
if project.gitea_import?
|
|
apply_labels(merge_request, raw)
|
|
end
|
|
rescue => e
|
|
errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(gh_pull_request.url), errors: e.message }
|
|
ensure
|
|
clean_up_restored_branches(gh_pull_request)
|
|
end
|
|
end
|
|
end
|
|
|
|
project.repository.after_remove_branch
|
|
end
|
|
|
|
def restore_source_branch(pull_request)
|
|
project.repository.create_branch(pull_request.source_branch_name, pull_request.source_branch_sha)
|
|
end
|
|
|
|
def restore_target_branch(pull_request)
|
|
project.repository.create_branch(pull_request.target_branch_name, pull_request.target_branch_sha)
|
|
end
|
|
|
|
def remove_branch(name)
|
|
project.repository.delete_branch(name)
|
|
rescue Gitlab::Git::Repository::DeleteBranchFailed
|
|
errors << { type: :remove_branch, name: name }
|
|
end
|
|
|
|
def clean_up_restored_branches(pull_request)
|
|
return if pull_request.opened?
|
|
|
|
remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists?
|
|
remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists?
|
|
end
|
|
|
|
def apply_labels(issuable, raw)
|
|
return unless raw.labels.count > 0
|
|
|
|
label_ids = raw.labels
|
|
.map { |attrs| @labels[attrs.name] }
|
|
.compact
|
|
|
|
issuable.update_attribute(:label_ids, label_ids)
|
|
end
|
|
|
|
# rubocop: disable CodeReuse/ActiveRecord
|
|
def import_comments(issuable_type)
|
|
resource_type = "#{issuable_type}_comments".to_sym
|
|
|
|
# Two notes here:
|
|
# 1. We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note,
|
|
# compare it against every comment in the current imported page until we find match, and that's where start importing
|
|
# 2. GH returns comments for _both_ issues and PRs through issues_comments API, while pull_requests_comments returns
|
|
# only comments on diffs, so select last note not based on noteable_type but on line_code
|
|
line_code_is = issuable_type == :pull_requests ? 'NOT NULL' : 'NULL'
|
|
last_note = project.notes.where("line_code IS #{line_code_is}").last
|
|
|
|
fetch_resources(resource_type, repo, per_page: 100) do |comments|
|
|
if last_note
|
|
discard_inserted_comments(comments, last_note)
|
|
last_note = nil
|
|
end
|
|
|
|
create_comments(comments)
|
|
end
|
|
end
|
|
# rubocop: enable CodeReuse/ActiveRecord
|
|
|
|
# rubocop: disable CodeReuse/ActiveRecord
|
|
def create_comments(comments)
|
|
ActiveRecord::Base.no_touching do
|
|
comments.each do |raw|
|
|
comment = CommentFormatter.new(project, raw, client)
|
|
|
|
# GH does not return info about comment's parent, so we guess it by checking its URL!
|
|
*_, parent, iid = URI(raw.html_url).path.split('/')
|
|
|
|
issuable = if parent == 'issues'
|
|
Issue.find_by(project_id: project.id, iid: iid)
|
|
else
|
|
MergeRequest.find_by(target_project_id: project.id, iid: iid)
|
|
end
|
|
|
|
next unless issuable
|
|
|
|
issuable.notes.create!(comment.attributes)
|
|
rescue => e
|
|
errors << { type: :comment, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
|
|
end
|
|
end
|
|
end
|
|
# rubocop: enable CodeReuse/ActiveRecord
|
|
|
|
def discard_inserted_comments(comments, last_note)
|
|
last_note_attrs = nil
|
|
|
|
cut_off_index = comments.find_index do |raw|
|
|
comment = CommentFormatter.new(project, raw)
|
|
comment_attrs = comment.attributes
|
|
last_note_attrs ||= last_note.slice(*comment_attrs.keys)
|
|
|
|
comment_attrs.with_indifferent_access == last_note_attrs
|
|
end
|
|
|
|
# No matching resource in the collection, which means we got halted right on the end of the last page, so all good
|
|
return unless cut_off_index
|
|
|
|
# Otherwise, remove the resources we've already inserted
|
|
comments.shift(cut_off_index + 1)
|
|
end
|
|
|
|
def import_wiki
|
|
unless project.wiki.repository_exists?
|
|
wiki = WikiFormatter.new(project)
|
|
gitlab_shell.import_wiki_repository(project, wiki)
|
|
end
|
|
rescue Gitlab::Shell::Error => e
|
|
# GitHub error message when the wiki repo has not been created,
|
|
# this means that repo has wiki enabled, but have no pages. So,
|
|
# we can skip the import.
|
|
if e.message !~ /repository not exported/
|
|
errors << { type: :wiki, errors: e.message }
|
|
end
|
|
end
|
|
|
|
def import_releases
|
|
fetch_resources(:releases, repo, per_page: 100) do |releases|
|
|
releases.each do |raw|
|
|
gh_release = ReleaseFormatter.new(project, raw)
|
|
gh_release.create! if gh_release.valid?
|
|
rescue => e
|
|
errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(gh_release.url), errors: e.message }
|
|
end
|
|
end
|
|
end
|
|
|
|
def cache_labels!
|
|
project.labels.select(:id, :title).find_each do |label|
|
|
@labels[label.title] = label.id
|
|
end
|
|
end
|
|
|
|
def fetch_resources(resource_type, *opts)
|
|
return if imported?(resource_type)
|
|
|
|
opts.last[:page] = current_page(resource_type)
|
|
|
|
client.public_send(resource_type, *opts) do |resources| # rubocop:disable GitlabSecurity/PublicSend
|
|
yield resources
|
|
increment_page(resource_type)
|
|
end
|
|
|
|
imported!(resource_type)
|
|
end
|
|
|
|
def imported?(resource_type)
|
|
Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported")
|
|
end
|
|
|
|
def imported!(resource_type)
|
|
Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day)
|
|
end
|
|
|
|
def increment_page(resource_type)
|
|
key = "#{cache_key_prefix}:#{resource_type}:current-page"
|
|
|
|
# Rails.cache.increment calls INCRBY directly on the value stored under the key, which is
|
|
# a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around
|
|
page = Rails.cache.read(key)
|
|
page += 1
|
|
Rails.cache.write(key, page)
|
|
|
|
page
|
|
end
|
|
|
|
def current_page(resource_type)
|
|
Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 }
|
|
end
|
|
|
|
def cache_key_prefix
|
|
@cache_key_prefix ||= "github-import:#{project.id}"
|
|
end
|
|
end
|
|
end
|
|
end
|