forgejo-federation/services/migrations/gitlab.go
Sebastian Brückner bc7a247b9e
Include resource state events in Gitlab downloads (#29382)
Some specific events on Gitlab issues and merge requests are stored
separately from comments as "resource state events". With this change,
all relevant resource state events are downloaded during issue and merge
request migration, and converted to comments.

This PR also updates the template used to render comments to add support
for migrated comments of these types.

ref: https://docs.gitlab.com/ee/api/resource_state_events.html
(cherry picked from commit 17f170ee3724d8bdf2ddaad4211b12433f78ff0e)
2024-03-06 12:10:43 +08:00

785 lines
22 KiB
Go

// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package migrations
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"path"
"regexp"
"strings"
"time"
issues_model "code.gitea.io/gitea/models/issues"
"code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/log"
base "code.gitea.io/gitea/modules/migration"
"code.gitea.io/gitea/modules/structs"
"github.com/xanzy/go-gitlab"
)
var (
_ base.Downloader = &GitlabDownloader{}
_ base.DownloaderFactory = &GitlabDownloaderFactory{}
)
func init() {
RegisterDownloaderFactory(&GitlabDownloaderFactory{})
}
// GitlabDownloaderFactory defines a gitlab downloader factory
type GitlabDownloaderFactory struct{}
// New returns a Downloader related to this factory according MigrateOptions
func (f *GitlabDownloaderFactory) New(ctx context.Context, opts base.MigrateOptions) (base.Downloader, error) {
u, err := url.Parse(opts.CloneAddr)
if err != nil {
return nil, err
}
baseURL := u.Scheme + "://" + u.Host
repoNameSpace := strings.TrimPrefix(u.Path, "/")
repoNameSpace = strings.TrimSuffix(repoNameSpace, ".git")
log.Trace("Create gitlab downloader. BaseURL: %s RepoName: %s", baseURL, repoNameSpace)
return NewGitlabDownloader(ctx, baseURL, repoNameSpace, opts.AuthUsername, opts.AuthPassword, opts.AuthToken)
}
// GitServiceType returns the type of git service
func (f *GitlabDownloaderFactory) GitServiceType() structs.GitServiceType {
return structs.GitlabService
}
type gitlabIIDResolver struct {
maxIssueIID int64
frozen bool
}
func (r *gitlabIIDResolver) recordIssueIID(issueIID int) {
if r.frozen {
panic("cannot record issue IID after pull request IID generation has started")
}
r.maxIssueIID = max(r.maxIssueIID, int64(issueIID))
}
func (r *gitlabIIDResolver) generatePullRequestNumber(mrIID int) int64 {
r.frozen = true
return r.maxIssueIID + int64(mrIID)
}
// GitlabDownloader implements a Downloader interface to get repository information
// from gitlab via go-gitlab
// - issueCount is incremented in GetIssues() to ensure PR and Issue numbers do not overlap,
// because Gitlab has individual Issue and Pull Request numbers.
type GitlabDownloader struct {
base.NullDownloader
ctx context.Context
client *gitlab.Client
baseURL string
repoID int
repoName string
iidResolver gitlabIIDResolver
maxPerPage int
}
// NewGitlabDownloader creates a gitlab Downloader via gitlab API
//
// Use either a username/password, personal token entered into the username field, or anonymous/public access
// Note: Public access only allows very basic access
func NewGitlabDownloader(ctx context.Context, baseURL, repoPath, username, password, token string) (*GitlabDownloader, error) {
gitlabClient, err := gitlab.NewClient(token, gitlab.WithBaseURL(baseURL), gitlab.WithHTTPClient(NewMigrationHTTPClient()))
// Only use basic auth if token is blank and password is NOT
// Basic auth will fail with empty strings, but empty token will allow anonymous public API usage
if token == "" && password != "" {
gitlabClient, err = gitlab.NewBasicAuthClient(username, password, gitlab.WithBaseURL(baseURL), gitlab.WithHTTPClient(NewMigrationHTTPClient()))
}
if err != nil {
log.Trace("Error logging into gitlab: %v", err)
return nil, err
}
// split namespace and subdirectory
pathParts := strings.Split(strings.Trim(repoPath, "/"), "/")
var resp *gitlab.Response
u, _ := url.Parse(baseURL)
for len(pathParts) >= 2 {
_, resp, err = gitlabClient.Version.GetVersion()
if err == nil || resp != nil && resp.StatusCode == http.StatusUnauthorized {
err = nil // if no authentication given, this still should work
break
}
u.Path = path.Join(u.Path, pathParts[0])
baseURL = u.String()
pathParts = pathParts[1:]
_ = gitlab.WithBaseURL(baseURL)(gitlabClient)
repoPath = strings.Join(pathParts, "/")
}
if err != nil {
log.Trace("Error could not get gitlab version: %v", err)
return nil, err
}
log.Trace("gitlab downloader: use BaseURL: '%s' and RepoPath: '%s'", baseURL, repoPath)
// Grab and store project/repo ID here, due to issues using the URL escaped path
gr, _, err := gitlabClient.Projects.GetProject(repoPath, nil, nil, gitlab.WithContext(ctx))
if err != nil {
log.Trace("Error retrieving project: %v", err)
return nil, err
}
if gr == nil {
log.Trace("Error getting project, project is nil")
return nil, errors.New("Error getting project, project is nil")
}
return &GitlabDownloader{
ctx: ctx,
client: gitlabClient,
baseURL: baseURL,
repoID: gr.ID,
repoName: gr.Name,
maxPerPage: 100,
}, nil
}
// String implements Stringer
func (g *GitlabDownloader) String() string {
return fmt.Sprintf("migration from gitlab server %s [%d]/%s", g.baseURL, g.repoID, g.repoName)
}
func (g *GitlabDownloader) LogString() string {
if g == nil {
return "<GitlabDownloader nil>"
}
return fmt.Sprintf("<GitlabDownloader %s [%d]/%s>", g.baseURL, g.repoID, g.repoName)
}
// SetContext set context
func (g *GitlabDownloader) SetContext(ctx context.Context) {
g.ctx = ctx
}
// GetRepoInfo returns a repository information
func (g *GitlabDownloader) GetRepoInfo() (*base.Repository, error) {
gr, _, err := g.client.Projects.GetProject(g.repoID, nil, nil, gitlab.WithContext(g.ctx))
if err != nil {
return nil, err
}
var private bool
switch gr.Visibility {
case gitlab.InternalVisibility:
private = true
case gitlab.PrivateVisibility:
private = true
}
var owner string
if gr.Owner == nil {
log.Trace("gr.Owner is nil, trying to get owner from Namespace")
if gr.Namespace != nil && gr.Namespace.Kind == "user" {
owner = gr.Namespace.Path
}
} else {
owner = gr.Owner.Username
}
// convert gitlab repo to stand Repo
return &base.Repository{
Owner: owner,
Name: gr.Name,
IsPrivate: private,
Description: gr.Description,
OriginalURL: gr.WebURL,
CloneURL: gr.HTTPURLToRepo,
DefaultBranch: gr.DefaultBranch,
}, nil
}
// GetTopics return gitlab topics
func (g *GitlabDownloader) GetTopics() ([]string, error) {
gr, _, err := g.client.Projects.GetProject(g.repoID, nil, nil, gitlab.WithContext(g.ctx))
if err != nil {
return nil, err
}
return gr.TagList, err
}
// GetMilestones returns milestones
func (g *GitlabDownloader) GetMilestones() ([]*base.Milestone, error) {
perPage := g.maxPerPage
state := "all"
milestones := make([]*base.Milestone, 0, perPage)
for i := 1; ; i++ {
ms, _, err := g.client.Milestones.ListMilestones(g.repoID, &gitlab.ListMilestonesOptions{
State: &state,
ListOptions: gitlab.ListOptions{
Page: i,
PerPage: perPage,
},
}, nil, gitlab.WithContext(g.ctx))
if err != nil {
return nil, err
}
for _, m := range ms {
var desc string
if m.Description != "" {
desc = m.Description
}
state := "open"
var closedAt *time.Time
if m.State != "" {
state = m.State
if state == "closed" {
closedAt = m.UpdatedAt
}
}
var deadline *time.Time
if m.DueDate != nil {
deadlineParsed, err := time.Parse("2006-01-02", m.DueDate.String())
if err != nil {
log.Trace("Error parsing Milestone DueDate time")
deadline = nil
} else {
deadline = &deadlineParsed
}
}
milestones = append(milestones, &base.Milestone{
Title: m.Title,
Description: desc,
Deadline: deadline,
State: state,
Created: *m.CreatedAt,
Updated: m.UpdatedAt,
Closed: closedAt,
})
}
if len(ms) < perPage {
break
}
}
return milestones, nil
}
func (g *GitlabDownloader) normalizeColor(val string) string {
val = strings.TrimLeft(val, "#")
val = strings.ToLower(val)
if len(val) == 3 {
c := []rune(val)
val = fmt.Sprintf("%c%c%c%c%c%c", c[0], c[0], c[1], c[1], c[2], c[2])
}
if len(val) != 6 {
return ""
}
return val
}
// GetLabels returns labels
func (g *GitlabDownloader) GetLabels() ([]*base.Label, error) {
perPage := g.maxPerPage
labels := make([]*base.Label, 0, perPage)
for i := 1; ; i++ {
ls, _, err := g.client.Labels.ListLabels(g.repoID, &gitlab.ListLabelsOptions{ListOptions: gitlab.ListOptions{
Page: i,
PerPage: perPage,
}}, nil, gitlab.WithContext(g.ctx))
if err != nil {
return nil, err
}
for _, label := range ls {
baseLabel := &base.Label{
Name: label.Name,
Color: g.normalizeColor(label.Color),
Description: label.Description,
}
labels = append(labels, baseLabel)
}
if len(ls) < perPage {
break
}
}
return labels, nil
}
func (g *GitlabDownloader) convertGitlabRelease(rel *gitlab.Release) *base.Release {
var zero int
r := &base.Release{
TagName: rel.TagName,
TargetCommitish: rel.Commit.ID,
Name: rel.Name,
Body: rel.Description,
Created: *rel.CreatedAt,
PublisherID: int64(rel.Author.ID),
PublisherName: rel.Author.Username,
}
httpClient := NewMigrationHTTPClient()
for k, asset := range rel.Assets.Links {
assetID := asset.ID // Don't optimize this, for closure we need a local variable
r.Assets = append(r.Assets, &base.ReleaseAsset{
ID: int64(asset.ID),
Name: asset.Name,
ContentType: &rel.Assets.Sources[k].Format,
Size: &zero,
DownloadCount: &zero,
DownloadFunc: func() (io.ReadCloser, error) {
link, _, err := g.client.ReleaseLinks.GetReleaseLink(g.repoID, rel.TagName, assetID, gitlab.WithContext(g.ctx))
if err != nil {
return nil, err
}
if !hasBaseURL(link.URL, g.baseURL) {
WarnAndNotice("Unexpected AssetURL for assetID[%d] in %s: %s", assetID, g, link.URL)
return io.NopCloser(strings.NewReader(link.URL)), nil
}
req, err := http.NewRequest("GET", link.URL, nil)
if err != nil {
return nil, err
}
req = req.WithContext(g.ctx)
resp, err := httpClient.Do(req)
if err != nil {
return nil, err
}
// resp.Body is closed by the uploader
return resp.Body, nil
},
})
}
return r
}
// GetReleases returns releases
func (g *GitlabDownloader) GetReleases() ([]*base.Release, error) {
perPage := g.maxPerPage
releases := make([]*base.Release, 0, perPage)
for i := 1; ; i++ {
ls, _, err := g.client.Releases.ListReleases(g.repoID, &gitlab.ListReleasesOptions{
ListOptions: gitlab.ListOptions{
Page: i,
PerPage: perPage,
},
}, nil, gitlab.WithContext(g.ctx))
if err != nil {
return nil, err
}
for _, release := range ls {
releases = append(releases, g.convertGitlabRelease(release))
}
if len(ls) < perPage {
break
}
}
return releases, nil
}
type gitlabIssueContext struct {
IsMergeRequest bool
}
// GetIssues returns issues according start and limit
//
// Note: issue label description and colors are not supported by the go-gitlab library at this time
func (g *GitlabDownloader) GetIssues(page, perPage int) ([]*base.Issue, bool, error) {
state := "all"
sort := "asc"
if perPage > g.maxPerPage {
perPage = g.maxPerPage
}
opt := &gitlab.ListProjectIssuesOptions{
State: &state,
Sort: &sort,
ListOptions: gitlab.ListOptions{
PerPage: perPage,
Page: page,
},
}
allIssues := make([]*base.Issue, 0, perPage)
issues, _, err := g.client.Issues.ListProjectIssues(g.repoID, opt, nil, gitlab.WithContext(g.ctx))
if err != nil {
return nil, false, fmt.Errorf("error while listing issues: %w", err)
}
for _, issue := range issues {
labels := make([]*base.Label, 0, len(issue.Labels))
for _, l := range issue.Labels {
labels = append(labels, &base.Label{
Name: l,
})
}
var milestone string
if issue.Milestone != nil {
milestone = issue.Milestone.Title
}
var reactions []*gitlab.AwardEmoji
awardPage := 1
for {
awards, _, err := g.client.AwardEmoji.ListIssueAwardEmoji(g.repoID, issue.IID, &gitlab.ListAwardEmojiOptions{Page: awardPage, PerPage: perPage}, gitlab.WithContext(g.ctx))
if err != nil {
return nil, false, fmt.Errorf("error while listing issue awards: %w", err)
}
reactions = append(reactions, awards...)
if len(awards) < perPage {
break
}
awardPage++
}
allIssues = append(allIssues, &base.Issue{
Title: issue.Title,
Number: int64(issue.IID),
PosterID: int64(issue.Author.ID),
PosterName: issue.Author.Username,
Content: issue.Description,
Milestone: milestone,
State: issue.State,
Created: *issue.CreatedAt,
Labels: labels,
Reactions: g.awardsToReactions(reactions),
Closed: issue.ClosedAt,
IsLocked: issue.DiscussionLocked,
Updated: *issue.UpdatedAt,
ForeignIndex: int64(issue.IID),
Context: gitlabIssueContext{IsMergeRequest: false},
})
// record the issue IID, to be used in GetPullRequests()
g.iidResolver.recordIssueIID(issue.IID)
}
return allIssues, len(issues) < perPage, nil
}
// GetComments returns comments according issueNumber
// TODO: figure out how to transfer comment reactions
func (g *GitlabDownloader) GetComments(commentable base.Commentable) ([]*base.Comment, bool, error) {
context, ok := commentable.GetContext().(gitlabIssueContext)
if !ok {
return nil, false, fmt.Errorf("unexpected context: %+v", commentable.GetContext())
}
allComments := make([]*base.Comment, 0, g.maxPerPage)
page := 1
for {
var comments []*gitlab.Discussion
var resp *gitlab.Response
var err error
if !context.IsMergeRequest {
comments, resp, err = g.client.Discussions.ListIssueDiscussions(g.repoID, int(commentable.GetForeignIndex()), &gitlab.ListIssueDiscussionsOptions{
Page: page,
PerPage: g.maxPerPage,
}, nil, gitlab.WithContext(g.ctx))
} else {
comments, resp, err = g.client.Discussions.ListMergeRequestDiscussions(g.repoID, int(commentable.GetForeignIndex()), &gitlab.ListMergeRequestDiscussionsOptions{
Page: page,
PerPage: g.maxPerPage,
}, nil, gitlab.WithContext(g.ctx))
}
if err != nil {
return nil, false, fmt.Errorf("error while listing comments: %v %w", g.repoID, err)
}
for _, comment := range comments {
for _, note := range comment.Notes {
allComments = append(allComments, g.convertNoteToComment(commentable.GetLocalIndex(), note))
}
}
if resp.NextPage == 0 {
break
}
page = resp.NextPage
}
page = 1
for {
var stateEvents []*gitlab.StateEvent
var resp *gitlab.Response
var err error
if context.IsMergeRequest {
stateEvents, resp, err = g.client.ResourceStateEvents.ListMergeStateEvents(g.repoID, int(commentable.GetForeignIndex()), &gitlab.ListStateEventsOptions{
ListOptions: gitlab.ListOptions{
Page: page,
PerPage: g.maxPerPage,
},
}, nil, gitlab.WithContext(g.ctx))
} else {
stateEvents, resp, err = g.client.ResourceStateEvents.ListIssueStateEvents(g.repoID, int(commentable.GetForeignIndex()), &gitlab.ListStateEventsOptions{
ListOptions: gitlab.ListOptions{
Page: page,
PerPage: g.maxPerPage,
},
}, nil, gitlab.WithContext(g.ctx))
}
if err != nil {
return nil, false, fmt.Errorf("error while listing state events: %v %w", g.repoID, err)
}
for _, stateEvent := range stateEvents {
comment := &base.Comment{
IssueIndex: commentable.GetLocalIndex(),
Index: int64(stateEvent.ID),
PosterID: int64(stateEvent.User.ID),
PosterName: stateEvent.User.Username,
Content: "",
Created: *stateEvent.CreatedAt,
}
switch stateEvent.State {
case gitlab.ClosedEventType:
comment.CommentType = issues_model.CommentTypeClose.String()
case gitlab.MergedEventType:
comment.CommentType = issues_model.CommentTypeMergePull.String()
case gitlab.ReopenedEventType:
comment.CommentType = issues_model.CommentTypeReopen.String()
default:
// Ignore other event types
continue
}
allComments = append(allComments, comment)
}
if resp.NextPage == 0 {
break
}
page = resp.NextPage
}
return allComments, true, nil
}
var targetBranchChangeRegexp = regexp.MustCompile("^changed target branch from `(.*?)` to `(.*?)`$")
func (g *GitlabDownloader) convertNoteToComment(localIndex int64, note *gitlab.Note) *base.Comment {
comment := &base.Comment{
IssueIndex: localIndex,
Index: int64(note.ID),
PosterID: int64(note.Author.ID),
PosterName: note.Author.Username,
PosterEmail: note.Author.Email,
Content: note.Body,
Created: *note.CreatedAt,
Meta: map[string]any{},
}
// Try to find the underlying event of system notes.
if note.System {
if match := targetBranchChangeRegexp.FindStringSubmatch(note.Body); match != nil {
comment.CommentType = issues_model.CommentTypeChangeTargetBranch.String()
comment.Meta["OldRef"] = match[1]
comment.Meta["NewRef"] = match[2]
} else if strings.HasPrefix(note.Body, "enabled an automatic merge") {
comment.CommentType = issues_model.CommentTypePRScheduledToAutoMerge.String()
} else if note.Body == "canceled the automatic merge" {
comment.CommentType = issues_model.CommentTypePRUnScheduledToAutoMerge.String()
}
}
return comment
}
// GetPullRequests returns pull requests according page and perPage
func (g *GitlabDownloader) GetPullRequests(page, perPage int) ([]*base.PullRequest, bool, error) {
if perPage > g.maxPerPage {
perPage = g.maxPerPage
}
view := "simple"
opt := &gitlab.ListProjectMergeRequestsOptions{
ListOptions: gitlab.ListOptions{
PerPage: perPage,
Page: page,
},
View: &view,
}
allPRs := make([]*base.PullRequest, 0, perPage)
prs, _, err := g.client.MergeRequests.ListProjectMergeRequests(g.repoID, opt, nil, gitlab.WithContext(g.ctx))
if err != nil {
return nil, false, fmt.Errorf("error while listing merge requests: %w", err)
}
for _, simplePR := range prs {
// Load merge request again by itself, as not all fields are populated in the ListProjectMergeRequests endpoint.
// See https://gitlab.com/gitlab-org/gitlab/-/issues/29620
pr, _, err := g.client.MergeRequests.GetMergeRequest(g.repoID, simplePR.IID, nil)
if err != nil {
return nil, false, fmt.Errorf("error while loading merge request: %w", err)
}
labels := make([]*base.Label, 0, len(pr.Labels))
for _, l := range pr.Labels {
labels = append(labels, &base.Label{
Name: l,
})
}
var merged bool
if pr.State == "merged" {
merged = true
pr.State = "closed"
}
mergeTime := pr.MergedAt
if merged && pr.MergedAt == nil {
mergeTime = pr.UpdatedAt
}
closeTime := pr.ClosedAt
if merged && pr.ClosedAt == nil {
closeTime = pr.UpdatedAt
}
mergeCommitSHA := pr.MergeCommitSHA
if mergeCommitSHA == "" {
mergeCommitSHA = pr.SquashCommitSHA
}
var locked bool
if pr.State == "locked" {
locked = true
}
var milestone string
if pr.Milestone != nil {
milestone = pr.Milestone.Title
}
var reactions []*gitlab.AwardEmoji
awardPage := 1
for {
awards, _, err := g.client.AwardEmoji.ListMergeRequestAwardEmoji(g.repoID, pr.IID, &gitlab.ListAwardEmojiOptions{Page: awardPage, PerPage: perPage}, gitlab.WithContext(g.ctx))
if err != nil {
return nil, false, fmt.Errorf("error while listing merge requests awards: %w", err)
}
reactions = append(reactions, awards...)
if len(awards) < perPage {
break
}
awardPage++
}
// Generate new PR Numbers by the known Issue Numbers, because they share the same number space in Gitea, but they are independent in Gitlab
newPRNumber := g.iidResolver.generatePullRequestNumber(pr.IID)
allPRs = append(allPRs, &base.PullRequest{
Title: pr.Title,
Number: newPRNumber,
PosterName: pr.Author.Username,
PosterID: int64(pr.Author.ID),
Content: pr.Description,
Milestone: milestone,
State: pr.State,
Created: *pr.CreatedAt,
Closed: closeTime,
Labels: labels,
Merged: merged,
MergeCommitSHA: mergeCommitSHA,
MergedTime: mergeTime,
IsLocked: locked,
Reactions: g.awardsToReactions(reactions),
Head: base.PullRequestBranch{
Ref: pr.SourceBranch,
SHA: pr.SHA,
RepoName: g.repoName,
OwnerName: pr.Author.Username,
CloneURL: pr.WebURL,
},
Base: base.PullRequestBranch{
Ref: pr.TargetBranch,
SHA: pr.DiffRefs.BaseSha,
RepoName: g.repoName,
OwnerName: pr.Author.Username,
},
PatchURL: pr.WebURL + ".patch",
ForeignIndex: int64(pr.IID),
Context: gitlabIssueContext{IsMergeRequest: true},
})
// SECURITY: Ensure that the PR is safe
_ = CheckAndEnsureSafePR(allPRs[len(allPRs)-1], g.baseURL, g)
}
return allPRs, len(prs) < perPage, nil
}
// GetReviews returns pull requests review
func (g *GitlabDownloader) GetReviews(reviewable base.Reviewable) ([]*base.Review, error) {
approvals, resp, err := g.client.MergeRequestApprovals.GetConfiguration(g.repoID, int(reviewable.GetForeignIndex()), gitlab.WithContext(g.ctx))
if err != nil {
if resp != nil && resp.StatusCode == http.StatusNotFound {
log.Error(fmt.Sprintf("GitlabDownloader: while migrating a error occurred: '%s'", err.Error()))
return []*base.Review{}, nil
}
return nil, err
}
var createdAt time.Time
if approvals.CreatedAt != nil {
createdAt = *approvals.CreatedAt
} else if approvals.UpdatedAt != nil {
createdAt = *approvals.UpdatedAt
} else {
createdAt = time.Now()
}
reviews := make([]*base.Review, 0, len(approvals.ApprovedBy))
for _, user := range approvals.ApprovedBy {
reviews = append(reviews, &base.Review{
IssueIndex: reviewable.GetLocalIndex(),
ReviewerID: int64(user.User.ID),
ReviewerName: user.User.Username,
CreatedAt: createdAt,
// All we get are approvals
State: base.ReviewStateApproved,
})
}
return reviews, nil
}
func (g *GitlabDownloader) awardsToReactions(awards []*gitlab.AwardEmoji) []*base.Reaction {
result := make([]*base.Reaction, 0, len(awards))
uniqCheck := make(container.Set[string])
for _, award := range awards {
uid := fmt.Sprintf("%s%d", award.Name, award.User.ID)
if uniqCheck.Add(uid) {
result = append(result, &base.Reaction{
UserID: int64(award.User.ID),
UserName: award.User.Username,
Content: award.Name,
})
}
}
return result
}