Make the github migration less rate limit waiting to get comment per page from repository but not per issue (#16070)

* Make the github migration less rate limit waiting to get comment per page from repository but not per issue

* Fix lint

* adjust Downloader interface

* Fix missed reviews

* Fix test

* Remove unused struct
This commit is contained in:
Lunny Xiao 2021-06-30 15:23:49 +08:00 committed by GitHub
parent e8c6cead0f
commit 0966349354
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 194 additions and 56 deletions

View file

@ -11,6 +11,13 @@ import (
"code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/structs"
) )
// GetCommentOptions represents an options for get comment
type GetCommentOptions struct {
IssueNumber int64
Page int
PageSize int
}
// Downloader downloads the site repo informations // Downloader downloads the site repo informations
type Downloader interface { type Downloader interface {
SetContext(context.Context) SetContext(context.Context)
@ -20,7 +27,8 @@ type Downloader interface {
GetReleases() ([]*Release, error) GetReleases() ([]*Release, error)
GetLabels() ([]*Label, error) GetLabels() ([]*Label, error)
GetIssues(page, perPage int) ([]*Issue, bool, error) GetIssues(page, perPage int) ([]*Issue, bool, error)
GetComments(issueNumber int64) ([]*Comment, error) GetComments(opts GetCommentOptions) ([]*Comment, bool, error)
SupportGetRepoComments() bool
GetPullRequests(page, perPage int) ([]*PullRequest, bool, error) GetPullRequests(page, perPage int) ([]*PullRequest, bool, error)
GetReviews(pullRequestNumber int64) ([]*Review, error) GetReviews(pullRequestNumber int64) ([]*Review, error)
FormatCloneURL(opts MigrateOptions, remoteAddr string) (string, error) FormatCloneURL(opts MigrateOptions, remoteAddr string) (string, error)

View file

@ -51,8 +51,8 @@ func (n NullDownloader) GetIssues(page, perPage int) ([]*Issue, bool, error) {
} }
// GetComments returns comments according issueNumber // GetComments returns comments according issueNumber
func (n NullDownloader) GetComments(issueNumber int64) ([]*Comment, error) { func (n NullDownloader) GetComments(GetCommentOptions) ([]*Comment, bool, error) {
return nil, &ErrNotSupported{Entity: "Comments"} return nil, false, &ErrNotSupported{Entity: "Comments"}
} }
// GetPullRequests returns pull requests according page and perPage // GetPullRequests returns pull requests according page and perPage
@ -80,3 +80,8 @@ func (n NullDownloader) FormatCloneURL(opts MigrateOptions, remoteAddr string) (
} }
return remoteAddr, nil return remoteAddr, nil
} }
// SupportGetRepoComments return true if it supports get repo comments
func (n NullDownloader) SupportGetRepoComments() bool {
return false
}

View file

@ -150,18 +150,19 @@ func (d *RetryDownloader) GetIssues(page, perPage int) ([]*Issue, bool, error) {
} }
// GetComments returns a repository's comments with retry // GetComments returns a repository's comments with retry
func (d *RetryDownloader) GetComments(issueNumber int64) ([]*Comment, error) { func (d *RetryDownloader) GetComments(opts GetCommentOptions) ([]*Comment, bool, error) {
var ( var (
comments []*Comment comments []*Comment
isEnd bool
err error err error
) )
err = d.retry(func() error { err = d.retry(func() error {
comments, err = d.Downloader.GetComments(issueNumber) comments, isEnd, err = d.Downloader.GetComments(opts)
return err return err
}) })
return comments, err return comments, isEnd, err
} }
// GetPullRequests returns a repository's pull requests with retry // GetPullRequests returns a repository's pull requests with retry

View file

@ -435,37 +435,37 @@ func (g *GiteaDownloader) GetIssues(page, perPage int) ([]*base.Issue, bool, err
} }
// GetComments returns comments according issueNumber // GetComments returns comments according issueNumber
func (g *GiteaDownloader) GetComments(index int64) ([]*base.Comment, error) { func (g *GiteaDownloader) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
var allComments = make([]*base.Comment, 0, g.maxPerPage) var allComments = make([]*base.Comment, 0, g.maxPerPage)
// for i := 1; ; i++ { // for i := 1; ; i++ {
// make sure gitea can shutdown gracefully // make sure gitea can shutdown gracefully
select { select {
case <-g.ctx.Done(): case <-g.ctx.Done():
return nil, nil return nil, false, nil
default: default:
} }
comments, _, err := g.client.ListIssueComments(g.repoOwner, g.repoName, index, gitea_sdk.ListIssueCommentOptions{ListOptions: gitea_sdk.ListOptions{ comments, _, err := g.client.ListIssueComments(g.repoOwner, g.repoName, opts.IssueNumber, gitea_sdk.ListIssueCommentOptions{ListOptions: gitea_sdk.ListOptions{
// PageSize: g.maxPerPage, // PageSize: g.maxPerPage,
// Page: i, // Page: i,
}}) }})
if err != nil { if err != nil {
return nil, fmt.Errorf("error while listing comments for issue #%d. Error: %v", index, err) return nil, false, fmt.Errorf("error while listing comments for issue #%d. Error: %v", opts.IssueNumber, err)
} }
for _, comment := range comments { for _, comment := range comments {
reactions, err := g.getCommentReactions(comment.ID) reactions, err := g.getCommentReactions(comment.ID)
if err != nil { if err != nil {
log.Warn("Unable to load comment reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", index, comment.ID, g.repoOwner, g.repoName, err) log.Warn("Unable to load comment reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", opts.IssueNumber, comment.ID, g.repoOwner, g.repoName, err)
if err2 := models.CreateRepositoryNotice( if err2 := models.CreateRepositoryNotice(
fmt.Sprintf("Unable to load reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", index, comment.ID, g.repoOwner, g.repoName, err)); err2 != nil { fmt.Sprintf("Unable to load reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", opts.IssueNumber, comment.ID, g.repoOwner, g.repoName, err)); err2 != nil {
log.Error("create repository notice failed: ", err2) log.Error("create repository notice failed: ", err2)
} }
} }
allComments = append(allComments, &base.Comment{ allComments = append(allComments, &base.Comment{
IssueIndex: index, IssueIndex: opts.IssueNumber,
PosterID: comment.Poster.ID, PosterID: comment.Poster.ID,
PosterName: comment.Poster.UserName, PosterName: comment.Poster.UserName,
PosterEmail: comment.Poster.Email, PosterEmail: comment.Poster.Email,
@ -481,7 +481,7 @@ func (g *GiteaDownloader) GetComments(index int64) ([]*base.Comment, error) {
// break // break
// } // }
//} //}
return allComments, nil return allComments, true, nil
} }
// GetPullRequests returns pull requests according page and perPage // GetPullRequests returns pull requests according page and perPage

View file

@ -224,7 +224,9 @@ func TestGiteaDownloadRepo(t *testing.T) {
Closed: &closed2, Closed: &closed2,
}, issues[1]) }, issues[1])
comments, err := downloader.GetComments(4) comments, _, err := downloader.GetComments(base.GetCommentOptions{
IssueNumber: 4,
})
assert.NoError(t, err) assert.NoError(t, err)
assert.Len(t, comments, 2) assert.Len(t, comments, 2)
assert.EqualValues(t, 1598975370, comments[0].Created.Unix()) assert.EqualValues(t, 1598975370, comments[0].Created.Unix())

View file

@ -11,6 +11,7 @@ import (
"io" "io"
"net/http" "net/http"
"net/url" "net/url"
"strconv"
"strings" "strings"
"time" "time"
@ -450,8 +451,22 @@ func (g *GithubDownloaderV3) GetIssues(page, perPage int) ([]*base.Issue, bool,
return allIssues, len(issues) < perPage, nil return allIssues, len(issues) < perPage, nil
} }
// SupportGetRepoComments return true if it supports get repo comments
func (g *GithubDownloaderV3) SupportGetRepoComments() bool {
return true
}
// GetComments returns comments according issueNumber // GetComments returns comments according issueNumber
func (g *GithubDownloaderV3) GetComments(issueNumber int64) ([]*base.Comment, error) { func (g *GithubDownloaderV3) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
if opts.IssueNumber > 0 {
comments, err := g.getComments(opts.IssueNumber)
return comments, false, err
}
return g.GetAllComments(opts.Page, opts.PageSize)
}
func (g *GithubDownloaderV3) getComments(issueNumber int64) ([]*base.Comment, error) {
var ( var (
allComments = make([]*base.Comment, 0, g.maxPerPage) allComments = make([]*base.Comment, 0, g.maxPerPage)
created = "created" created = "created"
@ -519,6 +534,75 @@ func (g *GithubDownloaderV3) GetComments(issueNumber int64) ([]*base.Comment, er
return allComments, nil return allComments, nil
} }
// GetAllComments returns repository comments according page and perPageSize
func (g *GithubDownloaderV3) GetAllComments(page, perPage int) ([]*base.Comment, bool, error) {
var (
allComments = make([]*base.Comment, 0, perPage)
created = "created"
asc = "asc"
)
opt := &github.IssueListCommentsOptions{
Sort: &created,
Direction: &asc,
ListOptions: github.ListOptions{
Page: page,
PerPage: perPage,
},
}
g.sleep()
comments, resp, err := g.client.Issues.ListComments(g.ctx, g.repoOwner, g.repoName, 0, opt)
if err != nil {
return nil, false, fmt.Errorf("error while listing repos: %v", err)
}
log.Trace("Request get comments %d/%d, but in fact get %d", perPage, page, len(comments))
g.rate = &resp.Rate
for _, comment := range comments {
var email string
if comment.User.Email != nil {
email = *comment.User.Email
}
// get reactions
var reactions []*base.Reaction
for i := 1; ; i++ {
g.sleep()
res, resp, err := g.client.Reactions.ListIssueCommentReactions(g.ctx, g.repoOwner, g.repoName, comment.GetID(), &github.ListOptions{
Page: i,
PerPage: g.maxPerPage,
})
if err != nil {
return nil, false, err
}
g.rate = &resp.Rate
if len(res) == 0 {
break
}
for _, reaction := range res {
reactions = append(reactions, &base.Reaction{
UserID: reaction.User.GetID(),
UserName: reaction.User.GetLogin(),
Content: reaction.GetContent(),
})
}
}
idx := strings.LastIndex(*comment.IssueURL, "/")
issueIndex, _ := strconv.ParseInt((*comment.IssueURL)[idx+1:], 10, 64)
allComments = append(allComments, &base.Comment{
IssueIndex: issueIndex,
PosterID: *comment.User.ID,
PosterName: *comment.User.Login,
PosterEmail: email,
Content: *comment.Body,
Created: *comment.CreatedAt,
Updated: *comment.UpdatedAt,
Reactions: reactions,
})
}
return allComments, len(allComments) < perPage, nil
}
// GetPullRequests returns pull requests according page and perPage // GetPullRequests returns pull requests according page and perPage
func (g *GithubDownloaderV3) GetPullRequests(page, perPage int) ([]*base.PullRequest, bool, error) { func (g *GithubDownloaderV3) GetPullRequests(page, perPage int) ([]*base.PullRequest, bool, error) {
if perPage > g.maxPerPage { if perPage > g.maxPerPage {
@ -539,6 +623,7 @@ func (g *GithubDownloaderV3) GetPullRequests(page, perPage int) ([]*base.PullReq
if err != nil { if err != nil {
return nil, false, fmt.Errorf("error while listing repos: %v", err) return nil, false, fmt.Errorf("error while listing repos: %v", err)
} }
log.Trace("Request get pull requests %d/%d, but in fact get %d", perPage, page, len(prs))
g.rate = &resp.Rate g.rate = &resp.Rate
for _, pr := range prs { for _, pr := range prs {
var body string var body string

View file

@ -240,7 +240,9 @@ func TestGitHubDownloadRepo(t *testing.T) {
}, issues) }, issues)
// downloader.GetComments() // downloader.GetComments()
comments, err := downloader.GetComments(2) comments, _, err := downloader.GetComments(base.GetCommentOptions{
IssueNumber: 2,
})
assert.NoError(t, err) assert.NoError(t, err)
assert.Len(t, comments, 2) assert.Len(t, comments, 2)
assert.EqualValues(t, []*base.Comment{ assert.EqualValues(t, []*base.Comment{

View file

@ -430,7 +430,8 @@ func (g *GitlabDownloader) GetIssues(page, perPage int) ([]*base.Issue, bool, er
// GetComments returns comments according issueNumber // GetComments returns comments according issueNumber
// TODO: figure out how to transfer comment reactions // TODO: figure out how to transfer comment reactions
func (g *GitlabDownloader) GetComments(issueNumber int64) ([]*base.Comment, error) { func (g *GitlabDownloader) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
var issueNumber = opts.IssueNumber
var allComments = make([]*base.Comment, 0, g.maxPerPage) var allComments = make([]*base.Comment, 0, g.maxPerPage)
var page = 1 var page = 1
@ -457,7 +458,7 @@ func (g *GitlabDownloader) GetComments(issueNumber int64) ([]*base.Comment, erro
} }
if err != nil { if err != nil {
return nil, fmt.Errorf("error while listing comments: %v %v", g.repoID, err) return nil, false, fmt.Errorf("error while listing comments: %v %v", g.repoID, err)
} }
for _, comment := range comments { for _, comment := range comments {
// Flatten comment threads // Flatten comment threads
@ -490,7 +491,7 @@ func (g *GitlabDownloader) GetComments(issueNumber int64) ([]*base.Comment, erro
} }
page = resp.NextPage page = resp.NextPage
} }
return allComments, nil return allComments, true, nil
} }
// GetPullRequests returns pull requests according page and perPage // GetPullRequests returns pull requests according page and perPage

View file

@ -204,7 +204,9 @@ func TestGitlabDownloadRepo(t *testing.T) {
}, },
}, issues) }, issues)
comments, err := downloader.GetComments(2) comments, _, err := downloader.GetComments(base.GetCommentOptions{
IssueNumber: 2,
})
assert.NoError(t, err) assert.NoError(t, err)
assert.Len(t, comments, 4) assert.Len(t, comments, 4)
assert.EqualValues(t, []*base.Comment{ assert.EqualValues(t, []*base.Comment{

View file

@ -227,12 +227,13 @@ func (g *GogsDownloader) getIssues(page int, state string) ([]*base.Issue, bool,
} }
// GetComments returns comments according issueNumber // GetComments returns comments according issueNumber
func (g *GogsDownloader) GetComments(issueNumber int64) ([]*base.Comment, error) { func (g *GogsDownloader) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
var issueNumber = opts.IssueNumber
var allComments = make([]*base.Comment, 0, 100) var allComments = make([]*base.Comment, 0, 100)
comments, err := g.client.ListIssueComments(g.repoOwner, g.repoName, issueNumber) comments, err := g.client.ListIssueComments(g.repoOwner, g.repoName, issueNumber)
if err != nil { if err != nil {
return nil, fmt.Errorf("error while listing repos: %v", err) return nil, false, fmt.Errorf("error while listing repos: %v", err)
} }
for _, comment := range comments { for _, comment := range comments {
if len(comment.Body) == 0 || comment.Poster == nil { if len(comment.Body) == 0 || comment.Poster == nil {
@ -249,7 +250,7 @@ func (g *GogsDownloader) GetComments(issueNumber int64) ([]*base.Comment, error)
}) })
} }
return allComments, nil return allComments, true, nil
} }
// GetTopics return repository topics // GetTopics return repository topics

View file

@ -103,7 +103,9 @@ func TestGogsDownloadRepo(t *testing.T) {
}, issues) }, issues)
// downloader.GetComments() // downloader.GetComments()
comments, err := downloader.GetComments(1) comments, _, err := downloader.GetComments(base.GetCommentOptions{
IssueNumber: 1,
})
assert.NoError(t, err) assert.NoError(t, err)
assert.Len(t, comments, 1) assert.Len(t, comments, 1)
assert.EqualValues(t, []*base.Comment{ assert.EqualValues(t, []*base.Comment{

View file

@ -292,6 +292,8 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
reviewBatchSize = uploader.MaxBatchInsertSize("review") reviewBatchSize = uploader.MaxBatchInsertSize("review")
) )
supportAllComments := downloader.SupportGetRepoComments()
if opts.Issues { if opts.Issues {
log.Trace("migrating issues and comments") log.Trace("migrating issues and comments")
messenger("repo.migrate.migrating_issues") messenger("repo.migrate.migrating_issues")
@ -311,11 +313,13 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
return err return err
} }
if opts.Comments { if opts.Comments && !supportAllComments {
var allComments = make([]*base.Comment, 0, commentBatchSize) var allComments = make([]*base.Comment, 0, commentBatchSize)
for _, issue := range issues { for _, issue := range issues {
log.Trace("migrating issue %d's comments", issue.Number) log.Trace("migrating issue %d's comments", issue.Number)
comments, err := downloader.GetComments(issue.Number) comments, _, err := downloader.GetComments(base.GetCommentOptions{
IssueNumber: issue.Number,
})
if err != nil { if err != nil {
if !base.IsErrNotSupported(err) { if !base.IsErrNotSupported(err) {
return err return err
@ -366,11 +370,14 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
} }
if opts.Comments { if opts.Comments {
if !supportAllComments {
// plain comments // plain comments
var allComments = make([]*base.Comment, 0, commentBatchSize) var allComments = make([]*base.Comment, 0, commentBatchSize)
for _, pr := range prs { for _, pr := range prs {
log.Trace("migrating pull request %d's comments", pr.Number) log.Trace("migrating pull request %d's comments", pr.Number)
comments, err := downloader.GetComments(pr.Number) comments, _, err := downloader.GetComments(base.GetCommentOptions{
IssueNumber: pr.Number,
})
if err != nil { if err != nil {
if !base.IsErrNotSupported(err) { if !base.IsErrNotSupported(err) {
return err return err
@ -392,6 +399,7 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
return err return err
} }
} }
}
// migrate reviews // migrate reviews
var allReviews = make([]*base.Review, 0, reviewBatchSize) var allReviews = make([]*base.Review, 0, reviewBatchSize)
@ -439,6 +447,27 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
} }
} }
if opts.Comments && supportAllComments {
log.Trace("migrating comments")
for i := 1; ; i++ {
comments, isEnd, err := downloader.GetComments(base.GetCommentOptions{
Page: i,
PageSize: commentBatchSize,
})
if err != nil {
return err
}
if err := uploader.CreateComments(comments...); err != nil {
return err
}
if isEnd {
break
}
}
}
return uploader.Finish() return uploader.Finish()
} }

View file

@ -212,27 +212,27 @@ func (r *RepositoryRestorer) GetIssues(page, perPage int) ([]*base.Issue, bool,
} }
// GetComments returns comments according issueNumber // GetComments returns comments according issueNumber
func (r *RepositoryRestorer) GetComments(issueNumber int64) ([]*base.Comment, error) { func (r *RepositoryRestorer) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
var comments = make([]*base.Comment, 0, 10) var comments = make([]*base.Comment, 0, 10)
p := filepath.Join(r.commentDir(), fmt.Sprintf("%d.yml", issueNumber)) p := filepath.Join(r.commentDir(), fmt.Sprintf("%d.yml", opts.IssueNumber))
_, err := os.Stat(p) _, err := os.Stat(p)
if err != nil { if err != nil {
if os.IsNotExist(err) { if os.IsNotExist(err) {
return nil, nil return nil, false, nil
} }
return nil, err return nil, false, err
} }
bs, err := ioutil.ReadFile(p) bs, err := ioutil.ReadFile(p)
if err != nil { if err != nil {
return nil, err return nil, false, err
} }
err = yaml.Unmarshal(bs, &comments) err = yaml.Unmarshal(bs, &comments)
if err != nil { if err != nil {
return nil, err return nil, false, err
} }
return comments, nil return comments, false, nil
} }
// GetPullRequests returns pull requests according page and perPage // GetPullRequests returns pull requests according page and perPage