bench-forgejo/services/pull/patch.go
zeripath d6b96627c1
Add setting to disable the git apply step in test patch (#22130)
For a long time Gitea has tested PR patches using a git apply --check
method, and in fact prior to the introduction of a read-tree assisted
three-way merge in #18004, this was the only way of checking patches.

Since #18004, the git apply --check method has been a fallback method,
only used when the read-tree three-way merge method has detected a
conflict. The read-tree assisted three-way merge method is much faster
and less resource intensive method of detecting conflicts. #18004 kept
the git apply method around because it was thought possible that this
fallback might be able to rectify conflicts that the read-tree three-way
merge detected. I am not certain if this could ever be the case.

Given the uncertainty here and the now relative stability of the
read-tree method - this PR makes using this fallback optional and
disables it by default. The hope is that users will not notice any
significant difference in conflict detection and we will be able to
remove the git apply fallback in future, and/or improve the read-tree
three-way merge method to catch any conflicts that git apply method
might have been able to fix.

An additional benefit is that patch checking should be significantly
less resource intensive and much quicker.

(See
https://github.com/go-gitea/gitea/issues/22083\#issuecomment-1347961737)

Ref #22083

Signed-off-by: Andrew Thornton <art27@cantab.net>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
Co-authored-by: KN4CK3R <admin@oldschoolhack.me>
2022-12-19 19:37:15 +08:00

568 lines
19 KiB
Go

// Copyright 2019 The Gitea Authors.
// All rights reserved.
// SPDX-License-Identifier: MIT
package pull
import (
"bufio"
"context"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/models/db"
issues_model "code.gitea.io/gitea/models/issues"
"code.gitea.io/gitea/models/unit"
"code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
repo_module "code.gitea.io/gitea/modules/repository"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/gobwas/glob"
)
// DownloadDiffOrPatch will write the patch for the pr to the writer
func DownloadDiffOrPatch(ctx context.Context, pr *issues_model.PullRequest, w io.Writer, patch, binary bool) error {
if err := pr.LoadBaseRepo(ctx); err != nil {
log.Error("Unable to load base repository ID %d for pr #%d [%d]", pr.BaseRepoID, pr.Index, pr.ID)
return err
}
gitRepo, closer, err := git.RepositoryFromContextOrOpen(ctx, pr.BaseRepo.RepoPath())
if err != nil {
return fmt.Errorf("OpenRepository: %w", err)
}
defer closer.Close()
if err := gitRepo.GetDiffOrPatch(pr.MergeBase, pr.GetGitRefName(), w, patch, binary); err != nil {
log.Error("Unable to get patch file from %s to %s in %s Error: %v", pr.MergeBase, pr.HeadBranch, pr.BaseRepo.FullName(), err)
return fmt.Errorf("Unable to get patch file from %s to %s in %s Error: %w", pr.MergeBase, pr.HeadBranch, pr.BaseRepo.FullName(), err)
}
return nil
}
var patchErrorSuffices = []string{
": already exists in index",
": patch does not apply",
": already exists in working directory",
"unrecognized input",
": No such file or directory",
": does not exist in index",
}
// TestPatch will test whether a simple patch will apply
func TestPatch(pr *issues_model.PullRequest) error {
ctx, _, finished := process.GetManager().AddContext(graceful.GetManager().HammerContext(), fmt.Sprintf("TestPatch: Repo[%d]#%d", pr.BaseRepoID, pr.Index))
defer finished()
// Clone base repo.
tmpBasePath, err := createTemporaryRepo(ctx, pr)
if err != nil {
log.Error("CreateTemporaryPath: %v", err)
return err
}
defer func() {
if err := repo_module.RemoveTemporaryPath(tmpBasePath); err != nil {
log.Error("Merge: RemoveTemporaryPath: %s", err)
}
}()
gitRepo, err := git.OpenRepository(ctx, tmpBasePath)
if err != nil {
return fmt.Errorf("OpenRepository: %w", err)
}
defer gitRepo.Close()
// 1. update merge base
pr.MergeBase, _, err = git.NewCommand(ctx, "merge-base", "--", "base", "tracking").RunStdString(&git.RunOpts{Dir: tmpBasePath})
if err != nil {
var err2 error
pr.MergeBase, err2 = gitRepo.GetRefCommitID(git.BranchPrefix + "base")
if err2 != nil {
return fmt.Errorf("GetMergeBase: %v and can't find commit ID for base: %w", err, err2)
}
}
pr.MergeBase = strings.TrimSpace(pr.MergeBase)
if pr.HeadCommitID, err = gitRepo.GetRefCommitID(git.BranchPrefix + "tracking"); err != nil {
return fmt.Errorf("GetBranchCommitID: can't find commit ID for head: %w", err)
}
if pr.HeadCommitID == pr.MergeBase {
pr.Status = issues_model.PullRequestStatusAncestor
return nil
}
// 2. Check for conflicts
if conflicts, err := checkConflicts(ctx, pr, gitRepo, tmpBasePath); err != nil || conflicts || pr.Status == issues_model.PullRequestStatusEmpty {
return err
}
// 3. Check for protected files changes
if err = checkPullFilesProtection(pr, gitRepo); err != nil {
return fmt.Errorf("pr.CheckPullFilesProtection(): %w", err)
}
if len(pr.ChangedProtectedFiles) > 0 {
log.Trace("Found %d protected files changed", len(pr.ChangedProtectedFiles))
}
pr.Status = issues_model.PullRequestStatusMergeable
return nil
}
type errMergeConflict struct {
filename string
}
func (e *errMergeConflict) Error() string {
return fmt.Sprintf("conflict detected at: %s", e.filename)
}
func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, gitRepo *git.Repository) error {
log.Trace("Attempt to merge:\n%v", file)
switch {
case file.stage1 != nil && (file.stage2 == nil || file.stage3 == nil):
// 1. Deleted in one or both:
//
// Conflict <==> the stage1 !SameAs to the undeleted one
if (file.stage2 != nil && !file.stage1.SameAs(file.stage2)) || (file.stage3 != nil && !file.stage1.SameAs(file.stage3)) {
// Conflict!
return &errMergeConflict{file.stage1.path}
}
// Not a genuine conflict and we can simply remove the file from the index
return gitRepo.RemoveFilesFromIndex(file.stage1.path)
case file.stage1 == nil && file.stage2 != nil && (file.stage3 == nil || file.stage2.SameAs(file.stage3)):
// 2. Added in ours but not in theirs or identical in both
//
// Not a genuine conflict just add to the index
if err := gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(file.stage2.sha), file.stage2.path); err != nil {
return err
}
return nil
case file.stage1 == nil && file.stage2 != nil && file.stage3 != nil && file.stage2.sha == file.stage3.sha && file.stage2.mode != file.stage3.mode:
// 3. Added in both with the same sha but the modes are different
//
// Conflict! (Not sure that this can actually happen but we should handle)
return &errMergeConflict{file.stage2.path}
case file.stage1 == nil && file.stage2 == nil && file.stage3 != nil:
// 4. Added in theirs but not ours:
//
// Not a genuine conflict just add to the index
return gitRepo.AddObjectToIndex(file.stage3.mode, git.MustIDFromString(file.stage3.sha), file.stage3.path)
case file.stage1 == nil:
// 5. Created by new in both
//
// Conflict!
return &errMergeConflict{file.stage2.path}
case file.stage2 != nil && file.stage3 != nil:
// 5. Modified in both - we should try to merge in the changes but first:
//
if file.stage2.mode == "120000" || file.stage3.mode == "120000" {
// 5a. Conflicting symbolic link change
return &errMergeConflict{file.stage2.path}
}
if file.stage2.mode == "160000" || file.stage3.mode == "160000" {
// 5b. Conflicting submodule change
return &errMergeConflict{file.stage2.path}
}
if file.stage2.mode != file.stage3.mode {
// 5c. Conflicting mode change
return &errMergeConflict{file.stage2.path}
}
// Need to get the objects from the object db to attempt to merge
root, _, err := git.NewCommand(ctx, "unpack-file").AddDynamicArguments(file.stage1.sha).RunStdString(&git.RunOpts{Dir: tmpBasePath})
if err != nil {
return fmt.Errorf("unable to get root object: %s at path: %s for merging. Error: %w", file.stage1.sha, file.stage1.path, err)
}
root = strings.TrimSpace(root)
defer func() {
_ = util.Remove(filepath.Join(tmpBasePath, root))
}()
base, _, err := git.NewCommand(ctx, "unpack-file").AddDynamicArguments(file.stage2.sha).RunStdString(&git.RunOpts{Dir: tmpBasePath})
if err != nil {
return fmt.Errorf("unable to get base object: %s at path: %s for merging. Error: %w", file.stage2.sha, file.stage2.path, err)
}
base = strings.TrimSpace(filepath.Join(tmpBasePath, base))
defer func() {
_ = util.Remove(base)
}()
head, _, err := git.NewCommand(ctx, "unpack-file").AddDynamicArguments(file.stage3.sha).RunStdString(&git.RunOpts{Dir: tmpBasePath})
if err != nil {
return fmt.Errorf("unable to get head object:%s at path: %s for merging. Error: %w", file.stage3.sha, file.stage3.path, err)
}
head = strings.TrimSpace(head)
defer func() {
_ = util.Remove(filepath.Join(tmpBasePath, head))
}()
// now git merge-file annoyingly takes a different order to the merge-tree ...
_, _, conflictErr := git.NewCommand(ctx, "merge-file").AddDynamicArguments(base, root, head).RunStdString(&git.RunOpts{Dir: tmpBasePath})
if conflictErr != nil {
return &errMergeConflict{file.stage2.path}
}
// base now contains the merged data
hash, _, err := git.NewCommand(ctx, "hash-object", "-w", "--path").AddDynamicArguments(file.stage2.path, base).RunStdString(&git.RunOpts{Dir: tmpBasePath})
if err != nil {
return err
}
hash = strings.TrimSpace(hash)
return gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(hash), file.stage2.path)
default:
if file.stage1 != nil {
return &errMergeConflict{file.stage1.path}
} else if file.stage2 != nil {
return &errMergeConflict{file.stage2.path}
} else if file.stage3 != nil {
return &errMergeConflict{file.stage3.path}
}
}
return nil
}
// AttemptThreeWayMerge will attempt to three way merge using git read-tree and then follow the git merge-one-file algorithm to attempt to resolve basic conflicts
func AttemptThreeWayMerge(ctx context.Context, gitPath string, gitRepo *git.Repository, base, ours, theirs, description string) (bool, []string, error) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
// First we use read-tree to do a simple three-way merge
if _, _, err := git.NewCommand(ctx, "read-tree", "-m").AddDynamicArguments(base, ours, theirs).RunStdString(&git.RunOpts{Dir: gitPath}); err != nil {
log.Error("Unable to run read-tree -m! Error: %v", err)
return false, nil, fmt.Errorf("unable to run read-tree -m! Error: %w", err)
}
// Then we use git ls-files -u to list the unmerged files and collate the triples in unmergedfiles
unmerged := make(chan *unmergedFile)
go unmergedFiles(ctx, gitPath, unmerged)
defer func() {
cancel()
for range unmerged {
// empty the unmerged channel
}
}()
numberOfConflicts := 0
conflict := false
conflictedFiles := make([]string, 0, 5)
for file := range unmerged {
if file == nil {
break
}
if file.err != nil {
cancel()
return false, nil, file.err
}
// OK now we have the unmerged file triplet attempt to merge it
if err := attemptMerge(ctx, file, gitPath, gitRepo); err != nil {
if conflictErr, ok := err.(*errMergeConflict); ok {
log.Trace("Conflict: %s in %s", conflictErr.filename, description)
conflict = true
if numberOfConflicts < 10 {
conflictedFiles = append(conflictedFiles, conflictErr.filename)
}
numberOfConflicts++
continue
}
return false, nil, err
}
}
return conflict, conflictedFiles, nil
}
func checkConflicts(ctx context.Context, pr *issues_model.PullRequest, gitRepo *git.Repository, tmpBasePath string) (bool, error) {
// 1. checkConflicts resets the conflict status - therefore - reset the conflict status
pr.ConflictedFiles = nil
// 2. AttemptThreeWayMerge first - this is much quicker than plain patch to base
description := fmt.Sprintf("PR[%d] %s/%s#%d", pr.ID, pr.BaseRepo.OwnerName, pr.BaseRepo.Name, pr.Index)
conflict, conflictFiles, err := AttemptThreeWayMerge(ctx,
tmpBasePath, gitRepo, pr.MergeBase, "base", "tracking", description)
if err != nil {
return false, err
}
if !conflict {
// No conflicts detected so we need to check if the patch is empty...
// a. Write the newly merged tree and check the new tree-hash
var treeHash string
treeHash, _, err = git.NewCommand(ctx, "write-tree").RunStdString(&git.RunOpts{Dir: tmpBasePath})
if err != nil {
lsfiles, _, _ := git.NewCommand(ctx, "ls-files", "-u").RunStdString(&git.RunOpts{Dir: tmpBasePath})
return false, fmt.Errorf("unable to write unconflicted tree: %w\n`git ls-files -u`:\n%s", err, lsfiles)
}
treeHash = strings.TrimSpace(treeHash)
baseTree, err := gitRepo.GetTree("base")
if err != nil {
return false, err
}
// b. compare the new tree-hash with the base tree hash
if treeHash == baseTree.ID.String() {
log.Debug("PullRequest[%d]: Patch is empty - ignoring", pr.ID)
pr.Status = issues_model.PullRequestStatusEmpty
}
return false, nil
}
// 3. OK the three-way merge method has detected conflicts
// 3a. Are still testing with GitApply? If not set the conflict status and move on
if !setting.Repository.PullRequest.TestConflictingPatchesWithGitApply {
pr.Status = issues_model.PullRequestStatusConflict
pr.ConflictedFiles = conflictFiles
log.Trace("Found %d files conflicted: %v", len(pr.ConflictedFiles), pr.ConflictedFiles)
return true, nil
}
// 3b. Create a plain patch from head to base
tmpPatchFile, err := os.CreateTemp("", "patch")
if err != nil {
log.Error("Unable to create temporary patch file! Error: %v", err)
return false, fmt.Errorf("unable to create temporary patch file! Error: %w", err)
}
defer func() {
_ = util.Remove(tmpPatchFile.Name())
}()
if err := gitRepo.GetDiffBinary(pr.MergeBase, "tracking", tmpPatchFile); err != nil {
tmpPatchFile.Close()
log.Error("Unable to get patch file from %s to %s in %s Error: %v", pr.MergeBase, pr.HeadBranch, pr.BaseRepo.FullName(), err)
return false, fmt.Errorf("unable to get patch file from %s to %s in %s Error: %w", pr.MergeBase, pr.HeadBranch, pr.BaseRepo.FullName(), err)
}
stat, err := tmpPatchFile.Stat()
if err != nil {
tmpPatchFile.Close()
return false, fmt.Errorf("unable to stat patch file: %w", err)
}
patchPath := tmpPatchFile.Name()
tmpPatchFile.Close()
// 3c. if the size of that patch is 0 - there can be no conflicts!
if stat.Size() == 0 {
log.Debug("PullRequest[%d]: Patch is empty - ignoring", pr.ID)
pr.Status = issues_model.PullRequestStatusEmpty
return false, nil
}
log.Trace("PullRequest[%d].testPatch (patchPath): %s", pr.ID, patchPath)
// 4. Read the base branch in to the index of the temporary repository
_, _, err = git.NewCommand(gitRepo.Ctx, "read-tree", "base").RunStdString(&git.RunOpts{Dir: tmpBasePath})
if err != nil {
return false, fmt.Errorf("git read-tree %s: %w", pr.BaseBranch, err)
}
// 5. Now get the pull request configuration to check if we need to ignore whitespace
prUnit, err := pr.BaseRepo.GetUnit(ctx, unit.TypePullRequests)
if err != nil {
return false, err
}
prConfig := prUnit.PullRequestsConfig()
// 6. Prepare the arguments to apply the patch against the index
args := []git.CmdArg{"apply", "--check", "--cached"}
if prConfig.IgnoreWhitespaceConflicts {
args = append(args, "--ignore-whitespace")
}
is3way := false
if git.CheckGitVersionAtLeast("2.32.0") == nil {
args = append(args, "--3way")
is3way = true
}
args = append(args, git.CmdArgCheck(patchPath))
// 7. Prep the pipe:
// - Here we could do the equivalent of:
// `git apply --check --cached patch_file > conflicts`
// Then iterate through the conflicts. However, that means storing all the conflicts
// in memory - which is very wasteful.
// - alternatively we can do the equivalent of:
// `git apply --check ... | grep ...`
// meaning we don't store all of the conflicts unnecessarily.
stderrReader, stderrWriter, err := os.Pipe()
if err != nil {
log.Error("Unable to open stderr pipe: %v", err)
return false, fmt.Errorf("unable to open stderr pipe: %w", err)
}
defer func() {
_ = stderrReader.Close()
_ = stderrWriter.Close()
}()
// 8. Run the check command
conflict = false
err = git.NewCommand(gitRepo.Ctx, args...).
Run(&git.RunOpts{
Dir: tmpBasePath,
Stderr: stderrWriter,
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
// Close the writer end of the pipe to begin processing
_ = stderrWriter.Close()
defer func() {
// Close the reader on return to terminate the git command if necessary
_ = stderrReader.Close()
}()
const prefix = "error: patch failed:"
const errorPrefix = "error: "
const threewayFailed = "Failed to perform three-way merge..."
const appliedPatchPrefix = "Applied patch to '"
const withConflicts = "' with conflicts."
conflicts := make(container.Set[string])
// Now scan the output from the command
scanner := bufio.NewScanner(stderrReader)
for scanner.Scan() {
line := scanner.Text()
log.Trace("PullRequest[%d].testPatch: stderr: %s", pr.ID, line)
if strings.HasPrefix(line, prefix) {
conflict = true
filepath := strings.TrimSpace(strings.Split(line[len(prefix):], ":")[0])
conflicts.Add(filepath)
} else if is3way && line == threewayFailed {
conflict = true
} else if strings.HasPrefix(line, errorPrefix) {
conflict = true
for _, suffix := range patchErrorSuffices {
if strings.HasSuffix(line, suffix) {
filepath := strings.TrimSpace(strings.TrimSuffix(line[len(errorPrefix):], suffix))
if filepath != "" {
conflicts.Add(filepath)
}
break
}
}
} else if is3way && strings.HasPrefix(line, appliedPatchPrefix) && strings.HasSuffix(line, withConflicts) {
conflict = true
filepath := strings.TrimPrefix(strings.TrimSuffix(line, withConflicts), appliedPatchPrefix)
if filepath != "" {
conflicts.Add(filepath)
}
}
// only list 10 conflicted files
if len(conflicts) >= 10 {
break
}
}
if len(conflicts) > 0 {
pr.ConflictedFiles = make([]string, 0, len(conflicts))
for key := range conflicts {
pr.ConflictedFiles = append(pr.ConflictedFiles, key)
}
}
return nil
},
})
// 9. Check if the found conflictedfiles is non-zero, "err" could be non-nil, so we should ignore it if we found conflicts.
// Note: `"err" could be non-nil` is due that if enable 3-way merge, it doesn't return any error on found conflicts.
if len(pr.ConflictedFiles) > 0 {
if conflict {
pr.Status = issues_model.PullRequestStatusConflict
log.Trace("Found %d files conflicted: %v", len(pr.ConflictedFiles), pr.ConflictedFiles)
return true, nil
}
} else if err != nil {
return false, fmt.Errorf("git apply --check: %w", err)
}
return false, nil
}
// CheckFileProtection check file Protection
func CheckFileProtection(repo *git.Repository, oldCommitID, newCommitID string, patterns []glob.Glob, limit int, env []string) ([]string, error) {
if len(patterns) == 0 {
return nil, nil
}
affectedFiles, err := git.GetAffectedFiles(repo, oldCommitID, newCommitID, env)
if err != nil {
return nil, err
}
changedProtectedFiles := make([]string, 0, limit)
for _, affectedFile := range affectedFiles {
lpath := strings.ToLower(affectedFile)
for _, pat := range patterns {
if pat.Match(lpath) {
changedProtectedFiles = append(changedProtectedFiles, lpath)
break
}
}
if len(changedProtectedFiles) >= limit {
break
}
}
if len(changedProtectedFiles) > 0 {
err = models.ErrFilePathProtected{
Path: changedProtectedFiles[0],
}
}
return changedProtectedFiles, err
}
// CheckUnprotectedFiles check if the commit only touches unprotected files
func CheckUnprotectedFiles(repo *git.Repository, oldCommitID, newCommitID string, patterns []glob.Glob, env []string) (bool, error) {
if len(patterns) == 0 {
return false, nil
}
affectedFiles, err := git.GetAffectedFiles(repo, oldCommitID, newCommitID, env)
if err != nil {
return false, err
}
for _, affectedFile := range affectedFiles {
lpath := strings.ToLower(affectedFile)
unprotected := false
for _, pat := range patterns {
if pat.Match(lpath) {
unprotected = true
break
}
}
if !unprotected {
return false, nil
}
}
return true, nil
}
// checkPullFilesProtection check if pr changed protected files and save results
func checkPullFilesProtection(pr *issues_model.PullRequest, gitRepo *git.Repository) error {
if pr.Status == issues_model.PullRequestStatusEmpty {
pr.ChangedProtectedFiles = nil
return nil
}
if err := pr.LoadProtectedBranch(db.DefaultContext); err != nil {
return err
}
if pr.ProtectedBranch == nil {
pr.ChangedProtectedFiles = nil
return nil
}
var err error
pr.ChangedProtectedFiles, err = CheckFileProtection(gitRepo, pr.MergeBase, "tracking", pr.ProtectedBranch.GetProtectedFilePatterns(), 10, os.Environ())
if err != nil && !models.IsErrFilePathProtected(err) {
return err
}
return nil
}