2019-12-14 03:51:06 +05:30
// Copyright 2019 The Gitea Authors.
// All rights reserved.
2022-11-27 23:50:29 +05:30
// SPDX-License-Identifier: MIT
2019-12-14 03:51:06 +05:30
package pull
import (
"bufio"
"context"
"fmt"
"io"
"os"
2021-12-19 09:49:25 +05:30
"path/filepath"
2019-12-14 03:51:06 +05:30
"strings"
"code.gitea.io/gitea/models"
2023-01-16 13:30:22 +05:30
git_model "code.gitea.io/gitea/models/git"
2022-06-13 15:07:59 +05:30
issues_model "code.gitea.io/gitea/models/issues"
2021-11-10 01:27:58 +05:30
"code.gitea.io/gitea/models/unit"
2022-10-12 10:48:26 +05:30
"code.gitea.io/gitea/modules/container"
2019-12-14 03:51:06 +05:30
"code.gitea.io/gitea/modules/git"
2021-12-19 09:49:25 +05:30
"code.gitea.io/gitea/modules/graceful"
2019-12-14 03:51:06 +05:30
"code.gitea.io/gitea/modules/log"
2021-12-19 09:49:25 +05:30
"code.gitea.io/gitea/modules/process"
2022-05-08 22:16:32 +05:30
repo_module "code.gitea.io/gitea/modules/repository"
2022-12-19 17:07:15 +05:30
"code.gitea.io/gitea/modules/setting"
2020-08-12 01:35:34 +05:30
"code.gitea.io/gitea/modules/util"
2020-10-14 00:20:57 +05:30
"github.com/gobwas/glob"
2019-12-14 03:51:06 +05:30
)
// DownloadDiffOrPatch will write the patch for the pr to the writer
2022-06-13 15:07:59 +05:30
func DownloadDiffOrPatch ( ctx context . Context , pr * issues_model . PullRequest , w io . Writer , patch , binary bool ) error {
2022-11-19 13:42:33 +05:30
if err := pr . LoadBaseRepo ( ctx ) ; err != nil {
2020-04-03 18:51:41 +05:30
log . Error ( "Unable to load base repository ID %d for pr #%d [%d]" , pr . BaseRepoID , pr . Index , pr . ID )
2019-12-14 03:51:06 +05:30
return err
}
2022-01-20 04:56:57 +05:30
gitRepo , closer , err := git . RepositoryFromContextOrOpen ( ctx , pr . BaseRepo . RepoPath ( ) )
2019-12-14 03:51:06 +05:30
if err != nil {
2022-10-25 00:59:17 +05:30
return fmt . Errorf ( "OpenRepository: %w" , err )
2019-12-14 03:51:06 +05:30
}
2022-01-20 04:56:57 +05:30
defer closer . Close ( )
2021-09-28 02:39:49 +05:30
if err := gitRepo . GetDiffOrPatch ( pr . MergeBase , pr . GetGitRefName ( ) , w , patch , binary ) ; err != nil {
2020-01-12 15:06:21 +05:30
log . Error ( "Unable to get patch file from %s to %s in %s Error: %v" , pr . MergeBase , pr . HeadBranch , pr . BaseRepo . FullName ( ) , err )
2022-10-25 00:59:17 +05:30
return fmt . Errorf ( "Unable to get patch file from %s to %s in %s Error: %w" , pr . MergeBase , pr . HeadBranch , pr . BaseRepo . FullName ( ) , err )
2019-12-14 03:51:06 +05:30
}
return nil
}
var patchErrorSuffices = [ ] string {
": already exists in index" ,
": patch does not apply" ,
": already exists in working directory" ,
"unrecognized input" ,
2022-12-14 19:15:33 +05:30
": No such file or directory" ,
": does not exist in index" ,
2019-12-14 03:51:06 +05:30
}
// TestPatch will test whether a simple patch will apply
2022-06-13 15:07:59 +05:30
func TestPatch ( pr * issues_model . PullRequest ) error {
2023-02-04 04:41:48 +05:30
ctx , _ , finished := process . GetManager ( ) . AddContext ( graceful . GetManager ( ) . HammerContext ( ) , fmt . Sprintf ( "TestPatch: %s" , pr ) )
2022-01-20 04:56:57 +05:30
defer finished ( )
2019-12-14 03:51:06 +05:30
// Clone base repo.
2022-01-20 04:56:57 +05:30
tmpBasePath , err := createTemporaryRepo ( ctx , pr )
2019-12-14 03:51:06 +05:30
if err != nil {
log . Error ( "CreateTemporaryPath: %v" , err )
return err
}
defer func ( ) {
2022-05-08 22:16:32 +05:30
if err := repo_module . RemoveTemporaryPath ( tmpBasePath ) ; err != nil {
2019-12-14 03:51:06 +05:30
log . Error ( "Merge: RemoveTemporaryPath: %s" , err )
}
} ( )
2022-03-30 00:43:41 +05:30
gitRepo , err := git . OpenRepository ( ctx , tmpBasePath )
2019-12-14 03:51:06 +05:30
if err != nil {
2022-10-25 00:59:17 +05:30
return fmt . Errorf ( "OpenRepository: %w" , err )
2019-12-14 03:51:06 +05:30
}
defer gitRepo . Close ( )
2020-10-14 00:20:57 +05:30
// 1. update merge base
2022-04-01 08:25:30 +05:30
pr . MergeBase , _ , err = git . NewCommand ( ctx , "merge-base" , "--" , "base" , "tracking" ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2019-12-14 03:51:06 +05:30
if err != nil {
var err2 error
pr . MergeBase , err2 = gitRepo . GetRefCommitID ( git . BranchPrefix + "base" )
if err2 != nil {
2022-10-25 00:59:17 +05:30
return fmt . Errorf ( "GetMergeBase: %v and can't find commit ID for base: %w" , err , err2 )
2019-12-14 03:51:06 +05:30
}
}
pr . MergeBase = strings . TrimSpace ( pr . MergeBase )
2022-07-13 13:52:51 +05:30
if pr . HeadCommitID , err = gitRepo . GetRefCommitID ( git . BranchPrefix + "tracking" ) ; err != nil {
return fmt . Errorf ( "GetBranchCommitID: can't find commit ID for head: %w" , err )
}
if pr . HeadCommitID == pr . MergeBase {
pr . Status = issues_model . PullRequestStatusAncestor
return nil
}
2020-10-14 00:20:57 +05:30
// 2. Check for conflicts
2022-06-13 15:07:59 +05:30
if conflicts , err := checkConflicts ( ctx , pr , gitRepo , tmpBasePath ) ; err != nil || conflicts || pr . Status == issues_model . PullRequestStatusEmpty {
2020-10-14 00:20:57 +05:30
return err
}
// 3. Check for protected files changes
2023-01-16 13:30:22 +05:30
if err = checkPullFilesProtection ( ctx , pr , gitRepo ) ; err != nil {
return fmt . Errorf ( "pr.CheckPullFilesProtection(): %v" , err )
2020-10-14 00:20:57 +05:30
}
if len ( pr . ChangedProtectedFiles ) > 0 {
log . Trace ( "Found %d protected files changed" , len ( pr . ChangedProtectedFiles ) )
}
2022-06-13 15:07:59 +05:30
pr . Status = issues_model . PullRequestStatusMergeable
2020-10-14 00:20:57 +05:30
return nil
}
2021-12-19 09:49:25 +05:30
type errMergeConflict struct {
filename string
}
func ( e * errMergeConflict ) Error ( ) string {
return fmt . Sprintf ( "conflict detected at: %s" , e . filename )
}
func attemptMerge ( ctx context . Context , file * unmergedFile , tmpBasePath string , gitRepo * git . Repository ) error {
2022-07-29 04:49:55 +05:30
log . Trace ( "Attempt to merge:\n%v" , file )
2021-12-19 09:49:25 +05:30
switch {
case file . stage1 != nil && ( file . stage2 == nil || file . stage3 == nil ) :
// 1. Deleted in one or both:
//
// Conflict <==> the stage1 !SameAs to the undeleted one
if ( file . stage2 != nil && ! file . stage1 . SameAs ( file . stage2 ) ) || ( file . stage3 != nil && ! file . stage1 . SameAs ( file . stage3 ) ) {
// Conflict!
return & errMergeConflict { file . stage1 . path }
}
// Not a genuine conflict and we can simply remove the file from the index
return gitRepo . RemoveFilesFromIndex ( file . stage1 . path )
case file . stage1 == nil && file . stage2 != nil && ( file . stage3 == nil || file . stage2 . SameAs ( file . stage3 ) ) :
// 2. Added in ours but not in theirs or identical in both
//
// Not a genuine conflict just add to the index
if err := gitRepo . AddObjectToIndex ( file . stage2 . mode , git . MustIDFromString ( file . stage2 . sha ) , file . stage2 . path ) ; err != nil {
return err
}
return nil
case file . stage1 == nil && file . stage2 != nil && file . stage3 != nil && file . stage2 . sha == file . stage3 . sha && file . stage2 . mode != file . stage3 . mode :
// 3. Added in both with the same sha but the modes are different
//
// Conflict! (Not sure that this can actually happen but we should handle)
return & errMergeConflict { file . stage2 . path }
case file . stage1 == nil && file . stage2 == nil && file . stage3 != nil :
// 4. Added in theirs but not ours:
//
// Not a genuine conflict just add to the index
return gitRepo . AddObjectToIndex ( file . stage3 . mode , git . MustIDFromString ( file . stage3 . sha ) , file . stage3 . path )
case file . stage1 == nil :
// 5. Created by new in both
//
// Conflict!
return & errMergeConflict { file . stage2 . path }
case file . stage2 != nil && file . stage3 != nil :
// 5. Modified in both - we should try to merge in the changes but first:
//
if file . stage2 . mode == "120000" || file . stage3 . mode == "120000" {
// 5a. Conflicting symbolic link change
return & errMergeConflict { file . stage2 . path }
}
if file . stage2 . mode == "160000" || file . stage3 . mode == "160000" {
// 5b. Conflicting submodule change
return & errMergeConflict { file . stage2 . path }
}
if file . stage2 . mode != file . stage3 . mode {
// 5c. Conflicting mode change
return & errMergeConflict { file . stage2 . path }
}
// Need to get the objects from the object db to attempt to merge
2022-10-23 20:14:45 +05:30
root , _ , err := git . NewCommand ( ctx , "unpack-file" ) . AddDynamicArguments ( file . stage1 . sha ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-19 09:49:25 +05:30
if err != nil {
return fmt . Errorf ( "unable to get root object: %s at path: %s for merging. Error: %w" , file . stage1 . sha , file . stage1 . path , err )
}
root = strings . TrimSpace ( root )
defer func ( ) {
_ = util . Remove ( filepath . Join ( tmpBasePath , root ) )
} ( )
2022-10-23 20:14:45 +05:30
base , _ , err := git . NewCommand ( ctx , "unpack-file" ) . AddDynamicArguments ( file . stage2 . sha ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-19 09:49:25 +05:30
if err != nil {
return fmt . Errorf ( "unable to get base object: %s at path: %s for merging. Error: %w" , file . stage2 . sha , file . stage2 . path , err )
}
base = strings . TrimSpace ( filepath . Join ( tmpBasePath , base ) )
defer func ( ) {
_ = util . Remove ( base )
} ( )
2022-10-23 20:14:45 +05:30
head , _ , err := git . NewCommand ( ctx , "unpack-file" ) . AddDynamicArguments ( file . stage3 . sha ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-19 09:49:25 +05:30
if err != nil {
return fmt . Errorf ( "unable to get head object:%s at path: %s for merging. Error: %w" , file . stage3 . sha , file . stage3 . path , err )
}
head = strings . TrimSpace ( head )
defer func ( ) {
_ = util . Remove ( filepath . Join ( tmpBasePath , head ) )
} ( )
// now git merge-file annoyingly takes a different order to the merge-tree ...
2022-10-23 20:14:45 +05:30
_ , _ , conflictErr := git . NewCommand ( ctx , "merge-file" ) . AddDynamicArguments ( base , root , head ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-19 09:49:25 +05:30
if conflictErr != nil {
return & errMergeConflict { file . stage2 . path }
}
// base now contains the merged data
2022-10-23 20:14:45 +05:30
hash , _ , err := git . NewCommand ( ctx , "hash-object" , "-w" , "--path" ) . AddDynamicArguments ( file . stage2 . path , base ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-19 09:49:25 +05:30
if err != nil {
return err
}
hash = strings . TrimSpace ( hash )
return gitRepo . AddObjectToIndex ( file . stage2 . mode , git . MustIDFromString ( hash ) , file . stage2 . path )
default :
if file . stage1 != nil {
return & errMergeConflict { file . stage1 . path }
} else if file . stage2 != nil {
return & errMergeConflict { file . stage2 . path }
} else if file . stage3 != nil {
return & errMergeConflict { file . stage3 . path }
}
}
return nil
}
2022-02-10 01:58:55 +05:30
// AttemptThreeWayMerge will attempt to three way merge using git read-tree and then follow the git merge-one-file algorithm to attempt to resolve basic conflicts
func AttemptThreeWayMerge ( ctx context . Context , gitPath string , gitRepo * git . Repository , base , ours , theirs , description string ) ( bool , [ ] string , error ) {
ctx , cancel := context . WithCancel ( ctx )
defer cancel ( )
2021-12-19 09:49:25 +05:30
// First we use read-tree to do a simple three-way merge
2022-10-23 20:14:45 +05:30
if _ , _ , err := git . NewCommand ( ctx , "read-tree" , "-m" ) . AddDynamicArguments ( base , ours , theirs ) . RunStdString ( & git . RunOpts { Dir : gitPath } ) ; err != nil {
2021-12-19 09:49:25 +05:30
log . Error ( "Unable to run read-tree -m! Error: %v" , err )
2022-10-25 00:59:17 +05:30
return false , nil , fmt . Errorf ( "unable to run read-tree -m! Error: %w" , err )
2021-12-19 09:49:25 +05:30
}
// Then we use git ls-files -u to list the unmerged files and collate the triples in unmergedfiles
unmerged := make ( chan * unmergedFile )
2022-02-10 01:58:55 +05:30
go unmergedFiles ( ctx , gitPath , unmerged )
2021-12-19 09:49:25 +05:30
defer func ( ) {
cancel ( )
for range unmerged {
// empty the unmerged channel
}
} ( )
numberOfConflicts := 0
conflict := false
2022-02-10 01:58:55 +05:30
conflictedFiles := make ( [ ] string , 0 , 5 )
2021-12-19 09:49:25 +05:30
for file := range unmerged {
if file == nil {
break
}
if file . err != nil {
cancel ( )
2022-02-10 01:58:55 +05:30
return false , nil , file . err
2021-12-19 09:49:25 +05:30
}
// OK now we have the unmerged file triplet attempt to merge it
2022-02-10 01:58:55 +05:30
if err := attemptMerge ( ctx , file , gitPath , gitRepo ) ; err != nil {
2021-12-19 09:49:25 +05:30
if conflictErr , ok := err . ( * errMergeConflict ) ; ok {
2022-02-10 01:58:55 +05:30
log . Trace ( "Conflict: %s in %s" , conflictErr . filename , description )
2021-12-19 09:49:25 +05:30
conflict = true
if numberOfConflicts < 10 {
2022-02-10 01:58:55 +05:30
conflictedFiles = append ( conflictedFiles , conflictErr . filename )
2021-12-19 09:49:25 +05:30
}
numberOfConflicts ++
continue
}
2022-02-10 01:58:55 +05:30
return false , nil , err
2021-12-19 09:49:25 +05:30
}
}
2022-02-10 01:58:55 +05:30
return conflict , conflictedFiles , nil
}
2022-06-13 15:07:59 +05:30
func checkConflicts ( ctx context . Context , pr * issues_model . PullRequest , gitRepo * git . Repository , tmpBasePath string ) ( bool , error ) {
2022-03-29 22:12:34 +05:30
// 1. checkConflicts resets the conflict status - therefore - reset the conflict status
pr . ConflictedFiles = nil
// 2. AttemptThreeWayMerge first - this is much quicker than plain patch to base
2022-02-10 01:58:55 +05:30
description := fmt . Sprintf ( "PR[%d] %s/%s#%d" , pr . ID , pr . BaseRepo . OwnerName , pr . BaseRepo . Name , pr . Index )
2022-12-19 17:07:15 +05:30
conflict , conflictFiles , err := AttemptThreeWayMerge ( ctx ,
2022-02-10 01:58:55 +05:30
tmpBasePath , gitRepo , pr . MergeBase , "base" , "tracking" , description )
if err != nil {
return false , err
}
2021-12-19 09:49:25 +05:30
if ! conflict {
2022-12-19 17:07:15 +05:30
// No conflicts detected so we need to check if the patch is empty...
// a. Write the newly merged tree and check the new tree-hash
2022-04-01 08:25:30 +05:30
var treeHash string
treeHash , _ , err = git . NewCommand ( ctx , "write-tree" ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-19 09:49:25 +05:30
if err != nil {
2022-07-29 04:49:55 +05:30
lsfiles , _ , _ := git . NewCommand ( ctx , "ls-files" , "-u" ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
return false , fmt . Errorf ( "unable to write unconflicted tree: %w\n`git ls-files -u`:\n%s" , err , lsfiles )
2021-12-19 09:49:25 +05:30
}
treeHash = strings . TrimSpace ( treeHash )
baseTree , err := gitRepo . GetTree ( "base" )
if err != nil {
return false , err
}
2022-12-19 17:07:15 +05:30
// b. compare the new tree-hash with the base tree hash
2021-12-19 09:49:25 +05:30
if treeHash == baseTree . ID . String ( ) {
log . Debug ( "PullRequest[%d]: Patch is empty - ignoring" , pr . ID )
2022-06-13 15:07:59 +05:30
pr . Status = issues_model . PullRequestStatusEmpty
2021-12-19 09:49:25 +05:30
}
return false , nil
}
2022-12-19 17:07:15 +05:30
// 3. OK the three-way merge method has detected conflicts
// 3a. Are still testing with GitApply? If not set the conflict status and move on
if ! setting . Repository . PullRequest . TestConflictingPatchesWithGitApply {
pr . Status = issues_model . PullRequestStatusConflict
pr . ConflictedFiles = conflictFiles
log . Trace ( "Found %d files conflicted: %v" , len ( pr . ConflictedFiles ) , pr . ConflictedFiles )
return true , nil
}
2021-12-19 09:49:25 +05:30
2022-12-19 17:07:15 +05:30
// 3b. Create a plain patch from head to base
2021-09-22 11:08:34 +05:30
tmpPatchFile , err := os . CreateTemp ( "" , "patch" )
2019-12-14 03:51:06 +05:30
if err != nil {
log . Error ( "Unable to create temporary patch file! Error: %v" , err )
2022-10-25 00:59:17 +05:30
return false , fmt . Errorf ( "unable to create temporary patch file! Error: %w" , err )
2019-12-14 03:51:06 +05:30
}
defer func ( ) {
2020-08-12 01:35:34 +05:30
_ = util . Remove ( tmpPatchFile . Name ( ) )
2019-12-14 03:51:06 +05:30
} ( )
2021-09-28 02:39:49 +05:30
if err := gitRepo . GetDiffBinary ( pr . MergeBase , "tracking" , tmpPatchFile ) ; err != nil {
2019-12-14 03:51:06 +05:30
tmpPatchFile . Close ( )
2020-01-12 15:06:21 +05:30
log . Error ( "Unable to get patch file from %s to %s in %s Error: %v" , pr . MergeBase , pr . HeadBranch , pr . BaseRepo . FullName ( ) , err )
2022-10-25 00:59:17 +05:30
return false , fmt . Errorf ( "unable to get patch file from %s to %s in %s Error: %w" , pr . MergeBase , pr . HeadBranch , pr . BaseRepo . FullName ( ) , err )
2019-12-14 03:51:06 +05:30
}
stat , err := tmpPatchFile . Stat ( )
if err != nil {
tmpPatchFile . Close ( )
2022-10-25 00:59:17 +05:30
return false , fmt . Errorf ( "unable to stat patch file: %w" , err )
2019-12-14 03:51:06 +05:30
}
patchPath := tmpPatchFile . Name ( )
tmpPatchFile . Close ( )
2022-12-19 17:07:15 +05:30
// 3c. if the size of that patch is 0 - there can be no conflicts!
2019-12-14 03:51:06 +05:30
if stat . Size ( ) == 0 {
log . Debug ( "PullRequest[%d]: Patch is empty - ignoring" , pr . ID )
2022-06-13 15:07:59 +05:30
pr . Status = issues_model . PullRequestStatusEmpty
2020-10-14 00:20:57 +05:30
return false , nil
2019-12-14 03:51:06 +05:30
}
log . Trace ( "PullRequest[%d].testPatch (patchPath): %s" , pr . ID , patchPath )
2022-03-29 22:12:34 +05:30
// 4. Read the base branch in to the index of the temporary repository
2022-04-01 08:25:30 +05:30
_ , _ , err = git . NewCommand ( gitRepo . Ctx , "read-tree" , "base" ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2019-12-14 03:51:06 +05:30
if err != nil {
2022-10-25 00:59:17 +05:30
return false , fmt . Errorf ( "git read-tree %s: %w" , pr . BaseBranch , err )
2019-12-14 03:51:06 +05:30
}
2022-03-29 22:12:34 +05:30
// 5. Now get the pull request configuration to check if we need to ignore whitespace
2022-12-10 08:16:31 +05:30
prUnit , err := pr . BaseRepo . GetUnit ( ctx , unit . TypePullRequests )
2019-12-14 03:51:06 +05:30
if err != nil {
2020-10-14 00:20:57 +05:30
return false , err
2019-12-14 03:51:06 +05:30
}
prConfig := prUnit . PullRequestsConfig ( )
2022-03-29 22:12:34 +05:30
// 6. Prepare the arguments to apply the patch against the index
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 08:00:43 +05:30
cmdApply := git . NewCommand ( gitRepo . Ctx , "apply" , "--check" , "--cached" )
2019-12-14 03:51:06 +05:30
if prConfig . IgnoreWhitespaceConflicts {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 08:00:43 +05:30
cmdApply . AddArguments ( "--ignore-whitespace" )
2019-12-14 03:51:06 +05:30
}
2022-02-02 10:16:10 +05:30
is3way := false
2021-12-19 09:49:25 +05:30
if git . CheckGitVersionAtLeast ( "2.32.0" ) == nil {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 08:00:43 +05:30
cmdApply . AddArguments ( "--3way" )
2022-02-02 10:16:10 +05:30
is3way = true
2021-12-19 09:49:25 +05:30
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 08:00:43 +05:30
cmdApply . AddDynamicArguments ( patchPath )
2019-12-14 03:51:06 +05:30
2022-03-29 22:12:34 +05:30
// 7. Prep the pipe:
2020-10-14 00:20:57 +05:30
// - Here we could do the equivalent of:
// `git apply --check --cached patch_file > conflicts`
// Then iterate through the conflicts. However, that means storing all the conflicts
// in memory - which is very wasteful.
// - alternatively we can do the equivalent of:
// `git apply --check ... | grep ...`
// meaning we don't store all of the conflicts unnecessarily.
2019-12-14 03:51:06 +05:30
stderrReader , stderrWriter , err := os . Pipe ( )
if err != nil {
log . Error ( "Unable to open stderr pipe: %v" , err )
2022-10-25 00:59:17 +05:30
return false , fmt . Errorf ( "unable to open stderr pipe: %w" , err )
2019-12-14 03:51:06 +05:30
}
defer func ( ) {
_ = stderrReader . Close ( )
_ = stderrWriter . Close ( )
} ( )
2020-10-14 00:20:57 +05:30
2022-03-29 22:12:34 +05:30
// 8. Run the check command
2021-12-19 09:49:25 +05:30
conflict = false
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 08:00:43 +05:30
err = cmdApply . Run ( & git . RunOpts {
Dir : tmpBasePath ,
Stderr : stderrWriter ,
PipelineFunc : func ( ctx context . Context , cancel context . CancelFunc ) error {
// Close the writer end of the pipe to begin processing
_ = stderrWriter . Close ( )
defer func ( ) {
// Close the reader on return to terminate the git command if necessary
_ = stderrReader . Close ( )
} ( )
const prefix = "error: patch failed:"
const errorPrefix = "error: "
const threewayFailed = "Failed to perform three-way merge..."
const appliedPatchPrefix = "Applied patch to '"
const withConflicts = "' with conflicts."
conflicts := make ( container . Set [ string ] )
// Now scan the output from the command
scanner := bufio . NewScanner ( stderrReader )
for scanner . Scan ( ) {
line := scanner . Text ( )
log . Trace ( "PullRequest[%d].testPatch: stderr: %s" , pr . ID , line )
if strings . HasPrefix ( line , prefix ) {
conflict = true
filepath := strings . TrimSpace ( strings . Split ( line [ len ( prefix ) : ] , ":" ) [ 0 ] )
conflicts . Add ( filepath )
} else if is3way && line == threewayFailed {
conflict = true
} else if strings . HasPrefix ( line , errorPrefix ) {
conflict = true
for _ , suffix := range patchErrorSuffices {
if strings . HasSuffix ( line , suffix ) {
filepath := strings . TrimSpace ( strings . TrimSuffix ( line [ len ( errorPrefix ) : ] , suffix ) )
if filepath != "" {
conflicts . Add ( filepath )
2019-12-14 03:51:06 +05:30
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 08:00:43 +05:30
break
2022-02-02 10:16:10 +05:30
}
2019-12-14 03:51:06 +05:30
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 08:00:43 +05:30
} else if is3way && strings . HasPrefix ( line , appliedPatchPrefix ) && strings . HasSuffix ( line , withConflicts ) {
conflict = true
filepath := strings . TrimPrefix ( strings . TrimSuffix ( line , withConflicts ) , appliedPatchPrefix )
if filepath != "" {
conflicts . Add ( filepath )
2019-12-14 03:51:06 +05:30
}
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 08:00:43 +05:30
// only list 10 conflicted files
if len ( conflicts ) >= 10 {
break
}
}
2020-10-14 00:20:57 +05:30
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 08:00:43 +05:30
if len ( conflicts ) > 0 {
pr . ConflictedFiles = make ( [ ] string , 0 , len ( conflicts ) )
for key := range conflicts {
pr . ConflictedFiles = append ( pr . ConflictedFiles , key )
2019-12-14 03:51:06 +05:30
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 08:00:43 +05:30
}
2020-10-14 00:20:57 +05:30
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 08:00:43 +05:30
return nil
} ,
} )
2019-12-14 03:51:06 +05:30
2022-04-22 03:25:45 +05:30
// 9. Check if the found conflictedfiles is non-zero, "err" could be non-nil, so we should ignore it if we found conflicts.
// Note: `"err" could be non-nil` is due that if enable 3-way merge, it doesn't return any error on found conflicts.
if len ( pr . ConflictedFiles ) > 0 {
2019-12-14 03:51:06 +05:30
if conflict {
2022-06-13 15:07:59 +05:30
pr . Status = issues_model . PullRequestStatusConflict
2019-12-14 03:51:06 +05:30
log . Trace ( "Found %d files conflicted: %v" , len ( pr . ConflictedFiles ) , pr . ConflictedFiles )
2020-10-14 00:20:57 +05:30
return true , nil
2019-12-14 03:51:06 +05:30
}
2022-04-22 03:25:45 +05:30
} else if err != nil {
2022-10-25 00:59:17 +05:30
return false , fmt . Errorf ( "git apply --check: %w" , err )
2019-12-14 03:51:06 +05:30
}
2020-10-14 00:20:57 +05:30
return false , nil
}
// CheckFileProtection check file Protection
2022-01-20 04:56:57 +05:30
func CheckFileProtection ( repo * git . Repository , oldCommitID , newCommitID string , patterns [ ] glob . Glob , limit int , env [ ] string ) ( [ ] string , error ) {
2020-10-14 00:20:57 +05:30
if len ( patterns ) == 0 {
return nil , nil
}
2022-01-20 04:56:57 +05:30
affectedFiles , err := git . GetAffectedFiles ( repo , oldCommitID , newCommitID , env )
2020-10-14 00:20:57 +05:30
if err != nil {
return nil , err
}
changedProtectedFiles := make ( [ ] string , 0 , limit )
2021-09-11 19:51:17 +05:30
for _ , affectedFile := range affectedFiles {
lpath := strings . ToLower ( affectedFile )
for _ , pat := range patterns {
if pat . Match ( lpath ) {
changedProtectedFiles = append ( changedProtectedFiles , lpath )
break
}
}
if len ( changedProtectedFiles ) >= limit {
break
}
}
if len ( changedProtectedFiles ) > 0 {
err = models . ErrFilePathProtected {
Path : changedProtectedFiles [ 0 ] ,
}
2020-10-14 00:20:57 +05:30
}
return changedProtectedFiles , err
}
2021-09-11 19:51:17 +05:30
// CheckUnprotectedFiles check if the commit only touches unprotected files
2022-01-20 04:56:57 +05:30
func CheckUnprotectedFiles ( repo * git . Repository , oldCommitID , newCommitID string , patterns [ ] glob . Glob , env [ ] string ) ( bool , error ) {
2021-09-11 19:51:17 +05:30
if len ( patterns ) == 0 {
return false , nil
}
2022-01-20 04:56:57 +05:30
affectedFiles , err := git . GetAffectedFiles ( repo , oldCommitID , newCommitID , env )
2021-09-11 19:51:17 +05:30
if err != nil {
return false , err
}
for _ , affectedFile := range affectedFiles {
lpath := strings . ToLower ( affectedFile )
unprotected := false
for _ , pat := range patterns {
if pat . Match ( lpath ) {
unprotected = true
break
}
}
if ! unprotected {
return false , nil
}
}
return true , nil
}
2020-10-14 00:20:57 +05:30
// checkPullFilesProtection check if pr changed protected files and save results
2023-01-16 13:30:22 +05:30
func checkPullFilesProtection ( ctx context . Context , pr * issues_model . PullRequest , gitRepo * git . Repository ) error {
2022-06-13 15:07:59 +05:30
if pr . Status == issues_model . PullRequestStatusEmpty {
2022-03-29 22:12:34 +05:30
pr . ChangedProtectedFiles = nil
return nil
}
2023-01-16 13:30:22 +05:30
pb , err := git_model . GetFirstMatchProtectedBranchRule ( ctx , pr . BaseRepoID , pr . BaseBranch )
if err != nil {
2020-10-14 00:20:57 +05:30
return err
}
2023-01-16 13:30:22 +05:30
if pb == nil {
2020-10-14 00:20:57 +05:30
pr . ChangedProtectedFiles = nil
return nil
}
2023-01-16 13:30:22 +05:30
pr . ChangedProtectedFiles , err = CheckFileProtection ( gitRepo , pr . MergeBase , "tracking" , pb . GetProtectedFilePatterns ( ) , 10 , os . Environ ( ) )
2020-10-14 00:20:57 +05:30
if err != nil && ! models . IsErrFilePathProtected ( err ) {
return err
}
2019-12-14 03:51:06 +05:30
return nil
}