// Copyright 2014 The Gogs Authors. All rights reserved. // Copyright 2019 The Gitea Authors. All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. package gitdiff import ( "bufio" "bytes" "context" "fmt" "html/template" "io" "io/ioutil" "net/url" "os" "os/exec" "regexp" "sort" "strings" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/setting" "github.com/sergi/go-diff/diffmatchpatch" stdcharset "golang.org/x/net/html/charset" "golang.org/x/text/transform" ) // DiffLineType represents the type of a DiffLine. type DiffLineType uint8 // DiffLineType possible values. const ( DiffLinePlain DiffLineType = iota + 1 DiffLineAdd DiffLineDel DiffLineSection ) // DiffFileType represents the type of a DiffFile. type DiffFileType uint8 // DiffFileType possible values. const ( DiffFileAdd DiffFileType = iota + 1 DiffFileChange DiffFileDel DiffFileRename ) // DiffLineExpandDirection represents the DiffLineSection expand direction type DiffLineExpandDirection uint8 // DiffLineExpandDirection possible values. const ( DiffLineExpandNone DiffLineExpandDirection = iota + 1 DiffLineExpandSingle DiffLineExpandUpDown DiffLineExpandUp DiffLineExpandDown ) // DiffLine represents a line difference in a DiffSection. type DiffLine struct { LeftIdx int RightIdx int Type DiffLineType Content string Comments []*models.Comment SectionInfo *DiffLineSectionInfo } // DiffLineSectionInfo represents diff line section meta data type DiffLineSectionInfo struct { Path string LastLeftIdx int LastRightIdx int LeftIdx int RightIdx int LeftHunkSize int RightHunkSize int } // BlobExcerptChunkSize represent max lines of excerpt const BlobExcerptChunkSize = 20 // GetType returns the type of a DiffLine. func (d *DiffLine) GetType() int { return int(d.Type) } // CanComment returns whether or not a line can get commented func (d *DiffLine) CanComment() bool { return len(d.Comments) == 0 && d.Type != DiffLineSection } // GetCommentSide returns the comment side of the first comment, if not set returns empty string func (d *DiffLine) GetCommentSide() string { if len(d.Comments) == 0 { return "" } return d.Comments[0].DiffSide() } // GetLineTypeMarker returns the line type marker func (d *DiffLine) GetLineTypeMarker() string { if strings.IndexByte(" +-", d.Content[0]) > -1 { return d.Content[0:1] } return "" } // GetBlobExcerptQuery builds query string to get blob excerpt func (d *DiffLine) GetBlobExcerptQuery() string { query := fmt.Sprintf( "last_left=%d&last_right=%d&"+ "left=%d&right=%d&"+ "left_hunk_size=%d&right_hunk_size=%d&"+ "path=%s", d.SectionInfo.LastLeftIdx, d.SectionInfo.LastRightIdx, d.SectionInfo.LeftIdx, d.SectionInfo.RightIdx, d.SectionInfo.LeftHunkSize, d.SectionInfo.RightHunkSize, url.QueryEscape(d.SectionInfo.Path)) return query } // GetExpandDirection gets DiffLineExpandDirection func (d *DiffLine) GetExpandDirection() DiffLineExpandDirection { if d.Type != DiffLineSection || d.SectionInfo == nil || d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx <= 1 { return DiffLineExpandNone } if d.SectionInfo.LastLeftIdx <= 0 && d.SectionInfo.LastRightIdx <= 0 { return DiffLineExpandUp } else if d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx > BlobExcerptChunkSize && d.SectionInfo.RightHunkSize > 0 { return DiffLineExpandUpDown } else if d.SectionInfo.LeftHunkSize <= 0 && d.SectionInfo.RightHunkSize <= 0 { return DiffLineExpandDown } return DiffLineExpandSingle } func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int) *DiffLineSectionInfo { leftLine, leftHunk, rightLine, righHunk := git.ParseDiffHunkString(line) return &DiffLineSectionInfo{ Path: treePath, LastLeftIdx: lastLeftIdx, LastRightIdx: lastRightIdx, LeftIdx: leftLine, RightIdx: rightLine, LeftHunkSize: leftHunk, RightHunkSize: righHunk, } } // escape a line's content or return <br> needed for copy/paste purposes func getLineContent(content string) string { if len(content) > 0 { return content } return "\n" } // DiffSection represents a section of a DiffFile. type DiffSection struct { FileName string Name string Lines []*DiffLine } var ( addedCodePrefix = []byte(`<span class="added-code">`) removedCodePrefix = []byte(`<span class="removed-code">`) codeTagSuffix = []byte(`</span>`) ) var addSpanRegex = regexp.MustCompile(`<span [class="[a-z]*]*$`) func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) template.HTML { buf := bytes.NewBuffer(nil) var addSpan string for i := range diffs { switch { case diffs[i].Type == diffmatchpatch.DiffEqual: // Looking for the case where our 3rd party diff library previously detected a string difference // in the middle of a span class because we highlight them first. This happens when added/deleted code // also changes the chroma class name, either partially or fully. If found, just move the openining span code forward into the next section // see TestDiffToHTML for examples if len(addSpan) > 0 { diffs[i].Text = addSpan + diffs[i].Text addSpan = "" } m := addSpanRegex.FindStringSubmatchIndex(diffs[i].Text) if m != nil { addSpan = diffs[i].Text[m[0]:m[1]] buf.WriteString(strings.TrimSuffix(diffs[i].Text, addSpan)) } else { addSpan = "" buf.WriteString(getLineContent(diffs[i].Text)) } case diffs[i].Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd: if len(addSpan) > 0 { diffs[i].Text = addSpan + diffs[i].Text addSpan = "" } // Print existing closing span first before opening added-code span so it doesn't unintentionally close it if strings.HasPrefix(diffs[i].Text, "</span>") { buf.WriteString("</span>") diffs[i].Text = strings.TrimPrefix(diffs[i].Text, "</span>") } m := addSpanRegex.FindStringSubmatchIndex(diffs[i].Text) if m != nil { addSpan = diffs[i].Text[m[0]:m[1]] diffs[i].Text = strings.TrimSuffix(diffs[i].Text, addSpan) } buf.Write(addedCodePrefix) buf.WriteString(getLineContent(diffs[i].Text)) buf.Write(codeTagSuffix) case diffs[i].Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel: if len(addSpan) > 0 { diffs[i].Text = addSpan + diffs[i].Text addSpan = "" } if strings.HasPrefix(diffs[i].Text, "</span>") { buf.WriteString("</span>") diffs[i].Text = strings.TrimPrefix(diffs[i].Text, "</span>") } m := addSpanRegex.FindStringSubmatchIndex(diffs[i].Text) if m != nil { addSpan = diffs[i].Text[m[0]:m[1]] diffs[i].Text = strings.TrimSuffix(diffs[i].Text, addSpan) } buf.Write(removedCodePrefix) buf.WriteString(getLineContent(diffs[i].Text)) buf.Write(codeTagSuffix) } } return template.HTML(buf.Bytes()) } // GetLine gets a specific line by type (add or del) and file line number func (diffSection *DiffSection) GetLine(lineType DiffLineType, idx int) *DiffLine { var ( difference = 0 addCount = 0 delCount = 0 matchDiffLine *DiffLine ) LOOP: for _, diffLine := range diffSection.Lines { switch diffLine.Type { case DiffLineAdd: addCount++ case DiffLineDel: delCount++ default: if matchDiffLine != nil { break LOOP } difference = diffLine.RightIdx - diffLine.LeftIdx addCount = 0 delCount = 0 } switch lineType { case DiffLineDel: if diffLine.RightIdx == 0 && diffLine.LeftIdx == idx-difference { matchDiffLine = diffLine } case DiffLineAdd: if diffLine.LeftIdx == 0 && diffLine.RightIdx == idx+difference { matchDiffLine = diffLine } } } if addCount == delCount { return matchDiffLine } return nil } var diffMatchPatch = diffmatchpatch.New() func init() { diffMatchPatch.DiffEditCost = 100 } // GetComputedInlineDiffFor computes inline diff for the given line. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) template.HTML { if setting.Git.DisableDiffHighlight { return template.HTML(getLineContent(diffLine.Content[1:])) } var ( compareDiffLine *DiffLine diff1 string diff2 string ) // try to find equivalent diff line. ignore, otherwise switch diffLine.Type { case DiffLineSection: return template.HTML(getLineContent(diffLine.Content[1:])) case DiffLineAdd: compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx) if compareDiffLine == nil { return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content[1:])) } diff1 = compareDiffLine.Content diff2 = diffLine.Content case DiffLineDel: compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx) if compareDiffLine == nil { return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content[1:])) } diff1 = diffLine.Content diff2 = compareDiffLine.Content default: if strings.IndexByte(" +-", diffLine.Content[0]) > -1 { return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content[1:])) } return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content)) } diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, diff1[1:]), highlight.Code(diffSection.FileName, diff2[1:]), true) diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type) } // DiffFile represents a file diff. type DiffFile struct { Name string OldName string Index int Addition, Deletion int Type DiffFileType IsCreated bool IsDeleted bool IsBin bool IsLFSFile bool IsRenamed bool IsSubmodule bool Sections []*DiffSection IsIncomplete bool } // GetType returns type of diff file. func (diffFile *DiffFile) GetType() int { return int(diffFile.Type) } // GetTailSection creates a fake DiffLineSection if the last section is not the end of the file func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommitID, rightCommitID string) *DiffSection { if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile { return nil } leftCommit, err := gitRepo.GetCommit(leftCommitID) if err != nil { return nil } rightCommit, err := gitRepo.GetCommit(rightCommitID) if err != nil { return nil } lastSection := diffFile.Sections[len(diffFile.Sections)-1] lastLine := lastSection.Lines[len(lastSection.Lines)-1] leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name) rightLineCount := getCommitFileLineCount(rightCommit, diffFile.Name) if leftLineCount <= lastLine.LeftIdx || rightLineCount <= lastLine.RightIdx { return nil } tailDiffLine := &DiffLine{ Type: DiffLineSection, Content: " ", SectionInfo: &DiffLineSectionInfo{ Path: diffFile.Name, LastLeftIdx: lastLine.LeftIdx, LastRightIdx: lastLine.RightIdx, LeftIdx: leftLineCount, RightIdx: rightLineCount, }} tailSection := &DiffSection{FileName: diffFile.Name, Lines: []*DiffLine{tailDiffLine}} return tailSection } func getCommitFileLineCount(commit *git.Commit, filePath string) int { blob, err := commit.GetBlobByPath(filePath) if err != nil { return 0 } lineCount, err := blob.GetBlobLineCount() if err != nil { return 0 } return lineCount } // Diff represents a difference between two git trees. type Diff struct { NumFiles, TotalAddition, TotalDeletion int Files []*DiffFile IsIncomplete bool } // LoadComments loads comments into each line func (diff *Diff) LoadComments(issue *models.Issue, currentUser *models.User) error { allComments, err := models.FetchCodeComments(issue, currentUser) if err != nil { return err } for _, file := range diff.Files { if lineCommits, ok := allComments[file.Name]; ok { for _, section := range file.Sections { for _, line := range section.Lines { if comments, ok := lineCommits[int64(line.LeftIdx*-1)]; ok { line.Comments = append(line.Comments, comments...) } if comments, ok := lineCommits[int64(line.RightIdx)]; ok { line.Comments = append(line.Comments, comments...) } sort.SliceStable(line.Comments, func(i, j int) bool { return line.Comments[i].CreatedUnix < line.Comments[j].CreatedUnix }) } } } } return nil } const cmdDiffHead = "diff --git " // ParsePatch builds a Diff object from a io.Reader and some // parameters. // TODO: move this function to gogits/git-module func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader) (*Diff, error) { var ( diff = &Diff{Files: make([]*DiffFile, 0)} curFile = &DiffFile{} curSection = &DiffSection{ Lines: make([]*DiffLine, 0, 10), } leftLine, rightLine int lineCount int curFileLinesCount int curFileLFSPrefix bool ) input := bufio.NewReader(reader) isEOF := false for !isEOF { var linebuf bytes.Buffer for { b, err := input.ReadByte() if err != nil { if err == io.EOF { isEOF = true break } else { return nil, fmt.Errorf("ReadByte: %v", err) } } if b == '\n' { break } if linebuf.Len() < maxLineCharacters { linebuf.WriteByte(b) } else if linebuf.Len() == maxLineCharacters { curFile.IsIncomplete = true } } line := linebuf.String() if strings.HasPrefix(line, "+++ ") || strings.HasPrefix(line, "--- ") || len(line) == 0 { continue } trimLine := strings.Trim(line, "+- ") if trimLine == models.LFSMetaFileIdentifier { curFileLFSPrefix = true } if curFileLFSPrefix && strings.HasPrefix(trimLine, models.LFSMetaFileOidPrefix) { oid := strings.TrimPrefix(trimLine, models.LFSMetaFileOidPrefix) if len(oid) == 64 { m := &models.LFSMetaObject{Oid: oid} count, err := models.Count(m) if err == nil && count > 0 { curFile.IsBin = true curFile.IsLFSFile = true curSection.Lines = nil } } } curFileLinesCount++ lineCount++ // Diff data too large, we only show the first about maxLines lines if curFileLinesCount >= maxLines { curFile.IsIncomplete = true } switch { case line[0] == ' ': diffLine := &DiffLine{Type: DiffLinePlain, Content: line, LeftIdx: leftLine, RightIdx: rightLine} leftLine++ rightLine++ curSection.Lines = append(curSection.Lines, diffLine) curSection.FileName = curFile.Name continue case line[0] == '@': curSection = &DiffSection{} curFile.Sections = append(curFile.Sections, curSection) lineSectionInfo := getDiffLineSectionInfo(curFile.Name, line, leftLine-1, rightLine-1) diffLine := &DiffLine{ Type: DiffLineSection, Content: line, SectionInfo: lineSectionInfo, } curSection.Lines = append(curSection.Lines, diffLine) curSection.FileName = curFile.Name // update line number. leftLine = lineSectionInfo.LeftIdx rightLine = lineSectionInfo.RightIdx continue case line[0] == '+': curFile.Addition++ diff.TotalAddition++ diffLine := &DiffLine{Type: DiffLineAdd, Content: line, RightIdx: rightLine} rightLine++ curSection.Lines = append(curSection.Lines, diffLine) curSection.FileName = curFile.Name continue case line[0] == '-': curFile.Deletion++ diff.TotalDeletion++ diffLine := &DiffLine{Type: DiffLineDel, Content: line, LeftIdx: leftLine} if leftLine > 0 { leftLine++ } curSection.Lines = append(curSection.Lines, diffLine) curSection.FileName = curFile.Name case strings.HasPrefix(line, "Binary"): curFile.IsBin = true continue } // Get new file. if strings.HasPrefix(line, cmdDiffHead) { if len(diff.Files) >= maxFiles { diff.IsIncomplete = true _, err := io.Copy(ioutil.Discard, reader) if err != nil { return nil, fmt.Errorf("Copy: %v", err) } break } // Note: In case file name is surrounded by double quotes (it happens only in git-shell). // e.g. diff --git "a/xxx" "b/xxx" var a string var b string rd := strings.NewReader(line[len(cmdDiffHead):]) char, _ := rd.ReadByte() _ = rd.UnreadByte() if char == '"' { fmt.Fscanf(rd, "%q ", &a) } else { fmt.Fscanf(rd, "%s ", &a) } char, _ = rd.ReadByte() _ = rd.UnreadByte() if char == '"' { fmt.Fscanf(rd, "%q", &b) } else { fmt.Fscanf(rd, "%s", &b) } a = a[2:] b = b[2:] curFile = &DiffFile{ Name: b, OldName: a, Index: len(diff.Files) + 1, Type: DiffFileChange, Sections: make([]*DiffSection, 0, 10), IsRenamed: a != b, } diff.Files = append(diff.Files, curFile) curFileLinesCount = 0 leftLine = 1 rightLine = 1 curFileLFSPrefix = false // Check file diff type and is submodule. for { line, err := input.ReadString('\n') if err != nil { if err == io.EOF { isEOF = true } else { return nil, fmt.Errorf("ReadString: %v", err) } } switch { case strings.HasPrefix(line, "new file"): curFile.Type = DiffFileAdd curFile.IsCreated = true case strings.HasPrefix(line, "deleted"): curFile.Type = DiffFileDel curFile.IsDeleted = true case strings.HasPrefix(line, "index"): curFile.Type = DiffFileChange case strings.HasPrefix(line, "similarity index 100%"): curFile.Type = DiffFileRename } if curFile.Type > 0 { if strings.HasSuffix(line, " 160000\n") { curFile.IsSubmodule = true } break } } } } // FIXME: detect encoding while parsing. var buf bytes.Buffer for _, f := range diff.Files { buf.Reset() for _, sec := range f.Sections { for _, l := range sec.Lines { buf.WriteString(l.Content) buf.WriteString("\n") } } charsetLabel, err := charset.DetectEncoding(buf.Bytes()) if charsetLabel != "UTF-8" && err == nil { encoding, _ := stdcharset.Lookup(charsetLabel) if encoding != nil { d := encoding.NewDecoder() for _, sec := range f.Sections { for _, l := range sec.Lines { if c, _, err := transform.String(d, l.Content); err == nil { l.Content = c } } } } } } diff.NumFiles = len(diff.Files) return diff, nil } // GetDiffRange builds a Diff between two commits of a repository. // passing the empty string as beforeCommitID returns a diff from the // parent commit. func GetDiffRange(repoPath, beforeCommitID, afterCommitID string, maxLines, maxLineCharacters, maxFiles int) (*Diff, error) { return GetDiffRangeWithWhitespaceBehavior(repoPath, beforeCommitID, afterCommitID, maxLines, maxLineCharacters, maxFiles, "") } // GetDiffRangeWithWhitespaceBehavior builds a Diff between two commits of a repository. // Passing the empty string as beforeCommitID returns a diff from the parent commit. // The whitespaceBehavior is either an empty string or a git flag func GetDiffRangeWithWhitespaceBehavior(repoPath, beforeCommitID, afterCommitID string, maxLines, maxLineCharacters, maxFiles int, whitespaceBehavior string) (*Diff, error) { gitRepo, err := git.OpenRepository(repoPath) if err != nil { return nil, err } defer gitRepo.Close() commit, err := gitRepo.GetCommit(afterCommitID) if err != nil { return nil, err } // FIXME: graceful: These commands should likely have a timeout ctx, cancel := context.WithCancel(git.DefaultContext) defer cancel() var cmd *exec.Cmd if (len(beforeCommitID) == 0 || beforeCommitID == git.EmptySHA) && commit.ParentCount() == 0 { cmd = exec.CommandContext(ctx, git.GitExecutable, "show", afterCommitID) } else { actualBeforeCommitID := beforeCommitID if len(actualBeforeCommitID) == 0 { parentCommit, _ := commit.Parent(0) actualBeforeCommitID = parentCommit.ID.String() } diffArgs := []string{"diff", "-M"} if len(whitespaceBehavior) != 0 { diffArgs = append(diffArgs, whitespaceBehavior) } diffArgs = append(diffArgs, actualBeforeCommitID) diffArgs = append(diffArgs, afterCommitID) cmd = exec.CommandContext(ctx, git.GitExecutable, diffArgs...) beforeCommitID = actualBeforeCommitID } cmd.Dir = repoPath cmd.Stderr = os.Stderr stdout, err := cmd.StdoutPipe() if err != nil { return nil, fmt.Errorf("StdoutPipe: %v", err) } if err = cmd.Start(); err != nil { return nil, fmt.Errorf("Start: %v", err) } pid := process.GetManager().Add(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath), cancel) defer process.GetManager().Remove(pid) diff, err := ParsePatch(maxLines, maxLineCharacters, maxFiles, stdout) if err != nil { return nil, fmt.Errorf("ParsePatch: %v", err) } for _, diffFile := range diff.Files { tailSection := diffFile.GetTailSection(gitRepo, beforeCommitID, afterCommitID) if tailSection != nil { diffFile.Sections = append(diffFile.Sections, tailSection) } } if err = cmd.Wait(); err != nil { return nil, fmt.Errorf("Wait: %v", err) } shortstatArgs := []string{beforeCommitID + "..." + afterCommitID} if len(beforeCommitID) == 0 || beforeCommitID == git.EmptySHA { shortstatArgs = []string{git.EmptyTreeSHA, afterCommitID} } diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(repoPath, shortstatArgs...) if err != nil && strings.Contains(err.Error(), "no merge base") { // git >= 2.28 now returns an error if base and head have become unrelated. // previously it would return the results of git diff --shortstat base head so let's try that... shortstatArgs = []string{beforeCommitID, afterCommitID} diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(repoPath, shortstatArgs...) } if err != nil { return nil, err } return diff, nil } // GetDiffCommit builds a Diff representing the given commitID. func GetDiffCommit(repoPath, commitID string, maxLines, maxLineCharacters, maxFiles int) (*Diff, error) { return GetDiffRange(repoPath, "", commitID, maxLines, maxLineCharacters, maxFiles) } // CommentAsDiff returns c.Patch as *Diff func CommentAsDiff(c *models.Comment) (*Diff, error) { diff, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.Patch)) if err != nil { return nil, err } if len(diff.Files) == 0 { return nil, fmt.Errorf("no file found for comment ID: %d", c.ID) } secs := diff.Files[0].Sections if len(secs) == 0 { return nil, fmt.Errorf("no sections found for comment ID: %d", c.ID) } return diff, nil } // CommentMustAsDiff executes AsDiff and logs the error instead of returning func CommentMustAsDiff(c *models.Comment) *Diff { diff, err := CommentAsDiff(c) if err != nil { log.Warn("CommentMustAsDiff: %v", err) } return diff }