[gitea] Refactor parseSignatureFromCommitLine (#29054)

Replace #28849. Thanks to @yp05327 for the looking into the problem.
Fix #28840

The old behavior of newSignatureFromCommitline is not right. The new
parseSignatureFromCommitLine:

1. never fails
2. only accept one format (if there is any other, it could be easily added)

And add some tests.

(cherry picked from commit a24e1da7e9e38fc5f5c84c083d122c0cc3da4b74)
This commit is contained in:
wxiaoguang 2024-02-09 11:02:53 +08:00 committed by Earl Warren
parent 045bd097c6
commit 83bb3cf86a
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: 0579CB2928A78A00
8 changed files with 104 additions and 149 deletions

View file

@ -271,7 +271,7 @@ func GetLatestCommitTime(ctx context.Context, repoPath string) (time.Time, error
return time.Time{}, err return time.Time{}, err
} }
commitTime := strings.TrimSpace(stdout) commitTime := strings.TrimSpace(stdout)
return time.Parse(GitTimeLayout, commitTime) return time.Parse("Mon Jan _2 15:04:05 2006 -0700", commitTime)
} }
// DivergeObject represents commit count diverging commits // DivergeObject represents commit count diverging commits

View file

@ -183,11 +183,7 @@ func parseTagRef(objectFormat ObjectFormat, ref map[string]string) (tag *Tag, er
} }
} }
tag.Tagger, err = newSignatureFromCommitline([]byte(ref["creator"])) tag.Tagger = parseSignatureFromCommitLine(ref["creator"])
if err != nil {
return nil, fmt.Errorf("parse tagger: %w", err)
}
tag.Message = ref["contents"] tag.Message = ref["contents"]
// strip PGP signature if present in contents field // strip PGP signature if present in contents field
pgpStart := strings.Index(tag.Message, beginpgp) pgpStart := strings.Index(tag.Message, beginpgp)

View file

@ -227,7 +227,7 @@ func TestRepository_parseTagRef(t *testing.T) {
ID: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"), ID: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"),
Object: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"), Object: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"),
Type: "commit", Type: "commit",
Tagger: parseAuthorLine(t, "Foo Bar <foo@bar.com> 1565789218 +0300"), Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n", Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n",
Signature: nil, Signature: nil,
}, },
@ -256,7 +256,7 @@ func TestRepository_parseTagRef(t *testing.T) {
ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"), ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"),
Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"), Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"),
Type: "tag", Type: "tag",
Tagger: parseAuthorLine(t, "Foo Bar <foo@bar.com> 1565789218 +0300"), Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n", Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n",
Signature: nil, Signature: nil,
}, },
@ -314,7 +314,7 @@ qbHDASXl
ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"), ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"),
Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"), Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"),
Type: "tag", Type: "tag",
Tagger: parseAuthorLine(t, "Foo Bar <foo@bar.com> 1565789218 +0300"), Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md", Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md",
Signature: &CommitGPGSignature{ Signature: &CommitGPGSignature{
Signature: `-----BEGIN PGP SIGNATURE----- Signature: `-----BEGIN PGP SIGNATURE-----
@ -363,14 +363,3 @@ Add changelog of v1.9.1 (#7859)
}) })
} }
} }
func parseAuthorLine(t *testing.T, committer string) *Signature {
t.Helper()
sig, err := newSignatureFromCommitline([]byte(committer))
if err != nil {
t.Fatalf("parse author line '%s': %v", committer, err)
}
return sig
}

View file

@ -4,7 +4,46 @@
package git package git
const ( import (
// GitTimeLayout is the (default) time layout used by git. "strconv"
GitTimeLayout = "Mon Jan _2 15:04:05 2006 -0700" "strings"
"time"
"code.gitea.io/gitea/modules/log"
) )
// Helper to get a signature from the commit line, which looks like:
//
// full name <user@example.com> 1378823654 +0200
//
// Haven't found the official reference for the standard format yet.
// This function never fails, if the "line" can't be parsed, it returns a default Signature with "zero" time.
func parseSignatureFromCommitLine(line string) *Signature {
sig := &Signature{}
s1, sx, ok1 := strings.Cut(line, " <")
s2, s3, ok2 := strings.Cut(sx, "> ")
if !ok1 || !ok2 {
sig.Name = line
return sig
}
sig.Name, sig.Email = s1, s2
if strings.Count(s3, " ") == 1 {
ts, tz, _ := strings.Cut(s3, " ")
seconds, _ := strconv.ParseInt(ts, 10, 64)
if tzTime, err := time.Parse("-0700", tz); err == nil {
sig.When = time.Unix(seconds, 0).In(tzTime.Location())
}
} else {
// the old gitea code tried to parse the date in a few different formats, but it's not clear why.
// according to public document, only the standard format "timestamp timezone" could be found, so drop other formats.
log.Error("suspicious commit line format: %q", line)
for _, fmt := range []string{ /*"Mon Jan _2 15:04:05 2006 -0700"*/ } {
if t, err := time.Parse(fmt, s3); err == nil {
sig.When = t
break
}
}
}
return sig
}

View file

@ -7,52 +7,8 @@
package git package git
import ( import (
"bytes"
"strconv"
"strings"
"time"
"github.com/go-git/go-git/v5/plumbing/object" "github.com/go-git/go-git/v5/plumbing/object"
) )
// Signature represents the Author or Committer information. // Signature represents the Author or Committer information.
type Signature = object.Signature type Signature = object.Signature
// Helper to get a signature from the commit line, which looks like these:
//
// author Patrick Gundlach <gundlach@speedata.de> 1378823654 +0200
// author Patrick Gundlach <gundlach@speedata.de> Thu, 07 Apr 2005 22:13:13 +0200
//
// but without the "author " at the beginning (this method should)
// be used for author and committer.
//
// FIXME: include timezone for timestamp!
func newSignatureFromCommitline(line []byte) (_ *Signature, err error) {
sig := new(Signature)
emailStart := bytes.IndexByte(line, '<')
if emailStart > 0 { // Empty name has already occurred, even if it shouldn't
sig.Name = strings.TrimSpace(string(line[:emailStart-1]))
}
emailEnd := bytes.IndexByte(line, '>')
sig.Email = string(line[emailStart+1 : emailEnd])
// Check date format.
if len(line) > emailEnd+2 {
firstChar := line[emailEnd+2]
if firstChar >= 48 && firstChar <= 57 {
timestop := bytes.IndexByte(line[emailEnd+2:], ' ')
timestring := string(line[emailEnd+2 : emailEnd+2+timestop])
seconds, _ := strconv.ParseInt(timestring, 10, 64)
sig.When = time.Unix(seconds, 0)
} else {
sig.When, err = time.Parse(GitTimeLayout, string(line[emailEnd+2:]))
if err != nil {
return nil, err
}
}
} else {
// Fall back to unix 0 time
sig.When = time.Unix(0, 0)
}
return sig, nil
}

View file

@ -7,21 +7,17 @@
package git package git
import ( import (
"bytes"
"fmt" "fmt"
"strconv"
"strings"
"time" "time"
"code.gitea.io/gitea/modules/util"
) )
// Signature represents the Author or Committer information. // Signature represents the Author, Committer or Tagger information.
type Signature struct { type Signature struct {
// Name represents a person name. It is an arbitrary string. Name string // the committer name, it can be anything
Name string Email string // the committer email, it can be anything
// Email is an email, but it cannot be assumed to be well-formed. When time.Time // the timestamp of the signature
Email string
// When is the timestamp of the signature.
When time.Time
} }
func (s *Signature) String() string { func (s *Signature) String() string {
@ -30,71 +26,5 @@ func (s *Signature) String() string {
// Decode decodes a byte array representing a signature to signature // Decode decodes a byte array representing a signature to signature
func (s *Signature) Decode(b []byte) { func (s *Signature) Decode(b []byte) {
sig, _ := newSignatureFromCommitline(b) *s = *parseSignatureFromCommitLine(util.UnsafeBytesToString(b))
s.Email = sig.Email
s.Name = sig.Name
s.When = sig.When
}
// Helper to get a signature from the commit line, which looks like these:
//
// author Patrick Gundlach <gundlach@speedata.de> 1378823654 +0200
// author Patrick Gundlach <gundlach@speedata.de> Thu, 07 Apr 2005 22:13:13 +0200
//
// but without the "author " at the beginning (this method should)
// be used for author and committer.
// FIXME: there are a lot of "return sig, err" (but the err is also nil), that's the old behavior, to avoid breaking
func newSignatureFromCommitline(line []byte) (sig *Signature, err error) {
sig = new(Signature)
emailStart := bytes.LastIndexByte(line, '<')
emailEnd := bytes.LastIndexByte(line, '>')
if emailStart == -1 || emailEnd == -1 || emailEnd < emailStart {
return sig, err
}
if emailStart > 0 { // Empty name has already occurred, even if it shouldn't
sig.Name = strings.TrimSpace(string(line[:emailStart-1]))
}
sig.Email = string(line[emailStart+1 : emailEnd])
hasTime := emailEnd+2 < len(line)
if !hasTime {
return sig, err
}
// Check date format.
firstChar := line[emailEnd+2]
if firstChar >= 48 && firstChar <= 57 {
idx := bytes.IndexByte(line[emailEnd+2:], ' ')
if idx < 0 {
return sig, err
}
timestring := string(line[emailEnd+2 : emailEnd+2+idx])
seconds, _ := strconv.ParseInt(timestring, 10, 64)
sig.When = time.Unix(seconds, 0)
idx += emailEnd + 3
if idx >= len(line) || idx+5 > len(line) {
return sig, err
}
timezone := string(line[idx : idx+5])
tzhours, err1 := strconv.ParseInt(timezone[0:3], 10, 64)
tzmins, err2 := strconv.ParseInt(timezone[3:], 10, 64)
if err1 != nil || err2 != nil {
return sig, err
}
if tzhours < 0 {
tzmins *= -1
}
tz := time.FixedZone("", int(tzhours*60*60+tzmins*60))
sig.When = sig.When.In(tz)
} else {
sig.When, err = time.Parse(GitTimeLayout, string(line[emailEnd+2:]))
if err != nil {
return sig, err
}
}
return sig, err
} }

View file

@ -0,0 +1,47 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestParseSignatureFromCommitLine(t *testing.T) {
tests := []struct {
line string
want *Signature
}{
{
line: "a b <c@d.com> 12345 +0100",
want: &Signature{
Name: "a b",
Email: "c@d.com",
When: time.Unix(12345, 0).In(time.FixedZone("", 3600)),
},
},
{
line: "bad line",
want: &Signature{Name: "bad line"},
},
{
line: "bad < line",
want: &Signature{Name: "bad < line"},
},
{
line: "bad > line",
want: &Signature{Name: "bad > line"},
},
{
line: "bad-line <name@example.com>",
want: &Signature{Name: "bad-line <name@example.com>"},
},
}
for _, test := range tests {
got := parseSignatureFromCommitLine(test.line)
assert.EqualValues(t, test.want, got)
}
}

View file

@ -7,6 +7,8 @@ import (
"bytes" "bytes"
"sort" "sort"
"strings" "strings"
"code.gitea.io/gitea/modules/util"
) )
const ( const (
@ -59,11 +61,7 @@ l:
// A commit can have one or more parents // A commit can have one or more parents
tag.Type = string(line[spacepos+1:]) tag.Type = string(line[spacepos+1:])
case "tagger": case "tagger":
sig, err := newSignatureFromCommitline(line[spacepos+1:]) tag.Tagger = parseSignatureFromCommitLine(util.UnsafeBytesToString(line[spacepos+1:]))
if err != nil {
return nil, err
}
tag.Tagger = sig
} }
nextline += eol + 1 nextline += eol + 1
case eol == 0: case eol == 0: