Refactor parseTreeEntries, speed up tree list (#21368)
Close #20315 (fix the panic when parsing invalid input), Speed up #20231 (use ls-tree without size field) Introduce ListEntriesRecursiveFast (ls-tree without size) and ListEntriesRecursiveWithSize (ls-tree with size)
This commit is contained in:
parent
69fc510d6d
commit
c08e42c47e
7 changed files with 109 additions and 53 deletions
|
@ -22,70 +22,72 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
|
||||||
return parseTreeEntries(data, nil)
|
return parseTreeEntries(data, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var sepSpace = []byte{' '}
|
||||||
|
|
||||||
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
|
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
|
||||||
entries := make([]*TreeEntry, 0, 10)
|
var err error
|
||||||
|
entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1)
|
||||||
for pos := 0; pos < len(data); {
|
for pos := 0; pos < len(data); {
|
||||||
// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
|
// expect line to be of the form:
|
||||||
|
// <mode> <type> <sha> <space-padded-size>\t<filename>
|
||||||
|
// <mode> <type> <sha>\t<filename>
|
||||||
|
posEnd := bytes.IndexByte(data[pos:], '\n')
|
||||||
|
if posEnd == -1 {
|
||||||
|
posEnd = len(data)
|
||||||
|
} else {
|
||||||
|
posEnd += pos
|
||||||
|
}
|
||||||
|
line := data[pos:posEnd]
|
||||||
|
posTab := bytes.IndexByte(line, '\t')
|
||||||
|
if posTab == -1 {
|
||||||
|
return nil, fmt.Errorf("invalid ls-tree output (no tab): %q", line)
|
||||||
|
}
|
||||||
|
|
||||||
entry := new(TreeEntry)
|
entry := new(TreeEntry)
|
||||||
entry.ptree = ptree
|
entry.ptree = ptree
|
||||||
if pos+6 > len(data) {
|
|
||||||
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
|
entryAttrs := line[:posTab]
|
||||||
|
entryName := line[posTab+1:]
|
||||||
|
|
||||||
|
entryMode, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
|
||||||
|
_ /* entryType */, entryAttrs, _ = bytes.Cut(entryAttrs, sepSpace) // the type is not used, the mode is enough to determine the type
|
||||||
|
entryObjectID, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
|
||||||
|
if len(entryAttrs) > 0 {
|
||||||
|
entrySize := entryAttrs // the last field is the space-padded-size
|
||||||
|
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(entrySize)), 10, 64)
|
||||||
|
entry.sized = true
|
||||||
}
|
}
|
||||||
switch string(data[pos : pos+6]) {
|
|
||||||
|
switch string(entryMode) {
|
||||||
case "100644":
|
case "100644":
|
||||||
entry.entryMode = EntryModeBlob
|
entry.entryMode = EntryModeBlob
|
||||||
pos += 12 // skip over "100644 blob "
|
|
||||||
case "100755":
|
case "100755":
|
||||||
entry.entryMode = EntryModeExec
|
entry.entryMode = EntryModeExec
|
||||||
pos += 12 // skip over "100755 blob "
|
|
||||||
case "120000":
|
case "120000":
|
||||||
entry.entryMode = EntryModeSymlink
|
entry.entryMode = EntryModeSymlink
|
||||||
pos += 12 // skip over "120000 blob "
|
|
||||||
case "160000":
|
case "160000":
|
||||||
entry.entryMode = EntryModeCommit
|
entry.entryMode = EntryModeCommit
|
||||||
pos += 14 // skip over "160000 object "
|
|
||||||
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
|
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
|
||||||
entry.entryMode = EntryModeTree
|
entry.entryMode = EntryModeTree
|
||||||
pos += 12 // skip over "040000 tree "
|
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unknown type: %v", string(data[pos:pos+6]))
|
return nil, fmt.Errorf("unknown type: %v", string(entryMode))
|
||||||
}
|
}
|
||||||
|
|
||||||
if pos+40 > len(data) {
|
entry.ID, err = NewIDFromString(string(entryObjectID))
|
||||||
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
|
|
||||||
}
|
|
||||||
id, err := NewIDFromString(string(data[pos : pos+40]))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
|
return nil, fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err)
|
||||||
}
|
|
||||||
entry.ID = id
|
|
||||||
pos += 41 // skip over sha and trailing space
|
|
||||||
|
|
||||||
end := pos + bytes.IndexByte(data[pos:], '\t')
|
|
||||||
if end < pos {
|
|
||||||
return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
|
|
||||||
}
|
|
||||||
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
|
|
||||||
entry.sized = true
|
|
||||||
|
|
||||||
pos = end + 1
|
|
||||||
|
|
||||||
end = pos + bytes.IndexByte(data[pos:], '\n')
|
|
||||||
if end < pos {
|
|
||||||
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// In case entry name is surrounded by double quotes(it happens only in git-shell).
|
if len(entryName) > 0 && entryName[0] == '"' {
|
||||||
if data[pos] == '"' {
|
entry.name, err = strconv.Unquote(string(entryName))
|
||||||
entry.name, err = strconv.Unquote(string(data[pos:end]))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
|
return nil, fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
entry.name = string(data[pos:end])
|
entry.name = string(entryName)
|
||||||
}
|
}
|
||||||
|
|
||||||
pos = end + 1
|
pos = posEnd + 1
|
||||||
entries = append(entries, entry)
|
entries = append(entries, entry)
|
||||||
}
|
}
|
||||||
return entries, nil
|
return entries, nil
|
||||||
|
|
|
@ -12,7 +12,7 @@ import (
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestParseTreeEntries(t *testing.T) {
|
func TestParseTreeEntriesLong(t *testing.T) {
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
Input string
|
Input string
|
||||||
Expected []*TreeEntry
|
Expected []*TreeEntry
|
||||||
|
@ -59,11 +59,47 @@ func TestParseTreeEntries(t *testing.T) {
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Len(t, entries, len(testCase.Expected))
|
assert.Len(t, entries, len(testCase.Expected))
|
||||||
for i, entry := range entries {
|
for i, entry := range entries {
|
||||||
assert.EqualValues(t, testCase.Expected[i].ID, entry.ID)
|
assert.EqualValues(t, testCase.Expected[i], entry)
|
||||||
assert.EqualValues(t, testCase.Expected[i].name, entry.name)
|
|
||||||
assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode)
|
|
||||||
assert.EqualValues(t, testCase.Expected[i].sized, entry.sized)
|
|
||||||
assert.EqualValues(t, testCase.Expected[i].size, entry.size)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseTreeEntriesShort(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
Input string
|
||||||
|
Expected []*TreeEntry
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af README.md
|
||||||
|
040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d assets
|
||||||
|
`,
|
||||||
|
Expected: []*TreeEntry{
|
||||||
|
{
|
||||||
|
ID: MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"),
|
||||||
|
name: "README.md",
|
||||||
|
entryMode: EntryModeBlob,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"),
|
||||||
|
name: "assets",
|
||||||
|
entryMode: EntryModeTree,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
entries, err := ParseTreeEntries([]byte(testCase.Input))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Len(t, entries, len(testCase.Expected))
|
||||||
|
for i, entry := range entries {
|
||||||
|
assert.EqualValues(t, testCase.Expected[i], entry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseTreeEntriesInvalid(t *testing.T) {
|
||||||
|
// there was a panic: "runtime error: slice bounds out of range" when the input was invalid: #20315
|
||||||
|
entries, err := ParseTreeEntries([]byte("100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af"))
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Len(t, entries, 0)
|
||||||
|
}
|
||||||
|
|
|
@ -57,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
||||||
|
|
||||||
tree := commit.Tree
|
tree := commit.Tree
|
||||||
|
|
||||||
entries, err := tree.ListEntriesRecursive()
|
entries, err := tree.ListEntriesRecursiveWithSize()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,8 +57,8 @@ func (t *Tree) ListEntries() (Entries, error) {
|
||||||
return entries, nil
|
return entries, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ListEntriesRecursive returns all entries of current tree recursively including all subtrees
|
// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees
|
||||||
func (t *Tree) ListEntriesRecursive() (Entries, error) {
|
func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) {
|
||||||
if t.gogitTree == nil {
|
if t.gogitTree == nil {
|
||||||
err := t.loadTreeObject()
|
err := t.loadTreeObject()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -92,3 +92,8 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) {
|
||||||
|
|
||||||
return entries, nil
|
return entries, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ListEntriesRecursiveFast is the alias of ListEntriesRecursiveWithSize for the gogit version
|
||||||
|
func (t *Tree) ListEntriesRecursiveFast() (Entries, error) {
|
||||||
|
return t.ListEntriesRecursiveWithSize()
|
||||||
|
}
|
||||||
|
|
|
@ -99,13 +99,16 @@ func (t *Tree) ListEntries() (Entries, error) {
|
||||||
return t.entries, err
|
return t.entries, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// ListEntriesRecursive returns all entries of current tree recursively including all subtrees
|
// listEntriesRecursive returns all entries of current tree recursively including all subtrees
|
||||||
func (t *Tree) ListEntriesRecursive() (Entries, error) {
|
// extraArgs could be "-l" to get the size, which is slower
|
||||||
|
func (t *Tree) listEntriesRecursive(extraArgs ...string) (Entries, error) {
|
||||||
if t.entriesRecursiveParsed {
|
if t.entriesRecursiveParsed {
|
||||||
return t.entriesRecursive, nil
|
return t.entriesRecursive, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
stdout, _, runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-l", "-r", t.ID.String()).RunStdBytes(&RunOpts{Dir: t.repo.Path})
|
args := append([]string{"ls-tree", "-t", "-r"}, extraArgs...)
|
||||||
|
args = append(args, t.ID.String())
|
||||||
|
stdout, _, runErr := NewCommand(t.repo.Ctx, args...).RunStdBytes(&RunOpts{Dir: t.repo.Path})
|
||||||
if runErr != nil {
|
if runErr != nil {
|
||||||
return nil, runErr
|
return nil, runErr
|
||||||
}
|
}
|
||||||
|
@ -118,3 +121,13 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) {
|
||||||
|
|
||||||
return t.entriesRecursive, err
|
return t.entriesRecursive, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ListEntriesRecursiveFast returns all entries of current tree recursively including all subtrees, no size
|
||||||
|
func (t *Tree) ListEntriesRecursiveFast() (Entries, error) {
|
||||||
|
return t.listEntriesRecursive()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees, with size
|
||||||
|
func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) {
|
||||||
|
return t.listEntriesRecursive("--long")
|
||||||
|
}
|
||||||
|
|
|
@ -22,9 +22,9 @@ func TreeList(ctx *context.Context) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
entries, err := tree.ListEntriesRecursive()
|
entries, err := tree.ListEntriesRecursiveFast()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ctx.ServerError("ListEntriesRecursive", err)
|
ctx.ServerError("ListEntriesRecursiveFast", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
entries.CustomSort(base.NaturalSortLess)
|
entries.CustomSort(base.NaturalSortLess)
|
||||||
|
|
|
@ -29,7 +29,7 @@ func GetTreeBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git
|
||||||
tree.URL = repo.APIURL() + "/git/trees/" + url.PathEscape(tree.SHA)
|
tree.URL = repo.APIURL() + "/git/trees/" + url.PathEscape(tree.SHA)
|
||||||
var entries git.Entries
|
var entries git.Entries
|
||||||
if recursive {
|
if recursive {
|
||||||
entries, err = gitTree.ListEntriesRecursive()
|
entries, err = gitTree.ListEntriesRecursiveWithSize()
|
||||||
} else {
|
} else {
|
||||||
entries, err = gitTree.ListEntries()
|
entries, err = gitTree.ListEntries()
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue