From 51fb6f3983f15aa3c2db1feadcc13db1692315ec Mon Sep 17 00:00:00 2001 From: Shiny Nematoda Date: Tue, 20 Feb 2024 11:05:42 +0000 Subject: [PATCH] [FEAT] add fallback repo search using git grep --- routers/web/repo/search.go | 58 +++++++++------ services/repository/files/search.go | 90 ++++++++++++++++++++++++ services/repository/files/search_test.go | 48 +++++++++++++ templates/repo/home.tmpl | 30 ++++---- templates/repo/search.tmpl | 20 +++--- tests/integration/repo_search_test.go | 63 ++++++++++++----- 6 files changed, 246 insertions(+), 63 deletions(-) create mode 100644 services/repository/files/search.go create mode 100644 services/repository/files/search_test.go diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go index 3c0fa4bc0..29b3b7b47 100644 --- a/routers/web/repo/search.go +++ b/routers/web/repo/search.go @@ -10,17 +10,13 @@ import ( "code.gitea.io/gitea/modules/context" code_indexer "code.gitea.io/gitea/modules/indexer/code" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/services/repository/files" ) const tplSearch base.TplName = "repo/search" // Search render repository search page func Search(ctx *context.Context) { - if !setting.Indexer.RepoIndexerEnabled { - ctx.Redirect(ctx.Repo.RepoLink) - return - } - language := ctx.FormTrim("l") keyword := ctx.FormTrim("q") @@ -37,31 +33,49 @@ func Search(ctx *context.Context) { return } + ctx.Data["SourcePath"] = ctx.Repo.Repository.Link() + page := ctx.FormInt("page") if page <= 0 { page = 1 } - total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID}, - language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch) - if err != nil { - if code_indexer.IsAvailable(ctx) { - ctx.ServerError("SearchResults", err) + if setting.Indexer.RepoIndexerEnabled { + ctx.Data["CodeIndexerEnabled"] = true + + total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID}, + language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch) + if err != nil { + if code_indexer.IsAvailable(ctx) { + ctx.ServerError("SearchResults", err) + return + } + ctx.Data["CodeIndexerUnavailable"] = true + } else { + ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx) + } + + ctx.Data["SearchResults"] = searchResults + ctx.Data["SearchResultLanguages"] = searchResultLanguages + + pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5) + pager.SetDefaultParams(ctx) + pager.AddParam(ctx, "l", "Language") + ctx.Data["Page"] = pager + } else { + data, err := files.NewRepoGrep(ctx, ctx.Repo.Repository, keyword) + if err != nil { + ctx.ServerError("NewRepoGrep", err) return } - ctx.Data["CodeIndexerUnavailable"] = true - } else { - ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx) + + ctx.Data["CodeIndexerEnabled"] = false + ctx.Data["SearchResults"] = data + + pager := context.NewPagination(len(data), setting.UI.RepoSearchPagingNum, page, 5) + pager.SetDefaultParams(ctx) + ctx.Data["Page"] = pager } - ctx.Data["SourcePath"] = ctx.Repo.Repository.Link() - ctx.Data["SearchResults"] = searchResults - ctx.Data["SearchResultLanguages"] = searchResultLanguages - - pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5) - pager.SetDefaultParams(ctx) - pager.AddParam(ctx, "l", "Language") - ctx.Data["Page"] = pager - ctx.HTML(http.StatusOK, tplSearch) } diff --git a/services/repository/files/search.go b/services/repository/files/search.go new file mode 100644 index 000000000..f8317c489 --- /dev/null +++ b/services/repository/files/search.go @@ -0,0 +1,90 @@ +package files + +import ( + "context" + "html/template" + "strconv" + "strings" + + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/highlight" + "code.gitea.io/gitea/modules/timeutil" + + "github.com/go-enry/go-enry/v2" +) + +type Result struct { + RepoID int64 // ignored + Filename string + CommitID string // branch + UpdatedUnix timeutil.TimeStamp // ignored + Language string + Color string + LineNumbers []int64 + FormattedLines template.HTML +} + +const pHEAD = "HEAD:" + +func NewRepoGrep(ctx context.Context, repo *repo_model.Repository, keyword string) ([]*Result, error) { + t, _, err := gitrepo.RepositoryFromContextOrOpen(ctx, repo) + if err != nil { + return nil, err + } + + data := []*Result{} + + stdout, _, err := git.NewCommand(ctx, + "grep", + "-1", // n before and after lines + "-z", + "--heading", + "--break", // easier parsing + "--fixed-strings", // disallow regex for now + "-n", // line nums + "-i", // ignore case + "--full-name", // full file path, rel to repo + //"--column", // for adding better highlighting support + ). + AddDynamicArguments(keyword). + AddArguments("HEAD"). + RunStdString(&git.RunOpts{Dir: t.Path}) + if err != nil { + return data, nil // non zero exit code when there are no results + } + + for _, block := range strings.Split(stdout, "\n\n") { + res := Result{CommitID: repo.DefaultBranch} + code := []string{} + + for _, line := range strings.Split(block, "\n") { + if strings.HasPrefix(line, pHEAD) { + res.Filename = strings.TrimPrefix(line, pHEAD) + continue + } + + if ln, after, ok := strings.Cut(line, "\x00"); ok { + i, err := strconv.ParseInt(ln, 10, 64) + if err != nil { + continue + } + + res.LineNumbers = append(res.LineNumbers, i) + code = append(code, after) + } + } + + if res.Filename == "" || len(code) == 0 || len(res.LineNumbers) == 0 { + continue + } + + res.FormattedLines, res.Language = highlight.Code(res.Filename, "", strings.Join(code, "\n")) + res.Color = enry.GetColor(res.Language) + + data = append(data, &res) + } + + return data, nil +} diff --git a/services/repository/files/search_test.go b/services/repository/files/search_test.go new file mode 100644 index 000000000..c24bb731a --- /dev/null +++ b/services/repository/files/search_test.go @@ -0,0 +1,48 @@ +package files + +import ( + "testing" + + "code.gitea.io/gitea/models/unittest" + "code.gitea.io/gitea/modules/contexttest" + + "github.com/stretchr/testify/assert" +) + +func TestNewRepoGrep(t *testing.T) { + unittest.PrepareTestEnv(t) + ctx, _ := contexttest.MockContext(t, "user2/repo1") + ctx.SetParams(":id", "1") + contexttest.LoadRepo(t, ctx, 1) + contexttest.LoadRepoCommit(t, ctx) + contexttest.LoadUser(t, ctx, 2) + contexttest.LoadGitRepo(t, ctx) + defer ctx.Repo.GitRepo.Close() + + t.Run("with result", func(t *testing.T) { + res, err := NewRepoGrep(ctx, ctx.Repo.Repository, "Description") + assert.NoError(t, err) + + expected := []*Result{ + { + RepoID: 0, + Filename: "README.md", + CommitID: "master", + UpdatedUnix: 0, + Language: "Markdown", + Color: "#083fa1", + LineNumbers: []int64{2, 3}, + FormattedLines: "\nDescription for repo1", + }, + } + + assert.EqualValues(t, res, expected) + }) + + t.Run("empty result", func(t *testing.T) { + res, err := NewRepoGrep(ctx, ctx.Repo.Repository, "keyword that does not match in the repo") + assert.NoError(t, err) + + assert.EqualValues(t, res, []*Result{}) + }) +} diff --git a/templates/repo/home.tmpl b/templates/repo/home.tmpl index 5e27d9160..9bac26ce1 100644 --- a/templates/repo/home.tmpl +++ b/templates/repo/home.tmpl @@ -11,23 +11,21 @@ {{if $description}}{{$description | RenderCodeBlock}}{{else if .IsRepositoryAdmin}}{{ctx.Locale.Tr "repo.no_desc"}}{{end}} {{.Repository.Website}} - {{if .RepoSearchEnabled}} -
{{range .Topics}}{{.Name}}{{end}} diff --git a/templates/repo/search.tmpl b/templates/repo/search.tmpl index b616b4de3..3b5c212af 100644 --- a/templates/repo/search.tmpl +++ b/templates/repo/search.tmpl @@ -6,14 +6,16 @@
- @@ -41,7 +43,7 @@

{{.Filename}} - {{ctx.Locale.Tr "repo.diff.view_file"}} + {{ctx.Locale.Tr "repo.diff.view_file"}}

@@ -50,7 +52,7 @@ {{range .LineNumbers}} - {{.}} + {{.}} {{end}} {{.FormattedLines}} diff --git a/tests/integration/repo_search_test.go b/tests/integration/repo_search_test.go index cf199e98c..e5ee334ce 100644 --- a/tests/integration/repo_search_test.go +++ b/tests/integration/repo_search_test.go @@ -11,14 +11,15 @@ import ( repo_model "code.gitea.io/gitea/models/repo" code_indexer "code.gitea.io/gitea/modules/indexer/code" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" "code.gitea.io/gitea/tests" "github.com/PuerkitoBio/goquery" "github.com/stretchr/testify/assert" ) -func resultFilenames(t testing.TB, doc *HTMLDoc) []string { - filenameSelections := doc.doc.Find(".repository.search").Find(".repo-search-result").Find(".header").Find("span.file") +func resultFilenames(t testing.TB, doc *goquery.Selection) []string { + filenameSelections := doc.Find(".header").Find("span.file") result := make([]string, filenameSelections.Length()) filenameSelections.Each(func(i int, selection *goquery.Selection) { result[i] = selection.Text() @@ -26,36 +27,66 @@ func resultFilenames(t testing.TB, doc *HTMLDoc) []string { return result } -func TestSearchRepo(t *testing.T) { +func checkResultLinks(t *testing.T, substr string, doc *goquery.Selection) { + t.Helper() + linkSelections := doc.Find("a[href]") + linkSelections.Each(func(i int, selection *goquery.Selection) { + assert.Contains(t, selection.AttrOr("href", ""), substr) + }) +} + +func testSearchRepo(t *testing.T, useExternalIndexer bool) { defer tests.PrepareTestEnv(t)() + defer test.MockVariableValue(&setting.Indexer.RepoIndexerEnabled, useExternalIndexer)() repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1") assert.NoError(t, err) - executeIndexer(t, repo, code_indexer.UpdateRepoIndexer) + gitReference := "/branch/" + repo.DefaultBranch - testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"}) + if useExternalIndexer { + gitReference = "/commit/" + executeIndexer(t, repo, code_indexer.UpdateRepoIndexer) + } - setting.Indexer.IncludePatterns = setting.IndexerGlobFromString("**.txt") - setting.Indexer.ExcludePatterns = setting.IndexerGlobFromString("**/y/**") + testSearch(t, "/user2/repo1/search?q=Description&page=1", gitReference, []string{"README.md"}) - repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob") - assert.NoError(t, err) + if useExternalIndexer { + setting.Indexer.IncludePatterns = setting.IndexerGlobFromString("**.txt") + setting.Indexer.ExcludePatterns = setting.IndexerGlobFromString("**/y/**") - executeIndexer(t, repo, code_indexer.UpdateRepoIndexer) + repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob") + assert.NoError(t, err) - testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"}) - testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt"}) - testSearch(t, "/user2/glob/search?q=file4&page=1", []string{}) - testSearch(t, "/user2/glob/search?q=file5&page=1", []string{}) + executeIndexer(t, repo, code_indexer.UpdateRepoIndexer) + + testSearch(t, "/user2/glob/search?q=loren&page=1", gitReference, []string{"a.txt"}) + testSearch(t, "/user2/glob/search?q=file3&page=1", gitReference, []string{"x/b.txt"}) + testSearch(t, "/user2/glob/search?q=file4&page=1", gitReference, []string{}) + testSearch(t, "/user2/glob/search?q=file5&page=1", gitReference, []string{}) + } } -func testSearch(t *testing.T, url string, expected []string) { +func TestIndexerSearchRepo(t *testing.T) { + testSearchRepo(t, true) +} + +func TestNoIndexerSearchRepo(t *testing.T) { + testSearchRepo(t, false) +} + +func testSearch(t *testing.T, url, gitRef string, expected []string) { req := NewRequest(t, "GET", url) resp := MakeRequest(t, req, http.StatusOK) - filenames := resultFilenames(t, NewHTMLParser(t, resp.Body)) + doc := NewHTMLParser(t, resp.Body).doc. + Find(".repository.search"). + Find(".repo-search-result") + + filenames := resultFilenames(t, doc) assert.EqualValues(t, expected, filenames) + + checkResultLinks(t, gitRef, doc) } func executeIndexer(t *testing.T, repo *repo_model.Repository, op func(*repo_model.Repository)) {