Skip to content

Commit 950abfe

Browse files
darrenwxiaoguangGiteaBot
authored
enable literal string for code search (#33590)
Close: #33588 --------- Co-authored-by: wxiaoguang <[email protected]> Co-authored-by: Giteabot <[email protected]>
1 parent fc1b383 commit 950abfe

File tree

8 files changed

+135
-56
lines changed

8 files changed

+135
-56
lines changed

modules/indexer/code/bleve/bleve.go

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -260,17 +260,28 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
260260
var (
261261
indexerQuery query.Query
262262
keywordQuery query.Query
263+
contentQuery query.Query
263264
)
264265

265266
pathQuery := bleve.NewPrefixQuery(strings.ToLower(opts.Keyword))
266267
pathQuery.FieldVal = "Filename"
267268
pathQuery.SetBoost(10)
268269

269-
contentQuery := bleve.NewMatchQuery(opts.Keyword)
270-
contentQuery.FieldVal = "Content"
271-
272-
if opts.IsKeywordFuzzy {
273-
contentQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
270+
keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
271+
if isPhrase {
272+
q := bleve.NewMatchPhraseQuery(keywordAsPhrase)
273+
q.FieldVal = "Content"
274+
if opts.IsKeywordFuzzy {
275+
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(keywordAsPhrase)
276+
}
277+
contentQuery = q
278+
} else {
279+
q := bleve.NewMatchQuery(opts.Keyword)
280+
q.FieldVal = "Content"
281+
if opts.IsKeywordFuzzy {
282+
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
283+
}
284+
contentQuery = q
274285
}
275286

276287
keywordQuery = bleve.NewDisjunctionQuery(contentQuery, pathQuery)

modules/indexer/code/elasticsearch/elasticsearch.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"code.gitea.io/gitea/modules/setting"
2525
"code.gitea.io/gitea/modules/timeutil"
2626
"code.gitea.io/gitea/modules/typesniffer"
27+
"code.gitea.io/gitea/modules/util"
2728

2829
"github.com/go-enry/go-enry/v2"
2930
"github.com/olivere/elastic/v7"
@@ -359,13 +360,19 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
359360

360361
// Search searches for codes and language stats by given conditions.
361362
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
362-
searchType := esMultiMatchTypePhrasePrefix
363-
if opts.IsKeywordFuzzy {
364-
searchType = esMultiMatchTypeBestFields
363+
var contentQuery elastic.Query
364+
keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
365+
if isPhrase {
366+
contentQuery = elastic.NewMatchPhraseQuery("content", keywordAsPhrase)
367+
} else {
368+
// TODO: this is the old logic, but not really using "fuzziness"
369+
// * IsKeywordFuzzy=true: "best_fields"
370+
// * IsKeywordFuzzy=false: "phrase_prefix"
371+
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).
372+
Type(util.Iif(opts.IsKeywordFuzzy, esMultiMatchTypeBestFields, esMultiMatchTypePhrasePrefix))
365373
}
366-
367374
kwQuery := elastic.NewBoolQuery().Should(
368-
elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType),
375+
contentQuery,
369376
elastic.NewMultiMatchQuery(opts.Keyword, "filename^10").Type(esMultiMatchTypePhrasePrefix),
370377
)
371378
query := elastic.NewBoolQuery()
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// Copyright 2025 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package gitgrep
5+
6+
import (
7+
"context"
8+
"fmt"
9+
"strings"
10+
11+
"code.gitea.io/gitea/modules/git"
12+
code_indexer "code.gitea.io/gitea/modules/indexer/code"
13+
"code.gitea.io/gitea/modules/setting"
14+
)
15+
16+
func indexSettingToGitGrepPathspecList() (list []string) {
17+
for _, expr := range setting.Indexer.IncludePatterns {
18+
list = append(list, ":(glob)"+expr.PatternString())
19+
}
20+
for _, expr := range setting.Indexer.ExcludePatterns {
21+
list = append(list, ":(glob,exclude)"+expr.PatternString())
22+
}
23+
return list
24+
}
25+
26+
func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, isFuzzy bool) (searchResults []*code_indexer.Result, total int, err error) {
27+
// TODO: it should also respect ParseKeywordAsPhrase and clarify the "fuzzy" behavior
28+
res, err := git.GrepSearch(ctx, gitRepo, keyword, git.GrepOptions{
29+
ContextLineNumber: 1,
30+
IsFuzzy: isFuzzy,
31+
RefName: ref.String(),
32+
PathspecList: indexSettingToGitGrepPathspecList(),
33+
})
34+
if err != nil {
35+
// TODO: if no branch exists, it reports: exit status 128, fatal: this operation must be run in a work tree.
36+
return nil, 0, fmt.Errorf("git.GrepSearch: %w", err)
37+
}
38+
commitID, err := gitRepo.GetRefCommitID(ref.String())
39+
if err != nil {
40+
return nil, 0, fmt.Errorf("gitRepo.GetRefCommitID: %w", err)
41+
}
42+
43+
total = len(res)
44+
pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res))
45+
pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res))
46+
res = res[pageStart:pageEnd]
47+
for _, r := range res {
48+
searchResults = append(searchResults, &code_indexer.Result{
49+
RepoID: repoID,
50+
Filename: r.Filename,
51+
CommitID: commitID,
52+
// UpdatedUnix: not supported yet
53+
// Language: not supported yet
54+
// Color: not supported yet
55+
Lines: code_indexer.HighlightSearchResultCode(r.Filename, "", r.LineNumbers, strings.Join(r.LineCodes, "\n")),
56+
})
57+
}
58+
return searchResults, total, nil
59+
}

routers/web/repo/search_test.go renamed to modules/indexer/code/gitgrep/gitgrep_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Copyright 2024 The Gitea Authors. All rights reserved.
22
// SPDX-License-Identifier: MIT
33

4-
package repo
4+
package gitgrep
55

66
import (
77
"testing"

modules/indexer/code/indexer.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,11 @@ var (
2929
// When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready.
3030
// So it's always safe use it as *globalIndexer.Load() and call its methods.
3131
globalIndexer atomic.Pointer[internal.Indexer]
32-
dummyIndexer *internal.Indexer
3332
)
3433

3534
func init() {
36-
i := internal.NewDummyIndexer()
37-
dummyIndexer = &i
38-
globalIndexer.Store(dummyIndexer)
35+
dummyIndexer := internal.NewDummyIndexer()
36+
globalIndexer.Store(&dummyIndexer)
3937
}
4038

4139
func index(ctx context.Context, indexer internal.Indexer, repoID int64) error {

modules/indexer/code/internal/util.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ func FilenameOfIndexerID(indexerID string) string {
3535
return indexerID[index+1:]
3636
}
3737

38-
// Given the contents of file, returns the boundaries of its first seven lines.
38+
// FilenameMatchIndexPos returns the boundaries of its first seven lines.
3939
func FilenameMatchIndexPos(content string) (int, int) {
4040
count := 1
4141
for i, c := range content {
@@ -48,3 +48,11 @@ func FilenameMatchIndexPos(content string) (int, int) {
4848
}
4949
return 0, len(content)
5050
}
51+
52+
func ParseKeywordAsPhrase(keyword string) (string, bool) {
53+
if strings.HasPrefix(keyword, `"`) && strings.HasSuffix(keyword, `"`) && len(keyword) > 1 {
54+
// only remove the prefix and suffix quotes, no need to decode the content at the moment
55+
return keyword[1 : len(keyword)-1], true
56+
}
57+
return "", false
58+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Copyright 2025 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package internal
5+
6+
import (
7+
"testing"
8+
9+
"github.com/stretchr/testify/assert"
10+
)
11+
12+
func TestParseKeywordAsPhrase(t *testing.T) {
13+
cases := []struct {
14+
keyword string
15+
phrase string
16+
isPhrase bool
17+
}{
18+
{``, "", false},
19+
{`a`, "", false},
20+
{`"`, "", false},
21+
{`"a`, "", false},
22+
{`"a"`, "a", true},
23+
{`""\"""`, `"\""`, true},
24+
}
25+
for _, c := range cases {
26+
phrase, isPhrase := ParseKeywordAsPhrase(c.keyword)
27+
assert.Equal(t, c.phrase, phrase, "keyword=%q", c.keyword)
28+
assert.Equal(t, c.isPhrase, isPhrase, "keyword=%q", c.keyword)
29+
}
30+
}

routers/web/repo/search.go

Lines changed: 6 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ package repo
55

66
import (
77
"net/http"
8-
"strings"
98

109
"code.gitea.io/gitea/models/db"
1110
"code.gitea.io/gitea/modules/git"
1211
code_indexer "code.gitea.io/gitea/modules/indexer/code"
12+
"code.gitea.io/gitea/modules/indexer/code/gitgrep"
1313
"code.gitea.io/gitea/modules/setting"
1414
"code.gitea.io/gitea/modules/templates"
1515
"code.gitea.io/gitea/routers/common"
@@ -18,16 +18,6 @@ import (
1818

1919
const tplSearch templates.TplName = "repo/search"
2020

21-
func indexSettingToGitGrepPathspecList() (list []string) {
22-
for _, expr := range setting.Indexer.IncludePatterns {
23-
list = append(list, ":(glob)"+expr.PatternString())
24-
}
25-
for _, expr := range setting.Indexer.ExcludePatterns {
26-
list = append(list, ":(glob,exclude)"+expr.PatternString())
27-
}
28-
return list
29-
}
30-
3121
// Search render repository search page
3222
func Search(ctx *context.Context) {
3323
ctx.Data["PageIsViewCode"] = true
@@ -67,38 +57,14 @@ func Search(ctx *context.Context) {
6757
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
6858
}
6959
} else {
70-
searchRefName := git.RefNameFromBranch(ctx.Repo.Repository.DefaultBranch) // BranchName should be default branch or the first existing branch
71-
res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, prepareSearch.Keyword, git.GrepOptions{
72-
ContextLineNumber: 1,
73-
IsFuzzy: prepareSearch.IsFuzzy,
74-
RefName: searchRefName.String(),
75-
PathspecList: indexSettingToGitGrepPathspecList(),
76-
})
77-
if err != nil {
78-
// TODO: if no branch exists, it reports: exit status 128, fatal: this operation must be run in a work tree.
79-
ctx.ServerError("GrepSearch", err)
80-
return
81-
}
82-
commitID, err := ctx.Repo.GitRepo.GetRefCommitID(searchRefName.String())
60+
var err error
61+
// ref should be default branch or the first existing branch
62+
searchRef := git.RefNameFromBranch(ctx.Repo.Repository.DefaultBranch)
63+
searchResults, total, err = gitgrep.PerformSearch(ctx, page, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, searchRef, prepareSearch.Keyword, prepareSearch.IsFuzzy)
8364
if err != nil {
84-
ctx.ServerError("GetRefCommitID", err)
65+
ctx.ServerError("gitgrep.PerformSearch", err)
8566
return
8667
}
87-
total = len(res)
88-
pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res))
89-
pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res))
90-
res = res[pageStart:pageEnd]
91-
for _, r := range res {
92-
searchResults = append(searchResults, &code_indexer.Result{
93-
RepoID: ctx.Repo.Repository.ID,
94-
Filename: r.Filename,
95-
CommitID: commitID,
96-
// UpdatedUnix: not supported yet
97-
// Language: not supported yet
98-
// Color: not supported yet
99-
Lines: code_indexer.HighlightSearchResultCode(r.Filename, "", r.LineNumbers, strings.Join(r.LineCodes, "\n")),
100-
})
101-
}
10268
}
10369

10470
ctx.Data["Repo"] = ctx.Repo.Repository

0 commit comments

Comments
 (0)