Skip to content

Make token deduplication optional for the repo indexer #7826

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions custom/conf/app.ini.sample
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ REPO_INDEXER_ENABLED = false
REPO_INDEXER_PATH = indexers/repos.bleve
UPDATE_BUFFER_LEN = 20
MAX_FILE_SIZE = 1048576
REPO_INDEXER_DEDUPLICATE = true

[admin]
; Disallow regular (non-admin) users from creating organizations.
Expand Down
1 change: 1 addition & 0 deletions docs/content/doc/advanced/config-cheat-sheet.en-us.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`.
- `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search.
- `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request.
- `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed.
- `REPO_INDEXER_DEDUPLICATE`: **true** Deduplicate terms from the index, reducing the index size while limiting the search capabilities.

## Security (`security`)

Expand Down
6 changes: 5 additions & 1 deletion modules/indexer/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,18 @@ func createRepoIndexer(path string, latestVersion int) error {
textFieldMapping.IncludeInAll = false
docMapping.AddFieldMappingsAt("Content", textFieldMapping)

tokenFilters := []string{unicodeNormalizeName, lowercase.Name}
if setting.Indexer.RepoIndexerDeduplicate {
tokenFilters = append(tokenFilters, unique.Name)
}
mapping := bleve.NewIndexMapping()
if err = addUnicodeNormalizeTokenFilter(mapping); err != nil {
return err
} else if err = mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{unicodeNormalizeName, lowercase.Name, unique.Name},
"token_filters": tokenFilters,
}); err != nil {
return err
}
Expand Down
22 changes: 12 additions & 10 deletions modules/setting/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,17 @@ const (
var (
// Indexer settings
Indexer = struct {
IssueType string
IssuePath string
RepoIndexerEnabled bool
RepoPath string
UpdateQueueLength int
MaxIndexerFileSize int64
IssueQueueType string
IssueQueueDir string
IssueQueueConnStr string
IssueQueueBatchNumber int
IssueType string
IssuePath string
RepoIndexerEnabled bool
RepoPath string
UpdateQueueLength int
MaxIndexerFileSize int64
RepoIndexerDeduplicate bool
IssueQueueType string
IssueQueueDir string
IssueQueueConnStr string
IssueQueueBatchNumber int
}{
IssueType: "bleve",
IssuePath: "indexers/issues.bleve",
Expand All @@ -51,6 +52,7 @@ func newIndexerService() {
if !filepath.IsAbs(Indexer.RepoPath) {
Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath)
}
Indexer.RepoIndexerDeduplicate = sec.Key("REPO_INDEXER_DEDUPLICATE").MustBool(true)
Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024)
Indexer.IssueQueueType = sec.Key("ISSUE_INDEXER_QUEUE_TYPE").MustString(LevelQueueType)
Expand Down