Skip to content

Speed up enry.IsVendor #15213

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions modules/analyze/vendor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package analyze

import (
"regexp"
"sort"
"strings"

"github.com/go-enry/go-enry/v2/data"
)

var isVendorRegExp *regexp.Regexp

func init() {
matchers := data.VendorMatchers

caretStrings := make([]string, 0, 10)
caretShareStrings := make([]string, 0, 10)

matcherStrings := make([]string, 0, len(matchers))
for _, matcher := range matchers {
str := matcher.String()
if str[0] == '^' {
caretStrings = append(caretStrings, str[1:])
} else if str[0:5] == "(^|/)" {
caretShareStrings = append(caretShareStrings, str[5:])
} else {
matcherStrings = append(matcherStrings, str)
}
}

sort.Strings(caretShareStrings)
sort.Strings(caretStrings)
sort.Strings(matcherStrings)

sb := &strings.Builder{}
sb.WriteString("(?:^(?:")
sb.WriteString(caretStrings[0])
for _, matcher := range caretStrings[1:] {
sb.WriteString(")|(?:")
sb.WriteString(matcher)
}
sb.WriteString("))")
sb.WriteString("|")
sb.WriteString("(?:(?:^|/)(?:")
sb.WriteString(caretShareStrings[0])
for _, matcher := range caretShareStrings[1:] {
sb.WriteString(")|(?:")
sb.WriteString(matcher)
}
sb.WriteString("))")
sb.WriteString("|")
sb.WriteString("(?:")
sb.WriteString(matcherStrings[0])
for _, matcher := range matcherStrings[1:] {
sb.WriteString(")|(?:")
sb.WriteString(matcher)
}
sb.WriteString(")")
combined := sb.String()
isVendorRegExp = regexp.MustCompile(combined)
}

// IsVendor returns whether or not path is a vendor path.
func IsVendor(path string) bool {
return isVendorRegExp.MatchString(path)
}
42 changes: 42 additions & 0 deletions modules/analyze/vendor_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package analyze

import "testing"

func TestIsVendor(t *testing.T) {
tests := []struct {
path string
want bool
}{
{"cache/", true},
{"random/cache/", true},
{"cache", false},
{"dependencies/", true},
{"Dependencies/", true},
{"dependency/", false},
{"dist/", true},
{"dist", false},
{"random/dist/", true},
{"random/dist", false},
{"deps/", true},
{"configure", true},
{"a/configure", true},
{"config.guess", true},
{"config.guess/", false},
{".vscode/", true},
{"doc/_build/", true},
{"a/docs/_build/", true},
{"a/dasdocs/_build-vsdoc.js", true},
{"a/dasdocs/_build-vsdoc.j", false},
}
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
if got := IsVendor(tt.path); got != tt.want {
t.Errorf("IsVendor() = %v, want %v", got, tt.want)
}
})
}
}
2 changes: 1 addition & 1 deletion modules/git/repo_language_stats_gogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err

sizes := make(map[string]int64)
err = tree.Files().ForEach(func(f *object.File) error {
if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion modules/git/repo_language_stats_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
for _, f := range entries {
contentBuf.Reset()
content = contentBuf.Bytes()
if f.Size() == 0 || enry.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) ||
if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) ||
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) {
continue
}
Expand Down
2 changes: 1 addition & 1 deletion modules/indexer/code/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) {

func (b *BleveIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error {
// Ignore vendored files in code search
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion modules/indexer/code/elastic_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ func (b *ElasticSearchIndexer) init() (bool, error) {

func (b *ElasticSearchIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) {
// Ignore vendored files in code search
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
return nil, nil
}

Expand Down