From c84e4aadc2c4f84180e2e8dfa1a14efcd3b4e949 Mon Sep 17 00:00:00 2001 From: Antoine GIRARD Date: Wed, 24 May 2017 12:07:26 +0200 Subject: [PATCH 1/8] Add bench task --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 683b8e522..afb4ef87f 100644 --- a/Makefile +++ b/Makefile @@ -35,6 +35,10 @@ lint: test: for PKG in $(PACKAGES); do go test -cover -coverprofile $$GOPATH/src/$$PKG/coverage.out $$PKG || exit 1; done; +.PHONY: bench +bench: + go test -run=XXX -bench=. || exit 1 + .PHONY: build build: go build . From 96af092d0061ef75fad6305004125b9c127c8e6c Mon Sep 17 00:00:00 2001 From: Antoine GIRARD Date: Wed, 24 May 2017 12:33:12 +0200 Subject: [PATCH 2/8] Create tree_entry_test.go --- tree_entry_test.go | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 tree_entry_test.go diff --git a/tree_entry_test.go b/tree_entry_test.go new file mode 100644 index 000000000..7d074d665 --- /dev/null +++ b/tree_entry_test.go @@ -0,0 +1,66 @@ +// Copyright 2017 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package git + +import ( + "io/ioutil" + "os" + "testing" +) + +func setupGitRepo(url string) string { + + dir, err := ioutil.TempDir("", "gitea-bench") + if err != nil { + panic(err) + } + /* Manual method + _, err = NewCommand("clone", url, dir).Run() + if err != nil { + log.Fatal(err) + } + */ + err = Clone(url, dir, CloneRepoOptions{Mirror: false, Bare: false, Quiet: true}) + if err != nil { + panic(err) + } + return dir +} + +func benchmarkGetCommitsInfo(url string, b *testing.B) { + + // setup env + repoPath := setupGitRepo(url) + defer os.RemoveAll(repoPath) + + repo, err := OpenRepository(repoPath) + if err != nil { + panic(err) + } + + commit, err := repo.GetBranchCommit("master") + if err != nil { + panic(err) + } + + entries, err := commit.Tree.ListEntries() + if err != nil { + panic(err) + } + entries.Sort() + + // run the GetCommitsInfo function b.N times + for n := 0; n < b.N; n++ { + _, err = entries.GetCommitsInfo(commit, "") + if err != nil { + panic(err) + } + } +} + + +func BenchmarkGetCommitsInfoGitea(b *testing.B) { benchmarkGetCommitsInfo("https://github.com/go-gitea/gitea.git", b) } //5k+ commits +func BenchmarkGetCommitsInfoMoby(b *testing.B) { benchmarkGetCommitsInfo("https://github.com/moby/moby.git", b) } //32k+ commits +func BenchmarkGetCommitsInfoGo(b *testing.B) { benchmarkGetCommitsInfo("https://github.com/golang/go.git", b) } //+32k commits From eef76cd15f9b53939dcd192e1fd1950ae1cbd46f Mon Sep 17 00:00:00 2001 From: Antoine GIRARD Date: Wed, 24 May 2017 14:54:07 +0200 Subject: [PATCH 3/8] Remove init time --- Makefile | 2 +- tree_entry_test.go | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index afb4ef87f..7d92d719f 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ test: .PHONY: bench bench: - go test -run=XXX -bench=. || exit 1 + go test -run=XXXXXX -benchtime=10s -bench=. || exit 1 .PHONY: build build: diff --git a/tree_entry_test.go b/tree_entry_test.go index 7d074d665..cdc695397 100644 --- a/tree_entry_test.go +++ b/tree_entry_test.go @@ -11,7 +11,6 @@ import ( ) func setupGitRepo(url string) string { - dir, err := ioutil.TempDir("", "gitea-bench") if err != nil { panic(err) @@ -30,7 +29,8 @@ func setupGitRepo(url string) string { } func benchmarkGetCommitsInfo(url string, b *testing.B) { - + b.StopTimer() + // setup env repoPath := setupGitRepo(url) defer os.RemoveAll(repoPath) @@ -51,6 +51,7 @@ func benchmarkGetCommitsInfo(url string, b *testing.B) { } entries.Sort() + b.StartTimer() // run the GetCommitsInfo function b.N times for n := 0; n < b.N; n++ { _, err = entries.GetCommitsInfo(commit, "") From ae8ee3619290061ddc67e44c78dd3b25431c0050 Mon Sep 17 00:00:00 2001 From: Antoine GIRARD Date: Wed, 24 May 2017 15:05:05 +0200 Subject: [PATCH 4/8] Add TODO information --- tree_entry_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/tree_entry_test.go b/tree_entry_test.go index cdc695397..8bec711a8 100644 --- a/tree_entry_test.go +++ b/tree_entry_test.go @@ -28,6 +28,7 @@ func setupGitRepo(url string) string { return dir } +//TODO use https://blog.golang.org/subtests when removing support for Go1.6 func benchmarkGetCommitsInfo(url string, b *testing.B) { b.StopTimer() From dcfda73c5de1701141f148fa920fc84eb9026578 Mon Sep 17 00:00:00 2001 From: Antoine GIRARD Date: Wed, 24 May 2017 17:03:57 +0200 Subject: [PATCH 5/8] Add linux repo --- tree_entry_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tree_entry_test.go b/tree_entry_test.go index 8bec711a8..879f4ae59 100644 --- a/tree_entry_test.go +++ b/tree_entry_test.go @@ -65,4 +65,5 @@ func benchmarkGetCommitsInfo(url string, b *testing.B) { func BenchmarkGetCommitsInfoGitea(b *testing.B) { benchmarkGetCommitsInfo("https://github.com/go-gitea/gitea.git", b) } //5k+ commits func BenchmarkGetCommitsInfoMoby(b *testing.B) { benchmarkGetCommitsInfo("https://github.com/moby/moby.git", b) } //32k+ commits -func BenchmarkGetCommitsInfoGo(b *testing.B) { benchmarkGetCommitsInfo("https://github.com/golang/go.git", b) } //+32k commits +func BenchmarkGetCommitsInfoGo(b *testing.B) { benchmarkGetCommitsInfo("https://github.com/golang/go.git", b) } //32k+ commits +func BenchmarkGetCommitsInfoGo(b *testing.B) { benchmarkGetCommitsInfo("https://github.com/torvalds/linux.git", b) } //677k+ commits From 76cec74c94b04c16ce0edb24cc946721cb4506d3 Mon Sep 17 00:00:00 2001 From: Ethan Koenig Date: Thu, 25 May 2017 11:19:20 -0400 Subject: [PATCH 6/8] Faster implementation of GetCommitsInfo --- .gitignore | 2 + Makefile | 4 +- tree_entry.go | 218 ++++++++++++++++++++++++++------------------- tree_entry_test.go | 93 +++++++++---------- 4 files changed, 172 insertions(+), 145 deletions(-) diff --git a/.gitignore b/.gitignore index 13e345788..37f260ffe 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,5 @@ _testmain.go *.prof coverage.out + +benchmark_repos/ diff --git a/Makefile b/Makefile index 7d92d719f..22906704a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ IMPORT := code.gitea.io/git -PACKAGES ?= $(shell go list ./... | grep -v /vendor/) +PACKAGES ?= $(shell go list -e ./... | grep -v /vendor/ | grep -v /benchmark_repos/) GENERATE ?= code.gitea.io/git .PHONY: all @@ -18,7 +18,7 @@ generate: .PHONY: fmt fmt: - find . -name "*.go" -type f -not -path "./vendor/*" | xargs gofmt -s -w + find . -name "*.go" -type f -not -path "./vendor/*" -not -path "./benchmark_repos/*" | xargs gofmt -s -w .PHONY: vet vet: diff --git a/tree_entry.go b/tree_entry.go index 1e4934e81..58c513246 100644 --- a/tree_entry.go +++ b/tree_entry.go @@ -5,10 +5,7 @@ package git import ( - "fmt" - "path" "path/filepath" - "runtime" "sort" "strconv" "strings" @@ -147,112 +144,147 @@ func (tes Entries) Sort() { sort.Sort(tes) } -type commitInfo struct { - entryName string - infos []interface{} - err error +// getCommitInfoState transient state for getting commit info for entries +type getCommitInfoState struct { + entries map[string]*TreeEntry // map from filepath to entry + commits map[string]*Commit // map from entry name to commit + lastCommitHash string + lastCommit *Commit + treePath string + headCommit *Commit + nextSearchSize int // next number of commits to search for } -// GetCommitsInfo takes advantages of concurrency to speed up getting information -// of all commits that are corresponding to these entries. This method will automatically -// choose the right number of goroutine (concurrency) to use related of the host CPU. +func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitInfoState { + entriesByPath := make(map[string]*TreeEntry, len(entries)) + for _, entry := range entries { + entriesByPath[filepath.Join(treePath, entry.Name())] = entry + } + return &getCommitInfoState{ + entries: entriesByPath, + commits: make(map[string]*Commit, len(entriesByPath)), + treePath: treePath, + headCommit: headCommit, + nextSearchSize: 16, + } +} + +// GetCommitsInfo gets information of all commits that are corresponding to these entries func (tes Entries) GetCommitsInfo(commit *Commit, treePath string) ([][]interface{}, error) { - return tes.GetCommitsInfoWithCustomConcurrency(commit, treePath, 0) + state := initGetCommitInfoState(tes, commit, treePath) + if err := getCommitsInfo(state); err != nil { + return nil, err + } + + commitsInfo := make([][]interface{}, len(tes)) + for i, entry := range tes { + commit = state.commits[filepath.Join(treePath, entry.Name())] + switch entry.Type { + case ObjectCommit: + subModuleURL := "" + if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil { + return nil, err + } else if subModule != nil { + subModuleURL = subModule.URL + } + subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String()) + commitsInfo[i] = []interface{}{entry, subModuleFile} + default: + commitsInfo[i] = []interface{}{entry, commit} + } + } + return commitsInfo, nil } -// GetCommitsInfoWithCustomConcurrency takes advantages of concurrency to speed up getting information -// of all commits that are corresponding to these entries. If the given maxConcurrency is negative or -// equal to zero: the right number of goroutine (concurrency) to use will be chosen related of the -// host CPU. -func (tes Entries) GetCommitsInfoWithCustomConcurrency(commit *Commit, treePath string, maxConcurrency int) ([][]interface{}, error) { - if len(tes) == 0 { - return nil, nil +func (state *getCommitInfoState) nextCommit(hash string) { + state.lastCommitHash = hash + state.lastCommit = nil +} + +func (state *getCommitInfoState) commit() (*Commit, error) { + var err error + if state.lastCommit == nil { + state.lastCommit, err = state.headCommit.repo.GetCommit(state.lastCommitHash) } + return state.lastCommit, err +} - if maxConcurrency <= 0 { - maxConcurrency = runtime.NumCPU() +func (state *getCommitInfoState) update(path string) error { + relPath, err := filepath.Rel(state.treePath, path) + if err != nil { + return nil + } + var entryPath string + if index := strings.IndexRune(relPath, '/'); index >= 0 { + entryPath = filepath.Join(state.treePath, relPath[:index]) + } else { + entryPath = path } + if _, ok := state.entries[entryPath]; !ok { + return nil + } else if _, ok := state.commits[entryPath]; ok { + return nil + } + state.commits[entryPath], err = state.commit() + return err +} - // Length of taskChan determines how many goroutines (subprocesses) can run at the same time. - // The length of revChan should be same as taskChan so goroutines whoever finished job can - // exit as early as possible, only store data inside channel. - taskChan := make(chan bool, maxConcurrency) - revChan := make(chan commitInfo, maxConcurrency) - doneChan := make(chan error) - - // Receive loop will exit when it collects same number of data pieces as tree entries. - // It notifies doneChan before exits or notify early with possible error. - infoMap := make(map[string][]interface{}, len(tes)) - go func() { - i := 0 - for info := range revChan { - if info.err != nil { - doneChan <- info.err - return - } +func getCommitsInfo(state *getCommitInfoState) error { + for len(state.entries) > len(state.commits) { + if err := getNextCommitInfos(state); err != nil { + return err + } + } + return nil +} - infoMap[info.entryName] = info.infos - i++ - if i == len(tes) { +func getNextCommitInfos(state *getCommitInfoState) error { + logOutput, err := logCommand(state.lastCommitHash, state).RunInDir(state.headCommit.repo.Path) + if err != nil { + return err + } + lines := strings.Split(logOutput, "\n") + i := 0 + for i < len(lines) { + state.nextCommit(lines[i]) + i++ + for ; i < len(lines); i++ { + path := lines[i] + if path == "" { break } + state.update(path) } - doneChan <- nil - }() - - for i := range tes { - // When taskChan is idle (or has empty slots), put operation will not block. - // However when taskChan is full, code will block and wait any running goroutines to finish. - taskChan <- true - - if tes[i].Type != ObjectCommit { - go func(i int) { - cinfo := commitInfo{entryName: tes[i].Name()} - c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name())) - if err != nil { - cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err) - } else { - cinfo.infos = []interface{}{tes[i], c} - } - revChan <- cinfo - <-taskChan // Clear one slot from taskChan to allow new goroutines to start. - }(i) - continue + i++ // skip blank line + if len(state.entries) == len(state.commits) { + break } - - // Handle submodule - go func(i int) { - cinfo := commitInfo{entryName: tes[i].Name()} - sm, err := commit.GetSubModule(path.Join(treePath, tes[i].Name())) - if err != nil && !IsErrNotExist(err) { - cinfo.err = fmt.Errorf("GetSubModule (%s/%s): %v", treePath, tes[i].Name(), err) - revChan <- cinfo - return - } - - smURL := "" - if sm != nil { - smURL = sm.URL - } - - c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name())) - if err != nil { - cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err) - } else { - cinfo.infos = []interface{}{tes[i], NewSubModuleFile(c, smURL, tes[i].ID.String())} - } - revChan <- cinfo - <-taskChan - }(i) } + return nil +} - if err := <-doneChan; err != nil { - return nil, err +func logCommand(exclusiveStartHash string, state *getCommitInfoState) *Command { + var commitHash string + if len(exclusiveStartHash) == 0 { + commitHash = "HEAD" + } else { + commitHash = exclusiveStartHash + "^" } - - commitsInfo := make([][]interface{}, len(tes)) - for i := 0; i < len(tes); i++ { - commitsInfo[i] = infoMap[tes[i].Name()] + var command *Command + numRemainingEntries := len(state.entries) - len(state.commits) + if numRemainingEntries < 32 { + searchSize := (numRemainingEntries + 1) / 2 + command = NewCommand("log", prettyLogFormat, "--name-only", + "-"+strconv.Itoa(searchSize), commitHash, "--") + for path, entry := range state.entries { + if _, ok := state.commits[entry.Name()]; !ok { + command.AddArguments(path) + } + } + } else { + command = NewCommand("log", prettyLogFormat, "--name-only", + "-"+strconv.Itoa(state.nextSearchSize), commitHash, "--", state.treePath) } - return commitsInfo, nil + state.nextSearchSize += state.nextSearchSize + return command } diff --git a/tree_entry_test.go b/tree_entry_test.go index 879f4ae59..92a8e5b89 100644 --- a/tree_entry_test.go +++ b/tree_entry_test.go @@ -5,65 +5,58 @@ package git import ( - "io/ioutil" "os" + "path/filepath" "testing" + "time" ) -func setupGitRepo(url string) string { - dir, err := ioutil.TempDir("", "gitea-bench") - if err != nil { - panic(err) - } - /* Manual method - _, err = NewCommand("clone", url, dir).Run() - if err != nil { - log.Fatal(err) - } - */ - err = Clone(url, dir, CloneRepoOptions{Mirror: false, Bare: false, Quiet: true}) - if err != nil { - panic(err) - } - return dir -} - -//TODO use https://blog.golang.org/subtests when removing support for Go1.6 -func benchmarkGetCommitsInfo(url string, b *testing.B) { - b.StopTimer() - - // setup env - repoPath := setupGitRepo(url) - defer os.RemoveAll(repoPath) - - repo, err := OpenRepository(repoPath) - if err != nil { - panic(err) - } +const benchmarkReposDir = "benchmark_repos/" - commit, err := repo.GetBranchCommit("master") - if err != nil { - panic(err) +func setupGitRepo(url string, name string) (string, error) { + repoDir := filepath.Join(benchmarkReposDir, name) + if _, err := os.Stat(repoDir); err == nil { + return repoDir, nil } + return repoDir, Clone(url, repoDir, CloneRepoOptions{ + Mirror: false, + Bare: false, + Quiet: true, + Timeout: 5 * time.Minute, + }) +} - entries, err := commit.Tree.ListEntries() - if err != nil { - panic(err) +func BenchmarkEntries_GetCommitsInfo(b *testing.B) { + benchmarks := []struct { + url string + name string + }{ + {url: "https://github.com/go-gitea/gitea.git", name: "gitea"}, + {url: "https://github.com/ethantkoenig/manyfiles.git", name: "manyfiles"}, + {url: "https://github.com/moby/moby.git", name: "moby"}, + {url: "https://github.com/golang/go.git", name: "go"}, + {url: "https://github.com/torvalds/linux.git", name: "linux"}, } - entries.Sort() - - b.StartTimer() - // run the GetCommitsInfo function b.N times - for n := 0; n < b.N; n++ { - _, err = entries.GetCommitsInfo(commit, "") - if err != nil { + for _, benchmark := range benchmarks { + var commit *Commit + var entries Entries + if repoPath, err := setupGitRepo(benchmark.url, benchmark.name); err != nil { + panic(err) + } else if repo, err := OpenRepository(repoPath); err != nil { + panic(err) + } else if commit, err = repo.GetBranchCommit("master"); err != nil { + panic(err) + } else if entries, err = commit.Tree.ListEntries(); err != nil { panic(err) } + entries.Sort() + b.Run(benchmark.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, err := entries.GetCommitsInfo(commit, "") + if err != nil { + panic(err) + } + } + }) } } - - -func BenchmarkGetCommitsInfoGitea(b *testing.B) { benchmarkGetCommitsInfo("https://github.com/go-gitea/gitea.git", b) } //5k+ commits -func BenchmarkGetCommitsInfoMoby(b *testing.B) { benchmarkGetCommitsInfo("https://github.com/moby/moby.git", b) } //32k+ commits -func BenchmarkGetCommitsInfoGo(b *testing.B) { benchmarkGetCommitsInfo("https://github.com/golang/go.git", b) } //32k+ commits -func BenchmarkGetCommitsInfoGo(b *testing.B) { benchmarkGetCommitsInfo("https://github.com/torvalds/linux.git", b) } //677k+ commits From e8bc37c40f4293f9e5120bb0d465e610d73949a3 Mon Sep 17 00:00:00 2001 From: Ethan Koenig Date: Fri, 26 May 2017 00:50:50 -0400 Subject: [PATCH 7/8] Start/stop timer --- tree_entry_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tree_entry_test.go b/tree_entry_test.go index 92a8e5b89..63b6af615 100644 --- a/tree_entry_test.go +++ b/tree_entry_test.go @@ -38,6 +38,7 @@ func BenchmarkEntries_GetCommitsInfo(b *testing.B) { {url: "https://github.com/torvalds/linux.git", name: "linux"}, } for _, benchmark := range benchmarks { + b.StopTimer() var commit *Commit var entries Entries if repoPath, err := setupGitRepo(benchmark.url, benchmark.name); err != nil { @@ -50,6 +51,7 @@ func BenchmarkEntries_GetCommitsInfo(b *testing.B) { panic(err) } entries.Sort() + b.StartTimer() b.Run(benchmark.name, func(b *testing.B) { for i := 0; i < b.N; i++ { _, err := entries.GetCommitsInfo(commit, "") From f22ce901b067462cfb039144a7486687a1278121 Mon Sep 17 00:00:00 2001 From: Ethan Koenig Date: Fri, 26 May 2017 09:40:26 -0400 Subject: [PATCH 8/8] Use benchmark/ directory for benchmark repos --- .gitignore | 2 +- Makefile | 4 ++-- tree_entry_test.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 37f260ffe..05df2812e 100644 --- a/.gitignore +++ b/.gitignore @@ -25,4 +25,4 @@ _testmain.go coverage.out -benchmark_repos/ +benchmark/ diff --git a/Makefile b/Makefile index 22906704a..e659a5968 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ IMPORT := code.gitea.io/git -PACKAGES ?= $(shell go list -e ./... | grep -v /vendor/ | grep -v /benchmark_repos/) +PACKAGES ?= $(shell go list -e ./... | grep -v /vendor/ | grep -v /benchmark/) GENERATE ?= code.gitea.io/git .PHONY: all @@ -18,7 +18,7 @@ generate: .PHONY: fmt fmt: - find . -name "*.go" -type f -not -path "./vendor/*" -not -path "./benchmark_repos/*" | xargs gofmt -s -w + find . -name "*.go" -type f -not -path "./vendor/*" -not -path "./benchmark/*" | xargs gofmt -s -w .PHONY: vet vet: diff --git a/tree_entry_test.go b/tree_entry_test.go index 63b6af615..a52692e67 100644 --- a/tree_entry_test.go +++ b/tree_entry_test.go @@ -11,7 +11,7 @@ import ( "time" ) -const benchmarkReposDir = "benchmark_repos/" +const benchmarkReposDir = "benchmark/repos/" func setupGitRepo(url string, name string) (string, error) { repoDir := filepath.Join(benchmarkReposDir, name)