Skip to content
This repository was archived by the owner on Apr 12, 2019. It is now read-only.

Commit 141bc21

Browse files
committed
Faster commit lookup
1 parent d47b98c commit 141bc21

File tree

4 files changed

+362
-168
lines changed

4 files changed

+362
-168
lines changed

commit_info.go

+303
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
// Copyright 2017 The Gitea Authors. All rights reserved.
2+
// Use of this source code is governed by a MIT-style
3+
// license that can be found in the LICENSE file.
4+
5+
package git
6+
7+
import (
8+
"bufio"
9+
"context"
10+
"fmt"
11+
"os/exec"
12+
"path"
13+
"runtime"
14+
"strconv"
15+
"strings"
16+
"sync"
17+
"time"
18+
)
19+
20+
const (
21+
// parameters for searching for commit infos. If the untargeted search has
22+
// not found any entries in the past 5 commits, and 12 or fewer entries
23+
// remain, then we'll just let the targeted-searching threads finish off,
24+
// and stop the untargeted search to not interfere.
25+
deferToTargetedSearchColdStreak = 5
26+
deferToTargetedSearchNumRemainingEntries = 12
27+
)
28+
29+
// getCommitsInfoState shared state while getting commit info for entries
30+
type getCommitsInfoState struct {
31+
lock sync.Mutex
32+
/* read-only fields, can be read without the mutex */
33+
// entries and entryPaths are read-only after initialization, so they can
34+
// safely be read without the mutex
35+
entries []*TreeEntry
36+
// set of filepaths to get info for
37+
entryPaths map[string]struct{}
38+
treePath string
39+
headCommit *Commit
40+
41+
/* mutable fields, must hold mutex to read or write */
42+
// map from filepath to commit
43+
commits map[string]*Commit
44+
// set of filepaths that have been or are being searched for in a target search
45+
targetedPaths map[string]struct{}
46+
}
47+
48+
func (state *getCommitsInfoState) numRemainingEntries() int {
49+
state.lock.Lock()
50+
numRemaining := len(state.entries) - len(state.commits)
51+
state.lock.Unlock()
52+
return numRemaining
53+
}
54+
55+
func (state *getCommitsInfoState) getTargetedEntryPath() string {
56+
var targetedEntryPath string
57+
state.lock.Lock()
58+
for _, entry := range state.entries {
59+
entryPath := path.Join(state.treePath, entry.Name())
60+
if _, ok := state.commits[entryPath]; ok {
61+
continue
62+
} else if _, ok = state.targetedPaths[entryPath]; ok {
63+
continue
64+
}
65+
targetedEntryPath = entryPath
66+
state.targetedPaths[entryPath] = struct{}{}
67+
break
68+
}
69+
state.lock.Unlock()
70+
return targetedEntryPath
71+
}
72+
73+
// repeatedly perform targeted searches for unpopulated entries
74+
func targetedSearch(state *getCommitsInfoState, done chan error) {
75+
for {
76+
entryPath := state.getTargetedEntryPath()
77+
if len(entryPath) == 0 {
78+
done <- nil
79+
return
80+
}
81+
command := NewCommand("rev-list", "-1", "HEAD", "--", entryPath)
82+
output, err := command.RunInDir(state.headCommit.repo.Path)
83+
if err != nil {
84+
done <- err
85+
return
86+
}
87+
id, err := NewIDFromString(strings.TrimSpace(output))
88+
if err != nil {
89+
done <- err
90+
return
91+
}
92+
commit, err := state.headCommit.repo.getCommit(id)
93+
if err != nil {
94+
done <- err
95+
return
96+
}
97+
state.update(entryPath, commit)
98+
}
99+
}
100+
101+
func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitsInfoState {
102+
entryPaths := make(map[string]struct{}, len(entries))
103+
for _, entry := range entries {
104+
entryPaths[path.Join(treePath, entry.Name())] = struct{}{}
105+
}
106+
if treePath = path.Clean(treePath); treePath == "." {
107+
treePath = ""
108+
}
109+
return &getCommitsInfoState{
110+
entries: entries,
111+
entryPaths: entryPaths,
112+
commits: make(map[string]*Commit, len(entries)),
113+
targetedPaths: make(map[string]struct{}, len(entries)),
114+
treePath: treePath,
115+
headCommit: headCommit,
116+
}
117+
}
118+
119+
// GetCommitsInfo gets information of all commits that are corresponding to these entries
120+
func (tes Entries) GetCommitsInfo(commit *Commit, treePath string) ([][]interface{}, error) {
121+
state := initGetCommitInfoState(tes, commit, treePath)
122+
if err := getCommitsInfo(state); err != nil {
123+
return nil, err
124+
}
125+
if len(state.commits) < len(state.entryPaths) {
126+
return nil, fmt.Errorf("could not find commits for all entries")
127+
}
128+
129+
commitsInfo := make([][]interface{}, len(tes))
130+
for i, entry := range tes {
131+
commit, ok := state.commits[path.Join(treePath, entry.Name())]
132+
if !ok {
133+
return nil, fmt.Errorf("could not find commit for %s", entry.Name())
134+
}
135+
switch entry.Type {
136+
case ObjectCommit:
137+
subModuleURL := ""
138+
if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil {
139+
return nil, err
140+
} else if subModule != nil {
141+
subModuleURL = subModule.URL
142+
}
143+
subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String())
144+
commitsInfo[i] = []interface{}{entry, subModuleFile}
145+
default:
146+
commitsInfo[i] = []interface{}{entry, commit}
147+
}
148+
}
149+
return commitsInfo, nil
150+
}
151+
152+
func (state *getCommitsInfoState) cleanEntryPath(rawEntryPath string) (string, error) {
153+
if rawEntryPath[0] == '"' {
154+
var err error
155+
rawEntryPath, err = strconv.Unquote(rawEntryPath)
156+
if err != nil {
157+
return rawEntryPath, err
158+
}
159+
}
160+
var entryNameStartIndex int
161+
if len(state.treePath) > 0 {
162+
entryNameStartIndex = len(state.treePath) + 1
163+
}
164+
165+
if index := strings.IndexByte(rawEntryPath[entryNameStartIndex:], '/'); index >= 0 {
166+
return rawEntryPath[:entryNameStartIndex+index], nil
167+
}
168+
return rawEntryPath, nil
169+
}
170+
171+
// update report that the given path was last modified by the given commit.
172+
// Returns whether state.commits was updated
173+
func (state *getCommitsInfoState) update(entryPath string, commit *Commit) bool {
174+
if _, ok := state.entryPaths[entryPath]; !ok {
175+
return false
176+
}
177+
178+
var updated bool
179+
state.lock.Lock()
180+
if _, ok := state.commits[entryPath]; !ok {
181+
state.commits[entryPath] = commit
182+
updated = true
183+
}
184+
state.lock.Unlock()
185+
return updated
186+
}
187+
188+
const getCommitsInfoPretty = "--pretty=format:%H %ct %s"
189+
190+
func getCommitsInfo(state *getCommitsInfoState) error {
191+
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
192+
defer cancel()
193+
194+
args := []string{"log", getCommitsInfoPretty, "--name-status"}
195+
if len(state.treePath) > 0 {
196+
args = append(args, "--", state.treePath)
197+
}
198+
cmd := exec.CommandContext(ctx, "git", args...)
199+
cmd.Dir = state.headCommit.repo.Path
200+
201+
readCloser, err := cmd.StdoutPipe()
202+
if err != nil {
203+
return err
204+
}
205+
206+
if err := cmd.Start(); err != nil {
207+
return err
208+
}
209+
210+
numThreads := runtime.NumCPU()
211+
done := make(chan error, numThreads)
212+
for i := 0; i < numThreads; i++ {
213+
go targetedSearch(state, done)
214+
}
215+
216+
scanner := bufio.NewScanner(readCloser)
217+
err = state.processGitLogOutput(scanner)
218+
for i := 0; i < numThreads; i++ {
219+
doneErr := <-done
220+
if doneErr != nil && err == nil {
221+
err = doneErr
222+
}
223+
}
224+
return err
225+
}
226+
227+
func (state *getCommitsInfoState) processGitLogOutput(scanner *bufio.Scanner) error {
228+
// keep a local cache of seen paths to avoid acquiring a lock for paths
229+
// we've already seen
230+
seenPaths := make(map[string]struct{}, len(state.entryPaths))
231+
// number of consecutive commits without any finds
232+
coldStreak := 0
233+
var commit *Commit
234+
var err error
235+
for scanner.Scan() {
236+
line := scanner.Text()
237+
if len(line) == 0 { // in-between commits
238+
numRemainingEntries := state.numRemainingEntries()
239+
if numRemainingEntries == 0 {
240+
break
241+
}
242+
if coldStreak >= deferToTargetedSearchColdStreak &&
243+
numRemainingEntries <= deferToTargetedSearchNumRemainingEntries {
244+
// stop this untargeted search, and let the targeted-search threads
245+
// finish the work
246+
break
247+
}
248+
continue
249+
}
250+
if line[0] >= 'A' && line[0] <= 'X' { // a file was changed by the current commit
251+
if len(line) < 3 || line[1] != '\t' {
252+
return fmt.Errorf("misformatted line: %s", line)
253+
}
254+
entryPath, err := state.cleanEntryPath(line[2:])
255+
if err != nil {
256+
return err
257+
}
258+
if _, ok := seenPaths[entryPath]; !ok {
259+
if state.update(entryPath, commit) {
260+
coldStreak = 0
261+
}
262+
seenPaths[entryPath] = struct{}{}
263+
}
264+
continue
265+
}
266+
267+
// a new commit
268+
commit, err = parseCommitInfo(line)
269+
if err != nil {
270+
return err
271+
}
272+
coldStreak++
273+
}
274+
return scanner.Err()
275+
}
276+
277+
// parseCommitInfo parse a commit from a line of `git log` output. Expects the
278+
// line to be formatted according to getCommitsInfoPretty.
279+
func parseCommitInfo(line string) (*Commit, error) {
280+
if len(line) < 43 {
281+
return nil, fmt.Errorf("invalid git output: %s", line)
282+
}
283+
ref, err := NewIDFromString(line[:40])
284+
if err != nil {
285+
return nil, err
286+
}
287+
spaceIndex := strings.IndexByte(line[41:], ' ')
288+
if spaceIndex < 0 {
289+
return nil, fmt.Errorf("invalid git output: %s", line)
290+
}
291+
unixSeconds, err := strconv.Atoi(line[41 : 41+spaceIndex])
292+
if err != nil {
293+
return nil, err
294+
}
295+
message := line[spaceIndex+42:]
296+
return &Commit{
297+
ID: ref,
298+
CommitMessage: message,
299+
Committer: &Signature{
300+
When: time.Unix(int64(unixSeconds), 0),
301+
},
302+
}, nil
303+
}

commit_info_test.go

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package git
2+
3+
import (
4+
"os"
5+
"path/filepath"
6+
"testing"
7+
"time"
8+
)
9+
10+
const benchmarkReposDir = "benchmark/repos/"
11+
12+
func setupGitRepo(url string, name string) (string, error) {
13+
repoDir := filepath.Join(benchmarkReposDir, name)
14+
if _, err := os.Stat(repoDir); err == nil {
15+
return repoDir, nil
16+
}
17+
return repoDir, Clone(url, repoDir, CloneRepoOptions{
18+
Mirror: false,
19+
Bare: false,
20+
Quiet: true,
21+
Timeout: 5 * time.Minute,
22+
})
23+
}
24+
25+
func BenchmarkEntries_GetCommitsInfo(b *testing.B) {
26+
benchmarks := []struct {
27+
url string
28+
name string
29+
}{
30+
{url: "https://github.com/go-gitea/gitea.git", name: "gitea"},
31+
{url: "https://github.com/ethantkoenig/manyfiles.git", name: "manyfiles"},
32+
{url: "https://github.com/moby/moby.git", name: "moby"},
33+
{url: "https://github.com/golang/go.git", name: "go"},
34+
{url: "https://github.com/torvalds/linux.git", name: "linux"},
35+
}
36+
for _, benchmark := range benchmarks {
37+
var commit *Commit
38+
var entries Entries
39+
if repoPath, err := setupGitRepo(benchmark.url, benchmark.name); err != nil {
40+
b.Fatal(err)
41+
} else if repo, err := OpenRepository(repoPath); err != nil {
42+
b.Fatal(err)
43+
} else if commit, err = repo.GetBranchCommit("master"); err != nil {
44+
b.Fatal(err)
45+
} else if entries, err = commit.Tree.ListEntries(); err != nil {
46+
b.Fatal(err)
47+
}
48+
entries.Sort()
49+
b.ResetTimer()
50+
b.Run(benchmark.name, func(b *testing.B) {
51+
for i := 0; i < b.N; i++ {
52+
_, err := entries.GetCommitsInfo(commit, "")
53+
if err != nil {
54+
b.Fatal(err)
55+
}
56+
}
57+
})
58+
}
59+
}

0 commit comments

Comments
 (0)