Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit 52c1f98

Browse files
committed
config: support a configurable, and turn-off-able, pack.window
One use of go-git is to transfer git data from a non-standard git repo (not stored in a file system, for example) to a "remote" backed by a standard, local .git repo. In this scenario, delta compression is not needed to reduce transfer time over the "network", because there is no network. The underlying storage layer has already taken care of the data tranfer, and sending the objects to local .git storage doesn't require compression. So this PR gives the user the option to turn off compression when it isn't needed. Of course, this results in a larger, uncompressed local .git repo, but the user can then run git gc or git repack on that repo if they care about the storage costs. Turning the pack window to 0 on reduces total push time of a 36K repo by 50 seconds (out of a pre-PR total of 3m26s).
1 parent bb3217c commit 52c1f98

File tree

10 files changed

+146
-42
lines changed

10 files changed

+146
-42
lines changed

config/config.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"errors"
77
"fmt"
88
"sort"
9+
"strconv"
910

1011
format "gopkg.in/src-d/go-git.v4/plumbing/format/config"
1112
)
@@ -40,6 +41,14 @@ type Config struct {
4041
// Worktree is the path to the root of the working tree.
4142
Worktree string
4243
}
44+
45+
Pack struct {
46+
// Window controls the size of the sliding window for delta
47+
// compression. The default is 10. A value of 0 turns off
48+
// delta compression entirely.
49+
Window uint
50+
}
51+
4352
// Remotes list of repository remotes, the key of the map is the name
4453
// of the remote, should equal to RemoteConfig.Name.
4554
Remotes map[string]*RemoteConfig
@@ -81,10 +90,14 @@ const (
8190
remoteSection = "remote"
8291
submoduleSection = "submodule"
8392
coreSection = "core"
93+
packSection = "pack"
8494
fetchKey = "fetch"
8595
urlKey = "url"
8696
bareKey = "bare"
8797
worktreeKey = "worktree"
98+
windowKey = "window"
99+
100+
defaultPackWindow = uint(10)
88101
)
89102

90103
// Unmarshal parses a git-config file and stores it.
@@ -98,6 +111,9 @@ func (c *Config) Unmarshal(b []byte) error {
98111
}
99112

100113
c.unmarshalCore()
114+
if err := c.unmarshalPack(); err != nil {
115+
return err
116+
}
101117
c.unmarshalSubmodules()
102118
return c.unmarshalRemotes()
103119
}
@@ -111,6 +127,21 @@ func (c *Config) unmarshalCore() {
111127
c.Core.Worktree = s.Options.Get(worktreeKey)
112128
}
113129

130+
func (c *Config) unmarshalPack() error {
131+
s := c.Raw.Section(packSection)
132+
window := s.Options.Get(windowKey)
133+
if window == "" {
134+
c.Pack.Window = defaultPackWindow
135+
} else {
136+
winUint, err := strconv.ParseUint(window, 10, 32)
137+
if err != nil {
138+
return err
139+
}
140+
c.Pack.Window = uint(winUint)
141+
}
142+
return nil
143+
}
144+
114145
func (c *Config) unmarshalRemotes() error {
115146
s := c.Raw.Section(remoteSection)
116147
for _, sub := range s.Subsections {
@@ -138,6 +169,7 @@ func (c *Config) unmarshalSubmodules() {
138169
// Marshal returns Config encoded as a git-config file.
139170
func (c *Config) Marshal() ([]byte, error) {
140171
c.marshalCore()
172+
c.marshalPack()
141173
c.marshalRemotes()
142174
c.marshalSubmodules()
143175

@@ -158,6 +190,13 @@ func (c *Config) marshalCore() {
158190
}
159191
}
160192

193+
func (c *Config) marshalPack() {
194+
s := c.Raw.Section(packSection)
195+
if c.Pack.Window != defaultPackWindow {
196+
s.SetOption(windowKey, fmt.Sprintf("%d", c.Pack.Window))
197+
}
198+
}
199+
161200
func (c *Config) marshalRemotes() {
162201
s := c.Raw.Section(remoteSection)
163202
newSubsections := make(format.Subsections, 0, len(c.Remotes))

config/config_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ func (s *ConfigSuite) TestUnmarshall(c *C) {
1010
input := []byte(`[core]
1111
bare = true
1212
worktree = foo
13+
[pack]
14+
window = 20
1315
[remote "origin"]
1416
url = [email protected]:mcuadros/go-git.git
1517
fetch = +refs/heads/*:refs/remotes/origin/*
@@ -33,6 +35,7 @@ func (s *ConfigSuite) TestUnmarshall(c *C) {
3335

3436
c.Assert(cfg.Core.IsBare, Equals, true)
3537
c.Assert(cfg.Core.Worktree, Equals, "foo")
38+
c.Assert(cfg.Pack.Window, Equals, uint(20))
3639
c.Assert(cfg.Remotes, HasLen, 2)
3740
c.Assert(cfg.Remotes["origin"].Name, Equals, "origin")
3841
c.Assert(cfg.Remotes["origin"].URLs, DeepEquals, []string{"[email protected]:mcuadros/go-git.git"})
@@ -51,6 +54,8 @@ func (s *ConfigSuite) TestMarshall(c *C) {
5154
output := []byte(`[core]
5255
bare = true
5356
worktree = bar
57+
[pack]
58+
window = 20
5459
[remote "alt"]
5560
url = [email protected]:mcuadros/go-git.git
5661
url = [email protected]:src-d/go-git.git
@@ -65,6 +70,7 @@ func (s *ConfigSuite) TestMarshall(c *C) {
6570
cfg := NewConfig()
6671
cfg.Core.IsBare = true
6772
cfg.Core.Worktree = "bar"
73+
cfg.Pack.Window = 20
6874
cfg.Remotes["origin"] = &RemoteConfig{
6975
Name: "origin",
7076
URLs: []string{"[email protected]:mcuadros/go-git.git"},
@@ -92,6 +98,8 @@ func (s *ConfigSuite) TestUnmarshallMarshall(c *C) {
9298
bare = true
9399
worktree = foo
94100
custom = ignored
101+
[pack]
102+
window = 20
95103
[remote "origin"]
96104
url = [email protected]:mcuadros/go-git.git
97105
fetch = +refs/heads/*:refs/remotes/origin/*

plumbing/format/packfile/delta_selector.go

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@ import (
99
)
1010

1111
const (
12-
// How far back in the sorted list to search for deltas. 10 is
13-
// the default in command line git.
14-
deltaWindowSize = 10
1512
// deltas based on deltas, how many steps we can do.
1613
// 50 is the default value used in JGit
1714
maxDepth = int64(50)
@@ -31,14 +28,24 @@ func newDeltaSelector(s storer.EncodedObjectStorer) *deltaSelector {
3128
return &deltaSelector{s}
3229
}
3330

34-
// ObjectsToPack creates a list of ObjectToPack from the hashes provided,
35-
// creating deltas if it's suitable, using an specific internal logic
36-
func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) {
37-
otp, err := dw.objectsToPack(hashes)
31+
// ObjectsToPack creates a list of ObjectToPack from the hashes
32+
// provided, creating deltas if it's suitable, using an specific
33+
// internal logic. `packWindow` specifies the size of the sliding
34+
// window used to compare objects for delta compression; 0 turns off
35+
// delta compression entirely.
36+
func (dw *deltaSelector) ObjectsToPack(
37+
hashes []plumbing.Hash,
38+
packWindow uint,
39+
) ([]*ObjectToPack, error) {
40+
otp, err := dw.objectsToPack(hashes, packWindow)
3841
if err != nil {
3942
return nil, err
4043
}
4144

45+
if packWindow == 0 {
46+
return otp, nil
47+
}
48+
4249
dw.sort(otp)
4350

4451
var objectGroups [][]*ObjectToPack
@@ -60,7 +67,7 @@ func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack,
6067
objs := objs
6168
wg.Add(1)
6269
go func() {
63-
if walkErr := dw.walk(objs); walkErr != nil {
70+
if walkErr := dw.walk(objs, packWindow); walkErr != nil {
6471
once.Do(func() {
6572
err = walkErr
6673
})
@@ -77,10 +84,19 @@ func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack,
7784
return otp, nil
7885
}
7986

80-
func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) {
87+
func (dw *deltaSelector) objectsToPack(
88+
hashes []plumbing.Hash,
89+
packWindow uint,
90+
) ([]*ObjectToPack, error) {
8191
var objectsToPack []*ObjectToPack
8292
for _, h := range hashes {
83-
o, err := dw.encodedDeltaObject(h)
93+
var o plumbing.EncodedObject
94+
var err error
95+
if packWindow == 0 {
96+
o, err = dw.encodedObject(h)
97+
} else {
98+
o, err = dw.encodedDeltaObject(h)
99+
}
84100
if err != nil {
85101
return nil, err
86102
}
@@ -93,6 +109,10 @@ func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack,
93109
objectsToPack = append(objectsToPack, otp)
94110
}
95111

112+
if packWindow == 0 {
113+
return objectsToPack, nil
114+
}
115+
96116
if err := dw.fixAndBreakChains(objectsToPack); err != nil {
97117
return nil, err
98118
}
@@ -201,7 +221,10 @@ func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) {
201221
sort.Sort(byTypeAndSize(objectsToPack))
202222
}
203223

204-
func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
224+
func (dw *deltaSelector) walk(
225+
objectsToPack []*ObjectToPack,
226+
packWindow uint,
227+
) error {
205228
indexMap := make(map[plumbing.Hash]*deltaIndex)
206229
for i := 0; i < len(objectsToPack); i++ {
207230
target := objectsToPack[i]
@@ -218,7 +241,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
218241
continue
219242
}
220243

221-
for j := i - 1; j >= 0 && i-j < deltaWindowSize; j-- {
244+
for j := i - 1; j >= 0 && i-j < int(packWindow); j-- {
222245
base := objectsToPack[j]
223246
// Objects must use only the same type as their delta base.
224247
// Since objectsToPack is sorted by type and size, once we find

plumbing/format/packfile/delta_selector_test.go

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -146,31 +146,32 @@ func (s *DeltaSelectorSuite) createTestObjects() {
146146
func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) {
147147
// Different type
148148
hashes := []plumbing.Hash{s.hashes["base"], s.hashes["treeType"]}
149-
otp, err := s.ds.ObjectsToPack(hashes)
149+
deltaWindowSize := uint(10)
150+
otp, err := s.ds.ObjectsToPack(hashes, deltaWindowSize)
150151
c.Assert(err, IsNil)
151152
c.Assert(len(otp), Equals, 2)
152153
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["base"]])
153154
c.Assert(otp[1].Object, Equals, s.store.Objects[s.hashes["treeType"]])
154155

155156
// Size radically different
156157
hashes = []plumbing.Hash{s.hashes["bigBase"], s.hashes["target"]}
157-
otp, err = s.ds.ObjectsToPack(hashes)
158+
otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize)
158159
c.Assert(err, IsNil)
159160
c.Assert(len(otp), Equals, 2)
160161
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["bigBase"]])
161162
c.Assert(otp[1].Object, Equals, s.store.Objects[s.hashes["target"]])
162163

163164
// Delta Size Limit with no best delta yet
164165
hashes = []plumbing.Hash{s.hashes["smallBase"], s.hashes["smallTarget"]}
165-
otp, err = s.ds.ObjectsToPack(hashes)
166+
otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize)
166167
c.Assert(err, IsNil)
167168
c.Assert(len(otp), Equals, 2)
168169
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["smallBase"]])
169170
c.Assert(otp[1].Object, Equals, s.store.Objects[s.hashes["smallTarget"]])
170171

171172
// It will create the delta
172173
hashes = []plumbing.Hash{s.hashes["base"], s.hashes["target"]}
173-
otp, err = s.ds.ObjectsToPack(hashes)
174+
otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize)
174175
c.Assert(err, IsNil)
175176
c.Assert(len(otp), Equals, 2)
176177
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["target"]])
@@ -185,7 +186,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) {
185186
s.hashes["o2"],
186187
s.hashes["o3"],
187188
}
188-
otp, err = s.ds.ObjectsToPack(hashes)
189+
otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize)
189190
c.Assert(err, IsNil)
190191
c.Assert(len(otp), Equals, 3)
191192
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["o1"]])
@@ -201,20 +202,32 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) {
201202
// a delta.
202203
hashes = make([]plumbing.Hash, 0, deltaWindowSize+2)
203204
hashes = append(hashes, s.hashes["base"])
204-
for i := 0; i < deltaWindowSize; i++ {
205+
for i := uint(0); i < deltaWindowSize; i++ {
205206
hashes = append(hashes, s.hashes["smallTarget"])
206207
}
207208
hashes = append(hashes, s.hashes["target"])
208209

209210
// Don't sort so we can easily check the sliding window without
210211
// creating a bunch of new objects.
211-
otp, err = s.ds.objectsToPack(hashes)
212+
otp, err = s.ds.objectsToPack(hashes, deltaWindowSize)
212213
c.Assert(err, IsNil)
213-
err = s.ds.walk(otp)
214+
err = s.ds.walk(otp, deltaWindowSize)
214215
c.Assert(err, IsNil)
215-
c.Assert(len(otp), Equals, deltaWindowSize+2)
216+
c.Assert(len(otp), Equals, int(deltaWindowSize)+2)
216217
targetIdx := len(otp) - 1
217218
c.Assert(otp[targetIdx].IsDelta(), Equals, false)
219+
220+
// Check that no deltas are created, and the objects are unsorted,
221+
// if compression is off.
222+
hashes = []plumbing.Hash{s.hashes["base"], s.hashes["target"]}
223+
otp, err = s.ds.ObjectsToPack(hashes, 0)
224+
c.Assert(err, IsNil)
225+
c.Assert(len(otp), Equals, 2)
226+
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["base"]])
227+
c.Assert(otp[0].IsDelta(), Equals, false)
228+
c.Assert(otp[1].Original, Equals, s.store.Objects[s.hashes["target"]])
229+
c.Assert(otp[1].IsDelta(), Equals, false)
230+
c.Assert(otp[1].Depth, Equals, 0)
218231
}
219232

220233
func (s *DeltaSelectorSuite) TestMaxDepth(c *C) {

plumbing/format/packfile/encoder.go

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ import (
1414
// Encoder gets the data from the storage and write it into the writer in PACK
1515
// format
1616
type Encoder struct {
17-
selector *deltaSelector
18-
w *offsetWriter
19-
zw *zlib.Writer
20-
hasher plumbing.Hasher
17+
selector *deltaSelector
18+
w *offsetWriter
19+
zw *zlib.Writer
20+
hasher plumbing.Hasher
2121
// offsets is a map of object hashes to corresponding offsets in the packfile.
2222
// It is used to determine offset of the base of a delta when a OFS_DELTA is
2323
// used.
@@ -45,10 +45,15 @@ func NewEncoder(w io.Writer, s storer.EncodedObjectStorer, useRefDeltas bool) *E
4545
}
4646
}
4747

48-
// Encode creates a packfile containing all the objects referenced in hashes
49-
// and writes it to the writer in the Encoder.
50-
func (e *Encoder) Encode(hashes []plumbing.Hash) (plumbing.Hash, error) {
51-
objects, err := e.selector.ObjectsToPack(hashes)
48+
// Encode creates a packfile containing all the objects referenced in
49+
// hashes and writes it to the writer in the Encoder. `packWindow`
50+
// specifies the size of the sliding window used to compare objects
51+
// for delta compression; 0 turns off delta compression entirely.
52+
func (e *Encoder) Encode(
53+
hashes []plumbing.Hash,
54+
packWindow uint,
55+
) (plumbing.Hash, error) {
56+
objects, err := e.selector.ObjectsToPack(hashes, packWindow)
5257
if err != nil {
5358
return plumbing.ZeroHash, err
5459
}
@@ -137,7 +142,7 @@ func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) err
137142

138143
// for OFS_DELTA, offset of the base is interpreted as negative offset
139144
// relative to the type-byte of the header of the ofs-delta entry.
140-
relativeOffset := deltaOffset-baseOffset
145+
relativeOffset := deltaOffset - baseOffset
141146
if relativeOffset <= 0 {
142147
return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset)
143148
}

plumbing/format/packfile/encoder_advanced_test.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,23 @@ func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) {
2727
fixs.Test(c, func(f *fixtures.Fixture) {
2828
storage, err := filesystem.NewStorage(f.DotGit())
2929
c.Assert(err, IsNil)
30-
s.testEncodeDecode(c, storage)
30+
s.testEncodeDecode(c, storage, 10)
3131
})
3232

3333
}
3434

35-
func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) {
35+
func (s *EncoderAdvancedSuite) TestEncodeDecodeNoDeltaCompression(c *C) {
36+
fixs := fixtures.Basic().ByTag("packfile").ByTag(".git")
37+
fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git").
38+
ByTag("packfile").ByTag(".git").One())
39+
fixs.Test(c, func(f *fixtures.Fixture) {
40+
storage, err := filesystem.NewStorage(f.DotGit())
41+
c.Assert(err, IsNil)
42+
s.testEncodeDecode(c, storage, 0)
43+
})
44+
}
45+
46+
func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, packWindow uint) {
3647

3748
objIter, err := storage.IterEncodedObjects(plumbing.AnyObject)
3849
c.Assert(err, IsNil)
@@ -57,7 +68,7 @@ func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) {
5768

5869
buf := bytes.NewBuffer(nil)
5970
enc := NewEncoder(buf, storage, false)
60-
_, err = enc.Encode(hashes)
71+
_, err = enc.Encode(hashes, packWindow)
6172
c.Assert(err, IsNil)
6273

6374
scanner := NewScanner(buf)

0 commit comments

Comments
 (0)