Skip to content

Commit 08fd81c

Browse files
derrickstoleegitster
authored andcommitted
commit-graph: implement write_commit_graph()
Teach Git to write a commit graph file by checking all packed objects to see if they are commits, then store the file in the given object directory. Helped-by: Jeff King <[email protected]> Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 4ce58ee commit 08fd81c

File tree

3 files changed

+366
-0
lines changed

3 files changed

+366
-0
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,7 @@ LIB_OBJS += color.o
771771
LIB_OBJS += column.o
772772
LIB_OBJS += combine-diff.o
773773
LIB_OBJS += commit.o
774+
LIB_OBJS += commit-graph.o
774775
LIB_OBJS += compat/obstack.o
775776
LIB_OBJS += compat/terminal.o
776777
LIB_OBJS += config.o

commit-graph.c

Lines changed: 359 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,359 @@
1+
#include "cache.h"
2+
#include "config.h"
3+
#include "git-compat-util.h"
4+
#include "lockfile.h"
5+
#include "pack.h"
6+
#include "packfile.h"
7+
#include "commit.h"
8+
#include "object.h"
9+
#include "revision.h"
10+
#include "sha1-lookup.h"
11+
#include "commit-graph.h"
12+
13+
#define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
14+
#define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
15+
#define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
16+
#define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
17+
#define GRAPH_CHUNKID_LARGEEDGES 0x45444745 /* "EDGE" */
18+
19+
#define GRAPH_DATA_WIDTH 36
20+
21+
#define GRAPH_VERSION_1 0x1
22+
#define GRAPH_VERSION GRAPH_VERSION_1
23+
24+
#define GRAPH_OID_VERSION_SHA1 1
25+
#define GRAPH_OID_LEN_SHA1 GIT_SHA1_RAWSZ
26+
#define GRAPH_OID_VERSION GRAPH_OID_VERSION_SHA1
27+
#define GRAPH_OID_LEN GRAPH_OID_LEN_SHA1
28+
29+
#define GRAPH_OCTOPUS_EDGES_NEEDED 0x80000000
30+
#define GRAPH_PARENT_MISSING 0x7fffffff
31+
#define GRAPH_EDGE_LAST_MASK 0x7fffffff
32+
#define GRAPH_PARENT_NONE 0x70000000
33+
34+
#define GRAPH_LAST_EDGE 0x80000000
35+
36+
#define GRAPH_FANOUT_SIZE (4 * 256)
37+
#define GRAPH_CHUNKLOOKUP_WIDTH 12
38+
#define GRAPH_MIN_SIZE (5 * GRAPH_CHUNKLOOKUP_WIDTH + GRAPH_FANOUT_SIZE + \
39+
GRAPH_OID_LEN + 8)
40+
41+
42+
static char *get_commit_graph_filename(const char *obj_dir)
43+
{
44+
return xstrfmt("%s/info/commit-graph", obj_dir);
45+
}
46+
47+
static void write_graph_chunk_fanout(struct hashfile *f,
48+
struct commit **commits,
49+
int nr_commits)
50+
{
51+
int i, count = 0;
52+
struct commit **list = commits;
53+
54+
/*
55+
* Write the first-level table (the list is sorted,
56+
* but we use a 256-entry lookup to be able to avoid
57+
* having to do eight extra binary search iterations).
58+
*/
59+
for (i = 0; i < 256; i++) {
60+
while (count < nr_commits) {
61+
if ((*list)->object.oid.hash[0] != i)
62+
break;
63+
count++;
64+
list++;
65+
}
66+
67+
hashwrite_be32(f, count);
68+
}
69+
}
70+
71+
static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
72+
struct commit **commits, int nr_commits)
73+
{
74+
struct commit **list = commits;
75+
int count;
76+
for (count = 0; count < nr_commits; count++, list++)
77+
hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
78+
}
79+
80+
static const unsigned char *commit_to_sha1(size_t index, void *table)
81+
{
82+
struct commit **commits = table;
83+
return commits[index]->object.oid.hash;
84+
}
85+
86+
static void write_graph_chunk_data(struct hashfile *f, int hash_len,
87+
struct commit **commits, int nr_commits)
88+
{
89+
struct commit **list = commits;
90+
struct commit **last = commits + nr_commits;
91+
uint32_t num_extra_edges = 0;
92+
93+
while (list < last) {
94+
struct commit_list *parent;
95+
int edge_value;
96+
uint32_t packedDate[2];
97+
98+
parse_commit(*list);
99+
hashwrite(f, (*list)->tree->object.oid.hash, hash_len);
100+
101+
parent = (*list)->parents;
102+
103+
if (!parent)
104+
edge_value = GRAPH_PARENT_NONE;
105+
else {
106+
edge_value = sha1_pos(parent->item->object.oid.hash,
107+
commits,
108+
nr_commits,
109+
commit_to_sha1);
110+
111+
if (edge_value < 0)
112+
edge_value = GRAPH_PARENT_MISSING;
113+
}
114+
115+
hashwrite_be32(f, edge_value);
116+
117+
if (parent)
118+
parent = parent->next;
119+
120+
if (!parent)
121+
edge_value = GRAPH_PARENT_NONE;
122+
else if (parent->next)
123+
edge_value = GRAPH_OCTOPUS_EDGES_NEEDED | num_extra_edges;
124+
else {
125+
edge_value = sha1_pos(parent->item->object.oid.hash,
126+
commits,
127+
nr_commits,
128+
commit_to_sha1);
129+
if (edge_value < 0)
130+
edge_value = GRAPH_PARENT_MISSING;
131+
}
132+
133+
hashwrite_be32(f, edge_value);
134+
135+
if (edge_value & GRAPH_OCTOPUS_EDGES_NEEDED) {
136+
do {
137+
num_extra_edges++;
138+
parent = parent->next;
139+
} while (parent);
140+
}
141+
142+
if (sizeof((*list)->date) > 4)
143+
packedDate[0] = htonl(((*list)->date >> 32) & 0x3);
144+
else
145+
packedDate[0] = 0;
146+
147+
packedDate[1] = htonl((*list)->date);
148+
hashwrite(f, packedDate, 8);
149+
150+
list++;
151+
}
152+
}
153+
154+
static void write_graph_chunk_large_edges(struct hashfile *f,
155+
struct commit **commits,
156+
int nr_commits)
157+
{
158+
struct commit **list = commits;
159+
struct commit **last = commits + nr_commits;
160+
struct commit_list *parent;
161+
162+
while (list < last) {
163+
int num_parents = 0;
164+
for (parent = (*list)->parents; num_parents < 3 && parent;
165+
parent = parent->next)
166+
num_parents++;
167+
168+
if (num_parents <= 2) {
169+
list++;
170+
continue;
171+
}
172+
173+
/* Since num_parents > 2, this initializer is safe. */
174+
for (parent = (*list)->parents->next; parent; parent = parent->next) {
175+
int edge_value = sha1_pos(parent->item->object.oid.hash,
176+
commits,
177+
nr_commits,
178+
commit_to_sha1);
179+
180+
if (edge_value < 0)
181+
edge_value = GRAPH_PARENT_MISSING;
182+
else if (!parent->next)
183+
edge_value |= GRAPH_LAST_EDGE;
184+
185+
hashwrite_be32(f, edge_value);
186+
}
187+
188+
list++;
189+
}
190+
}
191+
192+
static int commit_compare(const void *_a, const void *_b)
193+
{
194+
const struct object_id *a = (const struct object_id *)_a;
195+
const struct object_id *b = (const struct object_id *)_b;
196+
return oidcmp(a, b);
197+
}
198+
199+
struct packed_commit_list {
200+
struct commit **list;
201+
int nr;
202+
int alloc;
203+
};
204+
205+
struct packed_oid_list {
206+
struct object_id *list;
207+
int nr;
208+
int alloc;
209+
};
210+
211+
static int add_packed_commits(const struct object_id *oid,
212+
struct packed_git *pack,
213+
uint32_t pos,
214+
void *data)
215+
{
216+
struct packed_oid_list *list = (struct packed_oid_list*)data;
217+
enum object_type type;
218+
off_t offset = nth_packed_object_offset(pack, pos);
219+
struct object_info oi = OBJECT_INFO_INIT;
220+
221+
oi.typep = &type;
222+
if (packed_object_info(pack, offset, &oi) < 0)
223+
die("unable to get type of object %s", oid_to_hex(oid));
224+
225+
if (type != OBJ_COMMIT)
226+
return 0;
227+
228+
ALLOC_GROW(list->list, list->nr + 1, list->alloc);
229+
oidcpy(&(list->list[list->nr]), oid);
230+
list->nr++;
231+
232+
return 0;
233+
}
234+
235+
void write_commit_graph(const char *obj_dir)
236+
{
237+
struct packed_oid_list oids;
238+
struct packed_commit_list commits;
239+
struct hashfile *f;
240+
uint32_t i, count_distinct = 0;
241+
char *graph_name;
242+
int fd;
243+
struct lock_file lk = LOCK_INIT;
244+
uint32_t chunk_ids[5];
245+
uint64_t chunk_offsets[5];
246+
int num_chunks;
247+
int num_extra_edges;
248+
struct commit_list *parent;
249+
250+
oids.nr = 0;
251+
oids.alloc = approximate_object_count() / 4;
252+
253+
if (oids.alloc < 1024)
254+
oids.alloc = 1024;
255+
ALLOC_ARRAY(oids.list, oids.alloc);
256+
257+
for_each_packed_object(add_packed_commits, &oids, 0);
258+
259+
QSORT(oids.list, oids.nr, commit_compare);
260+
261+
count_distinct = 1;
262+
for (i = 1; i < oids.nr; i++) {
263+
if (oidcmp(&oids.list[i-1], &oids.list[i]))
264+
count_distinct++;
265+
}
266+
267+
if (count_distinct >= GRAPH_PARENT_MISSING)
268+
die(_("the commit graph format cannot write %d commits"), count_distinct);
269+
270+
commits.nr = 0;
271+
commits.alloc = count_distinct;
272+
ALLOC_ARRAY(commits.list, commits.alloc);
273+
274+
num_extra_edges = 0;
275+
for (i = 0; i < oids.nr; i++) {
276+
int num_parents = 0;
277+
if (i > 0 && !oidcmp(&oids.list[i-1], &oids.list[i]))
278+
continue;
279+
280+
commits.list[commits.nr] = lookup_commit(&oids.list[i]);
281+
parse_commit(commits.list[commits.nr]);
282+
283+
for (parent = commits.list[commits.nr]->parents;
284+
parent; parent = parent->next)
285+
num_parents++;
286+
287+
if (num_parents > 2)
288+
num_extra_edges += num_parents - 1;
289+
290+
commits.nr++;
291+
}
292+
num_chunks = num_extra_edges ? 4 : 3;
293+
294+
if (commits.nr >= GRAPH_PARENT_MISSING)
295+
die(_("too many commits to write graph"));
296+
297+
graph_name = get_commit_graph_filename(obj_dir);
298+
fd = hold_lock_file_for_update(&lk, graph_name, 0);
299+
300+
if (fd < 0) {
301+
struct strbuf folder = STRBUF_INIT;
302+
strbuf_addstr(&folder, graph_name);
303+
strbuf_setlen(&folder, strrchr(folder.buf, '/') - folder.buf);
304+
305+
if (mkdir(folder.buf, 0777) < 0)
306+
die_errno(_("cannot mkdir %s"), folder.buf);
307+
strbuf_release(&folder);
308+
309+
fd = hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR);
310+
311+
if (fd < 0)
312+
die_errno("unable to create '%s'", graph_name);
313+
}
314+
315+
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
316+
317+
hashwrite_be32(f, GRAPH_SIGNATURE);
318+
319+
hashwrite_u8(f, GRAPH_VERSION);
320+
hashwrite_u8(f, GRAPH_OID_VERSION);
321+
hashwrite_u8(f, num_chunks);
322+
hashwrite_u8(f, 0); /* unused padding byte */
323+
324+
chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
325+
chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
326+
chunk_ids[2] = GRAPH_CHUNKID_DATA;
327+
if (num_extra_edges)
328+
chunk_ids[3] = GRAPH_CHUNKID_LARGEEDGES;
329+
else
330+
chunk_ids[3] = 0;
331+
chunk_ids[4] = 0;
332+
333+
chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
334+
chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
335+
chunk_offsets[2] = chunk_offsets[1] + GRAPH_OID_LEN * commits.nr;
336+
chunk_offsets[3] = chunk_offsets[2] + (GRAPH_OID_LEN + 16) * commits.nr;
337+
chunk_offsets[4] = chunk_offsets[3] + 4 * num_extra_edges;
338+
339+
for (i = 0; i <= num_chunks; i++) {
340+
uint32_t chunk_write[3];
341+
342+
chunk_write[0] = htonl(chunk_ids[i]);
343+
chunk_write[1] = htonl(chunk_offsets[i] >> 32);
344+
chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff);
345+
hashwrite(f, chunk_write, 12);
346+
}
347+
348+
write_graph_chunk_fanout(f, commits.list, commits.nr);
349+
write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr);
350+
write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr);
351+
write_graph_chunk_large_edges(f, commits.list, commits.nr);
352+
353+
finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
354+
commit_lock_file(&lk);
355+
356+
free(oids.list);
357+
oids.alloc = 0;
358+
oids.nr = 0;
359+
}

commit-graph.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#ifndef COMMIT_GRAPH_H
2+
#define COMMIT_GRAPH_H
3+
4+
void write_commit_graph(const char *obj_dir);
5+
6+
#endif

0 commit comments

Comments
 (0)