Skip to content

Commit d0feaa3

Browse files
author
Son Luong Ngoc
committed
Added Bloom filter verification
Commit-graph written with bloom filter helps accelerate many git operations which require directory tree traversal to detect changed paths. However when a commit-graph chain were written, some parts of the chain may not contains the bloom filter data for a set of included commits. To overcome this, validate whether the Bloom filter was added into the chain and potentially rebuild the entire chain if needed. Bloom filter are optional chunks in commit-graph files: BIDX for Bloom filter index and BDAT for Bloom filter data. [1] To check for commit-graph existence in each graph file: - Check whether the graph file contained optional chunks - Among the optional chunks, Check for both BIDX and BDAT existence Unless every graph file in the commit-graph chain contains Bloom filter, rebuild the entire chain. [1]: https://github.com/git/git/blob/ae46588be0cd730430dded4491246dfb4eac5557/Documentation/technical/commit-graph-format.txt#L99
1 parent 4943c9d commit d0feaa3

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed

git-care.sh

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,62 @@ prefetch_loop() {
5151
done;
5252
}
5353

54+
# verify_bloom_filter verify that bloom filter was used
55+
# in a split commit-graph chain. If not, remove the chain.
56+
verify_bloom_filter() {
57+
for f in ${PROJECT_DIR}/.git/objects/info/commit-graphs/*.graph; do
58+
chunk_count=$(od -j6 -N1 -An -i $f | tr -d '[:space:]')
59+
60+
# A commit graph always has 3 default chunks
61+
# If there were less than 3 chunks, the file is corrupted
62+
if [[ ${chunk_count} -lt 3 ]]; then
63+
echo "Corrupted commit-graph in $f"
64+
echo 'Rebuilding commit-graph chain'
65+
rm -f ${PROJECT_DIR}/.git/objects/info/commit-graphs/commit-graph-chain;
66+
fi
67+
68+
# If there were only 3 chunks, the Bloom filter chunks are missing
69+
if [[ ${chunk_count} == 3 ]]; then
70+
echo "Missing Bloom filter in $f"
71+
echo 'Rebuilding commit-graph chain with bloom filter (slow)'
72+
rm -f ${PROJECT_DIR}/.git/objects/info/commit-graphs/commit-graph-chain;
73+
fi
74+
75+
has_bloom_index=0
76+
has_bloom_data=0
77+
78+
# Find Bloom filter chunks among non-default chunks
79+
# Always skip the first 3 default chunks
80+
max_offset=$((chunk_count - 1))
81+
for i in $(seq 3 ${max_offset}); do
82+
# 8 bytes for commit-graph header
83+
# 12 bytes for each chunk skipped
84+
bytes_offset=$((8 + i * 12))
85+
86+
# skipped $bytes_offset bytes,
87+
# read first 4 bytes of next chunk to get 4 characters chunk id
88+
chunk_id=$(od -j${bytes_offset} -N4 -An -c $f | tr -d '[:space:]')
89+
case $chunk_id in
90+
'BIDX')
91+
has_bloom_index=1
92+
;;
93+
'BDAT')
94+
has_bloom_data=1
95+
;;
96+
*)
97+
;;
98+
esac
99+
done
100+
101+
# If either the Bloom filter chunks were missing
102+
if [[ $((has_bloom_index + has_bloom_data)) != 2 ]]; then
103+
echo "Missing Bloom filter in $f"
104+
echo 'Rebuilding commit-graph chain with bloom filter (slow)'
105+
rm -f ${PROJECT_DIR}/.git/objects/info/commit-graphs/commit-graph-chain;
106+
fi
107+
done
108+
}
109+
54110
# commit_graph refreshes the commit-graph in a non-disruptive manner
55111
# thus speed up git operations over the commit history i.e. git-log
56112
commit_graph() {
@@ -62,6 +118,12 @@ commit_graph() {
62118
rm -f ${PROJECT_DIR}/.git/objects/info/commit-graph
63119
fi
64120

121+
# If a commit-graph chain already exists,
122+
# check if every chain were built with Bloom filter.
123+
if [ -f ${PROJECT_DIR}/.git/objects/info/commit-graphs/commit-graph-chain ]; then
124+
verify_bloom_filter
125+
fi
126+
65127
git commit-graph write --reachable --split --changed-paths --size-multiple=4 --no-progress;
66128

67129
if git commit-graph verify --shallow --no-progress; then

0 commit comments

Comments
 (0)