Skip to content

Commit dc19321

Browse files
committed
scripts: add cherry-pick verification tool with fuzzy matching
This script compares a release branch against a source branch (e.g. master) to verify that all cherry-picked commits are unmodified. It first attempts fast matching using normalized patch hashes. If no exact match is found, it falls back to a fuzzy matching mechanism: - Filters source commits by matching author and commit subject - Compares normalized diffs using diff -u - Selects the closest match based on line difference count Useful for verifying cherry-picks or rebased commits during release processes. Supports scan and compare limits for performance.
1 parent b3eb9a3 commit dc19321

File tree

1 file changed

+213
-0
lines changed

1 file changed

+213
-0
lines changed

scripts/fuzzy-match-release-branch.sh

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
SRC_BRANCH=""
6+
RELEASE_BRANCH=""
7+
SRC_SCAN_LIMIT=1000
8+
RELEASE_LIMIT=0
9+
10+
show_help() {
11+
echo "Usage: $0 --source <branch> --release <branch> [--scan-limit N] [--limit N]"
12+
echo ""
13+
echo " --source Branch where cherry-picks originated (e.g. master)"
14+
echo " --release Branch where cherry-picks landed (e.g. release-rc1)"
15+
echo " --scan-limit Max commits to scan in source branch (default: 1000)"
16+
echo " --limit Number of release commits to compare (default: all)"
17+
exit 1
18+
}
19+
20+
# Parse args
21+
while [[ $# -gt 0 ]]; do
22+
case "$1" in
23+
--source|--release|--scan-limit|--limit)
24+
if [[ -z "${2:-}" || "$2" =~ ^- ]]; then
25+
echo "Error: Missing value for argument $1" >&2
26+
show_help
27+
fi
28+
case "$1" in
29+
--source) SRC_BRANCH="$2" ;;
30+
--release) RELEASE_BRANCH="$2" ;;
31+
--scan-limit) SRC_SCAN_LIMIT="$2" ;;
32+
--limit) RELEASE_LIMIT="$2" ;;
33+
esac
34+
shift 2
35+
;;
36+
-h|--help) show_help ;;
37+
*) echo "Unknown argument: $1"; show_help ;;
38+
esac
39+
done
40+
41+
if [[ -z "$SRC_BRANCH" || -z "$RELEASE_BRANCH" ]]; then
42+
echo "❌ Missing required arguments."; show_help
43+
fi
44+
45+
# Cross-platform hashing
46+
hash_patch() {
47+
if command -v md5sum >/dev/null 2>&1; then
48+
md5sum | awk '{print $1}'
49+
else
50+
md5 | awk '{print $NF}'
51+
fi
52+
}
53+
54+
echo "🔍 Preparing comparison:"
55+
echo " Source branch : $SRC_BRANCH"
56+
echo " Release branch : $RELEASE_BRANCH"
57+
echo " Max source scan: $SRC_SCAN_LIMIT"
58+
echo " Max release compare: $([[ $RELEASE_LIMIT -gt 0 ]] && echo \"$RELEASE_LIMIT\" || echo \"ALL\")"
59+
echo ""
60+
61+
echo "🔄 Fetching latest refs..."
62+
git fetch --all --quiet || true
63+
64+
echo "📥 Collecting release commits..."
65+
RELEASE_COMMITS=$(git rev-list --no-merges "$RELEASE_BRANCH" ^"$SRC_BRANCH")
66+
if [[ "$RELEASE_LIMIT" -gt 0 ]]; then
67+
RELEASE_COMMITS=$(echo "$RELEASE_COMMITS" | head -n "$RELEASE_LIMIT")
68+
fi
69+
RELEASE_COMMITS=$(echo "$RELEASE_COMMITS" | awk '{ lines[NR] = $0 } END { for (i = NR; i > 0; i--) print lines[i] }')
70+
RELEASE_COMMITS_ARRAY=()
71+
while IFS= read -r line; do
72+
[[ -n "$line" ]] && RELEASE_COMMITS_ARRAY+=("$line")
73+
done <<< "$RELEASE_COMMITS"
74+
echo " → Found ${#RELEASE_COMMITS_ARRAY[@]} release commits."
75+
76+
if [[ "${#RELEASE_COMMITS_ARRAY[@]}" -eq 0 ]]; then
77+
echo "❌ No release commits found. Exiting."
78+
exit 1
79+
fi
80+
81+
echo "📥 Collecting source commits..."
82+
SRC_COMMITS=$(git rev-list --no-merges --max-count="$SRC_SCAN_LIMIT" "$SRC_BRANCH")
83+
SRC_COMMITS_ARRAY=()
84+
while IFS= read -r line; do
85+
[[ -n "$line" ]] && SRC_COMMITS_ARRAY+=("$line")
86+
done <<< "$SRC_COMMITS"
87+
echo " → Found ${#SRC_COMMITS_ARRAY[@]} source commits to scan."
88+
echo ""
89+
90+
echo "⚙️ Indexing source commit metadata..."
91+
echo " → Processing ${#SRC_COMMITS_ARRAY[@]} commits from $SRC_BRANCH..."
92+
SRC_COMMIT_META=()
93+
SRC_PATCH_HASHES=()
94+
SRC_PATCHES=()
95+
96+
progress=0
97+
for commit in "${SRC_COMMITS_ARRAY[@]}"; do
98+
progress=$((progress + 1))
99+
echo -ne "\r [$progress/${#SRC_COMMITS_ARRAY[@]}] Indexing $commit"
100+
author=$(git log -1 --pretty=format:"%an <%ae>" "$commit" 2>/dev/null) || continue
101+
subject=$(git log -1 --pretty=format:"%s" "$commit" 2>/dev/null) || continue
102+
authordate=$(git log -1 --pretty=format:"%ai" "$commit" 2>/dev/null) || continue
103+
meta_key="${subject}__${author}__${authordate}"
104+
patch=$(git show --format= --unified=3 "$commit" | sed 's/^[[:space:]]*//')
105+
patch_hash=$(echo "$patch" | hash_patch)
106+
107+
SRC_COMMIT_META+=("$meta_key")
108+
SRC_PATCH_HASHES+=("$patch_hash")
109+
SRC_PATCHES+=("$patch")
110+
done
111+
112+
echo -e "\n → Completed source indexing."
113+
114+
TOTAL=${#RELEASE_COMMITS_ARRAY[@]}
115+
MATCHED=0
116+
UNMATCHED=0
117+
118+
for i in "${!RELEASE_COMMITS_ARRAY[@]}"; do
119+
rc_commit="${RELEASE_COMMITS_ARRAY[$i]}"
120+
rc_author=$(git log -1 --pretty=format:"%an <%ae>" "$rc_commit" 2>/dev/null) || continue
121+
rc_subject=$(git log -1 --pretty=format:"%s" "$rc_commit" 2>/dev/null) || continue
122+
rc_authordate=$(git log -1 --pretty=format:"%ai" "$rc_commit" 2>/dev/null) || continue
123+
meta_key="${rc_subject}__${rc_author}__${rc_authordate}"
124+
125+
echo -ne "[$((i + 1))/$TOTAL] Checking ${rc_commit:0:7}... "
126+
127+
rc_patch=$(git show --format= --unified=3 "$rc_commit" | sed 's/^[[:space:]]*//')
128+
rc_patch_hash=$(echo "$rc_patch" | hash_patch)
129+
130+
found_exact_index=-1
131+
for j in "${!SRC_PATCH_HASHES[@]}"; do
132+
if [[ "${SRC_PATCH_HASHES[$j]}" == "$rc_patch_hash" ]]; then
133+
found_exact_index=$j
134+
break
135+
fi
136+
done
137+
138+
if [[ $found_exact_index -ne -1 ]]; then
139+
found_exact="${SRC_COMMITS_ARRAY[$found_exact_index]}"
140+
meta_info="${SRC_COMMIT_META[$found_exact_index]}"
141+
src_subject="${meta_info%%__*}"
142+
rest="${meta_info#*__}"
143+
src_author="${rest%%__*}"
144+
src_authordate="${rest##*__}"
145+
echo "✅ MATCHES ${found_exact:0:7}"
146+
echo " ↪ RELEASE: $rc_commit"
147+
echo " Author : $rc_author"
148+
echo " Date : $rc_authordate"
149+
echo " Subject: \"$rc_subject\""
150+
echo " ↪ SOURCE : $found_exact"
151+
echo " Author : $src_author"
152+
echo " Date : $src_authordate"
153+
echo " Subject: \"$src_subject\""
154+
echo ""
155+
MATCHED=$((MATCHED + 1))
156+
continue
157+
fi
158+
159+
echo "❌ NO MATCH"
160+
UNMATCHED=$((UNMATCHED + 1))
161+
162+
echo "🔍 Unmatched Commit:"
163+
echo " ↪ Commit : $rc_commit"
164+
echo " ↪ Author : $rc_author"
165+
echo " ↪ Subject: \"$rc_subject\""
166+
echo ""
167+
168+
best_score=99999
169+
best_index=""
170+
fuzzy_candidates=0
171+
172+
for j in "${!SRC_COMMIT_META[@]}"; do
173+
if [[ "${SRC_COMMIT_META[$j]}" == "$meta_key" ]]; then
174+
((fuzzy_candidates++))
175+
diff=$(diff -u <(echo "$rc_patch") <(echo "${SRC_PATCHES[$j]}") || true)
176+
score=$(echo "$diff" | grep -vE '^(--- |\+\+\+ )' | grep -c '^[-+]')
177+
if [[ "$score" -lt "$best_score" ]]; then
178+
best_score=$score
179+
best_index=$j
180+
fi
181+
fi
182+
done
183+
184+
if [[ "$fuzzy_candidates" -eq 0 ]]; then
185+
echo "⚠️ No commits with matching author + subject + date in source branch."
186+
else
187+
match_commit="${SRC_COMMITS_ARRAY[$best_index]}"
188+
match_author=$(git log -1 --pretty=format:"%an <%ae>" "$match_commit")
189+
match_subject=$(git log -1 --pretty=format:"%s" "$match_commit")
190+
191+
echo "🤔 Closest fuzzy match: $match_commit ($best_score changed lines from $fuzzy_candidates candidates)"
192+
echo " ↪ Author : $match_author"
193+
echo " ↪ Subject: \"$match_subject\""
194+
echo ""
195+
echo "🔧 Check it manually (patch diff):"
196+
echo " diff -u <(git show --format= --unified=3 $rc_commit) <(git show --format= --unified=3 $match_commit)"
197+
echo ""
198+
echo "🔍 Diff between release and closest match:"
199+
echo "---------------------------------------------"
200+
diff -u <(git show --format= --unified=3 "$rc_commit") <(git show --format= --unified=3 "$match_commit") | sed 's/^/ /' || true
201+
echo "---------------------------------------------"
202+
echo ""
203+
fi
204+
205+
done
206+
207+
# Summary
208+
echo ""
209+
echo "🔎 Summary:"
210+
echo " ✅ Matched : $MATCHED"
211+
echo " ❌ Unmatched : $UNMATCHED"
212+
echo " 📦 Total : $TOTAL"
213+

0 commit comments

Comments
 (0)