Skip to content

Commit b22658c

Browse files
Git scripts: checks commit sizes for entire repo and specific hash (#95)
1 parent 139809a commit b22658c

File tree

3 files changed

+334
-0
lines changed

3 files changed

+334
-0
lines changed

git/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ Amends the most recent commit by adding newly staged changes
5656

5757
Amends the most recent commit's message
5858

59+
## git-commit-analyzer.sh
60+
61+
This script analyzes the size of a specific Git commit
62+
5963
## git-commit-empty-commit.sh
6064

6165
Commits empty commit (useful for triggering CI builds)
@@ -72,6 +76,10 @@ Finds the best common ancestor(s) between two commits (you can use it to compare
7276

7377
Reorders the last 2 commits
7478

79+
## git-repo-commit-analyzer.sh
80+
81+
This script analyzes the size of all commits in a Git repository
82+
7583
## git-show-branch.sh
7684

7785
Shows commits that are common for branches being compared

git/git-commit-analyzer.sh

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#!/bin/bash
2+
3+
# -----------------------------------------------------------------------------
4+
# Script Name: git-commit-analyzer.sh
5+
# Description: This script analyzes the size of a specific Git commit.
6+
# It calculates the total size of all files in the commit, the
7+
# number of files modified, and provides a detailed breakdown of
8+
# file sizes. The results are displayed in a human-readable format.
9+
#
10+
# Usage: ./git-commit-analyzer.sh <commit-hash>
11+
#
12+
# Features:
13+
# - Verifies if the script is run inside a valid Git repository.
14+
# - Checks if the specified commit hash exists in the repository.
15+
# - Calculates the total size of all files in the commit.
16+
# - Displays the size of each file in the commit in a sorted, descending order.
17+
# - Formats file sizes for readability (bytes, KB, MB).
18+
# - Handles the initial commit by comparing it to an empty tree.
19+
#
20+
# Requirements:
21+
# - Must be run from within a valid Git repository.
22+
# - Requires a valid commit hash to be passed as an argument.
23+
#
24+
# Output:
25+
# - A detailed breakdown of file sizes in the specified commit.
26+
# - Total number of files and the total size of the commit.
27+
#
28+
# Example:
29+
# ./git-commit-analyzer.sh abc1234
30+
# This will analyze the commit with hash `abc1234` and display the size of
31+
# each file in the commit, along with the total size and file count.
32+
#
33+
# Author: Mickey Gousset (@mickeygousset)
34+
# Date: 2025-04-05
35+
# -----------------------------------------------------------------------------
36+
37+
set -e
38+
39+
if [ $# -ne 1 ]; then
40+
echo "Usage: $0 <commit-hash>"
41+
exit 1
42+
fi
43+
44+
COMMIT=$1
45+
46+
# Verify this is a valid git repository
47+
if ! git rev-parse --is-inside-work-tree &>/dev/null; then
48+
echo "Error: Not in a git repository"
49+
exit 1
50+
fi
51+
52+
# Verify the commit exists
53+
if ! git cat-file -e "$COMMIT^{commit}" 2>/dev/null; then
54+
echo "Error: Commit $COMMIT does not exist"
55+
exit 1
56+
fi
57+
58+
echo "Analyzing commit: $COMMIT"
59+
echo "------------------------"
60+
61+
# Get the parent commit
62+
PARENT=$(git rev-parse "$COMMIT^" 2>/dev/null || echo "")
63+
64+
# If there's no parent (first commit), we'll compare with empty tree
65+
if [ -z "$PARENT" ]; then
66+
PARENT=$(git hash-object -t tree /dev/null)
67+
echo "This is the initial commit. Comparing with empty tree."
68+
fi
69+
70+
# Get the list of files changed in this commit
71+
FILES=$(git diff-tree --no-commit-id --name-only -r "$COMMIT")
72+
73+
# Create a temporary file to store file sizes for sorting
74+
TEMP_FILE=$(mktemp)
75+
76+
# Process each file
77+
while IFS= read -r file; do
78+
# Get the file blob from the commit
79+
BLOB=$(git ls-tree -r "$COMMIT" -- "$file" 2>/dev/null | awk '{print $3}')
80+
81+
if [ -n "$BLOB" ]; then
82+
# Get the size of the blob
83+
SIZE=$(git cat-file -s "$BLOB")
84+
85+
# Add to temp file with size and filename
86+
echo "$SIZE $file" >> "$TEMP_FILE"
87+
fi
88+
done <<< "$FILES"
89+
90+
# Calculate total size and count
91+
TOTAL_SIZE=0
92+
FILE_COUNT=0
93+
94+
# Print header
95+
printf "%-60s %15s\n" "FILE" "SIZE"
96+
printf "%-60s %15s\n" "----" "----"
97+
98+
# Sort by size (numerically, descending) and display
99+
while read -r SIZE file; do
100+
TOTAL_SIZE=$((TOTAL_SIZE + SIZE))
101+
FILE_COUNT=$((FILE_COUNT + 1))
102+
103+
# Format size for human readability
104+
if [ "$SIZE" -gt 1048576 ]; then
105+
FORMATTED_SIZE=$(echo "scale=2; $SIZE/1048576" | bc)" MB"
106+
elif [ "$SIZE" -gt 1024 ]; then
107+
FORMATTED_SIZE=$(echo "scale=2; $SIZE/1024" | bc)" KB"
108+
else
109+
FORMATTED_SIZE="$SIZE bytes"
110+
fi
111+
112+
# Print file with size
113+
printf "%-60s %15s\n" "$file" "$FORMATTED_SIZE"
114+
115+
done < <(sort -nr "$TEMP_FILE")
116+
117+
# Clean up temp file
118+
rm "$TEMP_FILE"
119+
120+
# Print total
121+
if [ "$TOTAL_SIZE" -gt 1048576 ]; then
122+
TOTAL_FORMATTED=$(echo "scale=2; $TOTAL_SIZE/1048576" | bc)" MB"
123+
elif [ "$TOTAL_SIZE" -gt 1024 ]; then
124+
TOTAL_FORMATTED=$(echo "scale=2; $TOTAL_SIZE/1024" | bc)" KB"
125+
else
126+
TOTAL_FORMATTED="$TOTAL_SIZE bytes"
127+
fi
128+
129+
echo "------------------------"
130+
echo "Total files: $FILE_COUNT"
131+
echo "Total size: $TOTAL_FORMATTED"

git/git-repo-commit-analyzer.sh

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
#!/bin/bash
2+
3+
# -----------------------------------------------------------------------------
4+
# Script Name: git-repo-commit-analyzer.sh
5+
# Description: This script analyzes the size of all commits in a Git repository.
6+
# It generates detailed logs, CSV reports, and exception logs for
7+
# commits that meet or exceed a specified size threshold.
8+
#
9+
# Usage: ./git-repo-commit-analyzer.sh <size-threshold-in-bytes>
10+
#
11+
# Features:
12+
# - Analyzes all commits in the current Git repository.
13+
# - Calculates the total size of each commit and the number of files it modifies.
14+
# - Generates the following output files:
15+
# 1. A log file with detailed analysis of all commits.
16+
# 2. A CSV file summarizing commit hash, size, and file count.
17+
# 3. An exceptions log file for commits exceeding the size threshold,
18+
# including detailed file sizes for each commit.
19+
# - Identifies and logs the largest commit in the repository.
20+
#
21+
# Requirements:
22+
# - Must be run from within a valid Git repository.
23+
# - Requires a size threshold (in bytes) to be passed as an argument.
24+
#
25+
# Output Files:
26+
# - <repo-name>-analyzer-<timestamp>.log
27+
# - <repo-name>-commits-size-<timestamp>.csv
28+
# - <repo-name>-commit-size-exceptions-<timestamp>.log
29+
#
30+
# Example:
31+
# ./git-repo-commit-analyzer.sh 100000
32+
# This will analyze all commits in the repository and log details for commits
33+
# with a total size of 100,000 bytes or more.
34+
#
35+
# Author: Mickey Gousset (@mickeygousset)
36+
# Date: 2025-04-05
37+
# -----------------------------------------------------------------------------
38+
39+
set -e
40+
41+
# Verify this is a valid git repository
42+
if ! git rev-parse --is-inside-work-tree &>/dev/null; then
43+
echo "Error: Not in a git repository"
44+
exit 1
45+
fi
46+
47+
# Get the repository name
48+
REPO_NAME=$(basename "$(git rev-parse --show-toplevel)")
49+
50+
# Check if a size threshold is provided
51+
if [ $# -ne 1 ]; then
52+
echo "Usage: $0 <size-threshold-in-bytes>"
53+
exit 1
54+
fi
55+
56+
SIZE_THRESHOLD=$1
57+
58+
# Generate timestamp for log and CSV filenames
59+
TIMESTAMP=$(date +"%Y%m%d-%H%M%S")
60+
LOG_FILE="${REPO_NAME}-analyzer-$TIMESTAMP.log"
61+
CSV_FILE="${REPO_NAME}-commits-size-$TIMESTAMP.csv"
62+
EXCEPTIONS_FILE="${REPO_NAME}-commit-size-exceptions-$TIMESTAMP.log"
63+
64+
# Initialize variables to track the largest commit
65+
LARGEST_COMMIT=""
66+
LARGEST_SIZE=0
67+
68+
# Create the CSV file and add the header
69+
echo "Commit Hash,Commit Size (bytes),Number of Files" > "$CSV_FILE"
70+
71+
# Create the exceptions file
72+
echo "Commits meeting or exceeding the size threshold ($SIZE_THRESHOLD bytes) in repository '$REPO_NAME':" > "$EXCEPTIONS_FILE"
73+
74+
# Function to log output to both the screen and the log file
75+
log() {
76+
echo "$1" | tee -a "$LOG_FILE"
77+
}
78+
79+
# Function to log exceptions to the exceptions file
80+
log_exception() {
81+
echo "$1" | tee -a "$EXCEPTIONS_FILE"
82+
}
83+
84+
# Array to store commits that meet the size threshold
85+
declare -a LARGE_COMMITS
86+
87+
# Loop through all commits in the repository
88+
for COMMIT in $(git rev-list --all); do
89+
log "Analyzing commit: $COMMIT in repository '$REPO_NAME'"
90+
log "------------------------"
91+
92+
# Get the parent commit
93+
PARENT=$(git rev-parse "$COMMIT^" 2>/dev/null || echo "")
94+
95+
# If there's no parent (first commit), compare with empty tree
96+
if [ -z "$PARENT" ]; then
97+
PARENT=$(git hash-object -t tree /dev/null)
98+
log "This is the initial commit. Comparing with empty tree."
99+
fi
100+
101+
# Get the list of files changed in this commit
102+
FILES=$(git diff-tree --no-commit-id --name-only -r "$COMMIT" 2>/dev/null || echo "")
103+
104+
if [ -z "$FILES" ]; then
105+
log "No files changed in this commit."
106+
continue
107+
fi
108+
109+
# Create a temporary file to store file sizes for sorting
110+
TEMP_FILE=$(mktemp)
111+
112+
# Process each file
113+
while IFS= read -r file; do
114+
# Get the file blob from the commit
115+
BLOB=$(git ls-tree -r "$COMMIT" -- "$file" 2>/dev/null | awk '{print $3}')
116+
117+
if [ -n "$BLOB" ]; then
118+
# Get the size of the blob
119+
SIZE=$(git cat-file -s "$BLOB" 2>/dev/null || echo "0")
120+
121+
if [ "$SIZE" -gt 0 ]; then
122+
# Add to temp file with size and filename
123+
echo "$SIZE $file" >> "$TEMP_FILE"
124+
fi
125+
fi
126+
done <<< "$FILES"
127+
128+
# Calculate total size and count
129+
TOTAL_SIZE=0
130+
FILE_COUNT=0
131+
132+
if [ -s "$TEMP_FILE" ]; then
133+
# Sort by size (numerically, descending) and calculate totals
134+
while read -r SIZE file; do
135+
TOTAL_SIZE=$((TOTAL_SIZE + SIZE))
136+
FILE_COUNT=$((FILE_COUNT + 1))
137+
done < <(sort -nr "$TEMP_FILE")
138+
fi
139+
140+
# Clean up temp file
141+
rm "$TEMP_FILE"
142+
143+
# Log total for this commit
144+
log "Total files: $FILE_COUNT"
145+
log "Total size: $TOTAL_SIZE bytes"
146+
log "------------------------"
147+
148+
# Append commit details to the CSV file
149+
echo "$COMMIT,$TOTAL_SIZE,$FILE_COUNT" >> "$CSV_FILE"
150+
151+
# Check if this is the largest commit
152+
if [ "$TOTAL_SIZE" -gt "$LARGEST_SIZE" ]; then
153+
LARGEST_SIZE=$TOTAL_SIZE
154+
LARGEST_COMMIT=$COMMIT
155+
fi
156+
157+
# Check if the commit meets the size threshold
158+
if [ "$TOTAL_SIZE" -ge "$SIZE_THRESHOLD" ]; then
159+
LARGE_COMMITS+=("$COMMIT ($TOTAL_SIZE bytes)")
160+
161+
# Log details to the exceptions file
162+
log_exception "Commit: $COMMIT"
163+
log_exception "Total Size: $TOTAL_SIZE bytes"
164+
log_exception "Files:"
165+
166+
# Log each file and its size
167+
while IFS= read -r file; do
168+
BLOB=$(git ls-tree -r "$COMMIT" -- "$file" 2>/dev/null | awk '{print $3}')
169+
if [ -n "$BLOB" ]; then
170+
SIZE=$(git cat-file -s "$BLOB" 2>/dev/null || echo "0")
171+
log_exception " $file: $SIZE bytes"
172+
fi
173+
done <<< "$FILES"
174+
175+
log_exception "------------------------"
176+
fi
177+
done
178+
179+
# Output the largest commit
180+
log "Largest commit: $LARGEST_COMMIT"
181+
log "Largest size: $LARGEST_SIZE bytes"
182+
183+
# Output commits that meet the size threshold
184+
if [ ${#LARGE_COMMITS[@]} -gt 0 ]; then
185+
log "Commits meeting or exceeding the size threshold ($SIZE_THRESHOLD bytes):"
186+
for COMMIT_INFO in "${LARGE_COMMITS[@]}"; do
187+
log "$COMMIT_INFO"
188+
done
189+
else
190+
log "No commits meet or exceed the size threshold ($SIZE_THRESHOLD bytes)."
191+
fi
192+
193+
log "Log file created: $LOG_FILE"
194+
log "CSV file created: $CSV_FILE"
195+
log "Exceptions file created: $EXCEPTIONS_FILE"

0 commit comments

Comments
 (0)