1
+ #! /bin/bash
2
+
3
+ # -----------------------------------------------------------------------------
4
+ # Script Name: git-repo-commit-analyzer.sh
5
+ # Description: This script analyzes the size of all commits in a Git repository.
6
+ # It generates detailed logs, CSV reports, and exception logs for
7
+ # commits that meet or exceed a specified size threshold.
8
+ #
9
+ # Usage: ./git-repo-commit-analyzer.sh <size-threshold-in-bytes>
10
+ #
11
+ # Features:
12
+ # - Analyzes all commits in the current Git repository.
13
+ # - Calculates the total size of each commit and the number of files it modifies.
14
+ # - Generates the following output files:
15
+ # 1. A log file with detailed analysis of all commits.
16
+ # 2. A CSV file summarizing commit hash, size, and file count.
17
+ # 3. An exceptions log file for commits exceeding the size threshold,
18
+ # including detailed file sizes for each commit.
19
+ # - Identifies and logs the largest commit in the repository.
20
+ #
21
+ # Requirements:
22
+ # - Must be run from within a valid Git repository.
23
+ # - Requires a size threshold (in bytes) to be passed as an argument.
24
+ #
25
+ # Output Files:
26
+ # - <repo-name>-analyzer-<timestamp>.log
27
+ # - <repo-name>-commits-size-<timestamp>.csv
28
+ # - <repo-name>-commit-size-exceptions-<timestamp>.log
29
+ #
30
+ # Example:
31
+ # ./git-repo-commit-analyzer.sh 100000
32
+ # This will analyze all commits in the repository and log details for commits
33
+ # with a total size of 100,000 bytes or more.
34
+ #
35
+ # Author: Mickey Gousset (@mickeygousset)
36
+ # Date: 2025-04-05
37
+ # -----------------------------------------------------------------------------
38
+
39
+ set -e
40
+
41
+ # Verify this is a valid git repository
42
+ if ! git rev-parse --is-inside-work-tree & > /dev/null; then
43
+ echo " Error: Not in a git repository"
44
+ exit 1
45
+ fi
46
+
47
+ # Get the repository name
48
+ REPO_NAME=$( basename " $( git rev-parse --show-toplevel) " )
49
+
50
+ # Check if a size threshold is provided
51
+ if [ $# -ne 1 ]; then
52
+ echo " Usage: $0 <size-threshold-in-bytes>"
53
+ exit 1
54
+ fi
55
+
56
+ SIZE_THRESHOLD=$1
57
+
58
+ # Generate timestamp for log and CSV filenames
59
+ TIMESTAMP=$( date +" %Y%m%d-%H%M%S" )
60
+ LOG_FILE=" ${REPO_NAME} -analyzer-$TIMESTAMP .log"
61
+ CSV_FILE=" ${REPO_NAME} -commits-size-$TIMESTAMP .csv"
62
+ EXCEPTIONS_FILE=" ${REPO_NAME} -commit-size-exceptions-$TIMESTAMP .log"
63
+
64
+ # Initialize variables to track the largest commit
65
+ LARGEST_COMMIT=" "
66
+ LARGEST_SIZE=0
67
+
68
+ # Create the CSV file and add the header
69
+ echo " Commit Hash,Commit Size (bytes),Number of Files" > " $CSV_FILE "
70
+
71
+ # Create the exceptions file
72
+ echo " Commits meeting or exceeding the size threshold ($SIZE_THRESHOLD bytes) in repository '$REPO_NAME ':" > " $EXCEPTIONS_FILE "
73
+
74
+ # Function to log output to both the screen and the log file
75
+ log () {
76
+ echo " $1 " | tee -a " $LOG_FILE "
77
+ }
78
+
79
+ # Function to log exceptions to the exceptions file
80
+ log_exception () {
81
+ echo " $1 " | tee -a " $EXCEPTIONS_FILE "
82
+ }
83
+
84
+ # Array to store commits that meet the size threshold
85
+ declare -a LARGE_COMMITS
86
+
87
+ # Loop through all commits in the repository
88
+ for COMMIT in $( git rev-list --all) ; do
89
+ log " Analyzing commit: $COMMIT in repository '$REPO_NAME '"
90
+ log " ------------------------"
91
+
92
+ # Get the parent commit
93
+ PARENT=$( git rev-parse " $COMMIT ^" 2> /dev/null || echo " " )
94
+
95
+ # If there's no parent (first commit), compare with empty tree
96
+ if [ -z " $PARENT " ]; then
97
+ PARENT=$( git hash-object -t tree /dev/null)
98
+ log " This is the initial commit. Comparing with empty tree."
99
+ fi
100
+
101
+ # Get the list of files changed in this commit
102
+ FILES=$( git diff-tree --no-commit-id --name-only -r " $COMMIT " 2> /dev/null || echo " " )
103
+
104
+ if [ -z " $FILES " ]; then
105
+ log " No files changed in this commit."
106
+ continue
107
+ fi
108
+
109
+ # Create a temporary file to store file sizes for sorting
110
+ TEMP_FILE=$( mktemp)
111
+
112
+ # Process each file
113
+ while IFS= read -r file; do
114
+ # Get the file blob from the commit
115
+ BLOB=$( git ls-tree -r " $COMMIT " -- " $file " 2> /dev/null | awk ' {print $3}' )
116
+
117
+ if [ -n " $BLOB " ]; then
118
+ # Get the size of the blob
119
+ SIZE=$( git cat-file -s " $BLOB " 2> /dev/null || echo " 0" )
120
+
121
+ if [ " $SIZE " -gt 0 ]; then
122
+ # Add to temp file with size and filename
123
+ echo " $SIZE $file " >> " $TEMP_FILE "
124
+ fi
125
+ fi
126
+ done <<< " $FILES"
127
+
128
+ # Calculate total size and count
129
+ TOTAL_SIZE=0
130
+ FILE_COUNT=0
131
+
132
+ if [ -s " $TEMP_FILE " ]; then
133
+ # Sort by size (numerically, descending) and calculate totals
134
+ while read -r SIZE file; do
135
+ TOTAL_SIZE=$(( TOTAL_SIZE + SIZE))
136
+ FILE_COUNT=$(( FILE_COUNT + 1 ))
137
+ done < <( sort -nr " $TEMP_FILE " )
138
+ fi
139
+
140
+ # Clean up temp file
141
+ rm " $TEMP_FILE "
142
+
143
+ # Log total for this commit
144
+ log " Total files: $FILE_COUNT "
145
+ log " Total size: $TOTAL_SIZE bytes"
146
+ log " ------------------------"
147
+
148
+ # Append commit details to the CSV file
149
+ echo " $COMMIT ,$TOTAL_SIZE ,$FILE_COUNT " >> " $CSV_FILE "
150
+
151
+ # Check if this is the largest commit
152
+ if [ " $TOTAL_SIZE " -gt " $LARGEST_SIZE " ]; then
153
+ LARGEST_SIZE=$TOTAL_SIZE
154
+ LARGEST_COMMIT=$COMMIT
155
+ fi
156
+
157
+ # Check if the commit meets the size threshold
158
+ if [ " $TOTAL_SIZE " -ge " $SIZE_THRESHOLD " ]; then
159
+ LARGE_COMMITS+=(" $COMMIT ($TOTAL_SIZE bytes)" )
160
+
161
+ # Log details to the exceptions file
162
+ log_exception " Commit: $COMMIT "
163
+ log_exception " Total Size: $TOTAL_SIZE bytes"
164
+ log_exception " Files:"
165
+
166
+ # Log each file and its size
167
+ while IFS= read -r file; do
168
+ BLOB=$( git ls-tree -r " $COMMIT " -- " $file " 2> /dev/null | awk ' {print $3}' )
169
+ if [ -n " $BLOB " ]; then
170
+ SIZE=$( git cat-file -s " $BLOB " 2> /dev/null || echo " 0" )
171
+ log_exception " $file : $SIZE bytes"
172
+ fi
173
+ done <<< " $FILES"
174
+
175
+ log_exception " ------------------------"
176
+ fi
177
+ done
178
+
179
+ # Output the largest commit
180
+ log " Largest commit: $LARGEST_COMMIT "
181
+ log " Largest size: $LARGEST_SIZE bytes"
182
+
183
+ # Output commits that meet the size threshold
184
+ if [ ${# LARGE_COMMITS[@]} -gt 0 ]; then
185
+ log " Commits meeting or exceeding the size threshold ($SIZE_THRESHOLD bytes):"
186
+ for COMMIT_INFO in " ${LARGE_COMMITS[@]} " ; do
187
+ log " $COMMIT_INFO "
188
+ done
189
+ else
190
+ log " No commits meet or exceed the size threshold ($SIZE_THRESHOLD bytes)."
191
+ fi
192
+
193
+ log " Log file created: $LOG_FILE "
194
+ log " CSV file created: $CSV_FILE "
195
+ log " Exceptions file created: $EXCEPTIONS_FILE "
0 commit comments