Skip to content

Commit c407630

Browse files
authored
2 parents 53da665 + f490107 commit c407630

File tree

6 files changed

+88
-20
lines changed

6 files changed

+88
-20
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,10 @@ trustable spdx.xml \
5454
--grimoirelab-user user --grimoirelab-password password \
5555
--opensearch-url https://admin:[email protected]:9200 \
5656
--opensearch-index events \
57-
--output metrics.json \
5857
--repository-timeout 3600
58+
--code-file-pattern "\.py$|\.js$" \
59+
--binary-file-pattern "\.exe$|\.tar$" \
60+
--output metrics.json
5961
```
6062

6163
The parameters needed to run the tool are:

tests/data/events.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,20 @@
675675
"added": "85",
676676
"removed": "0"
677677
},
678+
{
679+
"modes": [
680+
"000000",
681+
"100644"
682+
],
683+
"indexes": [
684+
"0000000",
685+
"06ee9fa"
686+
],
687+
"action": "A",
688+
"file": "sample.tar.gz",
689+
"added": "0",
690+
"removed": "0"
691+
},
678692
{
679693
"modes": [
680694
"000000",

tests/end_to_end/test_cli.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def test_metrics(self):
7272
self.assertEqual(quickstart_metrics["pony_factor"], 2)
7373
self.assertEqual(quickstart_metrics["elephant_factor"], 2)
7474
self.assertEqual(quickstart_metrics["file_types_other"], 683)
75+
self.assertEqual(quickstart_metrics["file_types_binary"], 0)
7576
self.assertEqual(quickstart_metrics["file_types_code"], 479)
7677
self.assertEqual(quickstart_metrics["commit_size_added_lines"], 53121)
7778
self.assertEqual(quickstart_metrics["commit_size_removed_lines"], 51852)
@@ -92,7 +93,8 @@ def test_metrics(self):
9293
self.assertEqual(angular_metrics["total_contributors"], 58)
9394
self.assertEqual(angular_metrics["pony_factor"], 5)
9495
self.assertEqual(angular_metrics["elephant_factor"], 2)
95-
self.assertEqual(angular_metrics["file_types_other"], 538)
96+
self.assertEqual(angular_metrics["file_types_other"], 534)
97+
self.assertEqual(angular_metrics["file_types_binary"], 4)
9698
self.assertEqual(angular_metrics["file_types_code"], 2129)
9799
self.assertEqual(angular_metrics["commit_size_added_lines"], 218483)
98100
self.assertEqual(angular_metrics["commit_size_removed_lines"], 245784)
@@ -147,6 +149,7 @@ def test_from_date(self):
147149
self.assertEqual(quickstart_metrics["pony_factor"], 2)
148150
self.assertEqual(quickstart_metrics["elephant_factor"], 1)
149151
self.assertEqual(quickstart_metrics["file_types_other"], 37)
152+
self.assertEqual(quickstart_metrics["file_types_binary"], 0)
150153
self.assertEqual(quickstart_metrics["file_types_code"], 17)
151154
self.assertEqual(quickstart_metrics["commit_size_added_lines"], 269)
152155
self.assertEqual(quickstart_metrics["commit_size_removed_lines"], 103)
@@ -168,6 +171,7 @@ def test_from_date(self):
168171
self.assertEqual(angular_metrics["pony_factor"], 1)
169172
self.assertEqual(angular_metrics["elephant_factor"], 1)
170173
self.assertEqual(angular_metrics["file_types_other"], 24)
174+
self.assertEqual(angular_metrics["file_types_binary"], 0)
171175
self.assertEqual(angular_metrics["file_types_code"], 13)
172176
self.assertEqual(angular_metrics["commit_size_added_lines"], 4849)
173177
self.assertEqual(angular_metrics["commit_size_removed_lines"], 149)
@@ -223,6 +227,7 @@ def test_to_date(self):
223227
self.assertEqual(quickstart_metrics["pony_factor"], 2)
224228
self.assertEqual(quickstart_metrics["elephant_factor"], 2)
225229
self.assertEqual(quickstart_metrics["file_types_other"], 646)
230+
self.assertEqual(quickstart_metrics["file_types_binary"], 0)
226231
self.assertEqual(quickstart_metrics["file_types_code"], 462)
227232
self.assertEqual(quickstart_metrics["commit_size_added_lines"], 52852)
228233
self.assertEqual(quickstart_metrics["commit_size_removed_lines"], 51749)
@@ -243,7 +248,8 @@ def test_to_date(self):
243248
self.assertEqual(angular_metrics["total_contributors"], 56)
244249
self.assertEqual(angular_metrics["pony_factor"], 5)
245250
self.assertEqual(angular_metrics["elephant_factor"], 2)
246-
self.assertEqual(angular_metrics["file_types_other"], 514)
251+
self.assertEqual(angular_metrics["file_types_other"], 510)
252+
self.assertEqual(angular_metrics["file_types_binary"], 4)
247253
self.assertEqual(angular_metrics["file_types_code"], 2116)
248254
self.assertEqual(angular_metrics["commit_size_added_lines"], 213634)
249255
self.assertEqual(angular_metrics["commit_size_removed_lines"], 245635)
@@ -299,6 +305,7 @@ def test_duplicate_repo(self):
299305
self.assertEqual(quickstart_metrics["pony_factor"], 2)
300306
self.assertEqual(quickstart_metrics["elephant_factor"], 2)
301307
self.assertEqual(quickstart_metrics["file_types_other"], 683)
308+
self.assertEqual(quickstart_metrics["file_types_binary"], 0)
302309
self.assertEqual(quickstart_metrics["file_types_code"], 479)
303310
self.assertEqual(quickstart_metrics["commit_size_added_lines"], 53121)
304311
self.assertEqual(quickstart_metrics["commit_size_removed_lines"], 51852)
@@ -354,6 +361,7 @@ def test_non_git_repo(self):
354361
self.assertEqual(quickstart_metrics["pony_factor"], 2)
355362
self.assertEqual(quickstart_metrics["elephant_factor"], 2)
356363
self.assertEqual(quickstart_metrics["file_types_other"], 683)
364+
self.assertEqual(quickstart_metrics["file_types_binary"], 0)
357365
self.assertEqual(quickstart_metrics["file_types_code"], 479)
358366
self.assertEqual(quickstart_metrics["commit_size_added_lines"], 53121)
359367
self.assertEqual(quickstart_metrics["commit_size_removed_lines"], 51852)

tests/unit/test_metrics.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,32 @@ def test_get_elephant_factor(self):
116116
def test_file_type_metrics(self):
117117
"""Test that file type metrics are calculated correctly"""
118118

119-
self.assertEqual(self.analyzer.get_file_type_metrics(), {})
120-
121119
self.analyzer.process_events(self.events)
122120

123121
file_metrics = self.analyzer.get_file_type_metrics()
124-
self.assertEqual(file_metrics.get("code", 0), 54)
125-
self.assertEqual(file_metrics.get("other", 0), 24)
122+
self.assertEqual(file_metrics["code"], 54)
123+
self.assertEqual(file_metrics["binary"], 1)
124+
self.assertEqual(file_metrics["other"], 24)
125+
126+
def test_file_type_metrics_empty(self):
127+
"""Test that file type metrics are calculated correctly without events"""
128+
129+
file_metrics = self.analyzer.get_file_type_metrics()
130+
self.assertEqual(file_metrics["code"], 0)
131+
self.assertEqual(file_metrics["binary"], 0)
132+
self.assertEqual(file_metrics["other"], 0)
133+
134+
def test_file_type_metrics_new_regex(self):
135+
"""Test that file type metrics are calculated correctly with new regex"""
136+
137+
analyzer = GitEventsAnalyzer(code_file_pattern=r"\.py$", binary_file_pattern=r"\.md$")
138+
139+
analyzer.process_events(self.events)
140+
141+
file_metrics = analyzer.get_file_type_metrics()
142+
self.assertEqual(file_metrics["code"], 53)
143+
self.assertEqual(file_metrics["binary"], 4)
144+
self.assertEqual(file_metrics["other"], 22)
126145

127146
def test_commit_size_metrics(self):
128147
"""Test that commit size metrics are calculated correctly"""

trustable_cli/cli.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@
7777
)
7878
@click.option("--verify-certs", is_flag=True, default=False, help="Verify SSL/TLS certificates")
7979
@click.option("--verbose", is_flag=True, default=False, help="Increase output verbosity")
80+
@click.option("--code-file-pattern", help="Regular expression to match code file types")
81+
@click.option("--binary-file-pattern", help="Regular expression to match binary file types")
8082
def trustable_grimoirelab_score(
8183
filename: str,
8284
grimoirelab_url: str,
@@ -90,6 +92,8 @@ def trustable_grimoirelab_score(
9092
to_date: datetime.datetime | None = None,
9193
verify_certs: bool = False,
9294
verbose: bool = False,
95+
code_file_pattern: str | None = None,
96+
binary_file_pattern: str | None = None,
9397
) -> None:
9498
"""Calculate metrics for Trustable using GrimoireLab.
9599
@@ -131,6 +135,8 @@ def trustable_grimoirelab_score(
131135
to_date=to_date,
132136
verify_certs=verify_certs,
133137
timeout=repository_timeout,
138+
code_file_pattern=code_file_pattern,
139+
binary_file_pattern=binary_file_pattern,
134140
)
135141

136142
package_metrics = {"packages": {}}
@@ -203,6 +209,8 @@ def generate_metrics_when_ready(
203209
to_date: datetime.datetime | None = None,
204210
verify_certs: bool = False,
205211
timeout: int = 3600,
212+
code_file_pattern: str | None = None,
213+
binary_file_pattern: str | None = None,
206214
) -> dict[str:Any]:
207215
"""Generate metrics once the repositories have finished the collection.
208216
@@ -214,6 +222,8 @@ def generate_metrics_when_ready(
214222
:param to_date: End date for metrics.
215223
:param verify_certs: Verify SSL/TLS certificates.
216224
:param timeout: Seconds to wait before failing getting metrics
225+
:param code_file_pattern: Regular expression to match code file types.
226+
:param binary_file_pattern: Regular expression to match binary file types.
217227
"""
218228
logging.info("Generating metrics")
219229

@@ -228,12 +238,14 @@ def generate_metrics_when_ready(
228238
for repository in pending_repositories:
229239
if repository_ready(grimoirelab_client, repository, after_date):
230240
metrics["repositories"][repository] = get_repository_metrics(
231-
repository,
232-
opensearch_url,
233-
opensearch_index,
234-
from_date,
235-
to_date,
236-
verify_certs,
241+
repository=repository,
242+
opensearch_url=opensearch_url,
243+
opensearch_index=opensearch_index,
244+
from_date=from_date,
245+
to_date=to_date,
246+
verify_certs=verify_certs,
247+
code_file_pattern=code_file_pattern,
248+
binary_file_pattern=binary_file_pattern,
237249
)
238250
processed.add(repository)
239251

trustable_cli/metrics.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,20 +40,27 @@
4040
COMMIT_EVENT_TYPE = "org.grimoirelab.events.git.commit"
4141
AUTHOR_FIELD = "Author"
4242
FILE_TYPE_CODE = (
43-
r"\.bazel$|\.bazelrc$|\.bzl$|\.c$|\.cc$|\.cp$|\.cpp$|\.cxx$|\.c\+\+$|"
44-
r"\.go$|\.h$|\.js$|\.mjs$|\.java$|\.py$|\.rs$|\.sh$|\.tf$|\.ts$"
43+
r"\.bazel$|\.bazelrc$|\.bzl$|\.c$|\.cc$|\.cp$|\.cpp$|\.cs$\|\.cxx$|\.c\+\+$|"
44+
r"\.go$|\.h$|\.hpp$|\.js$|\.mjs$|\.java$|\.pl$|\.py$|\.rs$|\.sh$|\.tf$|\.ts$"
45+
)
46+
FILE_TYPE_BINARY = (
47+
r"\.7z$|\.a$|\.abb$|\.apk$|\.app$|\.appx$|\.arc$|\.bin$|\.bz2$|\.class$|\.deb$|"
48+
r"\.dll$|\.dmg$|\.exe$|\.gz$|\.ipa$|\.iso$|\.jar$|\.lib$|\.msi$|\.o$|\.obj$|\.rar$|"
49+
r"\.rpm$|\.so$|\.tar$|\.xar$|\.xz$|\.zip$|\.zst$|\.Z$"
4550
)
4651

4752

4853
class GitEventsAnalyzer:
49-
def __init__(self):
54+
def __init__(self, code_file_pattern: str | None = None, binary_file_pattern: str | None = None):
5055
self.total_commits: int = 0
5156
self.contributors: Counter = Counter()
5257
self.companies: Counter = Counter()
53-
self.file_types: dict = Counter()
58+
self.file_types: dict = {"code": 0, "binary": 0, "other": 0}
5459
self.added_lines: int = 0
5560
self.removed_lines: int = 0
5661
self.messages_sizes: list = []
62+
self.re_code_pattern = re.compile(code_file_pattern or FILE_TYPE_CODE)
63+
self.re_binary_pattern = re.compile(binary_file_pattern or FILE_TYPE_BINARY)
5764

5865
def process_events(self, events: iter(dict[str, Any])):
5966
for event in events:
@@ -111,7 +118,7 @@ def get_elephant_factor(self):
111118
def get_file_type_metrics(self):
112119
"""Get the file type metrics"""
113120

114-
return dict(self.file_types)
121+
return self.file_types
115122

116123
def get_commit_size_metrics(self):
117124
"""Get the commit size metrics"""
@@ -190,8 +197,10 @@ def _update_file_metrics(self, event):
190197
if not file["file"]:
191198
continue
192199
# File type metrics
193-
if re.search(FILE_TYPE_CODE, file["file"]):
200+
if self.re_code_pattern.search(file["file"]):
194201
self.file_types["code"] += 1
202+
elif self.re_binary_pattern.search(file["file"]):
203+
self.file_types["binary"] += 1
195204
else:
196205
self.file_types["other"] += 1
197206

@@ -219,6 +228,8 @@ def get_repository_metrics(
219228
from_date: datetime.datetime = None,
220229
to_date: datetime.datetime = None,
221230
verify_certs: bool = True,
231+
code_file_pattern: str | None = None,
232+
binary_file_pattern: str | None = None,
222233
):
223234
"""
224235
Get the metrics from a repository.
@@ -229,14 +240,16 @@ def get_repository_metrics(
229240
:param verify_certs: Boolean, verify SSL/TLS certificates, default True
230241
:param from_date: Start date, by default None
231242
:param to_date: End date, by default None
243+
:param code_file_pattern: Regular expression to match code file types.
244+
:param binary_file_pattern: Regular expression to match binary file types.
232245
"""
233246
os_conn = connect_to_opensearch(opensearch_url, verify_certs=verify_certs)
234247

235248
metrics = {"metrics": {}}
236249

237250
events = get_repository_events(os_conn, opensearch_index, repository, from_date, to_date)
238251

239-
analyzer = GitEventsAnalyzer()
252+
analyzer = GitEventsAnalyzer(code_file_pattern=code_file_pattern, binary_file_pattern=binary_file_pattern)
240253
analyzer.process_events(events)
241254

242255
metrics["metrics"]["total_commits"] = analyzer.get_commit_count()

0 commit comments

Comments
 (0)