Skip to content

Commit 64dce60

Browse files
authored
Merges #22 Closes #22
2 parents 2f32a64 + ad08120 commit 64dce60

File tree

4 files changed

+89
-7
lines changed

4 files changed

+89
-7
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,12 @@ grimoirelab-metrics spdx.xml \
5757
--opensearch-url https://admin:[email protected]:9200 \
5858
--opensearch-index events \
5959
--from-date 2024-01-01 --to-date 2025-01-01 \
60-
--repository-timeout 3600
60+
--repository-timeout 3600 \
6161
--code-file-pattern "\.py$|\.js$" \
6262
--binary-file-pattern "\.exe$|\.tar$" \
63+
--pony-threshold 0.5 \
64+
--elephant-threshold 0.5 \
65+
--dev-categories-thresholds 0.8 0.95 \
6366
--output metrics.json
6467
```
6568

grimoirelab_metrics/cli.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,17 @@
8080
@click.option("--verbose", is_flag=True, default=False, help="Increase output verbosity")
8181
@click.option("--code-file-pattern", help="Regular expression to match code file types")
8282
@click.option("--binary-file-pattern", help="Regular expression to match binary file types")
83+
@click.option("--pony-threshold", type=click.FloatRange(0, 1), show_default=True, help="Pony factor threshold", default=0.5)
84+
@click.option(
85+
"--elephant-threshold", type=click.FloatRange(0, 1), show_default=True, help="Elephant factor threshold", default=0.5
86+
)
87+
@click.option(
88+
"--dev-categories-thresholds",
89+
type=(click.FloatRange(0, 1), click.FloatRange(0, 1)),
90+
show_default=True,
91+
help="Developer categories thresholds",
92+
default=(0.8, 0.95),
93+
)
8394
def grimoirelab_metrics(
8495
filename: str,
8596
grimoirelab_url: str,
@@ -95,6 +106,9 @@ def grimoirelab_metrics(
95106
verbose: bool = False,
96107
code_file_pattern: str | None = None,
97108
binary_file_pattern: str | None = None,
109+
pony_threshold: float = 0.5,
110+
elephant_threshold: float = 0.5,
111+
dev_categories_thresholds: tuple[float, float] = (0.8, 0.95),
98112
) -> None:
99113
"""Calculate metrics using GrimoireLab.
100114
@@ -136,6 +150,9 @@ def grimoirelab_metrics(
136150
timeout=repository_timeout,
137151
code_file_pattern=code_file_pattern,
138152
binary_file_pattern=binary_file_pattern,
153+
pony_threshold=pony_threshold,
154+
elephant_threshold=elephant_threshold,
155+
dev_categories_thresholds=dev_categories_thresholds,
139156
)
140157

141158
package_metrics = {"packages": {}}
@@ -212,6 +229,9 @@ def generate_metrics_when_ready(
212229
timeout: int = 3600,
213230
code_file_pattern: str | None = None,
214231
binary_file_pattern: str | None = None,
232+
pony_threshold: float = 0.5,
233+
elephant_threshold: float = 0.5,
234+
dev_categories_thresholds: tuple[float, float] = (0.8, 0.95),
215235
) -> dict[str:Any]:
216236
"""Generate metrics once the repositories have finished the collection.
217237
@@ -225,6 +245,9 @@ def generate_metrics_when_ready(
225245
:param timeout: Seconds to wait before failing getting metrics
226246
:param code_file_pattern: Regular expression to match code file types.
227247
:param binary_file_pattern: Regular expression to match binary file types.
248+
:param pony_threshold: Pony Factor threshold.
249+
:param elephant_threshold: Elephant Factor threshold.
250+
:param dev_categories_thresholds: Developer Categories thresholds.
228251
"""
229252
logging.info("Generating metrics")
230253

@@ -247,6 +270,9 @@ def generate_metrics_when_ready(
247270
verify_certs=verify_certs,
248271
code_file_pattern=code_file_pattern,
249272
binary_file_pattern=binary_file_pattern,
273+
pony_threshold=pony_threshold,
274+
elephant_threshold=elephant_threshold,
275+
dev_categories_thresholds=dev_categories_thresholds,
250276
)
251277
processed.add(repository)
252278

grimoirelab_metrics/metrics.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,14 @@
5151

5252

5353
class GitEventsAnalyzer:
54-
def __init__(self, code_file_pattern: str | None = None, binary_file_pattern: str | None = None):
54+
def __init__(
55+
self,
56+
code_file_pattern: str | None = None,
57+
binary_file_pattern: str | None = None,
58+
pony_threshold: float = 0.5,
59+
elephant_threshold: float = 0.5,
60+
dev_categories_thresholds: tuple[float, float] = (0.8, 0.95),
61+
):
5562
self.total_commits: int = 0
5663
self.contributors: Counter = Counter()
5764
self.companies: Counter = Counter()
@@ -61,6 +68,9 @@ def __init__(self, code_file_pattern: str | None = None, binary_file_pattern: st
6168
self.messages_sizes: list = []
6269
self.re_code_pattern = re.compile(code_file_pattern or FILE_TYPE_CODE)
6370
self.re_binary_pattern = re.compile(binary_file_pattern or FILE_TYPE_BINARY)
71+
self.pony_threshold = pony_threshold
72+
self.elephant_threshold = elephant_threshold
73+
self.dev_categories_thresholds = dev_categories_thresholds
6474

6575
def process_events(self, events: iter(dict[str, Any])):
6676
for event in events:
@@ -93,7 +103,7 @@ def get_pony_factor(self):
93103
for _, contributions in self.contributors.most_common():
94104
partial_contributions += contributions
95105
pony_factor += 1
96-
if partial_contributions / self.total_commits > 0.5:
106+
if partial_contributions / self.total_commits > self.pony_threshold:
97107
break
98108

99109
return pony_factor
@@ -110,7 +120,7 @@ def get_elephant_factor(self):
110120
for _, contributions in self.companies.most_common():
111121
partial_contributions += contributions
112122
elephant_factor += 1
113-
if partial_contributions / self.total_commits > 0.5:
123+
if partial_contributions / self.total_commits > self.elephant_threshold:
114124
break
115125

116126
return elephant_factor
@@ -173,8 +183,8 @@ def get_developer_categories(self):
173183
core = 0
174184
regular = 0
175185
casual = 0
176-
regular_threshold = int(0.8 * self.total_commits)
177-
casual_threshold = int(0.95 * self.total_commits)
186+
regular_threshold = int(self.dev_categories_thresholds[0] * self.total_commits)
187+
casual_threshold = int(self.dev_categories_thresholds[1] * self.total_commits)
178188
acc_commits = 0
179189
last_core_contribution = 0
180190

@@ -244,6 +254,9 @@ def get_repository_metrics(
244254
verify_certs: bool = True,
245255
code_file_pattern: str | None = None,
246256
binary_file_pattern: str | None = None,
257+
pony_threshold: float | None = None,
258+
elephant_threshold: float | None = None,
259+
dev_categories_thresholds: tuple[float, float] = (0.8, 0.95),
247260
):
248261
"""
249262
Get the metrics from a repository.
@@ -256,14 +269,23 @@ def get_repository_metrics(
256269
:param to_date: End date, by default None
257270
:param code_file_pattern: Regular expression to match code file types.
258271
:param binary_file_pattern: Regular expression to match binary file types.
272+
:param pony_threshold: Threshold for the pony factor
273+
:param elephant_threshold: Threshold for the elephant factor
274+
:param dev_categories_thresholds: Threshold for the developer categories
259275
"""
260276
os_conn = connect_to_opensearch(opensearch_url, verify_certs=verify_certs)
261277

262278
metrics = {"metrics": {}}
263279

264280
events = get_repository_events(os_conn, opensearch_index, repository, from_date, to_date)
265281

266-
analyzer = GitEventsAnalyzer(code_file_pattern=code_file_pattern, binary_file_pattern=binary_file_pattern)
282+
analyzer = GitEventsAnalyzer(
283+
code_file_pattern=code_file_pattern,
284+
binary_file_pattern=binary_file_pattern,
285+
pony_threshold=pony_threshold,
286+
elephant_threshold=elephant_threshold,
287+
dev_categories_thresholds=dev_categories_thresholds,
288+
)
267289
analyzer.process_events(events)
268290

269291
metrics["metrics"]["total_commits"] = analyzer.get_commit_count()

tests/unit/test_metrics.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,14 @@ def test_get_pony_factor(self):
7979
self.analyzer.process_events(extra_events)
8080
self.assertEqual(self.analyzer.get_pony_factor(), 2)
8181

82+
def test_get_pony_factor_custom_threshold(self):
83+
"""Test the computation of the pony factor is correct with a custom threshold"""
84+
85+
analyzer = GitEventsAnalyzer(pony_threshold=0.8)
86+
analyzer.process_events(self.events)
87+
88+
self.assertEqual(analyzer.get_pony_factor(), 2)
89+
8290
def test_get_elephant_factor(self):
8391
"""Test the computation of the elephant factor is correct"""
8492

@@ -113,6 +121,14 @@ def test_get_elephant_factor(self):
113121
self.analyzer.process_events(extra_events)
114122
self.assertEqual(self.analyzer.get_elephant_factor(), 2)
115123

124+
def test_get_elephant_factor_custom_threshold(self):
125+
"""Test the computation of the elephant factor is correct with a custom threshold"""
126+
127+
analyzer = GitEventsAnalyzer(elephant_threshold=0.8)
128+
analyzer.process_events(self.events)
129+
130+
self.assertEqual(analyzer.get_elephant_factor(), 2)
131+
116132
def test_file_type_metrics(self):
117133
"""Test that file type metrics are calculated correctly"""
118134

@@ -280,6 +296,21 @@ def test_get_developer_categories_tied(self):
280296
categories = self.analyzer.get_developer_categories()
281297
self.assertDictEqual(categories, {"core": 4, "regular": 1, "casual": 0})
282298

299+
def test_get_developer_categories_custom_threshold(self):
300+
"""Test if the categories are calculated correctly with a custom threshold"""
301+
302+
analyzer = GitEventsAnalyzer(dev_categories_thresholds=(0.5, 0.9))
303+
analyzer.process_events(self.events)
304+
305+
categories = analyzer.get_developer_categories()
306+
self.assertDictEqual(categories, {"core": 1, "regular": 1, "casual": 1})
307+
308+
analyzer_2 = GitEventsAnalyzer(dev_categories_thresholds=(0.95, 0.99))
309+
analyzer_2.process_events(self.events)
310+
311+
categories = analyzer_2.get_developer_categories()
312+
self.assertDictEqual(categories, {"core": 2, "regular": 0, "casual": 1})
313+
283314

284315
if __name__ == "__main__":
285316
unittest.main()

0 commit comments

Comments
 (0)