Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,12 @@ grimoirelab-metrics spdx.xml \
--opensearch-url https://admin:[email protected]:9200 \
--opensearch-index events \
--from-date 2024-01-01 --to-date 2025-01-01 \
--repository-timeout 3600
--repository-timeout 3600 \
--code-file-pattern "\.py$|\.js$" \
--binary-file-pattern "\.exe$|\.tar$" \
--pony-threshold 0.5 \
--elephant-threshold 0.5 \
--dev-categories-thresholds 0.8 0.95 \
--output metrics.json
```

Expand Down
26 changes: 26 additions & 0 deletions grimoirelab_metrics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,17 @@
@click.option("--verbose", is_flag=True, default=False, help="Increase output verbosity")
@click.option("--code-file-pattern", help="Regular expression to match code file types")
@click.option("--binary-file-pattern", help="Regular expression to match binary file types")
@click.option("--pony-threshold", type=click.FloatRange(0, 1), show_default=True, help="Pony factor threshold", default=0.5)
@click.option(
"--elephant-threshold", type=click.FloatRange(0, 1), show_default=True, help="Elephant factor threshold", default=0.5
)
@click.option(
"--dev-categories-thresholds",
type=(click.FloatRange(0, 1), click.FloatRange(0, 1)),
show_default=True,
help="Developer categories thresholds",
default=(0.8, 0.95),
)
def grimoirelab_metrics(
filename: str,
grimoirelab_url: str,
Expand All @@ -95,6 +106,9 @@ def grimoirelab_metrics(
verbose: bool = False,
code_file_pattern: str | None = None,
binary_file_pattern: str | None = None,
pony_threshold: float = 0.5,
elephant_threshold: float = 0.5,
dev_categories_thresholds: tuple[float, float] = (0.8, 0.95),
) -> None:
"""Calculate metrics using GrimoireLab.

Expand Down Expand Up @@ -136,6 +150,9 @@ def grimoirelab_metrics(
timeout=repository_timeout,
code_file_pattern=code_file_pattern,
binary_file_pattern=binary_file_pattern,
pony_threshold=pony_threshold,
elephant_threshold=elephant_threshold,
dev_categories_thresholds=dev_categories_thresholds,
)

package_metrics = {"packages": {}}
Expand Down Expand Up @@ -212,6 +229,9 @@ def generate_metrics_when_ready(
timeout: int = 3600,
code_file_pattern: str | None = None,
binary_file_pattern: str | None = None,
pony_threshold: float = 0.5,
elephant_threshold: float = 0.5,
dev_categories_thresholds: tuple[float, float] = (0.8, 0.95),
) -> dict[str:Any]:
"""Generate metrics once the repositories have finished the collection.

Expand All @@ -225,6 +245,9 @@ def generate_metrics_when_ready(
:param timeout: Seconds to wait before failing getting metrics
:param code_file_pattern: Regular expression to match code file types.
:param binary_file_pattern: Regular expression to match binary file types.
:param pony_threshold: Pony Factor threshold.
:param elephant_threshold: Elephant Factor threshold.
:param dev_categories_thresholds: Developer Categories thresholds.
"""
logging.info("Generating metrics")

Expand All @@ -247,6 +270,9 @@ def generate_metrics_when_ready(
verify_certs=verify_certs,
code_file_pattern=code_file_pattern,
binary_file_pattern=binary_file_pattern,
pony_threshold=pony_threshold,
elephant_threshold=elephant_threshold,
dev_categories_thresholds=dev_categories_thresholds,
)
processed.add(repository)

Expand Down
34 changes: 28 additions & 6 deletions grimoirelab_metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,14 @@


class GitEventsAnalyzer:
def __init__(self, code_file_pattern: str | None = None, binary_file_pattern: str | None = None):
def __init__(
self,
code_file_pattern: str | None = None,
binary_file_pattern: str | None = None,
pony_threshold: float = 0.5,
elephant_threshold: float = 0.5,
dev_categories_thresholds: tuple[float, float] = (0.8, 0.95),
):
self.total_commits: int = 0
self.contributors: Counter = Counter()
self.companies: Counter = Counter()
Expand All @@ -61,6 +68,9 @@ def __init__(self, code_file_pattern: str | None = None, binary_file_pattern: st
self.messages_sizes: list = []
self.re_code_pattern = re.compile(code_file_pattern or FILE_TYPE_CODE)
self.re_binary_pattern = re.compile(binary_file_pattern or FILE_TYPE_BINARY)
self.pony_threshold = pony_threshold
self.elephant_threshold = elephant_threshold
self.dev_categories_thresholds = dev_categories_thresholds

def process_events(self, events: iter(dict[str, Any])):
for event in events:
Expand Down Expand Up @@ -93,7 +103,7 @@ def get_pony_factor(self):
for _, contributions in self.contributors.most_common():
partial_contributions += contributions
pony_factor += 1
if partial_contributions / self.total_commits > 0.5:
if partial_contributions / self.total_commits > self.pony_threshold:
break

return pony_factor
Expand All @@ -110,7 +120,7 @@ def get_elephant_factor(self):
for _, contributions in self.companies.most_common():
partial_contributions += contributions
elephant_factor += 1
if partial_contributions / self.total_commits > 0.5:
if partial_contributions / self.total_commits > self.elephant_threshold:
break

return elephant_factor
Expand Down Expand Up @@ -173,8 +183,8 @@ def get_developer_categories(self):
core = 0
regular = 0
casual = 0
regular_threshold = int(0.8 * self.total_commits)
casual_threshold = int(0.95 * self.total_commits)
regular_threshold = int(self.dev_categories_thresholds[0] * self.total_commits)
casual_threshold = int(self.dev_categories_thresholds[1] * self.total_commits)
acc_commits = 0
last_core_contribution = 0

Expand Down Expand Up @@ -244,6 +254,9 @@ def get_repository_metrics(
verify_certs: bool = True,
code_file_pattern: str | None = None,
binary_file_pattern: str | None = None,
pony_threshold: float | None = None,
elephant_threshold: float | None = None,
dev_categories_thresholds: tuple[float, float] = (0.8, 0.95),
):
"""
Get the metrics from a repository.
Expand All @@ -256,14 +269,23 @@ def get_repository_metrics(
:param to_date: End date, by default None
:param code_file_pattern: Regular expression to match code file types.
:param binary_file_pattern: Regular expression to match binary file types.
:param pony_threshold: Threshold for the pony factor
:param elephant_threshold: Threshold for the elephant factor
:param dev_categories_thresholds: Threshold for the developer categories
"""
os_conn = connect_to_opensearch(opensearch_url, verify_certs=verify_certs)

metrics = {"metrics": {}}

events = get_repository_events(os_conn, opensearch_index, repository, from_date, to_date)

analyzer = GitEventsAnalyzer(code_file_pattern=code_file_pattern, binary_file_pattern=binary_file_pattern)
analyzer = GitEventsAnalyzer(
code_file_pattern=code_file_pattern,
binary_file_pattern=binary_file_pattern,
pony_threshold=pony_threshold,
elephant_threshold=elephant_threshold,
dev_categories_thresholds=dev_categories_thresholds,
)
analyzer.process_events(events)

metrics["metrics"]["total_commits"] = analyzer.get_commit_count()
Expand Down
31 changes: 31 additions & 0 deletions tests/unit/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ def test_get_pony_factor(self):
self.analyzer.process_events(extra_events)
self.assertEqual(self.analyzer.get_pony_factor(), 2)

def test_get_pony_factor_custom_threshold(self):
"""Test the computation of the pony factor is correct with a custom threshold"""

analyzer = GitEventsAnalyzer(pony_threshold=0.8)
analyzer.process_events(self.events)

self.assertEqual(analyzer.get_pony_factor(), 2)

def test_get_elephant_factor(self):
"""Test the computation of the elephant factor is correct"""

Expand Down Expand Up @@ -113,6 +121,14 @@ def test_get_elephant_factor(self):
self.analyzer.process_events(extra_events)
self.assertEqual(self.analyzer.get_elephant_factor(), 2)

def test_get_elephant_factor_custom_threshold(self):
"""Test the computation of the elephant factor is correct with a custom threshold"""

analyzer = GitEventsAnalyzer(elephant_threshold=0.8)
analyzer.process_events(self.events)

self.assertEqual(analyzer.get_elephant_factor(), 2)

def test_file_type_metrics(self):
"""Test that file type metrics are calculated correctly"""

Expand Down Expand Up @@ -280,6 +296,21 @@ def test_get_developer_categories_tied(self):
categories = self.analyzer.get_developer_categories()
self.assertDictEqual(categories, {"core": 4, "regular": 1, "casual": 0})

def test_get_developer_categories_custom_threshold(self):
"""Test if the categories are calculated correctly with a custom threshold"""

analyzer = GitEventsAnalyzer(dev_categories_thresholds=(0.5, 0.9))
analyzer.process_events(self.events)

categories = analyzer.get_developer_categories()
self.assertDictEqual(categories, {"core": 1, "regular": 1, "casual": 1})

analyzer_2 = GitEventsAnalyzer(dev_categories_thresholds=(0.95, 0.99))
analyzer_2.process_events(self.events)

categories = analyzer_2.get_developer_categories()
self.assertDictEqual(categories, {"core": 2, "regular": 0, "casual": 1})


if __name__ == "__main__":
unittest.main()