Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ trustable spdx.xml \
--grimoirelab-user user --grimoirelab-password password \
--opensearch-url https://admin:[email protected]:9200 \
--opensearch-index events \
--output metrics.json \
--repository-timeout 3600
--code-file-pattern "\.py$|\.js$" \
--binary-file-pattern "\.exe$|\.tar$" \
--output metrics.json
```

The parameters needed to run the tool are:
Expand Down
14 changes: 14 additions & 0 deletions tests/data/events.json
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,20 @@
"added": "85",
"removed": "0"
},
{
"modes": [
"000000",
"100644"
],
"indexes": [
"0000000",
"06ee9fa"
],
"action": "A",
"file": "sample.tar.gz",
"added": "0",
"removed": "0"
},
{
"modes": [
"000000",
Expand Down
12 changes: 10 additions & 2 deletions tests/end_to_end/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def test_metrics(self):
self.assertEqual(quickstart_metrics["pony_factor"], 2)
self.assertEqual(quickstart_metrics["elephant_factor"], 2)
self.assertEqual(quickstart_metrics["file_types_other"], 683)
self.assertEqual(quickstart_metrics["file_types_binary"], 0)
self.assertEqual(quickstart_metrics["file_types_code"], 479)
self.assertEqual(quickstart_metrics["commit_size_added_lines"], 53121)
self.assertEqual(quickstart_metrics["commit_size_removed_lines"], 51852)
Expand All @@ -92,7 +93,8 @@ def test_metrics(self):
self.assertEqual(angular_metrics["total_contributors"], 58)
self.assertEqual(angular_metrics["pony_factor"], 5)
self.assertEqual(angular_metrics["elephant_factor"], 2)
self.assertEqual(angular_metrics["file_types_other"], 538)
self.assertEqual(angular_metrics["file_types_other"], 534)
self.assertEqual(angular_metrics["file_types_binary"], 4)
self.assertEqual(angular_metrics["file_types_code"], 2129)
self.assertEqual(angular_metrics["commit_size_added_lines"], 218483)
self.assertEqual(angular_metrics["commit_size_removed_lines"], 245784)
Expand Down Expand Up @@ -147,6 +149,7 @@ def test_from_date(self):
self.assertEqual(quickstart_metrics["pony_factor"], 2)
self.assertEqual(quickstart_metrics["elephant_factor"], 1)
self.assertEqual(quickstart_metrics["file_types_other"], 37)
self.assertEqual(quickstart_metrics["file_types_binary"], 0)
self.assertEqual(quickstart_metrics["file_types_code"], 17)
self.assertEqual(quickstart_metrics["commit_size_added_lines"], 269)
self.assertEqual(quickstart_metrics["commit_size_removed_lines"], 103)
Expand All @@ -168,6 +171,7 @@ def test_from_date(self):
self.assertEqual(angular_metrics["pony_factor"], 1)
self.assertEqual(angular_metrics["elephant_factor"], 1)
self.assertEqual(angular_metrics["file_types_other"], 24)
self.assertEqual(angular_metrics["file_types_binary"], 0)
self.assertEqual(angular_metrics["file_types_code"], 13)
self.assertEqual(angular_metrics["commit_size_added_lines"], 4849)
self.assertEqual(angular_metrics["commit_size_removed_lines"], 149)
Expand Down Expand Up @@ -223,6 +227,7 @@ def test_to_date(self):
self.assertEqual(quickstart_metrics["pony_factor"], 2)
self.assertEqual(quickstart_metrics["elephant_factor"], 2)
self.assertEqual(quickstart_metrics["file_types_other"], 646)
self.assertEqual(quickstart_metrics["file_types_binary"], 0)
self.assertEqual(quickstart_metrics["file_types_code"], 462)
self.assertEqual(quickstart_metrics["commit_size_added_lines"], 52852)
self.assertEqual(quickstart_metrics["commit_size_removed_lines"], 51749)
Expand All @@ -243,7 +248,8 @@ def test_to_date(self):
self.assertEqual(angular_metrics["total_contributors"], 56)
self.assertEqual(angular_metrics["pony_factor"], 5)
self.assertEqual(angular_metrics["elephant_factor"], 2)
self.assertEqual(angular_metrics["file_types_other"], 514)
self.assertEqual(angular_metrics["file_types_other"], 510)
self.assertEqual(angular_metrics["file_types_binary"], 4)
self.assertEqual(angular_metrics["file_types_code"], 2116)
self.assertEqual(angular_metrics["commit_size_added_lines"], 213634)
self.assertEqual(angular_metrics["commit_size_removed_lines"], 245635)
Expand Down Expand Up @@ -299,6 +305,7 @@ def test_duplicate_repo(self):
self.assertEqual(quickstart_metrics["pony_factor"], 2)
self.assertEqual(quickstart_metrics["elephant_factor"], 2)
self.assertEqual(quickstart_metrics["file_types_other"], 683)
self.assertEqual(quickstart_metrics["file_types_binary"], 0)
self.assertEqual(quickstart_metrics["file_types_code"], 479)
self.assertEqual(quickstart_metrics["commit_size_added_lines"], 53121)
self.assertEqual(quickstart_metrics["commit_size_removed_lines"], 51852)
Expand Down Expand Up @@ -354,6 +361,7 @@ def test_non_git_repo(self):
self.assertEqual(quickstart_metrics["pony_factor"], 2)
self.assertEqual(quickstart_metrics["elephant_factor"], 2)
self.assertEqual(quickstart_metrics["file_types_other"], 683)
self.assertEqual(quickstart_metrics["file_types_binary"], 0)
self.assertEqual(quickstart_metrics["file_types_code"], 479)
self.assertEqual(quickstart_metrics["commit_size_added_lines"], 53121)
self.assertEqual(quickstart_metrics["commit_size_removed_lines"], 51852)
Expand Down
27 changes: 23 additions & 4 deletions tests/unit/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,32 @@ def test_get_elephant_factor(self):
def test_file_type_metrics(self):
"""Test that file type metrics are calculated correctly"""

self.assertEqual(self.analyzer.get_file_type_metrics(), {})

self.analyzer.process_events(self.events)

file_metrics = self.analyzer.get_file_type_metrics()
self.assertEqual(file_metrics.get("code", 0), 54)
self.assertEqual(file_metrics.get("other", 0), 24)
self.assertEqual(file_metrics["code"], 54)
self.assertEqual(file_metrics["binary"], 1)
self.assertEqual(file_metrics["other"], 24)

def test_file_type_metrics_empty(self):
"""Test that file type metrics are calculated correctly without events"""

file_metrics = self.analyzer.get_file_type_metrics()
self.assertEqual(file_metrics["code"], 0)
self.assertEqual(file_metrics["binary"], 0)
self.assertEqual(file_metrics["other"], 0)

def test_file_type_metrics_new_regex(self):
"""Test that file type metrics are calculated correctly with new regex"""

analyzer = GitEventsAnalyzer(code_file_pattern=r"\.py$", binary_file_pattern=r"\.md$")

analyzer.process_events(self.events)

file_metrics = analyzer.get_file_type_metrics()
self.assertEqual(file_metrics["code"], 53)
self.assertEqual(file_metrics["binary"], 4)
self.assertEqual(file_metrics["other"], 22)

def test_commit_size_metrics(self):
"""Test that commit size metrics are calculated correctly"""
Expand Down
24 changes: 18 additions & 6 deletions trustable_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@
)
@click.option("--verify-certs", is_flag=True, default=False, help="Verify SSL/TLS certificates")
@click.option("--verbose", is_flag=True, default=False, help="Increase output verbosity")
@click.option("--code-file-pattern", help="Regular expression to match code file types")
@click.option("--binary-file-pattern", help="Regular expression to match binary file types")
def trustable_grimoirelab_score(
filename: str,
grimoirelab_url: str,
Expand All @@ -90,6 +92,8 @@ def trustable_grimoirelab_score(
to_date: datetime.datetime | None = None,
verify_certs: bool = False,
verbose: bool = False,
code_file_pattern: str | None = None,
binary_file_pattern: str | None = None,
) -> None:
"""Calculate metrics for Trustable using GrimoireLab.

Expand Down Expand Up @@ -131,6 +135,8 @@ def trustable_grimoirelab_score(
to_date=to_date,
verify_certs=verify_certs,
timeout=repository_timeout,
code_file_pattern=code_file_pattern,
binary_file_pattern=binary_file_pattern,
)

package_metrics = {"packages": {}}
Expand Down Expand Up @@ -203,6 +209,8 @@ def generate_metrics_when_ready(
to_date: datetime.datetime | None = None,
verify_certs: bool = False,
timeout: int = 3600,
code_file_pattern: str | None = None,
binary_file_pattern: str | None = None,
) -> dict[str:Any]:
"""Generate metrics once the repositories have finished the collection.

Expand All @@ -214,6 +222,8 @@ def generate_metrics_when_ready(
:param to_date: End date for metrics.
:param verify_certs: Verify SSL/TLS certificates.
:param timeout: Seconds to wait before failing getting metrics
:param code_file_pattern: Regular expression to match code file types.
:param binary_file_pattern: Regular expression to match binary file types.
"""
logging.info("Generating metrics")

Expand All @@ -228,12 +238,14 @@ def generate_metrics_when_ready(
for repository in pending_repositories:
if repository_ready(grimoirelab_client, repository, after_date):
metrics["repositories"][repository] = get_repository_metrics(
repository,
opensearch_url,
opensearch_index,
from_date,
to_date,
verify_certs,
repository=repository,
opensearch_url=opensearch_url,
opensearch_index=opensearch_index,
from_date=from_date,
to_date=to_date,
verify_certs=verify_certs,
code_file_pattern=code_file_pattern,
binary_file_pattern=binary_file_pattern,
)
processed.add(repository)

Expand Down
27 changes: 20 additions & 7 deletions trustable_cli/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,20 +40,27 @@
COMMIT_EVENT_TYPE = "org.grimoirelab.events.git.commit"
AUTHOR_FIELD = "Author"
FILE_TYPE_CODE = (
r"\.bazel$|\.bazelrc$|\.bzl$|\.c$|\.cc$|\.cp$|\.cpp$|\.cxx$|\.c\+\+$|"
r"\.go$|\.h$|\.js$|\.mjs$|\.java$|\.py$|\.rs$|\.sh$|\.tf$|\.ts$"
r"\.bazel$|\.bazelrc$|\.bzl$|\.c$|\.cc$|\.cp$|\.cpp$|\.cs$\|\.cxx$|\.c\+\+$|"
r"\.go$|\.h$|\.hpp$|\.js$|\.mjs$|\.java$|\.pl$|\.py$|\.rs$|\.sh$|\.tf$|\.ts$"
)
FILE_TYPE_BINARY = (
r"\.7z$|\.a$|\.abb$|\.apk$|\.app$|\.appx$|\.arc$|\.bin$|\.bz2$|\.class$|\.deb$|"
r"\.dll$|\.dmg$|\.exe$|\.gz$|\.ipa$|\.iso$|\.jar$|\.lib$|\.msi$|\.o$|\.obj$|\.rar$|"
r"\.rpm$|\.so$|\.tar$|\.xar$|\.xz$|\.zip$|\.zst$|\.Z$"
)


class GitEventsAnalyzer:
def __init__(self):
def __init__(self, code_file_pattern: str | None = None, binary_file_pattern: str | None = None):
self.total_commits: int = 0
self.contributors: Counter = Counter()
self.companies: Counter = Counter()
self.file_types: dict = Counter()
self.file_types: dict = {"code": 0, "binary": 0, "other": 0}
self.added_lines: int = 0
self.removed_lines: int = 0
self.messages_sizes: list = []
self.re_code_pattern = re.compile(code_file_pattern or FILE_TYPE_CODE)
self.re_binary_pattern = re.compile(binary_file_pattern or FILE_TYPE_BINARY)

def process_events(self, events: iter(dict[str, Any])):
for event in events:
Expand Down Expand Up @@ -111,7 +118,7 @@ def get_elephant_factor(self):
def get_file_type_metrics(self):
"""Get the file type metrics"""

return dict(self.file_types)
return self.file_types

def get_commit_size_metrics(self):
"""Get the commit size metrics"""
Expand Down Expand Up @@ -190,8 +197,10 @@ def _update_file_metrics(self, event):
if not file["file"]:
continue
# File type metrics
if re.search(FILE_TYPE_CODE, file["file"]):
if self.re_code_pattern.search(file["file"]):
self.file_types["code"] += 1
elif self.re_binary_pattern.search(file["file"]):
self.file_types["binary"] += 1
else:
self.file_types["other"] += 1

Expand Down Expand Up @@ -219,6 +228,8 @@ def get_repository_metrics(
from_date: datetime.datetime = None,
to_date: datetime.datetime = None,
verify_certs: bool = True,
code_file_pattern: str | None = None,
binary_file_pattern: str | None = None,
):
"""
Get the metrics from a repository.
Expand All @@ -229,14 +240,16 @@ def get_repository_metrics(
:param verify_certs: Boolean, verify SSL/TLS certificates, default True
:param from_date: Start date, by default None
:param to_date: End date, by default None
:param code_file_pattern: Regular expression to match code file types.
:param binary_file_pattern: Regular expression to match binary file types.
"""
os_conn = connect_to_opensearch(opensearch_url, verify_certs=verify_certs)

metrics = {"metrics": {}}

events = get_repository_events(os_conn, opensearch_index, repository, from_date, to_date)

analyzer = GitEventsAnalyzer()
analyzer = GitEventsAnalyzer(code_file_pattern=code_file_pattern, binary_file_pattern=binary_file_pattern)
analyzer.process_events(events)

metrics["metrics"]["total_commits"] = analyzer.get_commit_count()
Expand Down