Skip to content

Commit 03675fc

Browse files
committed
Merge branch 'main' of https://github.com/chaoss/augur into improve-dockerization
2 parents 604983c + 571aa33 commit 03675fc

File tree

16 files changed

+70
-1649
lines changed

16 files changed

+70
-1649
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Augur NEW Release v0.76.2
1+
# Augur NEW Release v0.76.4
22

33
Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data - less data carpentry for everyone else!
44
The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot), a public instance of 8Knot is available [here](https://metrix.chaoss.io) - this is tied to a public instance of [Augur](https://ai.chaoss.io).

add.md

Lines changed: 0 additions & 1 deletion
This file was deleted.

augur/api/view/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def av_add_user_repo():
106106
# matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo}
107107
elif Repo.parse_gitlab_repo_url(url)[0]:
108108

109-
org_name, repo_name = Repo.parse_github_repo_url(url)
109+
org_name, repo_name = Repo.parse_gitlab_repo_url(url)
110110
repo_git = f"https://gitlab.com/{org_name}/{repo_name}"
111111

112112
# TODO: gitlab ensure the whole repo git is inserted so it can be found here

augur/application/db/data_parse.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1151,7 +1151,7 @@ def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, r
11511151
return message_ref_dict
11521152

11531153

1154-
def extract_needed_gitlab_message_data(comment: dict, platform_id: int, tool_source: str, tool_version: str, data_source: str):
1154+
def extract_needed_gitlab_message_data(comment: dict, platform_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str):
11551155
"""
11561156
Extract specific metadata for a comment from an api response
11571157
and connect it to the relevant platform id.
@@ -1169,6 +1169,7 @@ def extract_needed_gitlab_message_data(comment: dict, platform_id: int, tool_sou
11691169
"""
11701170

11711171
comment_dict = {
1172+
"repo_id": repo_id,
11721173
"pltfrm_id": platform_id,
11731174
"msg_text": comment['body'],
11741175
"msg_timestamp": comment['created_at'],

augur/application/db/models/augur_data.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import logging
2727
import re
2828
import json
29+
import urllib.parse
2930

3031

3132
from augur.application.db.models.base import Base
@@ -971,7 +972,7 @@ def is_valid_gitlab_repo(gl_session, url: str) -> bool:
971972
return False, {"status": "Invalid repo URL"}
972973

973974
# Encode namespace and project name for the API request
974-
project_identifier = f"{owner}%2F{repo}"
975+
project_identifier = urllib.parse.quote(f"{owner}/{repo}", safe='')
975976
url = REPO_ENDPOINT.format(project_identifier)
976977

977978
attempts = 0
@@ -1030,7 +1031,7 @@ def parse_gitlab_repo_url(url: str) -> tuple:
10301031
Tuple of owner and repo. Or a tuple of None and None if the url is invalid.
10311032
"""
10321033

1033-
result = re.search(r"https?:\/\/gitlab\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$", url)
1034+
result = re.search(r"https?:\/\/gitlab\.com\/([A-Za-z0-9\-_\/]+)\/([A-Za-z0-9\-_]+)(\.git)?\/?$", url)
10341035

10351036
if not result:
10361037
return None, None

augur/tasks/git/dependency_tasks/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def generate_scorecard(logger, repo_git):
7676
path = repo_git[8:]
7777
if path[-4:] == '.git':
7878
path = path.replace(".git", "")
79-
command = '--repo=' + path
79+
command = '--local=' + path
8080

8181
#this is path where our scorecard project is located
8282
path_to_scorecard = os.environ['HOME'] + '/scorecard'
@@ -99,7 +99,7 @@ def generate_scorecard(logger, repo_git):
9999
logger.info('adding to database...')
100100
logger.debug(f"output: {required_output}")
101101

102-
if not required_output['checks']:
102+
if not required_output.get('checks'):
103103
logger.info('No scorecard checks found!')
104104
return
105105

augur/tasks/git/dependency_tasks/tasks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def process_ossf_dependency_metrics(self, repo_git):
2727
logger.warning(f'Exception generating scorecard: {e}')
2828
tracer = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
2929
logger.warning(f'Full stack trace of OpenSSF scorecard error: {tracer}')
30-
raise MetadataException(f"An error occurred while generating the scorecard: {str(e)}")
30+
raise MetadataException(e,f"An error occurred while generating the scorecard: {str(e)}")
3131

3232
"""
3333
This try/except block is an attempt to get more information about this occasional error:

augur/tasks/github/events.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ def _collect_and_process_pr_events(self, owner, repo, repo_id, key_auth):
316316
with engine.connect() as connection:
317317

318318
query = text(f"""
319-
select pull_request_id, pr_src_number as gh_pr_number, pr_src_id from pull_requests order by pr_created_at desc; from pull_requests WHERE repo_id={repo_id} order by pr_created_at desc;
319+
select pull_request_id, pr_src_number as gh_pr_number, pr_src_id from pull_requests WHERE repo_id={repo_id} order by pr_created_at desc;
320320
""")
321321

322322
pr_result = connection.execute(query).fetchall()

augur/tasks/github/util/util.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Utility functions that are useful for several Github tasks"""
22
from typing import Any, List, Tuple
33
import logging
4+
import urllib.parse
45
import json
56
import httpx
67
from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
@@ -46,6 +47,10 @@ def get_owner_repo(git_url: str) -> Tuple[str, str]:
4647

4748
return owner, repo
4849

50+
def get_gitlab_repo_identifier(owner, repo):
51+
52+
return urllib.parse.quote(f"{owner}/{repo}", safe='')
53+
4954

5055
def parse_json_response(logger: logging.Logger, response: httpx.Response) -> dict:
5156
# try to get json from response

augur/tasks/gitlab/events_task.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask
88
from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler
99
from augur.application.db.data_parse import extract_gitlab_mr_event_data, extract_gitlab_issue_event_data
10-
from augur.tasks.github.util.util import get_owner_repo
11-
from augur.application.db.models import Issue, IssueEvent, PullRequest, PullRequestEvent
10+
from augur.tasks.github.util.util import get_gitlab_repo_identifier
11+
from augur.application.db.models import Issue, IssueEvent, PullRequest, PullRequestEvent, Repo
1212
from augur.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_session
1313
from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth
1414

@@ -24,7 +24,7 @@ def collect_gitlab_issue_events(repo_git) -> int:
2424
repo_git: the repo url string
2525
"""
2626

27-
owner, repo = get_owner_repo(repo_git)
27+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
2828

2929
logger = logging.getLogger(collect_gitlab_issue_events.__name__)
3030

@@ -52,7 +52,7 @@ def collect_gitlab_merge_request_events(repo_git) -> int:
5252
repo_git: the repo url string
5353
"""
5454

55-
owner, repo = get_owner_repo(repo_git)
55+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
5656

5757
logger = logging.getLogger(collect_gitlab_issue_events.__name__)
5858

@@ -82,11 +82,13 @@ def retrieve_all_gitlab_event_data(gtype, repo_git, logger, key_auth) -> None:
8282
key_auth: key auth cache and rotator object
8383
"""
8484

85-
owner, repo = get_owner_repo(repo_git)
85+
owner, repo = Repo.parse_gitlab_repo_url(repo_git)
86+
87+
repo_identifier = get_gitlab_repo_identifier(owner, repo)
8688

8789
logger.info(f"Collecting gitlab issue events for {owner}/{repo}")
8890

89-
url = f"https://gitlab.com/api/v4/projects/{owner}%2f{repo}/events?target_type={gtype}"
91+
url = f"https://gitlab.com/api/v4/projects/{repo_identifier}/events?target_type={gtype}"
9092
events = GitlabApiHandler(key_auth, logger)
9193

9294
all_data = []

0 commit comments

Comments
 (0)