Skip to content

Commit b778fcd

Browse files
authored
Merge pull request #2891 from chaoss/docker-dev-fix
Docker dev fix: Fixes to commit comments and events
2 parents 17038d4 + eb4fa84 commit b778fcd

File tree

20 files changed

+13586
-1658
lines changed

20 files changed

+13586
-1658
lines changed

augur/api/routes/pull_request_reports.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@
2121
from bokeh.models.glyphs import Rect
2222
from bokeh.transform import dodge, factor_cmap, transform
2323

24+
# from selenium.webdriver import Firefox, FirefoxOptions
25+
# options = FirefoxOptions()
26+
# options.headless = True
27+
# webdriver = Firefox(options=options)
28+
#export_png(item, path, webdriver=webdriver)
29+
2430
warnings.filterwarnings('ignore')
2531

2632
from augur.api.routes import AUGUR_API_VERSION
@@ -604,6 +610,7 @@ def average_commits_per_PR():
604610
# opts = FirefoxOptions()
605611
# opts.add_argument("--headless")
606612
# driver = webdriver.Firefox(firefox_options=opts)
613+
# filename = export_png(grid, timeout=180, webdriver=webdriver)
607614
filename = export_png(grid, timeout=180)
608615

609616
return send_file(filename)

augur/application/cli/backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ def determine_worker_processes(ratio,maximum):
185185
sleep_time += 6
186186

187187
#60% of estimate, Maximum value of 45 : Reduced because it can be lower
188-
core_num_processes = determine_worker_processes(.40, 50)
188+
core_num_processes = determine_worker_processes(.40, 90)
189189
logger.info(f"Starting core worker processes with concurrency={core_num_processes}")
190190
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h"
191191
process_list.append(subprocess.Popen(core_worker.split(" ")))

augur/application/cli/collection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def determine_worker_processes(ratio,maximum):
125125
sleep_time += 6
126126

127127
#60% of estimate, Maximum value of 45: Reduced because not needed
128-
core_num_processes = determine_worker_processes(.40, 50)
128+
core_num_processes = determine_worker_processes(.40, 90)
129129
logger.info(f"Starting core worker processes with concurrency={core_num_processes}")
130130
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h"
131131
process_list.append(subprocess.Popen(core_worker.split(" ")))

augur/application/cli/tasks.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ def start():
3636
secondary_worker_process = None
3737

3838
scheduling_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=1 -n scheduling:{uuid.uuid4().hex}@%h -Q scheduling"
39-
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=50 -n core:{uuid.uuid4().hex}@%h"
40-
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=50 -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
39+
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=90 -n core:{uuid.uuid4().hex}@%h"
40+
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=20 -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
4141

4242
scheduling_worker_process = subprocess.Popen(scheduling_worker.split(" "))
4343
core_worker_process = subprocess.Popen(core_worker.split(" "))

augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from datetime import datetime
55

66
from augur.tasks.init.celery_app import celery_app as celery
7-
from augur.tasks.github.util.github_data_access import GithubDataAccess
7+
from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException
88
from augur.application.db.models import ContributorRepo
99
from augur.application.db.lib import bulk_insert_dicts
1010
from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
@@ -100,17 +100,22 @@ def contributor_breadth_model(self) -> None:
100100

101101

102102
cntrb_events = []
103-
for event in github_data_access.paginate_resource(repo_cntrb_url):
103+
try:
104+
for event in github_data_access.paginate_resource(repo_cntrb_url):
104105

105-
cntrb_events.append(event)
106+
cntrb_events.append(event)
106107

107-
event_age = datetime.strptime(event["created_at"], "%Y-%m-%dT%H:%M:%SZ")
108-
if event_age < newest_event_in_db:
109-
logger.info("Found cntrb events we already have...skipping the rest")
110-
break
108+
event_age = datetime.strptime(event["created_at"], "%Y-%m-%dT%H:%M:%SZ")
109+
if event_age < newest_event_in_db:
110+
logger.info("Found cntrb events we already have...skipping the rest")
111+
break
111112

112-
if len(cntrb_events) == 0:
113-
logger.info("There are no cntrb events, or new events for this user.\n")
113+
if len(cntrb_events) == 0:
114+
logger.info("There are no cntrb events, or new events for this user.\n")
115+
continue
116+
117+
except UrlNotFoundException as e:
118+
logger.warning(e)
114119
continue
115120

116121
events = process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source)
Lines changed: 30 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import requests
2-
import logging
3-
import traceback
2+
import logging
3+
import traceback
44

55
logger = logging.getLogger(__name__)
66

@@ -9,87 +9,81 @@ def get_NPM_data(package):
99
r = requests.get(url)
1010
if r.status_code < 400:
1111
return r.json()
12+
logger.warning(f"Failed to fetch data for package {package}. HTTP Status: {r.status_code}")
1213
return {}
1314

14-
1515
def clean_version(version):
1616
version = [v for v in version if v.isdigit() or v == '.']
1717
return ''.join(version)
1818

1919
def split_version(version):
20-
#Split version string into list seperated by .
21-
#assign elements of list to respective variables.
2220
version_list = list(version.split('.'))
2321
patch = version_list.pop(-1)
2422
minor = version_list.pop(-1)
2523
major = version_list[0]
26-
27-
return major,minor,patch
28-
29-
24+
return major, minor, patch
3025

3126
def get_latest_patch(version, data):
27+
if 'versions' not in data:
28+
logger.error(f"'versions' key not found in the NPM data for version {version}. Data: {data}")
29+
raise KeyError("'versions' key not found")
30+
3231
versions = data['versions']
3332
try:
3433
index = list(versions.keys()).index(version)
3534
except ValueError as e:
35+
logger.error(f"Version {version} not found in the 'versions' list. Error: {e}")
3636
raise e
3737

38-
major,minor,patch = split_version(version)
38+
major, minor, patch = split_version(version)
3939
consider_version = version
4040
for v in list(versions.keys())[index:]:
41-
if v.split('.')[0]==major:
42-
if v.split('.')[1]== minor:
43-
if v.split('.')[2]>patch:
41+
if v.split('.')[0] == major:
42+
if v.split('.')[1] == minor:
43+
if v.split('.')[2] > patch:
4444
consider_version = v
4545
return consider_version
4646

47-
4847
def get_lastest_minor(version, data):
49-
try:
50-
versions = data['versions']
51-
except Exception as e:
52-
logger.info(
53-
''.join(traceback.format_exception(None, e, e.__traceback__)))
54-
# raise e
55-
48+
if 'versions' not in data:
49+
logger.error(f"'versions' key not found in the NPM data. Data: {data}")
50+
raise KeyError("'versions' key not found")
51+
52+
versions = data['versions']
5653
try:
5754
index = list(versions.keys()).index(version)
5855
except ValueError as e:
59-
logger.info(f'error is {e} on the NPM. Some kind of value error. Probably a VALUES error for Node, #AmIRight?')
56+
logger.info(f"Version {version} not found in the 'versions' list. Error: {e}")
6057
raise e
6158

62-
major,minor,patch = split_version(version)
63-
59+
major, minor, patch = split_version(version)
6460
consider_version = get_latest_patch(version, data)
6561
for v in list(versions.keys())[index:]:
66-
if v.split('.')[0]==major:
67-
if v.split('.')[1]>minor:
68-
consider_version = v
69-
return consider_version
70-
62+
if v.split('.')[0] == major:
63+
if v.split('.')[1] > minor:
64+
consider_version = v
65+
return consider_version
7166

7267
def get_npm_release_date(data, version):
73-
release_time = data['time'][version]
68+
release_time = data['time'].get(version)
7469
if release_time:
7570
return release_time
71+
logger.warning(f"Release time not found for version {version}")
7672
return None
7773

78-
7974
def get_npm_latest_version(data):
80-
return data['dist-tags']['latest']
75+
return data['dist-tags'].get('latest', 'unknown')
8176

82-
#add code here
8377
def get_npm_current_version(data, requirement):
84-
if requirement[0]=='~':
78+
if requirement[0] == '~':
8579
try:
8680
return get_latest_patch(clean_version(requirement), data)
8781
except ValueError:
8882
return None
89-
elif requirement[0]=='^':
83+
elif requirement[0] == '^':
9084
try:
9185
return get_lastest_minor(clean_version(requirement), data)
9286
except ValueError:
9387
return None
9488
else:
95-
return requirement
89+
return requirement

augur/tasks/git/dependency_tasks/tasks.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from augur.tasks.git.dependency_tasks.core import *
44
from augur.tasks.init.celery_app import celery_app as celery
55
from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask, AugurSecondaryRepoCollectionTask
6+
from augur.tasks.util.metadata_exception import MetadataException
67

78

89
@celery.task(base=AugurFacadeRepoCollectionTask)
@@ -20,4 +21,26 @@ def process_ossf_dependency_metrics(self, repo_git):
2021

2122
logger = logging.getLogger(process_ossf_dependency_metrics.__name__)
2223

23-
generate_scorecard(logger, repo_git)
24+
try:
25+
generate_scorecard(logger, repo_git)
26+
except Exception as e:
27+
logger.warning(f'Exception generating scorecard: {e}')
28+
tracer = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
29+
logger.warning(f'Full stack trace of OpenSSF scorecard error: {tracer}')
30+
raise MetadataException(f"An error occurred while generating the scorecard: {str(e)}")
31+
32+
"""
33+
This try/except block is an attempt to get more information about this occasional error:
34+
35+
```bash
36+
Traceback (most recent call last):
37+
File "/home/ubuntu/github/virtualenvs/hosted/lib/python3.11/site-packages/billiard/pool.py", line 366, in workloop
38+
put((READY, (job, i, result, inqW_fd)))
39+
File "/home/ubuntu/github/virtualenvs/hosted/lib/python3.11/site-packages/billiard/queues.py", line 366, in put
40+
self.send_payload(ForkingPickler.dumps(obj))
41+
^^^^^^^^^^^^^^^^^^^^^^^^^
42+
File "/home/ubuntu/github/virtualenvs/hosted/lib/python3.11/site-packages/billiard/reduction.py", line 56, in dumps
43+
cls(buf, protocol).dump(obj)
44+
billiard.pool.MaybeEncodingError: Error sending result: ''(1, <ExceptionInfo: MetadataException("\'checks\' | Additional metadata: required_output: {}")>, None)''. Reason: ''PicklingError("Can\'t pickle <class \'augur.tasks.util.metadata_exception.MetadataException\'>: it\'s not the same object as augur.tasks.util.metadata_exception.MetadataException")''.
45+
```
46+
"""

augur/tasks/git/util/facade_worker/facade_worker/analyzecommit.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,13 @@ def generate_commit_record(repos_id,commit,filename,
179179
#db_local.commit()
180180
execute_sql(store_working_commit)
181181

182+
# commit_message = check_output(
183+
# f"git --git-dir {repo_loc} log --format=%B -n 1 {commit}".split()
184+
# ).strip()
185+
182186
commit_message = check_output(
183187
f"git --git-dir {repo_loc} log --format=%B -n 1 {commit}".split()
184-
).strip()
185-
188+
).decode('utf-8').strip()
186189

187190
msg_record = {
188191
'repo_id' : repo_id,

augur/tasks/github/contributors.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import time
22
import logging
3+
import traceback
34

45
from augur.tasks.init.celery_app import celery_app as celery
56
from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask

augur/tasks/github/detect_move/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c
115115

116116

117117
session.commit()
118-
raise Exception("ERROR: Repo has moved! Resetting Collection!")
118+
raise Exception("ERROR: Repo has moved, and there is no redirection! 404 returned, not 301. Resetting Collection!")
119119

120120

121121
if attempts >= 10:

0 commit comments

Comments
 (0)