|
13 | 13 | from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias,UnresolvedCommitEmail, Contributor, CollectionStatus, UserGroup, RepoGroup
|
14 | 14 | from augur.tasks.util.collection_state import CollectionState
|
15 | 15 | from augur.application.db import get_session, get_engine
|
16 |
| -from augur.application.db.util import execute_session_query |
| 16 | +from augur.application.db.util import execute_session_query, convert_type_of_value |
17 | 17 | from augur.application.db.session import remove_duplicates_by_uniques, remove_null_characters_from_list_of_dicts
|
18 | 18 |
|
19 | 19 | logger = logging.getLogger("db_lib")
|
20 | 20 |
|
21 |
| -def convert_type_of_value(config_dict, logger=None): |
22 |
| - |
23 |
| - |
24 |
| - data_type = config_dict["type"] |
25 |
| - |
26 |
| - if data_type == "str" or data_type is None: |
27 |
| - return config_dict |
28 |
| - |
29 |
| - if data_type == "int": |
30 |
| - config_dict["value"] = int(config_dict["value"]) |
31 |
| - |
32 |
| - elif data_type == "bool": |
33 |
| - value = config_dict["value"] |
34 |
| - |
35 |
| - if value.lower() == "false": |
36 |
| - config_dict["value"] = False |
37 |
| - else: |
38 |
| - config_dict["value"] = True |
39 |
| - |
40 |
| - elif data_type == "float": |
41 |
| - config_dict["value"] = float(config_dict["value"]) |
42 |
| - |
43 |
| - else: |
44 |
| - if logger: |
45 |
| - logger.error(f"Need to add support for {data_type} types to config") |
46 |
| - else: |
47 |
| - print(f"Need to add support for {data_type} types to config") |
48 |
| - |
49 |
| - return config_dict |
50 |
| - |
51 | 21 |
|
52 | 22 | def get_section(section_name) -> dict:
|
53 | 23 | """Get a section of data from the config.
|
@@ -255,24 +225,46 @@ def facade_bulk_insert_commits(logger, records):
|
255 | 225 |
|
256 | 226 | facade_bulk_insert_commits(logger, firsthalfRecords)
|
257 | 227 | facade_bulk_insert_commits(logger, secondhalfRecords)
|
258 |
| - elif len(records) == 1 and isinstance(e,DataError) and "time zone displacement" in f"{e}": |
| 228 | + elif len(records) == 1: |
259 | 229 | commit_record = records[0]
|
260 | 230 | #replace incomprehensible dates with epoch.
|
261 | 231 | #2021-10-11 11:57:46 -0500
|
262 | 232 |
|
263 | 233 | # placeholder_date = "1970-01-01 00:00:15 -0500"
|
264 |
| - placeholder_date = commit_record['author_timestamp'] |
| 234 | + placeholder_date = commit_record['cmt_author_timestamp'] |
| 235 | + |
| 236 | + postgres_valid_timezones = { |
| 237 | + -1200, -1100, -1000, -930, -900, -800, -700, |
| 238 | + -600, -500, -400, -300, -230, -200, -100, 000, |
| 239 | + 100, 200, 300, 330, 400, 430, 500, 530, 545, 600, |
| 240 | + 630, 700, 800, 845, 900, 930, 1000, 1030, 1100, 1200, |
| 241 | + 1245, 1300, 1400 |
| 242 | + } |
265 | 243 |
|
266 | 244 | # Reconstruct timezone portion of the date string to UTC
|
267 |
| - placeholder_date = re.split("[-+]", placeholder_date) |
268 |
| - placeholder_date.pop() |
269 |
| - placeholder_date = "-".join(placeholder_date) + "+0000" |
| 245 | + placeholder_date_segments = re.split(" ", placeholder_date) |
| 246 | + tzdata = placeholder_date_segments.pop() |
| 247 | + |
| 248 | + if ":" in tzdata: |
| 249 | + tzdata = tzdata.replace(":", "") |
| 250 | + |
| 251 | + if int(tzdata) not in postgres_valid_timezones: |
| 252 | + tzdata = "+0000" |
| 253 | + else: |
| 254 | + raise e |
| 255 | + |
| 256 | + placeholder_date_segments.append(tzdata) |
| 257 | + |
| 258 | + placeholder_date = " ".join(placeholder_date_segments) |
270 | 259 |
|
271 | 260 | #Check for improper utc timezone offset
|
272 | 261 | #UTC timezone offset should be between -14:00 and +14:00
|
273 | 262 |
|
274 |
| - commit_record['author_timestamp'] = placeholder_date |
275 |
| - commit_record['committer_timestamp'] = placeholder_date |
| 263 | + # analyzecommit.generate_commit_record() defines the keys on the commit_record dictionary |
| 264 | + commit_record['cmt_author_timestamp'] = placeholder_date |
| 265 | + commit_record['cmt_committer_timestamp'] = placeholder_date |
| 266 | + |
| 267 | + logger.warning(f"commit with invalid timezone set to UTC: {commit_record['cmt_commit_hash']}") |
276 | 268 |
|
277 | 269 | session.execute(
|
278 | 270 | s.insert(Commit),
|
|
0 commit comments