Skip to content

Commit b07f96d

Browse files
authored
Merge pull request #1354 from Codium-ai/tr/3-way-prs
Tr/3 way prs
2 parents e0c1540 + 0657770 commit b07f96d

File tree

3 files changed

+61
-33
lines changed

3 files changed

+61
-33
lines changed

pr_agent/algo/git_patch_processing.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0,
3131

3232

3333
def decode_if_bytes(original_file_str):
34-
if isinstance(original_file_str, bytes):
34+
if isinstance(original_file_str, (bytes, bytearray)):
3535
try:
3636
return original_file_str.decode('utf-8')
3737
except UnicodeDecodeError:
@@ -61,23 +61,26 @@ def process_patch_lines(patch_str, original_file_str, patch_extra_lines_before,
6161
patch_lines = patch_str.splitlines()
6262
extended_patch_lines = []
6363

64+
is_valid_hunk = True
6465
start1, size1, start2, size2 = -1, -1, -1, -1
6566
RE_HUNK_HEADER = re.compile(
6667
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
6768
try:
68-
for line in patch_lines:
69+
for i,line in enumerate(patch_lines):
6970
if line.startswith('@@'):
7071
match = RE_HUNK_HEADER.match(line)
7172
# identify hunk header
7273
if match:
7374
# finish processing previous hunk
74-
if start1 != -1 and patch_extra_lines_after > 0:
75+
if is_valid_hunk and (start1 != -1 and patch_extra_lines_after > 0):
7576
delta_lines = [f' {line}' for line in original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]]
7677
extended_patch_lines.extend(delta_lines)
7778

7879
section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
7980

80-
if patch_extra_lines_before > 0 or patch_extra_lines_after > 0:
81+
is_valid_hunk = check_if_hunk_lines_matches_to_file(i, original_lines, patch_lines, start1)
82+
83+
if is_valid_hunk and (patch_extra_lines_before > 0 or patch_extra_lines_after > 0):
8184
def _calc_context_limits(patch_lines_before):
8285
extended_start1 = max(1, start1 - patch_lines_before)
8386
extended_size1 = size1 + (start1 - extended_start1) + patch_extra_lines_after
@@ -138,7 +141,7 @@ def _calc_context_limits(patch_lines_before):
138141
return patch_str
139142

140143
# finish processing last hunk
141-
if start1 != -1 and patch_extra_lines_after > 0:
144+
if start1 != -1 and patch_extra_lines_after > 0 and is_valid_hunk:
142145
delta_lines = original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]
143146
# add space at the beginning of each extra line
144147
delta_lines = [f' {line}' for line in delta_lines]
@@ -148,6 +151,23 @@ def _calc_context_limits(patch_lines_before):
148151
return extended_patch_str
149152

150153

154+
def check_if_hunk_lines_matches_to_file(i, original_lines, patch_lines, start1):
155+
"""
156+
Check if the hunk lines match the original file content. We saw cases where the hunk header line doesn't match the original file content, and then
157+
extending the hunk with extra lines before the hunk header can cause the hunk to be invalid.
158+
"""
159+
is_valid_hunk = True
160+
try:
161+
if i + 1 < len(patch_lines) and patch_lines[i + 1][0] == ' ': # an existing line in the file
162+
if patch_lines[i + 1].strip() != original_lines[start1 - 1].strip():
163+
is_valid_hunk = False
164+
get_logger().error(
165+
f"Invalid hunk in PR, line {start1} in hunk header doesn't match the original file content")
166+
except:
167+
pass
168+
return is_valid_hunk
169+
170+
151171
def extract_hunk_headers(match):
152172
res = list(match.groups())
153173
for i in range(len(res)):

pr_agent/git_providers/github_provider.py

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,24 @@ def get_diff_files(self) -> list[FilePatchInfo]:
199199
if avoid_load:
200200
original_file_content_str = ""
201201
else:
202-
original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha)
202+
# The base.sha will point to the current state of the base branch (including parallel merges), not the original base commit when the PR was created
203+
# We can fix this by finding the merge base commit between the PR head and base branches
204+
# Note that The pr.head.sha is actually correct as is - it points to the latest commit in your PR branch.
205+
# This SHA isn't affected by parallel merges to the base branch since it's specific to your PR's branch.
206+
repo = self.repo_obj
207+
pr = self.pr
208+
try:
209+
compare = repo.compare(pr.base.sha, pr.head.sha)
210+
merge_base_commit = compare.merge_base_commit
211+
except Exception as e:
212+
get_logger().error(f"Failed to get merge base commit: {e}")
213+
merge_base_commit = pr.base
214+
if merge_base_commit.sha != pr.base.sha:
215+
get_logger().info(
216+
f"Using merge base commit {merge_base_commit.sha} instead of base commit "
217+
f"{pr.base.sha} for {file.filename}")
218+
original_file_content_str = self._get_pr_file_content(file, merge_base_commit.sha)
219+
203220
if not patch:
204221
patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)
205222

@@ -283,8 +300,7 @@ def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_
283300
relevant_line_in_file,
284301
absolute_position)
285302
if position == -1:
286-
if get_settings().config.verbosity_level >= 2:
287-
get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
303+
get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
288304
subject_type = "FILE"
289305
else:
290306
subject_type = "LINE"
@@ -296,20 +312,17 @@ def publish_inline_comments(self, comments: list[dict], disable_fallback: bool =
296312
# publish all comments in a single message
297313
self.pr.create_review(commit=self.last_commit_id, comments=comments)
298314
except Exception as e:
299-
if get_settings().config.verbosity_level >= 2:
300-
get_logger().error(f"Failed to publish inline comments")
315+
get_logger().info(f"Initially failed to publish inline comments as committable")
301316

302-
if (getattr(e, "status", None) == 422
303-
and get_settings().github.publish_inline_comments_fallback_with_verification and not disable_fallback):
317+
if (getattr(e, "status", None) == 422 and not disable_fallback):
304318
pass # continue to try _publish_inline_comments_fallback_with_verification
305319
else:
306320
raise e # will end up with publishing the comments one by one
307321

308322
try:
309323
self._publish_inline_comments_fallback_with_verification(comments)
310324
except Exception as e:
311-
if get_settings().config.verbosity_level >= 2:
312-
get_logger().error(f"Failed to publish inline code comments fallback, error: {e}")
325+
get_logger().error(f"Failed to publish inline code comments fallback, error: {e}")
313326
raise e
314327

315328
def _publish_inline_comments_fallback_with_verification(self, comments: list[dict]):
@@ -334,11 +347,9 @@ def _publish_inline_comments_fallback_with_verification(self, comments: list[dic
334347
for comment in fixed_comments_as_one_liner:
335348
try:
336349
self.publish_inline_comments([comment], disable_fallback=True)
337-
if get_settings().config.verbosity_level >= 2:
338-
get_logger().info(f"Published invalid comment as a single line comment: {comment}")
350+
get_logger().info(f"Published invalid comment as a single line comment: {comment}")
339351
except:
340-
if get_settings().config.verbosity_level >= 2:
341-
get_logger().error(f"Failed to publish invalid comment as a single line comment: {comment}")
352+
get_logger().error(f"Failed to publish invalid comment as a single line comment: {comment}")
342353

343354
def _verify_code_comment(self, comment: dict):
344355
is_verified = False
@@ -396,8 +407,7 @@ def _try_fix_invalid_inline_comments(self, invalid_comments: list[dict]) -> list
396407
if fixed_comment != comment:
397408
fixed_comments.append(fixed_comment)
398409
except Exception as e:
399-
if get_settings().config.verbosity_level >= 2:
400-
get_logger().error(f"Failed to fix inline comment, error: {e}")
410+
get_logger().error(f"Failed to fix inline comment, error: {e}")
401411
return fixed_comments
402412

403413
def publish_code_suggestions(self, code_suggestions: list) -> bool:
@@ -412,16 +422,14 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
412422
relevant_lines_end = suggestion['relevant_lines_end']
413423

414424
if not relevant_lines_start or relevant_lines_start == -1:
415-
if get_settings().config.verbosity_level >= 2:
416-
get_logger().exception(
417-
f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}")
425+
get_logger().exception(
426+
f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}")
418427
continue
419428

420429
if relevant_lines_end < relevant_lines_start:
421-
if get_settings().config.verbosity_level >= 2:
422-
get_logger().exception(f"Failed to publish code suggestion, "
423-
f"relevant_lines_end is {relevant_lines_end} and "
424-
f"relevant_lines_start is {relevant_lines_start}")
430+
get_logger().exception(f"Failed to publish code suggestion, "
431+
f"relevant_lines_end is {relevant_lines_end} and "
432+
f"relevant_lines_start is {relevant_lines_start}")
425433
continue
426434

427435
if relevant_lines_end > relevant_lines_start:
@@ -445,8 +453,7 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
445453
self.publish_inline_comments(post_parameters_list)
446454
return True
447455
except Exception as e:
448-
if get_settings().config.verbosity_level >= 2:
449-
get_logger().error(f"Failed to publish code suggestion, error: {e}")
456+
get_logger().error(f"Failed to publish code suggestion, error: {e}")
450457
return False
451458

452459
def edit_comment(self, comment, body: str):
@@ -505,6 +512,7 @@ def publish_file_comments(self, file_comments: list) -> bool:
505512
elif self.deployment_type == 'user':
506513
same_comment_creator = self.github_user_id == existing_comment['user']['login']
507514
if existing_comment['subject_type'] == 'file' and comment['path'] == existing_comment['path'] and same_comment_creator:
515+
508516
headers, data_patch = self.pr._requester.requestJsonAndCheck(
509517
"PATCH", f"{self.base_url}/repos/{self.repo}/pulls/comments/{existing_comment['id']}", input={"body":comment['body']}
510518
)
@@ -516,8 +524,7 @@ def publish_file_comments(self, file_comments: list) -> bool:
516524
)
517525
return True
518526
except Exception as e:
519-
if get_settings().config.verbosity_level >= 2:
520-
get_logger().error(f"Failed to publish diffview file summary, error: {e}")
527+
get_logger().error(f"Failed to publish diffview file summary, error: {e}")
521528
return False
522529

523530
def remove_initial_comment(self):
@@ -805,8 +812,7 @@ def generate_link_to_relevant_line_number(self, suggestion) -> str:
805812
link = f"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{absolute_position}"
806813
return link
807814
except Exception as e:
808-
if get_settings().config.verbosity_level >= 2:
809-
get_logger().info(f"Failed adding line link, error: {e}")
815+
get_logger().info(f"Failed adding line link, error: {e}")
810816

811817
return ""
812818

pr_agent/tools/pr_code_suggestions.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,8 @@ async def _prepare_prediction(self, model: str) -> dict:
337337
model,
338338
add_line_numbers_to_hunks=True,
339339
disable_extra_lines=False)
340+
self.patches_diff_list = [self.patches_diff]
341+
self.patches_diff_no_line_number = self.remove_line_numbers([self.patches_diff])[0]
340342

341343
if self.patches_diff:
342344
get_logger().debug(f"PR diff", artifact=self.patches_diff)

0 commit comments

Comments
 (0)