-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[Utils] add update-verify-tests.py #97369
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-testing-tools @llvm/pr-subscribers-clang Author: Henrik G. Olsson (hnrklssn) ChangesAdds a python script to automatically take output from a failed clang -verify test and update the test case(s) to expect the new behaviour. Full diff: https://github.com/llvm/llvm-project/pull/97369.diff 1 Files Affected:
diff --git a/clang/utils/update-verify-tests.py b/clang/utils/update-verify-tests.py
new file mode 100644
index 0000000000000..95eca3af61a72
--- /dev/null
+++ b/clang/utils/update-verify-tests.py
@@ -0,0 +1,321 @@
+import sys
+import re
+
+"""
+ Pipe output from clang's -verify into this script to have the test case updated to expect the actual diagnostic output.
+ When inserting new expected-* checks it will place them on the line before the location of the diagnostic, with an @+1,
+ or @+N for some N if there are multiple diagnostics emitted on the same line. If the current checks are using @-N for
+ this line, the new check will follow that convention also.
+ Existing checks will be left untouched as much as possible, including their location and whitespace content, to minimize
+ diffs. If inaccurate their count will be updated, or the check removed entirely.
+
+ Missing features:
+ - custom prefix support (-verify=my-prefix)
+ - multiple prefixes on the same line (-verify=my-prefix,my-other-prefix)
+ - multiple prefixes on separate RUN lines (RUN: -verify=my-prefix\nRUN: -verify my-other-prefix)
+ - regexes with expected-*-re: existing ones will be left untouched if accurate, but the script will abort if there are any
+ diagnostic mismatches on the same line.
+ - multiple checks targeting the same line are supported, but a line may only contain one check
+ - if multiple checks targeting the same line are failing the script is not guaranteed to produce a minimal diff
+
+Example usage:
+ build/bin/llvm-lit clang/test/Sema/ --no-progress-bar -v | python3 update-verify-tests.py
+"""
+
+class KnownException(Exception):
+ pass
+
+def parse_error_category(s):
+ parts = s.split("diagnostics")
+ diag_category = parts[0]
+ category_parts = parts[0].strip().strip("'").split("-")
+ expected = category_parts[0]
+ if expected != "expected":
+ raise Exception(f"expected 'expected', but found '{expected}'. Custom verify prefixes are not supported.")
+ diag_category = category_parts[1]
+ if "seen but not expected" in parts[1]:
+ seen = True
+ elif "expected but not seen" in parts[1]:
+ seen = False
+ else:
+ raise KnownException(f"unexpected category '{parts[1]}'")
+ return (diag_category, seen)
+
+diag_error_re = re.compile(r"File (\S+) Line (\d+): (.+)")
+diag_error_re2 = re.compile(r"File \S+ Line \d+ \(directive at (\S+):(\d+)\): (.+)")
+def parse_diag_error(s):
+ m = diag_error_re2.match(s)
+ if not m:
+ m = diag_error_re.match(s)
+ if not m:
+ return None
+ return (m.group(1), int(m.group(2)), m.group(3))
+
+class Line:
+ def __init__(self, content, line_n):
+ self.content = content
+ self.diag = None
+ self.line_n = line_n
+ self.related_diags = []
+ self.targeting_diags = []
+ def update_line_n(self, n):
+ if self.diag and not self.diag.line_is_absolute:
+ self.diag.orig_target_line_n += n - self.line_n
+ self.line_n = n
+ for diag in self.targeting_diags:
+ if diag.line_is_absolute:
+ diag.orig_target_line_n = n
+ else:
+ diag.orig_target_line_n = n - diag.line.line_n
+ for diag in self.related_diags:
+ if not diag.line_is_absolute:
+ pass
+ def render(self):
+ if not self.diag:
+ return self.content
+ assert("{{DIAG}}" in self.content)
+ res = self.content.replace("{{DIAG}}", self.diag.render())
+ if not res.strip():
+ return ""
+ return res
+
+class Diag:
+ def __init__(self, diag_content, category, targeted_line_n, line_is_absolute, count, line, is_re, whitespace_strings):
+ self.diag_content = diag_content
+ self.category = category
+ self.orig_target_line_n = targeted_line_n
+ self.line_is_absolute = line_is_absolute
+ self.count = count
+ self.line = line
+ self.target = None
+ self.is_re = is_re
+ self.absolute_target()
+ self.whitespace_strings = whitespace_strings
+
+ def add(self):
+ if targeted_line > 0:
+ targeted_line += 1
+ elif targeted_line < 0:
+ targeted_line -= 1
+
+ def absolute_target(self):
+ if self.line_is_absolute:
+ res = self.orig_target_line_n
+ else:
+ res = self.line.line_n + self.orig_target_line_n
+ if self.target:
+ assert(self.line.line_n == res)
+ return res
+
+ def relative_target(self):
+ return self.absolute_target() - self.line.line_n
+
+ def render(self):
+ assert(self.count >= 0)
+ if self.count == 0:
+ return ""
+ line_location_s = ""
+ if self.relative_target() != 0:
+ if self.line_is_absolute:
+ line_location_s = f"@{self.absolute_target()}"
+ elif self.relative_target() > 0:
+ line_location_s = f"@+{self.relative_target()}"
+ else:
+ line_location_s = f"@{self.relative_target()}" # the minus sign is implicit
+ count_s = "" if self.count == 1 else f"{self.count}"
+ re_s = "-re" if self.is_re else ""
+ if self.whitespace_strings:
+ whitespace1_s = self.whitespace_strings[0]
+ whitespace2_s = self.whitespace_strings[1]
+ whitespace3_s = self.whitespace_strings[2]
+ whitespace4_s = self.whitespace_strings[3]
+ else:
+ whitespace1_s = " "
+ whitespace2_s = ""
+ whitespace3_s = ""
+ whitespace4_s = ""
+ if count_s and not whitespace3_s:
+ whitespace3_s = " "
+ return f"//{whitespace1_s}expected-{self.category}{re_s}{whitespace2_s}{line_location_s}{whitespace3_s}{count_s}{whitespace4_s}{{{{{self.diag_content}}}}}"
+
+expected_diag_re = re.compile(r"//(\s*)expected-(note|warning|error)(-re)?(\s*)(@[+-]?\d+)?(\s*)(\d+)?(\s*)\{\{(.*)\}\}")
+def parse_diag(line, filename, lines):
+ s = line.content
+ ms = expected_diag_re.findall(s)
+ if not ms:
+ return None
+ if len(ms) > 1:
+ print(f"multiple diags on line {filename}:{line.line_n}. Aborting due to missing implementation.")
+ sys.exit(1)
+ [whitespace1_s, category_s, re_s, whitespace2_s, target_line_s, whitespace3_s, count_s, whitespace4_s, diag_s] = ms[0]
+ if not target_line_s:
+ target_line_n = 0
+ is_absolute = False
+ elif target_line_s.startswith("@+"):
+ target_line_n = int(target_line_s[2:])
+ is_absolute = False
+ elif target_line_s.startswith("@-"):
+ target_line_n = int(target_line_s[1:])
+ is_absolute = False
+ else:
+ target_line_n = int(target_line_s[1:])
+ is_absolute = True
+ count = int(count_s) if count_s else 1
+ line.content = expected_diag_re.sub("{{DIAG}}", s)
+
+ return Diag(diag_s, category_s, target_line_n, is_absolute, count, line, bool(re_s), [whitespace1_s, whitespace2_s, whitespace3_s, whitespace4_s])
+
+def link_line_diags(lines, diag):
+ line_n = diag.line.line_n
+ target_line_n = diag.absolute_target()
+ step = 1 if target_line_n < line_n else -1
+ for i in range(target_line_n, line_n, step):
+ lines[i-1].related_diags.append(diag)
+
+def add_line(new_line, lines):
+ lines.insert(new_line.line_n-1, new_line)
+ for i in range(new_line.line_n, len(lines)):
+ line = lines[i]
+ assert(line.line_n == i)
+ line.update_line_n(i+1)
+ assert(all(line.line_n == i+1 for i, line in enumerate(lines)))
+
+indent_re = re.compile(r"\s*")
+def get_indent(s):
+ return indent_re.match(s).group(0)
+
+def add_diag(line_n, diag_s, diag_category, lines):
+ target = lines[line_n - 1]
+ for other in target.targeting_diags:
+ if other.is_re:
+ raise KnownException("mismatching diag on line with regex matcher. Skipping due to missing implementation")
+ reverse = True if [other for other in target.targeting_diags if other.relative_target() < 0] else False
+
+ targeting = [other for other in target.targeting_diags if not other.line_is_absolute]
+ targeting.sort(reverse=reverse, key=lambda d: d.relative_target())
+ prev_offset = 0
+ prev_line = target
+ direction = -1 if reverse else 1
+ for d in targeting:
+ if d.relative_target() != prev_offset + direction:
+ break
+ prev_offset = d.relative_target()
+ prev_line = d.line
+ total_offset = prev_offset - 1 if reverse else prev_offset + 1
+ if reverse:
+ new_line_n = prev_line.line_n + 1
+ else:
+ new_line_n = prev_line.line_n
+ assert(new_line_n == line_n + (not reverse) - total_offset)
+
+ new_line = Line(get_indent(prev_line.content) + "{{DIAG}}\n", new_line_n)
+ new_line.related_diags = list(prev_line.related_diags)
+ add_line(new_line, lines)
+
+ new_diag = Diag(diag_s, diag_category, total_offset, False, 1, new_line, False, None)
+ new_line.diag = new_diag
+ new_diag.target_line = target
+ assert(type(new_diag) != str)
+ target.targeting_diags.append(new_diag)
+ link_line_diags(lines, new_diag)
+
+updated_test_files = set()
+def update_test_file(filename, diag_errors):
+ print(f"updating test file {filename}")
+ if filename in updated_test_files:
+ print(f"{filename} already updated, but got new output - expect incorrect results")
+ else:
+ updated_test_files.add(filename)
+ with open(filename, 'r') as f:
+ lines = [Line(line, i+1) for i, line in enumerate(f.readlines())]
+ for line in lines:
+ diag = parse_diag(line, filename, lines)
+ if diag:
+ line.diag = diag
+ diag.target_line = lines[diag.absolute_target() - 1]
+ link_line_diags(lines, diag)
+ lines[diag.absolute_target() - 1].targeting_diags.append(diag)
+
+ for (line_n, diag_s, diag_category, seen) in diag_errors:
+ if seen:
+ continue
+ # this is a diagnostic expected but not seen
+ assert(lines[line_n - 1].diag)
+ if diag_s != lines[line_n - 1].diag.diag_content:
+ raise KnownException(f"{filename}:{line_n} - found diag {lines[line_n - 1].diag.diag_content} but expected {diag_s}")
+ if diag_category != lines[line_n - 1].diag.category:
+ raise KnownException(f"{filename}:{line_n} - found {lines[line_n - 1].diag.category} diag but expected {diag_category}")
+ lines[line_n - 1].diag.count -= 1
+ diag_errors_left = []
+ diag_errors.sort(reverse=True, key=lambda t: t[0])
+ for (line_n, diag_s, diag_category, seen) in diag_errors:
+ if not seen:
+ continue
+ target = lines[line_n - 1]
+ other_diags = [d for d in target.targeting_diags if d.diag_content == diag_s and d.category == diag_category]
+ other_diag = other_diags[0] if other_diags else None
+ if other_diag:
+ other_diag.count += 1
+ else:
+ diag_errors_left.append((line_n, diag_s, diag_category))
+ for (line_n, diag_s, diag_category) in diag_errors_left:
+ add_diag(line_n, diag_s, diag_category, lines)
+ with open(filename, 'w') as f:
+ for line in lines:
+ f.write(line.render())
+
+def update_test_files(errors):
+ errors_by_file = {}
+ for ((filename, line, diag_s), (diag_category, seen)) in errors:
+ if filename not in errors_by_file:
+ errors_by_file[filename] = []
+ errors_by_file[filename].append((line, diag_s, diag_category, seen))
+ for filename, diag_errors in errors_by_file.items():
+ try:
+ update_test_file(filename, diag_errors)
+ except KnownException as e:
+ print(f"{filename} - ERROR: {e}")
+ print("continuing...")
+curr = []
+curr_category = None
+curr_run_line = None
+lines_since_run = []
+for line in sys.stdin.readlines():
+ lines_since_run.append(line)
+ try:
+ if line.startswith("RUN:"):
+ if curr:
+ update_test_files(curr)
+ curr = []
+ lines_since_run = [line]
+ curr_run_line = line
+ else:
+ for line in lines_since_run:
+ print(line, end="")
+ print("====================")
+ print("no mismatching diagnostics found since last RUN line")
+ continue
+ if line.startswith("error: "):
+ if "no expected directives found" in line:
+ print(f"no expected directives found for RUN line '{curr_run_line.strip()}'. Add 'expected-no-diagnostics' manually if this is intended.")
+ continue
+ curr_category = parse_error_category(line[len("error: "):])
+ continue
+
+ diag_error = parse_diag_error(line.strip())
+ if diag_error:
+ curr.append((diag_error, curr_category))
+ except Exception as e:
+ for line in lines_since_run:
+ print(line, end="")
+ print("====================")
+ print(e)
+ sys.exit(1)
+if curr:
+ update_test_files(curr)
+ print("done!")
+else:
+ for line in lines_since_run:
+ print(line, end="")
+ print("====================")
+ print("no mismatching diagnostics found")
|
✅ With the latest revision this PR passed the Python code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a nice addition, but I think it should follow the conventions established by the existing update_*_test_checks.py scripts as much as possible, at least:
- Ability to parse RUN lines, re-execute them autonomously, and modify test files in place based on their output
- Proper argument parsing via the argparse library
- Name of the script itself
You should integrate with the UpdateTestChecks/common.py infrastructure as much as possible. At a minimum, the collection of test cases to update, the RUN line parsing, and stuff like common.invoke_tool
should almost certainly be shared. This may require some minor updates to that common infrastructure, but that's a good thing.
The actual generating and updating of CHECK lines is probably sufficiently different that sharing might not make sense. That said, I wonder if this script can be generalized to automatically generate CHECK lines for other tools that produce diagnostic output.
I did consider this at first - here's my reasoning why I didn't, please let me know what you think: I think that the added complexity of RUN line parsing + The If anything I think
This script relies on the format of the output of the For the checks themselves I think they are sufficiently different from FileCheck checks that it'd be hard to generalise the output: multiple |
Added tests and updated the script. Instead of only being a free-standing script it's now (along with the UTC scripts) integrated into llvm-lit to automatically detect which script can update a failing test, using the new flag |
831d678
to
1c3108f
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the integration into lit is okay. The diff updater seems neat, but I think it could be taken into a separate change, see also the inline comment. In general, I think this PR really does three separate things, that ought to be in separate changes:
- Add update-verify-tests.py
- Add a way to update tests from within llvm-lit
- Add the diff-test-updater to llvm-lit
@@ -0,0 +1,10 @@ | |||
// RUN: %clang_cc1 -verify %s | |||
// RUN: diff %s %s.expected |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The way I understand it, the update-verify-tests plugin in lit will overwrite this source file, which may end up overwriting this RUN line if there is a bug. Basically, there is a weird circularity here.
Therefore, it seems to me that the RUN lines for the diff
against expected ought to be in the lit-plugin.test
file instead.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The files in the Inputs
directory are not discovered by lit, so this doesn't actually run the RUN lines. Instead a copy is made, and a new lit instance is launched to test the run lines.
This is in duplicate-diag.test
, the RUN lines of duplicate-diag.c
are not executed at all for this case because it tests the update-verify-tests.py
script:
# RUN: cp %S/Inputs/duplicate-diag.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests
# RUN: diff -u %S/Inputs/duplicate-diag.c.expected %t.c
# RUN: %clang_cc1 -verify %t.c
This is in lit-plugin.test
, which copies the files to a separate directory and launches llvm-lit there to test the lit integration:
# RUN: cp %S/Inputs/*.c %S/LitTests
# RUN: cp %S/Inputs/*.c.expected %S/LitTests
# RUN: not %{lit} %S/LitTests
# RUN: not %{lit} %S/LitTests --update-tests
# RUN: %{lit} %S/LitTests
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, it's all a little mind-bending, but my point is that since you're trying to test whether the invocation of not %{lit} %S/LitTests --update-tests
in lit-plugin.test
updated the tests to look like the .expected
file, that invocation of diff
should also be in lit-plugin.test
.
You can't have the correctness check for a test that modifies a file be in the same file that is being modified. It has to live outside.
Makes sense, I'll separate the changes into 3 PRs. |
Adds a python script to automatically take output from a failed clang -verify test and update the test case(s) to expect the new behaviour.
Fixes various line offset bugs. Fixes a bug where whitespace could be emitted between 'expected-[category]' and the line location specifier, which clang does not parse. Now handles the cases where the file does not emit any diagnostics, or previously contained 'expected-no-diagnostics'. Also does further work to minimise diffs by replacing diagnostics checks with a new one, even if the previous check was in a location that the script would not normally emit. This is useful for the case where a check is on the same line as the emitted diagnostic, and the diagnostic message changes. Instead of removing the previous diagnostic and inserting a new one with @+1, the old diagnostic text is replaced with the new one.
Custom prefixes can now be provided with --prefix. The default remains 'expected'.
The script no longer attempts to be RUN line aware. Feeding raw llvm-lit output into it _may_ still work, but given a large enough number of test cases to update, the risk that some test case uses unsupported features grows pretty large. This will instead be handled by adding a lit integration to automatically invoke the core for each failing test output. The lit integration will be landed as a separate change.
fc243fd
to
7395ef2
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't go over the core.py implementation with a comb, but this looks reasonable to me. Thanks!
Looks like these tests are failing in CI: see https://buildkite.com/llvm-project/github-pull-requests/builds/100644#0191ec9f-441f-4299-ad47-a9318fbae7a8
|
This breaks tests on windows: http://45.33.8.238/win/94083/step_6.txt Please take a look and revert for now if it takes a while to fix. |
Also note that this was also detected by CI prior to merging the PR. |
Reverted in #108630 |
Very sorry about the CI oversight on my part! Thanks for reverting, I was in a meeting. |
This adds a flag to lit for detecting and updating failing tests when possible to do so automatically. The flag uses a plugin architecture where config files can add additional auto-updaters for the types of tests in the test suite. When a test fails with `--update-tests` enabled lit passes the test RUN invocation and output to each registered test updater until one of them signals that it updated the test (or all test updaters have been run). As such it is the responsibility of the test updater to only update tests where it is reasonably certain that it will actually fix the test, or come close to doing so. Initially adds support for UpdateVerifyTests and UpdateTestChecks. The flag is currently only implemented for lit's internal shell, so `--update-tests` implies `LIT_USE_INTERNAL_SHELL=1`. Builds on work in #97369 Fixes #81320
This adds a flag to lit for detecting and updating failing tests when possible to do so automatically. The flag uses a plugin architecture where config files can add additional auto-updaters for the types of tests in the test suite. When a test fails with `--update-tests` enabled lit passes the test RUN invocation and output to each registered test updater until one of them signals that it updated the test (or all test updaters have been run). As such it is the responsibility of the test updater to only update tests where it is reasonably certain that it will actually fix the test, or come close to doing so. Initially adds support for UpdateVerifyTests and UpdateTestChecks. The flag is currently only implemented for lit's internal shell, so `--update-tests` implies `LIT_USE_INTERNAL_SHELL=1`. Builds on work in llvm#97369 Fixes llvm#81320
Adds a python script to automatically take output from a failed clang -verify test and update the test case(s) to expect the new behaviour.