-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[lldb] Support alternatives for scope format entries #137751
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lldb Author: Jonas Devlieghere (JDevlieghere) ChangesThis PR implements support for specifying multiple alternatives for scope format entries. Scopes are used to enclose things that should only be printed when everything in the scope resolves. For example, the following scope only resolves if both
However, the current implementation doesn't let you specify what to print when they don't resolve. This PR adds support for specifying multiple alternative scopes, which are evaluated left-to-right. For example:
This will resolve to:
This PR makes the I ended up with this approach because it fit quite nicely in the existing architecture of the format entries and by limiting the functionality to scopes, it sidesteps some complexity, like dealing with recursion. Full diff: https://github.com/llvm/llvm-project/pull/137751.diff 3 Files Affected:
diff --git a/lldb/include/lldb/Core/FormatEntity.h b/lldb/include/lldb/Core/FormatEntity.h
index e62c82d080d5f..73bf19252b3a8 100644
--- a/lldb/include/lldb/Core/FormatEntity.h
+++ b/lldb/include/lldb/Core/FormatEntity.h
@@ -11,10 +11,10 @@
#include "lldb/lldb-enumerations.h"
#include "lldb/lldb-types.h"
+#include "llvm/ADT/SmallVector.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
-
#include <string>
#include <vector>
@@ -157,9 +157,7 @@ struct Entry {
}
Entry(Type t = Type::Invalid, const char *s = nullptr,
- const char *f = nullptr)
- : string(s ? s : ""), printf_format(f ? f : ""), type(t) {}
-
+ const char *f = nullptr);
Entry(llvm::StringRef s);
Entry(char ch);
@@ -169,15 +167,19 @@ struct Entry {
void AppendText(const char *cstr);
- void AppendEntry(const Entry &&entry) { children.push_back(entry); }
+ void AppendEntry(const Entry &&entry);
+
+ void StartAlternative();
void Clear() {
string.clear();
printf_format.clear();
- children.clear();
+ children_stack.clear();
+ children_stack.emplace_back();
type = Type::Invalid;
fmt = lldb::eFormatDefault;
number = 0;
+ level = 0;
deref = false;
}
@@ -190,7 +192,7 @@ struct Entry {
return false;
if (printf_format != rhs.printf_format)
return false;
- if (children != rhs.children)
+ if (children_stack != rhs.children_stack)
return false;
if (type != rhs.type)
return false;
@@ -201,9 +203,18 @@ struct Entry {
return true;
}
+ std::vector<Entry> &GetChildren();
+
std::string string;
std::string printf_format;
- std::vector<Entry> children;
+
+ /// A stack of children entries, used by Scope entries to provide alterantive
+ /// children. All other entries have a stack of size 1.
+ /// @{
+ llvm::SmallVector<std::vector<Entry>, 1> children_stack;
+ size_t level = 0;
+ /// @}
+
Type type;
lldb::Format fmt = lldb::eFormatDefault;
lldb::addr_t number = 0;
diff --git a/lldb/source/Core/FormatEntity.cpp b/lldb/source/Core/FormatEntity.cpp
index a2410048e5a89..b7f44fe6e2c74 100644
--- a/lldb/source/Core/FormatEntity.cpp
+++ b/lldb/source/Core/FormatEntity.cpp
@@ -60,6 +60,7 @@
#include "llvm/Support/Regex.h"
#include "llvm/TargetParser/Triple.h"
+#include <cassert>
#include <cctype>
#include <cinttypes>
#include <cstdio>
@@ -280,31 +281,53 @@ constexpr Definition g_top_level_entries[] = {
constexpr Definition g_root = Entry::DefinitionWithChildren(
"<root>", EntryType::Root, g_top_level_entries);
+FormatEntity::Entry::Entry(Type t, const char *s, const char *f)
+ : string(s ? s : ""), printf_format(f ? f : ""), children_stack({{}}),
+ type(t) {}
+
FormatEntity::Entry::Entry(llvm::StringRef s)
- : string(s.data(), s.size()), printf_format(), children(),
- type(Type::String) {}
+ : string(s.data(), s.size()), children_stack({{}}), type(Type::String) {}
FormatEntity::Entry::Entry(char ch)
- : string(1, ch), printf_format(), children(), type(Type::String) {}
+ : string(1, ch), printf_format(), children_stack({{}}), type(Type::String) {
+}
+
+std::vector<Entry> &FormatEntity::Entry::GetChildren() {
+ assert(level < children_stack.size());
+ return children_stack[level];
+}
void FormatEntity::Entry::AppendChar(char ch) {
- if (children.empty() || children.back().type != Entry::Type::String)
- children.push_back(Entry(ch));
+ auto &entries = GetChildren();
+ if (entries.empty() || entries.back().type != Entry::Type::String)
+ entries.push_back(Entry(ch));
else
- children.back().string.append(1, ch);
+ entries.back().string.append(1, ch);
}
void FormatEntity::Entry::AppendText(const llvm::StringRef &s) {
- if (children.empty() || children.back().type != Entry::Type::String)
- children.push_back(Entry(s));
+ auto &entries = GetChildren();
+ if (entries.empty() || entries.back().type != Entry::Type::String)
+ entries.push_back(Entry(s));
else
- children.back().string.append(s.data(), s.size());
+ entries.back().string.append(s.data(), s.size());
}
void FormatEntity::Entry::AppendText(const char *cstr) {
return AppendText(llvm::StringRef(cstr));
}
+void FormatEntity::Entry::AppendEntry(const Entry &&entry) {
+ auto &entries = GetChildren();
+ entries.push_back(entry);
+}
+
+void FormatEntity::Entry::StartAlternative() {
+ assert(type == Entry::Type::Scope);
+ children_stack.emplace_back();
+ level++;
+}
+
#define ENUM_TO_CSTR(eee) \
case FormatEntity::Entry::Type::eee: \
return #eee
@@ -403,8 +426,9 @@ void FormatEntity::Entry::Dump(Stream &s, int depth) const {
if (deref)
s.Printf("deref = true, ");
s.EOL();
- for (const auto &child : children) {
- child.Dump(s, depth + 1);
+ for (const auto &children : children_stack) {
+ for (const auto &child : children)
+ child.Dump(s, depth + 1);
}
}
@@ -1306,7 +1330,7 @@ bool FormatEntity::Format(const Entry &entry, Stream &s,
return true;
case Entry::Type::Root:
- for (const auto &child : entry.children) {
+ for (const auto &child : entry.children_stack[0]) {
if (!Format(child, s, sc, exe_ctx, addr, valobj, function_changed,
initial_function)) {
return false; // If any item of root fails, then the formatting fails
@@ -1320,19 +1344,26 @@ bool FormatEntity::Format(const Entry &entry, Stream &s,
case Entry::Type::Scope: {
StreamString scope_stream;
- bool success = false;
- for (const auto &child : entry.children) {
- success = Format(child, scope_stream, sc, exe_ctx, addr, valobj,
- function_changed, initial_function);
- if (!success)
- break;
+ auto format_children = [&](const std::vector<Entry> &children) {
+ scope_stream.Clear();
+ for (const auto &child : children) {
+ if (!Format(child, scope_stream, sc, exe_ctx, addr, valobj,
+ function_changed, initial_function))
+ return false;
+ }
+ return true;
+ };
+
+ for (auto &children : entry.children_stack) {
+ if (format_children(children)) {
+ s.Write(scope_stream.GetString().data(),
+ scope_stream.GetString().size());
+ return true;
+ }
}
- // Only if all items in a scope succeed, then do we print the output into
- // the main stream
- if (success)
- s.Write(scope_stream.GetString().data(), scope_stream.GetString().size());
- }
+
return true; // Scopes always successfully print themselves
+ }
case Entry::Type::Variable:
case Entry::Type::VariableSynthetic:
@@ -2121,7 +2152,7 @@ static Status ParseInternal(llvm::StringRef &format, Entry &parent_entry,
uint32_t depth) {
Status error;
while (!format.empty() && error.Success()) {
- const size_t non_special_chars = format.find_first_of("${}\\");
+ const size_t non_special_chars = format.find_first_of("${}\\|");
if (non_special_chars == llvm::StringRef::npos) {
// No special characters, just string bytes so add them and we are done
@@ -2158,6 +2189,14 @@ static Status ParseInternal(llvm::StringRef &format, Entry &parent_entry,
.drop_front(); // Skip the '}' as we are at the end of the scope
return error;
+ case '|':
+ format = format.drop_front(); // Skip the '|'
+ if (parent_entry.type == Entry::Type::Scope)
+ parent_entry.StartAlternative();
+ else
+ parent_entry.AppendChar('|');
+ break;
+
case '\\': {
format = format.drop_front(); // Skip the '\' character
if (format.empty()) {
diff --git a/lldb/unittests/Core/FormatEntityTest.cpp b/lldb/unittests/Core/FormatEntityTest.cpp
index 5983c9de99ef7..ecf956925afb1 100644
--- a/lldb/unittests/Core/FormatEntityTest.cpp
+++ b/lldb/unittests/Core/FormatEntityTest.cpp
@@ -8,6 +8,7 @@
#include "lldb/Core/FormatEntity.h"
#include "lldb/Utility/Status.h"
+#include "lldb/Utility/StreamString.h"
#include "llvm/ADT/StringRef.h"
#include "gtest/gtest.h"
@@ -160,3 +161,48 @@ TEST(FormatEntity, LookupAllEntriesInTree) {
<< "Formatting " << testString << " did not succeed";
}
}
+
+TEST(FormatEntity, ScopeAlt) {
+ StreamString stream;
+ FormatEntity::Entry format;
+ Status status = FormatEntity::Parse("{${frame.pc}|foo}", format);
+ ASSERT_TRUE(status.Success()) << status.AsCString();
+
+ FormatEntity::Format(format, stream, nullptr, nullptr, nullptr, nullptr,
+ false, false);
+ EXPECT_EQ(stream.GetString(), "foo");
+}
+
+TEST(FormatEntity, EscapedPipeInScope) {
+ StreamString stream;
+ FormatEntity::Entry format;
+ Status status = FormatEntity::Parse("{foo\\|bar}", format);
+ ASSERT_TRUE(status.Success()) << status.AsCString();
+
+ FormatEntity::Format(format, stream, nullptr, nullptr, nullptr, nullptr,
+ false, false);
+ EXPECT_EQ(stream.GetString(), "foo|bar");
+}
+
+TEST(FormatEntity, PipeOutsideScope) {
+ StreamString stream;
+ FormatEntity::Entry format;
+ Status status = FormatEntity::Parse("foo|bar", format);
+ ASSERT_TRUE(status.Success()) << status.AsCString();
+
+ FormatEntity::Format(format, stream, nullptr, nullptr, nullptr, nullptr,
+ false, false);
+ EXPECT_EQ(stream.GetString(), "foo|bar");
+}
+
+TEST(FormatEntity, ScopeAltMultiple) {
+ StreamString stream;
+ FormatEntity::Entry format;
+ Status status =
+ FormatEntity::Parse("{${frame.pc}|${function.name}|foo}", format);
+ ASSERT_TRUE(status.Success()) << status.AsCString();
+
+ FormatEntity::Format(format, stream, nullptr, nullptr, nullptr, nullptr,
+ false, false);
+ EXPECT_EQ(stream.GetString(), "foo");
+}
|
@labath this ended up quite different from the bash-like syntax that we discussed at EuroLLVM [1]. Using a single character keeps the parsing code simple and limiting this to scopes eliminates some design complexity like recursion. The current implementation covers all of my uses cases and I think you can almost (?) always use a scope when you want to have an alternative value. Please let me know what you think. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll give others time to chime in, but syntax and implementation LGTM
It would address the use-cases I had in mind for the function-name-format
variables I'm working on
This PR implements support for specifying multiple alternatives for scope format entries. Scopes are used to enclose things that should only be printed when everything in the scope resolves. For example, the following scope only resolves if both `${line.file.basename}` and `${line.number}` resolve. ` ``` { at ${line.file.basename}:${line.number}} ``` However, the current implementation doesn't let you specify what to print when they don't resolve. This PR adds support for specifying multiple alternative scopes, which are evaluated left-to-right. For example: ``` { at ${line.file.basename}:${line.number}| in ${function.name}| <unknown location>} ``` This will resolve to: - ` at ${line.file.basename}:${line.number}` if the corresponding variables resolve. - Otherwise, this resolves to ` in ${function.name}` if `${function.name}` resolves. - Otherwise, this resolves to ` <unknown location>` which always resolves. This PR makes the `|` character a special character within a scope, but allows it to be escaped. I ended up with this approach because it fit quite nicely in the existing architecture of the format entries and by limiting the functionality to scopes, it sidesteps some complexity, like dealing with recursion.
8773b25
to
b6618a2
Compare
|
||
// Nested scopes. Note that scopes always resolve. | ||
EXPECT_THAT_EXPECTED(Format("{{${frame.pc}|foo}|{bar}}"), HasValue("foo")); | ||
EXPECT_THAT_EXPECTED(Format("{{${frame.pc}}|{bar}}"), HasValue("")); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't this resolve to bar
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, because a scope always resolves successfully, no matter what's in it, this behaves as expected.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I figured this was a weird case which is why I included it :-)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh so within {${frame.pc}}
, ${frame.pc}
would fail, but the entire scope is seen as "successfully resolved"? Hence it doesn't fall back to the other scope?
Does that mean the fallback can't ever be used between scopes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be difficult to make a scope fail if all its children failed? That would be more intuitive. But I suspect it might be a breaking change we're not willing to make?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That would break literally every existing use of scopes because the only reason to use them was to make expansion failures non-fatal. I'm not saying we can't do that, but I think that's what it is. We could try to split hairs and say this only applies to scopes with more than one alternative, but I don't know if it's worth it. I suppose it might be used to write something like {common prefix {${foo}|${bar}}| alternative prefix}
, but I don't know if there's a realistic use case for that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we maybe disallow alternatives to be used between scopes (or rather where a scope is on the left-hand side)? I guess technically it does "work", but one would have to know the intricacies of this language to understand why. Would there ever be a case where writing { { foo } | { bar } }
would not be a bug?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm on the fence. It's probably not too hard to implement (although you need to be careful to only trigger this for an alternative scope, a regular nested scope is okay). On the other hand, if you know how a scope works, I think this behaves pretty much as expected, it's just a rather unhelpful thing to do. I don't think we have precedence for diagnosing something like this. I can see how "clean" or "ugly" it is to implement and that might sway me one way or another.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair enough. If it's a pain to implement I won't insist. There is precedent for diagnosing invalid syntax (like mismatching curly braces). But maybe that's not the level at which this diagnostic would need to be implemented
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems legit, I'll let you and Michael figure out the failure mode for alternatives.
Yeah, the existence of scopes is probably a sufficient departure from the bash syntax that it doesn't make sense to mimic the rest. The alternatives of a scope should be strictly more powerful than on a single variable and |
@Michael137 I looked into adding the diagnostic you asked about and while the implementation is relatively straightforward, I'm even more convinced that I don't think it belongs there because it's a semantic rather than a syntax error. We can detect a scope followed by an alternative scope, i.e. something like |
Ok thanks for giving it a shot. I agree it doesn't quite fit the existing syntax diagnostics. We can always choose to revisit if this becomes a big point of confusion for people |
This PR implements support for specifying multiple alternatives for scope format entries. Scopes are used to enclose things that should only be printed when everything in the scope resolves.
For example, the following scope only resolves if both
${line.file.basename}
and${line.number}
resolve. `However, the current implementation doesn't let you specify what to print when they don't resolve. This PR adds support for specifying multiple alternative scopes, which are evaluated left-to-right.
For example:
This will resolve to:
at ${line.file.basename}:${line.number}
if the corresponding variables resolve.in ${function.name}
if${function.name}
resolves.<unknown location>
which always resolves.This PR makes the
|
character a special character within a scope, but allows it to be escaped.I ended up with this approach because it fit quite nicely in the existing architecture of the format entries and by limiting the functionality to scopes, it sidesteps some complexity, like dealing with recursion.