Skip to content

Commit 464d9d9

Browse files
authored
[RemoveDIs][DebugInfo][IR] Add parsing for non-intrinsic debug values (#79818)
This patch adds support for parsing the proposed non-instruction debug info ("RemoveDIs") from textual IR, and adds a test for the parser as well as a set of verifier tests that are dependent on parsing to fire. An important detail of this patch is the fact that although we can now parse in the RemoveDIs (new) and Intrinsic (old) debug info formats, we will always convert back to the old format at the end of parsing - this is done for two reasons: firstly to ensure that every tool is able to process IR printed in the new format, regardless of whether that tool has had RemoveDIs support added, and secondly to maintain the effect of the existing flags: for the tools where support for the new format has been added, we will run LLVM passes in the new format iff `--try-experimental-debuginfo-iterators=true`, and we will print in the new format iff `--write-experimental-debuginfo-iterators=true`; the format of the textual IR input should have no effect on either of these features.
1 parent 59e405b commit 464d9d9

32 files changed

+1141
-4
lines changed

llvm/include/llvm/AsmParser/LLParser.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,9 @@ namespace llvm {
178178
/// UpgradeDebuginfo so it can generate broken bitcode.
179179
bool UpgradeDebugInfo;
180180

181+
bool SeenNewDbgInfoFormat = false;
182+
bool SeenOldDbgInfoFormat = false;
183+
181184
std::string SourceFileName;
182185

183186
public:
@@ -573,6 +576,7 @@ namespace llvm {
573576
bool parseMDNodeTail(MDNode *&N);
574577
bool parseMDNodeVector(SmallVectorImpl<Metadata *> &Elts);
575578
bool parseMetadataAttachment(unsigned &Kind, MDNode *&MD);
579+
bool parseDebugRecord(DbgRecord *&DR, PerFunctionState &PFS);
576580
bool parseInstructionMetadata(Instruction &Inst);
577581
bool parseGlobalObjectMetadataAttachment(GlobalObject &GO);
578582
bool parseOptionalFunctionMetadata(Function &F);

llvm/include/llvm/AsmParser/LLToken.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ enum Kind {
3636
exclaim, // !
3737
bar, // |
3838
colon, // :
39+
hash, // #
3940

4041
kw_vscale,
4142
kw_x,
@@ -479,6 +480,7 @@ enum Kind {
479480
DISPFlag, // DISPFlagFoo
480481
DwarfMacinfo, // DW_MACINFO_foo
481482
ChecksumKind, // CSK_foo
483+
DbgRecordType, // dbg_foo
482484

483485
// Type valued tokens (TyVal).
484486
Type,

llvm/include/llvm/IR/DebugProgramInstruction.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,19 @@ inline raw_ostream &operator<<(raw_ostream &OS, const DbgRecord &R) {
223223
class DPLabel : public DbgRecord {
224224
DbgRecordParamRef<DILabel> Label;
225225

226+
/// This constructor intentionally left private, so that it is only called via
227+
/// "createUnresolvedDPLabel", which clearly expresses that it is for parsing
228+
/// only.
229+
DPLabel(MDNode *Label, MDNode *DL);
230+
226231
public:
227232
DPLabel(DILabel *Label, DebugLoc DL);
228233

234+
/// For use during parsing; creates a DPLabel from as-of-yet unresolved
235+
/// MDNodes. Trying to access the resulting DPLabel's fields before they are
236+
/// resolved, or if they resolve to the wrong type, will result in a crash.
237+
static DPLabel *createUnresolvedDPLabel(MDNode *Label, MDNode *DL);
238+
229239
DPLabel *clone() const;
230240
void print(raw_ostream &O, bool IsForDebug = false) const;
231241
void print(raw_ostream &ROS, ModuleSlotTracker &MST, bool IsForDebug) const;
@@ -286,6 +296,29 @@ class DPValue : public DbgRecord, protected DebugValueUser {
286296
DIAssignID *AssignID, Metadata *Address,
287297
DIExpression *AddressExpression, const DILocation *DI);
288298

299+
private:
300+
/// Private constructor for creating new instances during parsing only. Only
301+
/// called through `createUnresolvedDPValue` below, which makes clear that
302+
/// this is used for parsing only, and will later return a subclass depending
303+
/// on which Type is passed.
304+
DPValue(LocationType Type, Metadata *Val, MDNode *Variable,
305+
MDNode *Expression, MDNode *AssignID, Metadata *Address,
306+
MDNode *AddressExpression, MDNode *DI);
307+
308+
public:
309+
/// Used to create DPValues during parsing, where some metadata references may
310+
/// still be unresolved. Although for some fields a generic `Metadata*`
311+
/// argument is accepted for forward type-references, the verifier and
312+
/// accessors will reject incorrect types later on. The function is used for
313+
/// all types of DPValues for simplicity while parsing, but asserts if any
314+
/// necessary fields are empty or unused fields are not empty, i.e. if the
315+
/// #dbg_assign fields are used for a non-dbg-assign type.
316+
static DPValue *createUnresolvedDPValue(LocationType Type, Metadata *Val,
317+
MDNode *Variable, MDNode *Expression,
318+
MDNode *AssignID, Metadata *Address,
319+
MDNode *AddressExpression,
320+
MDNode *DI);
321+
289322
static DPValue *createDPVAssign(Value *Val, DILocalVariable *Variable,
290323
DIExpression *Expression,
291324
DIAssignID *AssignID, Value *Address,

llvm/lib/AsmParser/LLLexer.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,9 +438,12 @@ lltok::Kind LLLexer::LexCaret() {
438438

439439
/// Lex all tokens that start with a # character.
440440
/// AttrGrpID ::= #[0-9]+
441+
/// Hash ::= #
441442
lltok::Kind LLLexer::LexHash() {
442443
// Handle AttrGrpID: #[0-9]+
443-
return LexUIntID(lltok::AttrGrpID);
444+
if (isdigit(static_cast<unsigned char>(CurPtr[0])))
445+
return LexUIntID(lltok::AttrGrpID);
446+
return lltok::hash;
444447
}
445448

446449
/// Lex a label, integer type, keyword, or hexadecimal integer constant.
@@ -923,6 +926,21 @@ lltok::Kind LLLexer::LexIdentifier() {
923926

924927
#undef DWKEYWORD
925928

929+
// Keywords for debug record types.
930+
#define DBGRECORDTYPEKEYWORD(STR) \
931+
do { \
932+
if (Keyword == "dbg_" #STR) { \
933+
StrVal = #STR; \
934+
return lltok::DbgRecordType; \
935+
} \
936+
} while (false)
937+
938+
DBGRECORDTYPEKEYWORD(value);
939+
DBGRECORDTYPEKEYWORD(declare);
940+
DBGRECORDTYPEKEYWORD(assign);
941+
DBGRECORDTYPEKEYWORD(label);
942+
#undef DBGRECORDTYPEKEYWORD
943+
926944
if (Keyword.starts_with("DIFlag")) {
927945
StrVal.assign(Keyword.begin(), Keyword.end());
928946
return lltok::DIFlag;

llvm/lib/AsmParser/LLParser.cpp

Lines changed: 166 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,24 @@ static cl::opt<bool> AllowIncompleteIR(
6262
"Allow incomplete IR on a best effort basis (references to unknown "
6363
"metadata will be dropped)"));
6464

65+
extern llvm::cl::opt<bool> UseNewDbgInfoFormat;
66+
6567
static std::string getTypeString(Type *T) {
6668
std::string Result;
6769
raw_string_ostream Tmp(Result);
6870
Tmp << *T;
6971
return Tmp.str();
7072
}
7173

74+
// Currently, we should always process modules in the old debug info format by
75+
// default regardless of the module's format in IR; convert it to the old format
76+
// here.
77+
bool finalizeDebugInfoFormat(Module *M) {
78+
if (M)
79+
M->setIsNewDbgInfoFormat(false);
80+
return false;
81+
}
82+
7283
/// Run: module ::= toplevelentity*
7384
bool LLParser::Run(bool UpgradeDebugInfo,
7485
DataLayoutCallbackTy DataLayoutCallback) {
@@ -86,7 +97,7 @@ bool LLParser::Run(bool UpgradeDebugInfo,
8697
}
8798

8899
return parseTopLevelEntities() || validateEndOfModule(UpgradeDebugInfo) ||
89-
validateEndOfIndex();
100+
validateEndOfIndex() || finalizeDebugInfoFormat(M);
90101
}
91102

92103
bool LLParser::parseStandaloneConstantValue(Constant *&C,
@@ -6041,6 +6052,17 @@ bool LLParser::parseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
60416052
return false;
60426053
}
60436054

6055+
bool isOldDbgFormatIntrinsic(StringRef Name) {
6056+
// Exit early for the common (non-debug-intrinsic) case.
6057+
// We can make this the only check when we begin supporting all "llvm.dbg"
6058+
// intrinsics in the new debug info format.
6059+
if (!Name.starts_with("llvm.dbg."))
6060+
return false;
6061+
Intrinsic::ID FnID = Function::lookupIntrinsicID(Name);
6062+
return FnID == Intrinsic::dbg_declare || FnID == Intrinsic::dbg_value ||
6063+
FnID == Intrinsic::dbg_assign;
6064+
}
6065+
60446066
/// FunctionHeader
60456067
/// ::= OptionalLinkage OptionalPreemptionSpecifier OptionalVisibility
60466068
/// OptionalCallingConv OptRetAttrs OptUnnamedAddr Type GlobalName
@@ -6390,9 +6412,31 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) {
63906412

63916413
std::string NameStr;
63926414

6393-
// parse the instructions in this block until we get a terminator.
6415+
// Parse the instructions and debug values in this block until we get a
6416+
// terminator.
63946417
Instruction *Inst;
6418+
auto DeleteDbgRecord = [](DbgRecord *DR) { DR->deleteRecord(); };
6419+
using DbgRecordPtr = std::unique_ptr<DbgRecord, decltype(DeleteDbgRecord)>;
6420+
SmallVector<DbgRecordPtr> TrailingDbgRecord;
63956421
do {
6422+
// Handle debug records first - there should always be an instruction
6423+
// following the debug records, i.e. they cannot appear after the block
6424+
// terminator.
6425+
while (Lex.getKind() == lltok::hash) {
6426+
if (SeenOldDbgInfoFormat)
6427+
return error(Lex.getLoc(), "debug record should not appear in a module "
6428+
"containing debug info intrinsics");
6429+
SeenNewDbgInfoFormat = true;
6430+
Lex.Lex();
6431+
if (!M->IsNewDbgInfoFormat)
6432+
M->convertToNewDbgValues();
6433+
6434+
DbgRecord *DR;
6435+
if (parseDebugRecord(DR, PFS))
6436+
return true;
6437+
TrailingDbgRecord.emplace_back(DR, DeleteDbgRecord);
6438+
}
6439+
63966440
// This instruction may have three possibilities for a name: a) none
63976441
// specified, b) name specified "%foo =", c) number specified: "%4 =".
63986442
LocTy NameLoc = Lex.getLoc();
@@ -6437,11 +6481,121 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) {
64376481
// Set the name on the instruction.
64386482
if (PFS.setInstName(NameID, NameStr, NameLoc, Inst))
64396483
return true;
6484+
6485+
// Attach any preceding debug values to this instruction.
6486+
for (DbgRecordPtr &DR : TrailingDbgRecord)
6487+
BB->insertDPValueBefore(DR.release(), Inst->getIterator());
6488+
TrailingDbgRecord.clear();
64406489
} while (!Inst->isTerminator());
64416490

6491+
assert(TrailingDbgRecord.empty() &&
6492+
"All debug values should have been attached to an instruction.");
6493+
64426494
return false;
64436495
}
64446496

6497+
/// parseDebugRecord
6498+
/// ::= #dbg_label '(' MDNode ')'
6499+
/// ::= #dbg_type '(' Metadata ',' MDNode ',' Metadata ','
6500+
/// (MDNode ',' Metadata ',' Metadata ',')? MDNode ')'
6501+
bool LLParser::parseDebugRecord(DbgRecord *&DR, PerFunctionState &PFS) {
6502+
using RecordKind = DbgRecord::Kind;
6503+
using LocType = DPValue::LocationType;
6504+
LocTy DPVLoc = Lex.getLoc();
6505+
if (Lex.getKind() != lltok::DbgRecordType)
6506+
return error(DPVLoc, "expected debug record type here");
6507+
RecordKind RecordType = StringSwitch<RecordKind>(Lex.getStrVal())
6508+
.Case("declare", RecordKind::ValueKind)
6509+
.Case("value", RecordKind::ValueKind)
6510+
.Case("assign", RecordKind::ValueKind)
6511+
.Case("label", RecordKind::LabelKind);
6512+
6513+
// Parsing labels is trivial; parse here and early exit, otherwise go into the
6514+
// full DPValue processing stage.
6515+
if (RecordType == RecordKind::LabelKind) {
6516+
Lex.Lex();
6517+
if (parseToken(lltok::lparen, "Expected '(' here"))
6518+
return true;
6519+
MDNode *Label;
6520+
if (parseMDNode(Label))
6521+
return true;
6522+
if (parseToken(lltok::comma, "Expected ',' here"))
6523+
return true;
6524+
MDNode *DbgLoc;
6525+
if (parseMDNode(DbgLoc))
6526+
return true;
6527+
if (parseToken(lltok::rparen, "Expected ')' here"))
6528+
return true;
6529+
DR = DPLabel::createUnresolvedDPLabel(Label, DbgLoc);
6530+
return false;
6531+
}
6532+
6533+
LocType ValueType = StringSwitch<LocType>(Lex.getStrVal())
6534+
.Case("declare", LocType::Declare)
6535+
.Case("value", LocType::Value)
6536+
.Case("assign", LocType::Assign);
6537+
6538+
Lex.Lex();
6539+
if (parseToken(lltok::lparen, "Expected '(' here"))
6540+
return true;
6541+
6542+
// Parse Value field.
6543+
Metadata *ValLocMD;
6544+
if (parseMetadata(ValLocMD, &PFS))
6545+
return true;
6546+
if (parseToken(lltok::comma, "Expected ',' here"))
6547+
return true;
6548+
6549+
// Parse Variable field.
6550+
MDNode *Variable;
6551+
if (parseMDNode(Variable))
6552+
return true;
6553+
if (parseToken(lltok::comma, "Expected ',' here"))
6554+
return true;
6555+
6556+
// Parse Expression field.
6557+
MDNode *Expression;
6558+
if (parseMDNode(Expression))
6559+
return true;
6560+
if (parseToken(lltok::comma, "Expected ',' here"))
6561+
return true;
6562+
6563+
// Parse additional fields for #dbg_assign.
6564+
MDNode *AssignID = nullptr;
6565+
Metadata *AddressLocation = nullptr;
6566+
MDNode *AddressExpression = nullptr;
6567+
if (ValueType == LocType::Assign) {
6568+
// Parse DIAssignID.
6569+
if (parseMDNode(AssignID))
6570+
return true;
6571+
if (parseToken(lltok::comma, "Expected ',' here"))
6572+
return true;
6573+
6574+
// Parse address ValueAsMetadata.
6575+
if (parseMetadata(AddressLocation, &PFS))
6576+
return true;
6577+
if (parseToken(lltok::comma, "Expected ',' here"))
6578+
return true;
6579+
6580+
// Parse address DIExpression.
6581+
if (parseMDNode(AddressExpression))
6582+
return true;
6583+
if (parseToken(lltok::comma, "Expected ',' here"))
6584+
return true;
6585+
}
6586+
6587+
/// Parse DILocation.
6588+
MDNode *DebugLoc;
6589+
if (parseMDNode(DebugLoc))
6590+
return true;
6591+
6592+
if (parseToken(lltok::rparen, "Expected ')' here"))
6593+
return true;
6594+
DR = DPValue::createUnresolvedDPValue(ValueType, ValLocMD, Variable,
6595+
Expression, AssignID, AddressLocation,
6596+
AddressExpression, DebugLoc);
6597+
return false;
6598+
}
64456599
//===----------------------------------------------------------------------===//
64466600
// Instruction Parsing.
64476601
//===----------------------------------------------------------------------===//
@@ -7669,6 +7823,16 @@ bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS,
76697823
}
76707824
CI->setFastMathFlags(FMF);
76717825
}
7826+
7827+
if (CalleeID.Kind == ValID::t_GlobalName &&
7828+
isOldDbgFormatIntrinsic(CalleeID.StrVal)) {
7829+
if (SeenNewDbgInfoFormat) {
7830+
CI->deleteValue();
7831+
return error(CallLoc, "llvm.dbg intrinsic should not appear in a module "
7832+
"using non-intrinsic debug info");
7833+
}
7834+
SeenOldDbgInfoFormat = true;
7835+
}
76727836
CI->setAttributes(PAL);
76737837
ForwardRefAttrGroups[CI] = FwdRefAttrGrps;
76747838
Inst = CI;

llvm/lib/IR/DebugProgramInstruction.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,38 @@ DbgRecord::createDebugIntrinsic(Module *M, Instruction *InsertBefore) const {
138138
llvm_unreachable("unsupported DbgRecord kind");
139139
}
140140

141+
DPLabel::DPLabel(MDNode *Label, MDNode *DL)
142+
: DbgRecord(LabelKind, DebugLoc(DL)), Label(Label) {
143+
assert(Label && "Unexpected nullptr");
144+
assert((isa<DILabel>(Label) || Label->isTemporary()) &&
145+
"Label type must be or resolve to a DILabel");
146+
}
141147
DPLabel::DPLabel(DILabel *Label, DebugLoc DL)
142148
: DbgRecord(LabelKind, DL), Label(Label) {
143149
assert(Label && "Unexpected nullptr");
144150
}
145151

152+
DPLabel *DPLabel::createUnresolvedDPLabel(MDNode *Label, MDNode *DL) {
153+
return new DPLabel(Label, DL);
154+
}
155+
156+
DPValue::DPValue(DPValue::LocationType Type, Metadata *Val, MDNode *Variable,
157+
MDNode *Expression, MDNode *AssignID, Metadata *Address,
158+
MDNode *AddressExpression, MDNode *DI)
159+
: DbgRecord(ValueKind, DebugLoc(DI)),
160+
DebugValueUser({Val, Address, AssignID}), Type(Type), Variable(Variable),
161+
Expression(Expression), AddressExpression(AddressExpression) {}
162+
163+
DPValue *DPValue::createUnresolvedDPValue(DPValue::LocationType Type,
164+
Metadata *Val, MDNode *Variable,
165+
MDNode *Expression, MDNode *AssignID,
166+
Metadata *Address,
167+
MDNode *AddressExpression,
168+
MDNode *DI) {
169+
return new DPValue(Type, Val, Variable, Expression, AssignID, Address,
170+
AddressExpression, DI);
171+
}
172+
146173
DPValue *DPValue::createDPValue(Value *Location, DILocalVariable *DV,
147174
DIExpression *Expr, const DILocation *DI) {
148175
return new DPValue(ValueAsMetadata::get(Location), DV, Expr, DI,

llvm/lib/IR/Verifier.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6291,7 +6291,7 @@ void Verifier::visit(DPValue &DPV) {
62916291
Var->getRawType());
62926292

62936293
auto *DLNode = DPV.getDebugLoc().getAsMDNode();
6294-
CheckDI(isa_and_nonnull<DILocation>(DLNode), "invalid #dbg record location",
6294+
CheckDI(isa_and_nonnull<DILocation>(DLNode), "invalid #dbg record DILocation",
62956295
&DPV, DLNode);
62966296
DILocation *Loc = DPV.getDebugLoc();
62976297

0 commit comments

Comments
 (0)