-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[AsmPrinter][DebugNames] Implement DW_IDX_parent entries #77457
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
38f64dd
e30a913
aa17884
2a5a510
b4ce5d5
990d15b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
#include "llvm/CodeGen/AccelTable.h" | ||
#include "DwarfCompileUnit.h" | ||
#include "DwarfUnit.h" | ||
#include "llvm/ADT/DenseSet.h" | ||
#include "llvm/ADT/STLExtras.h" | ||
#include "llvm/ADT/Twine.h" | ||
#include "llvm/BinaryFormat/Dwarf.h" | ||
|
@@ -207,7 +208,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter { | |
}; | ||
|
||
Header Header; | ||
DenseMap<uint32_t, SmallVector<DWARF5AccelTableData::AttributeEncoding, 2>> | ||
DenseMap<uint32_t, SmallVector<DWARF5AccelTableData::AttributeEncoding, 3>> | ||
Abbreviations; | ||
ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits; | ||
ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits; | ||
|
@@ -220,6 +221,8 @@ class Dwarf5AccelTableWriter : public AccelTableWriter { | |
MCSymbol *EntryPool = Asm->createTempSymbol("names_entries"); | ||
// Indicates if this module is built with Split Dwarf enabled. | ||
bool IsSplitDwarf = false; | ||
/// Stores the DIE offsets which are indexed by this table. | ||
DenseSet<OffsetAndUnitID> IndexedOffsets; | ||
|
||
void populateAbbrevsMap(); | ||
|
||
|
@@ -228,8 +231,11 @@ class Dwarf5AccelTableWriter : public AccelTableWriter { | |
void emitBuckets() const; | ||
void emitStringOffsets() const; | ||
void emitAbbrevs() const; | ||
void emitEntry(const DWARF5AccelTableData &Entry) const; | ||
void emitData() const; | ||
void emitEntry( | ||
const DWARF5AccelTableData &Entry, | ||
const DenseMap<OffsetAndUnitID, MCSymbol *> &DIEOffsetToAccelEntryLabel, | ||
DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) const; | ||
void emitData(); | ||
|
||
public: | ||
Dwarf5AccelTableWriter( | ||
|
@@ -395,36 +401,90 @@ void Dwarf5AccelTableWriter::Header::emit(Dwarf5AccelTableWriter &Ctx) { | |
Asm->OutStreamer->emitBytes({AugmentationString, AugmentationStringSize}); | ||
} | ||
|
||
static uint32_t constexpr LowerBitSize = dwarf::DW_IDX_type_hash; | ||
std::optional<uint64_t> | ||
DWARF5AccelTableData::getDefiningParentDieOffset(const DIE &Die) { | ||
if (auto *Parent = Die.getParent(); | ||
Parent && !Parent->findAttribute(dwarf::Attribute::DW_AT_declaration)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's going to be the distinction between a parent that's a declaration, and a parent that doesn't have a name? Should there be a distinction between these cases? |
||
return Parent->getOffset(); | ||
return {}; | ||
} | ||
|
||
enum IdxParentEncoding : uint8_t { | ||
NoIndexedParent = 0, /// Parent information present but parent isn't indexed. | ||
Ref4 = 1, /// Parent information present and parent is indexed. | ||
NoParent = 2, /// Parent information missing. | ||
}; | ||
|
||
static uint32_t constexpr NumBitsIdxParent = 2; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I /think/ we usually put |
||
|
||
uint8_t encodeIdxParent(const std::optional<dwarf::Form> MaybeParentForm) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LLVM doesn't usually use top-level const on values - I'd suggest removing it here (it can be confusing - makes people think maybe the |
||
if (!MaybeParentForm) | ||
return NoParent; | ||
switch (*MaybeParentForm) { | ||
case dwarf::Form::DW_FORM_flag_present: | ||
return NoIndexedParent; | ||
case dwarf::Form::DW_FORM_ref4: | ||
return Ref4; | ||
default: | ||
// This is not crashing on bad input: we should only reach this if the | ||
// internal compiler logic is faulty; see getFormForIdxParent. | ||
llvm_unreachable("Bad form for IDX_parent"); | ||
} | ||
} | ||
|
||
static uint32_t constexpr ParentBitOffset = dwarf::DW_IDX_type_hash; | ||
static uint32_t constexpr TagBitOffset = ParentBitOffset + NumBitsIdxParent; | ||
static uint32_t getTagFromAbbreviationTag(const uint32_t AbbrvTag) { | ||
return AbbrvTag >> LowerBitSize; | ||
return AbbrvTag >> TagBitOffset; | ||
} | ||
Comment on lines
+435
to
439
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Hmm weird. Just got notification for this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suspect David forgot to click the "submit review" button? But yeah, more importantly, I think we need to make these abbreviation codes count from 1...num_abbreviations so that the ULEB encoding takes a single byte. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll incorporate it into my bolt implementation from start. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yep, drowning in reviews/todos lately, so it was some late feedback I figured I'd send anyway,s ince I'd written it and some of it was still applicable.
Not sure I understand quite what you're saying here - but mostly it seemed like packing these things into the "AbbrvTag" is more a shortcut to being able to use a single int in the map/lookup/hash table, rather than the most readable piece of code & it'd be good to make the code more legible/self-descrpitive rather than having this bit fiddling. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Combining the two. Something like:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Something like that would suffice - though I wouldn't mind if the abbrev system for debug_info DIEs/abbrevs was generalized to be usable for debug_names entries too, I think. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We need two bits for the parent information, as it can have 3 states: absent, form_ref4, form_flag_present
I remember looking a bit into how that works, but it would require a bit more effort, since debug_info uses attributes whereas debug_names uses IDXs There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
ah thanks. It was a place holder as initial implementation in bolt won't support it. It will be follow up PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yep, I'd expect it to be generalized into a template parameterized on the type of the attributes ( |
||
|
||
/// Constructs a unique AbbrevTag that captures what a DIE accesses. | ||
/// Using this tag we can emit a unique abbreviation for each DIE. | ||
static uint32_t constructAbbreviationTag( | ||
const unsigned Tag, | ||
const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet) { | ||
const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet, | ||
std::optional<dwarf::Form> MaybeParentForm) { | ||
uint32_t AbbrvTag = 0; | ||
if (EntryRet) | ||
AbbrvTag |= 1 << EntryRet->Encoding.Index; | ||
AbbrvTag |= 1 << dwarf::DW_IDX_die_offset; | ||
AbbrvTag |= Tag << LowerBitSize; | ||
AbbrvTag |= 1 << dwarf::DW_IDX_parent; | ||
AbbrvTag |= encodeIdxParent(MaybeParentForm) << ParentBitOffset; | ||
AbbrvTag |= Tag << TagBitOffset; | ||
return AbbrvTag; | ||
} | ||
|
||
static std::optional<dwarf::Form> | ||
getFormForIdxParent(const DenseSet<OffsetAndUnitID> &IndexedOffsets, | ||
std::optional<OffsetAndUnitID> ParentOffset) { | ||
// No parent information | ||
if (!ParentOffset) | ||
return std::nullopt; | ||
// Parent is indexed by this table. | ||
if (IndexedOffsets.contains(*ParentOffset)) | ||
return dwarf::Form::DW_FORM_ref4; | ||
// Parent is not indexed by this table. | ||
return dwarf::Form::DW_FORM_flag_present; | ||
} | ||
|
||
void Dwarf5AccelTableWriter::populateAbbrevsMap() { | ||
for (auto &Bucket : Contents.getBuckets()) { | ||
for (auto *Hash : Bucket) { | ||
for (auto *Value : Hash->getValues<DWARF5AccelTableData *>()) { | ||
std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet = | ||
getIndexForEntry(*Value); | ||
unsigned Tag = Value->getDieTag(); | ||
uint32_t AbbrvTag = constructAbbreviationTag(Tag, EntryRet); | ||
std::optional<dwarf::Form> MaybeParentForm = getFormForIdxParent( | ||
IndexedOffsets, Value->getParentDieOffsetAndUnitID()); | ||
uint32_t AbbrvTag = | ||
constructAbbreviationTag(Tag, EntryRet, MaybeParentForm); | ||
if (Abbreviations.count(AbbrvTag) == 0) { | ||
SmallVector<DWARF5AccelTableData::AttributeEncoding, 2> UA; | ||
SmallVector<DWARF5AccelTableData::AttributeEncoding, 3> UA; | ||
if (EntryRet) | ||
UA.push_back(EntryRet->Encoding); | ||
UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4}); | ||
if (MaybeParentForm) | ||
UA.push_back({dwarf::DW_IDX_parent, *MaybeParentForm}); | ||
Abbreviations.try_emplace(AbbrvTag, UA); | ||
} | ||
} | ||
|
@@ -496,15 +556,34 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const { | |
} | ||
|
||
void Dwarf5AccelTableWriter::emitEntry( | ||
const DWARF5AccelTableData &Entry) const { | ||
const DWARF5AccelTableData &Entry, | ||
const DenseMap<OffsetAndUnitID, MCSymbol *> &DIEOffsetToAccelEntryLabel, | ||
DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) const { | ||
std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet = | ||
getIndexForEntry(Entry); | ||
uint32_t AbbrvTag = constructAbbreviationTag(Entry.getDieTag(), EntryRet); | ||
std::optional<OffsetAndUnitID> MaybeParentOffset = | ||
Entry.getParentDieOffsetAndUnitID(); | ||
std::optional<dwarf::Form> MaybeParentForm = | ||
getFormForIdxParent(IndexedOffsets, MaybeParentOffset); | ||
uint32_t AbbrvTag = | ||
constructAbbreviationTag(Entry.getDieTag(), EntryRet, MaybeParentForm); | ||
auto AbbrevIt = Abbreviations.find(AbbrvTag); | ||
assert(AbbrevIt != Abbreviations.end() && | ||
"Why wasn't this abbrev generated?"); | ||
assert(getTagFromAbbreviationTag(AbbrevIt->first) == Entry.getDieTag() && | ||
"Invalid Tag"); | ||
|
||
auto EntrySymbolIt = | ||
DIEOffsetToAccelEntryLabel.find(Entry.getDieOffsetAndUnitID()); | ||
assert(EntrySymbolIt != DIEOffsetToAccelEntryLabel.end()); | ||
MCSymbol *EntrySymbol = EntrySymbolIt->getSecond(); | ||
|
||
// Emit the label for this Entry, so that IDX_parents may refer to it. | ||
// Note: a DIE may have multiple accelerator Entries; this check avoids | ||
// creating/emitting multiple labels for the same DIE. | ||
if (EmittedAccelEntrySymbols.insert(EntrySymbol).second) | ||
Asm->OutStreamer->emitLabel(EntrySymbol); | ||
|
||
Asm->emitULEB128(AbbrevIt->first, "Abbreviation code"); | ||
|
||
for (const auto &AttrEnc : AbbrevIt->second) { | ||
|
@@ -520,20 +599,34 @@ void Dwarf5AccelTableWriter::emitEntry( | |
assert(AttrEnc.Form == dwarf::DW_FORM_ref4); | ||
Asm->emitInt32(Entry.getDieOffset()); | ||
break; | ||
case dwarf::DW_IDX_parent: { | ||
if (AttrEnc.Form == dwarf::Form::DW_FORM_flag_present) | ||
break; | ||
auto ParentSymbolIt = DIEOffsetToAccelEntryLabel.find(*MaybeParentOffset); | ||
assert(ParentSymbolIt != DIEOffsetToAccelEntryLabel.end()); | ||
Asm->emitLabelDifference(ParentSymbolIt->getSecond(), EntryPool, 4); | ||
break; | ||
} | ||
default: | ||
llvm_unreachable("Unexpected index attribute!"); | ||
} | ||
} | ||
} | ||
|
||
void Dwarf5AccelTableWriter::emitData() const { | ||
void Dwarf5AccelTableWriter::emitData() { | ||
DenseMap<OffsetAndUnitID, MCSymbol *> DIEOffsetToAccelEntryLabel; | ||
|
||
for (OffsetAndUnitID Offset : IndexedOffsets) | ||
DIEOffsetToAccelEntryLabel.insert({Offset, Asm->createTempSymbol("")}); | ||
|
||
Asm->OutStreamer->emitLabel(EntryPool); | ||
DenseSet<MCSymbol *> EmittedAccelEntrySymbols; | ||
for (auto &Bucket : Contents.getBuckets()) { | ||
for (auto *Hash : Bucket) { | ||
// Remember to emit the label for our offset. | ||
Asm->OutStreamer->emitLabel(Hash->Sym); | ||
for (const auto *Value : Hash->Values) | ||
emitEntry(*static_cast<const DWARF5AccelTableData *>(Value)); | ||
for (const auto *Value : Hash->getValues<DWARF5AccelTableData *>()) | ||
emitEntry(*Value, DIEOffsetToAccelEntryLabel, EmittedAccelEntrySymbols); | ||
Asm->OutStreamer->AddComment("End of list: " + Hash->Name.getString()); | ||
Asm->emitInt8(0); | ||
} | ||
|
@@ -555,6 +648,12 @@ Dwarf5AccelTableWriter::Dwarf5AccelTableWriter( | |
CompUnits(CompUnits), TypeUnits(TypeUnits), | ||
getIndexForEntry(std::move(getIndexForEntry)), | ||
IsSplitDwarf(IsSplitDwarf) { | ||
|
||
for (auto &Bucket : Contents.getBuckets()) | ||
for (auto *Hash : Bucket) | ||
for (auto *Value : Hash->getValues<DWARF5AccelTableData *>()) | ||
IndexedOffsets.insert(Value->getDieOffsetAndUnitID()); | ||
|
||
populateAbbrevsMap(); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1374,7 +1374,8 @@ void DWARFLinkerImpl::emitDWARFv5DebugNamesSection(const Triple &TargetTriple) { | |
case DwarfUnit::AccelType::Namespace: | ||
case DwarfUnit::AccelType::Type: { | ||
DebugNames->addName(*DebugStrStrings.getExistingEntry(Info.String), | ||
Info.OutOffset, Info.Tag, CU->getUniqueID()); | ||
Info.OutOffset, std::nullopt /*ParentDIEOffset*/, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this is what I was alluding to in the last paragraph of the commit message, it's not clear to me how/if the parallel linker has access to the parent DIE offset of the final, merged, debug information section. I was hoping to tackle this later |
||
Info.Tag, CU->getUniqueID()); | ||
} break; | ||
|
||
default: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we want a named structure ,perhaps we could avoid using a pair and accessors and have a plain struct with a public offset and unitID member?