Skip to content

Commit 9d66239

Browse files
author
Peter Goodman
authored
Clone metadata when moving stuff (#530)
1 parent 7d6daf3 commit 9d66239

File tree

2 files changed

+120
-19
lines changed

2 files changed

+120
-19
lines changed

include/remill/BC/Util.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class FunctionType;
5050
class GlobalObject;
5151
class GlobalVariable;
5252
class IntegerType;
53+
class Metadata;
5354
class Module;
5455
class PointerType;
5556
class Type;
@@ -222,6 +223,7 @@ llvm::IntegerType *AddressType(llvm::Module *module)
222223
__attribute__((deprecated));
223224

224225
using ValueMap = std::unordered_map<llvm::Value *, llvm::Value *>;
226+
using MDMap = std::unordered_map<llvm::Metadata *, llvm::Metadata *>;
225227

226228
// Clone function `source_func` into `dest_func`, using `value_map` to map over
227229
// values. This will strip out debug info during the clone. This will strip out
@@ -230,7 +232,7 @@ using ValueMap = std::unordered_map<llvm::Value *, llvm::Value *>;
230232
// Note: this will try to clone globals referenced from the module of
231233
// `source_func` into the module of `dest_func`.
232234
void CloneFunctionInto(llvm::Function *source_func, llvm::Function *dest_func,
233-
ValueMap &value_map);
235+
ValueMap &value_map, MDMap &md_map);
234236

235237
// Clone function `source_func` into `dest_func`. This will strip out debug
236238
// info during the clone.

lib/BC/Util.cpp

Lines changed: 117 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,7 @@ llvm::IntegerType *AddressType(llvm::Module *module) {
676676
void CloneFunctionInto(llvm::Function *source_func, llvm::Function *dest_func) {
677677
auto new_args = dest_func->arg_begin();
678678
ValueMap value_map;
679+
MDMap md_map;
679680
for (llvm::Argument &old_arg : source_func->args()) {
680681
new_args->setName(old_arg.getName());
681682
value_map[&old_arg] = &*new_args;
@@ -686,7 +687,7 @@ void CloneFunctionInto(llvm::Function *source_func, llvm::Function *dest_func) {
686687
dest_func->getContext()),
687688
dest_func->getFunctionType());
688689

689-
CloneFunctionInto(source_func, dest_func, value_map);
690+
CloneFunctionInto(source_func, dest_func, value_map, md_map);
690691
}
691692

692693
// Make `func` a clone of the `__remill_basic_block` function.
@@ -1389,6 +1390,65 @@ static void MoveInstructionIntoModule(llvm::Instruction *inst,
13891390
}
13901391
}
13911392

1393+
llvm::Metadata *CloneMetadataInto(
1394+
llvm::Module *source_mod, llvm::Module *dest_mod,
1395+
llvm::Metadata *md, ValueMap &value_map, MDMap &md_map) {
1396+
1397+
llvm::Metadata *mapped_md = nullptr;
1398+
auto [it, added] = md_map.emplace(md, mapped_md);
1399+
if (!added) {
1400+
return it->second;
1401+
}
1402+
1403+
llvm::LLVMContext &source_context = source_mod->getContext();
1404+
llvm::LLVMContext &dest_context = dest_mod->getContext();
1405+
1406+
if (llvm::ValueAsMetadata *val_md = llvm::dyn_cast<llvm::ValueAsMetadata>(md)) {
1407+
llvm::Value *val = val_md->getValue();
1408+
if (auto it = value_map.find(val); it != value_map.end()) {
1409+
llvm::Value *mapped_val = it->second;
1410+
mapped_md = llvm::ValueAsMetadata::get(mapped_val);
1411+
1412+
} else if (auto cv = llvm::dyn_cast<llvm::Constant>(val)) {
1413+
llvm::Value *mapped_cv = MoveConstantIntoModule(cv, dest_mod, value_map);
1414+
if (!mapped_cv) {
1415+
return nullptr; // Couldn't move it.
1416+
}
1417+
mapped_md = llvm::ValueAsMetadata::get(mapped_cv);
1418+
1419+
} else {
1420+
return nullptr;
1421+
}
1422+
1423+
} else if (llvm::MDString *str = llvm::dyn_cast<llvm::MDString>(md)) {
1424+
if (&source_context == &dest_context) {
1425+
mapped_md = str;
1426+
} else {
1427+
mapped_md = llvm::MDString::get(dest_context, str->getString());
1428+
}
1429+
1430+
} else if (llvm::MDTuple *tuple = llvm::dyn_cast<llvm::MDTuple>(md)) {
1431+
std::vector<llvm::Metadata *> mapped_ops;
1432+
for (llvm::Metadata *op : tuple->operands()) {
1433+
auto mapped_op = CloneMetadataInto(source_mod, dest_mod, op, value_map,
1434+
md_map);
1435+
if (!mapped_op) {
1436+
return nullptr; // Possibly cyclic or just not clonable.
1437+
} else {
1438+
mapped_ops.push_back(mapped_op);
1439+
}
1440+
}
1441+
mapped_md = llvm::MDTuple::get(dest_context, mapped_ops);
1442+
1443+
// Not supported.
1444+
} else {
1445+
return nullptr;
1446+
}
1447+
1448+
it->second = mapped_md;
1449+
return mapped_md;
1450+
}
1451+
13921452
} // namespace
13931453

13941454
// Clone function `source_func` into `dest_func`, using `value_map` to map over
@@ -1398,7 +1458,7 @@ static void MoveInstructionIntoModule(llvm::Instruction *inst,
13981458
// Note: this will try to clone globals referenced from the module of
13991459
// `source_func` into the module of `dest_func`.
14001460
void CloneFunctionInto(llvm::Function *source_func, llvm::Function *dest_func,
1401-
ValueMap &value_map) {
1461+
ValueMap &value_map, MDMap &md_map) {
14021462

14031463
auto func_name = source_func->getName().str();
14041464
auto source_mod = source_func->getParent();
@@ -1447,26 +1507,69 @@ void CloneFunctionInto(llvm::Function *source_func, llvm::Function *dest_func,
14471507
// Fixup the references in the cloned instructions so that they point into
14481508
// the cloned function, or point to declared globals in the module containing
14491509
// `dest_func`.
1450-
for (auto &old_block : *source_func) {
1451-
for (auto &old_inst : old_block) {
1510+
for (llvm::BasicBlock &old_block : *source_func) {
1511+
for (llvm::Instruction &old_inst : old_block) {
14521512
if (llvm::isa<llvm::DbgInfoIntrinsic>(old_inst)) {
14531513
continue;
14541514
}
14551515

14561516
auto new_inst = llvm::dyn_cast<llvm::Instruction>(value_map[&old_inst]);
1517+
new_inst->setDebugLoc(llvm::DebugLoc());
1518+
new_inst->setName(old_inst.getName());
14571519

1458-
// Clear out all metadata from the new instruction.
1520+
MoveInstructionIntoModule(new_inst, dest_mod, value_map);
1521+
}
1522+
}
1523+
1524+
// NOTE(pag): All fixed MD kinds are part of the custom map, and are
1525+
// initialized into the context upon construction. There are about
1526+
// 40 of them.
1527+
llvm::SmallVector<llvm::StringRef, 64> source_md_names;
1528+
1529+
source_context.getMDKindNames(source_md_names);
1530+
1531+
std::unordered_map<unsigned, unsigned> md_id_map;
1532+
md_id_map.reserve(source_md_names.size());
1533+
for (auto i = 0u; i < source_md_names.size(); ++i) {
1534+
if (&source_context != &dest_context) {
1535+
md_id_map[i] = dest_context.getMDKindID(source_md_names[i]);
1536+
} else {
1537+
md_id_map[i] = i;
1538+
}
1539+
}
1540+
1541+
// Now port the metadata.
1542+
for (llvm::BasicBlock &old_block : *source_func) {
1543+
for (llvm::Instruction &old_inst : old_block) {
1544+
if (llvm::isa<llvm::DbgInfoIntrinsic>(old_inst)) {
1545+
continue;
1546+
}
1547+
1548+
llvm::Instruction *new_inst =
1549+
llvm::dyn_cast<llvm::Instruction>(value_map[&old_inst]);
1550+
if (!new_inst) {
1551+
continue;
1552+
}
1553+
1554+
mds.clear();
14591555
old_inst.getAllMetadata(mds);
1556+
1557+
// First, clear out the old metadata; the metadata IDs might not all
1558+
// align.
14601559
for (auto md_info : mds) {
1461-
if (md_info.first != reg_md_id || &source_context != &dest_context) {
1462-
new_inst->setMetadata(md_info.first, nullptr);
1463-
}
1560+
new_inst->setMetadata(md_info.first, nullptr);
14641561
}
14651562

1466-
new_inst->setDebugLoc(llvm::DebugLoc());
1467-
new_inst->setName(old_inst.getName());
1468-
1469-
MoveInstructionIntoModule(new_inst, dest_mod, value_map);
1563+
// Next, try to convert the metadata over, mapping metadata IDs along
1564+
// the way.
1565+
for (auto md_info : mds) {
1566+
llvm::MDNode *new_md = llvm::dyn_cast_or_null<llvm::MDNode>(
1567+
CloneMetadataInto(source_mod, dest_mod, md_info.second,
1568+
value_map, md_map));
1569+
if (new_md) {
1570+
new_inst->setMetadata(md_id_map[md_info.first], new_md);
1571+
}
1572+
}
14701573
}
14711574
}
14721575
}
@@ -1586,9 +1689,7 @@ void MoveFunctionIntoModule(llvm::Function *func, llvm::Module *dest_module) {
15861689
existing_decl_in_dest_module = nullptr;
15871690
}
15881691

1589-
if (!in_same_context) {
1590-
IF_LLVM_GTE_370(ClearMetaData(func);)
1591-
}
1692+
IF_LLVM_GTE_370(ClearMetaData(func);)
15921693

15931694
// Fill up the locals so that they map to themselves.
15941695
for (auto &arg : func->args()) {
@@ -1597,9 +1698,7 @@ void MoveFunctionIntoModule(llvm::Function *func, llvm::Module *dest_module) {
15971698
for (auto &block : *func) {
15981699
value_map.emplace(&block, &block);
15991700
for (auto &inst : block) {
1600-
if (!in_same_context) {
1601-
ClearMetaData(&inst);
1602-
}
1701+
ClearMetaData(&inst);
16031702
value_map.emplace(&inst, &inst);
16041703
}
16051704
}

0 commit comments

Comments
 (0)