Skip to content

Commit a929c5c

Browse files
committed
interp: use object layout information for LLVM types
This commit will use the memory layout information for heap allocations added in the previous commit to determine LLVM types, instead of guessing their types based on the content. This fixes a bug in which recursive data structures (such as doubly linked lists) would result in a compiler stack overflow due to infinite recursion. Not all heap allocations have a memory layout yet, but this can be incrementally fixed in the future. So far, this commit should fix (almost?) all cases of this stack overflow issue.
1 parent d00036c commit a929c5c

File tree

6 files changed

+270
-12
lines changed

6 files changed

+270
-12
lines changed

interp/interp_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ func TestInterp(t *testing.T) {
1818
"consteval",
1919
"interface",
2020
"revert",
21+
"alloc",
2122
} {
2223
name := name // make tc local to this closure
2324
t.Run(name, func(t *testing.T) {

interp/interpreter.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,11 +234,15 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
234234
// Get the requested memory size to be allocated.
235235
size := operands[1].Uint()
236236

237+
// Get the object layout, if it is available.
238+
llvmLayoutType := r.getLLVMTypeFromLayout(operands[2])
239+
237240
// Create the object.
238241
alloc := object{
239-
globalName: r.pkgName + "$alloc",
240-
buffer: newRawValue(uint32(size)),
241-
size: uint32(size),
242+
globalName: r.pkgName + "$alloc",
243+
llvmLayoutType: llvmLayoutType,
244+
buffer: newRawValue(uint32(size)),
245+
size: uint32(size),
242246
}
243247
index := len(r.objects)
244248
r.objects = append(r.objects, alloc)

interp/memory.go

Lines changed: 139 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"encoding/binary"
1919
"errors"
2020
"math"
21+
"math/big"
2122
"strconv"
2223
"strings"
2324

@@ -27,17 +28,20 @@ import (
2728
// An object is a memory buffer that may be an already existing global or a
2829
// global created with runtime.alloc or the alloca instruction. If llvmGlobal is
2930
// set, that's the global for this object, otherwise it needs to be created (if
30-
// it is still reachable when the package initializer returns).
31+
// it is still reachable when the package initializer returns). The
32+
// llvmLayoutType is not necessarily a complete type: it may need to be
33+
// repeated (for example, for a slice value).
3134
//
3235
// Objects are copied in a memory view when they are stored to, to provide the
3336
// ability to roll back interpreting a function.
3437
type object struct {
35-
llvmGlobal llvm.Value
36-
llvmType llvm.Type // must match llvmGlobal.Type() if both are set, may be unset if llvmGlobal is set
37-
globalName string // name, if not yet created (not guaranteed to be the final name)
38-
buffer value // buffer with value as given by interp, nil if external
39-
size uint32 // must match buffer.len(), if available
40-
marked uint8 // 0 means unmarked, 1 means external read, 2 means external write
38+
llvmGlobal llvm.Value
39+
llvmType llvm.Type // must match llvmGlobal.Type() if both are set, may be unset if llvmGlobal is set
40+
llvmLayoutType llvm.Type // LLVM type based on runtime.alloc layout parameter, if available
41+
globalName string // name, if not yet created (not guaranteed to be the final name)
42+
buffer value // buffer with value as given by interp, nil if external
43+
size uint32 // must match buffer.len(), if available
44+
marked uint8 // 0 means unmarked, 1 means external read, 2 means external write
4145
}
4246

4347
// clone() returns a cloned version of this object, for when an object needs to
@@ -529,7 +533,7 @@ func (v pointerValue) toLLVMValue(llvmType llvm.Type, mem *memoryView) (llvm.Val
529533
// runtime.alloc.
530534
// First allocate a new global for this object.
531535
obj := mem.get(v.index())
532-
if obj.llvmType.IsNil() {
536+
if obj.llvmType.IsNil() && obj.llvmLayoutType.IsNil() {
533537
// Create an initializer without knowing the global type.
534538
// This is probably the result of a runtime.alloc call.
535539
initializer, err := obj.buffer.asRawValue(mem.r).rawLLVMValue(mem)
@@ -543,7 +547,23 @@ func (v pointerValue) toLLVMValue(llvmType llvm.Type, mem *memoryView) (llvm.Val
543547
obj.llvmGlobal = llvmValue
544548
mem.put(v.index(), obj)
545549
} else {
546-
globalType := obj.llvmType.ElementType()
550+
// The global type is known, or at least its structure.
551+
var globalType llvm.Type
552+
if !obj.llvmType.IsNil() {
553+
// The exact type is known.
554+
globalType = obj.llvmType.ElementType()
555+
} else { // !obj.llvmLayoutType.IsNil()
556+
// The exact type isn't known, but the object layout is known.
557+
globalType = obj.llvmLayoutType
558+
// The layout may not span the full size of the global because
559+
// of repetition. One example would be make([]string, 5) which
560+
// would be 10 words in size but the layout would only be two
561+
// words (for the string type).
562+
typeSize := mem.r.targetData.TypeAllocSize(globalType)
563+
if typeSize != uint64(obj.size) {
564+
globalType = llvm.ArrayType(globalType, int(uint64(obj.size)/typeSize))
565+
}
566+
}
547567
if checks && mem.r.targetData.TypeAllocSize(globalType) != uint64(obj.size) {
548568
panic("size of the globalType isn't the same as the object size")
549569
}
@@ -562,6 +582,11 @@ func (v pointerValue) toLLVMValue(llvmType llvm.Type, mem *memoryView) (llvm.Val
562582
return llvm.Value{}, errors.New("interp: allocated value does not match allocated type")
563583
}
564584
llvmValue.SetInitializer(initializer)
585+
if obj.llvmType.IsNil() {
586+
// The exact type isn't known (only the layout), so use the
587+
// alignment that would normally be expected from runtime.alloc.
588+
llvmValue.SetAlignment(mem.r.maxAlign)
589+
}
565590
}
566591

567592
// It should be included in r.globals because otherwise markExternal
@@ -1209,3 +1234,108 @@ func (r *runner) getValue(llvmValue llvm.Value) value {
12091234
panic("unknown value")
12101235
}
12111236
}
1237+
1238+
// readObjectLayout reads the object layout as it is stored by the compiler. It
1239+
// returns the size in the number of words and the bitmap.
1240+
func (r *runner) readObjectLayout(layoutValue value) (uint64, *big.Int) {
1241+
pointerSize := layoutValue.len(r)
1242+
if checks && uint64(pointerSize) != r.targetData.TypeAllocSize(r.i8ptrType) {
1243+
panic("inconsistent pointer size")
1244+
}
1245+
1246+
// The object layout can be stored in a global variable, directly as an
1247+
// integer value, or can be nil.
1248+
ptr, err := layoutValue.asPointer(r)
1249+
if err == errIntegerAsPointer {
1250+
// It's an integer, which means it's a small object or unknown.
1251+
layout := layoutValue.Uint()
1252+
if layout == 0 {
1253+
// Nil pointer, which means the layout is unknown.
1254+
return 0, nil
1255+
}
1256+
if layout%2 != 1 {
1257+
// Sanity check: the least significant bit must be set. This is how
1258+
// the runtime can separate pointers from integers.
1259+
panic("unexpected layout")
1260+
}
1261+
1262+
// Determine format of bitfields in the integer.
1263+
pointerBits := uint64(pointerSize * 8)
1264+
var sizeFieldBits uint64
1265+
switch pointerBits {
1266+
case 16:
1267+
sizeFieldBits = 4
1268+
case 32:
1269+
sizeFieldBits = 5
1270+
case 64:
1271+
sizeFieldBits = 6
1272+
default:
1273+
panic("unknown pointer size")
1274+
}
1275+
1276+
// Extract fields.
1277+
objectSizeWords := (layout >> 1) & (1<<sizeFieldBits - 1)
1278+
bitmap := new(big.Int).SetUint64(layout >> (1 + sizeFieldBits))
1279+
return objectSizeWords, bitmap
1280+
}
1281+
1282+
// Read the object size in words and the bitmap from the global.
1283+
buf := r.objects[ptr.index()].buffer.(rawValue)
1284+
objectSizeWords := rawValue{buf: buf.buf[:r.pointerSize]}.Uint()
1285+
rawByteValues := buf.buf[r.pointerSize:]
1286+
rawBytes := make([]byte, len(rawByteValues))
1287+
for i, v := range rawByteValues {
1288+
if uint64(byte(v)) != v {
1289+
panic("found pointer in data array?") // sanity check
1290+
}
1291+
rawBytes[i] = byte(v)
1292+
}
1293+
bitmap := new(big.Int).SetBytes(rawBytes)
1294+
return objectSizeWords, bitmap
1295+
}
1296+
1297+
// getLLVMTypeFromLayout returns the 'layout type', which is an approximation of
1298+
// the real type. Pointers are in the correct location but the actual object may
1299+
// have some additional repetition, for example in the buffer of a slice.
1300+
func (r *runner) getLLVMTypeFromLayout(layoutValue value) llvm.Type {
1301+
objectSizeWords, bitmap := r.readObjectLayout(layoutValue)
1302+
if bitmap == nil {
1303+
// No information available.
1304+
return llvm.Type{}
1305+
}
1306+
1307+
if bitmap.BitLen() == 0 {
1308+
// There are no pointers in this object, so treat this as a raw byte
1309+
// buffer. This is important because objects without pointers may have
1310+
// lower alignment.
1311+
return r.mod.Context().Int8Type()
1312+
}
1313+
1314+
// Create the LLVM type.
1315+
pointerSize := layoutValue.len(r)
1316+
pointerAlignment := r.targetData.PrefTypeAlignment(r.i8ptrType)
1317+
var fields []llvm.Type
1318+
for i := 0; i < int(objectSizeWords); {
1319+
if bitmap.Bit(i) != 0 {
1320+
// Pointer field.
1321+
fields = append(fields, r.i8ptrType)
1322+
i += int(pointerSize / uint32(pointerAlignment))
1323+
} else {
1324+
// Byte/word field.
1325+
fields = append(fields, r.mod.Context().IntType(pointerAlignment*8))
1326+
i += 1
1327+
}
1328+
}
1329+
var llvmLayoutType llvm.Type
1330+
if len(fields) == 1 {
1331+
llvmLayoutType = fields[0]
1332+
} else {
1333+
llvmLayoutType = r.mod.Context().StructType(fields, false)
1334+
}
1335+
1336+
objectSizeBytes := objectSizeWords * uint64(pointerAlignment)
1337+
if checks && r.targetData.TypeAllocSize(llvmLayoutType) != objectSizeBytes {
1338+
panic("unexpected size") // sanity check
1339+
}
1340+
return llvmLayoutType
1341+
}

interp/testdata/alloc.ll

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
2+
target triple = "wasm32--wasi"
3+
4+
@"runtime/gc.layout:62-2000000000000001" = linkonce_odr unnamed_addr constant { i32, [8 x i8] } { i32 62, [8 x i8] c" \00\00\00\00\00\00\01" }
5+
@pointerFree12 = global i8* null
6+
@pointerFree7 = global i8* null
7+
@pointerFree3 = global i8* null
8+
@pointerFree0 = global i8* null
9+
@layout1 = global i8* null
10+
@layout2 = global i8* null
11+
@layout3 = global i8* null
12+
@layout4 = global i8* null
13+
@bigobj1 = global i8* null
14+
15+
declare i8* @runtime.alloc(i32, i8*) unnamed_addr
16+
17+
define void @runtime.initAll() unnamed_addr {
18+
call void @main.init()
19+
ret void
20+
}
21+
22+
define internal void @main.init() unnamed_addr {
23+
; Object that's word-aligned.
24+
%pointerFree12 = call i8* @runtime.alloc(i32 12, i8* inttoptr (i32 3 to i8*))
25+
store i8* %pointerFree12, i8** @pointerFree12
26+
; Object larger than a word but not word-aligned.
27+
%pointerFree7 = call i8* @runtime.alloc(i32 7, i8* inttoptr (i32 3 to i8*))
28+
store i8* %pointerFree7, i8** @pointerFree7
29+
; Object smaller than a word (and of course not word-aligned).
30+
%pointerFree3 = call i8* @runtime.alloc(i32 3, i8* inttoptr (i32 3 to i8*))
31+
store i8* %pointerFree3, i8** @pointerFree3
32+
; Zero-sized object.
33+
%pointerFree0 = call i8* @runtime.alloc(i32 0, i8* inttoptr (i32 3 to i8*))
34+
store i8* %pointerFree0, i8** @pointerFree0
35+
36+
; Object made out of 3 pointers.
37+
%layout1 = call i8* @runtime.alloc(i32 12, i8* inttoptr (i32 67 to i8*))
38+
store i8* %layout1, i8** @layout1
39+
; Array (or slice) of 5 slices.
40+
%layout2 = call i8* @runtime.alloc(i32 60, i8* inttoptr (i32 71 to i8*))
41+
store i8* %layout2, i8** @layout2
42+
; Oddly shaped object, using all bits in the layout integer.
43+
%layout3 = call i8* @runtime.alloc(i32 104, i8* inttoptr (i32 2467830261 to i8*))
44+
store i8* %layout3, i8** @layout3
45+
; ...repeated.
46+
%layout4 = call i8* @runtime.alloc(i32 312, i8* inttoptr (i32 2467830261 to i8*))
47+
store i8* %layout4, i8** @layout4
48+
49+
; Large object that needs to be stored in a separate global.
50+
%bigobj1 = call i8* @runtime.alloc(i32 248, i8* bitcast ({ i32, [8 x i8] }* @"runtime/gc.layout:62-2000000000000001" to i8*))
51+
store i8* %bigobj1, i8** @bigobj1
52+
ret void
53+
}

interp/testdata/alloc.out.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
2+
target triple = "wasm32--wasi"
3+
4+
@pointerFree12 = local_unnamed_addr global i8* getelementptr inbounds ([12 x i8], [12 x i8]* @"main$alloc", i32 0, i32 0)
5+
@pointerFree7 = local_unnamed_addr global i8* getelementptr inbounds ([7 x i8], [7 x i8]* @"main$alloc.1", i32 0, i32 0)
6+
@pointerFree3 = local_unnamed_addr global i8* getelementptr inbounds ([3 x i8], [3 x i8]* @"main$alloc.2", i32 0, i32 0)
7+
@pointerFree0 = local_unnamed_addr global i8* getelementptr inbounds ([0 x i8], [0 x i8]* @"main$alloc.3", i32 0, i32 0)
8+
@layout1 = local_unnamed_addr global i8* bitcast ([3 x i8*]* @"main$alloc.4" to i8*)
9+
@layout2 = local_unnamed_addr global i8* bitcast ([5 x { i8*, i32, i32 }]* @"main$alloc.5" to i8*)
10+
@layout3 = local_unnamed_addr global i8* bitcast ({ i8*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i8*, i8*, i32, i32, i8*, i32, i32, i8* }* @"main$alloc.6" to i8*)
11+
@layout4 = local_unnamed_addr global i8* bitcast ([3 x { i8*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i8*, i8*, i32, i32, i8*, i32, i32, i8* }]* @"main$alloc.7" to i8*)
12+
@bigobj1 = local_unnamed_addr global i8* bitcast ({ i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }* @"main$alloc.8" to i8*)
13+
@"main$alloc" = internal global [12 x i8] zeroinitializer, align 4
14+
@"main$alloc.1" = internal global [7 x i8] zeroinitializer, align 4
15+
@"main$alloc.2" = internal global [3 x i8] zeroinitializer, align 4
16+
@"main$alloc.3" = internal global [0 x i8] zeroinitializer, align 4
17+
@"main$alloc.4" = internal global [3 x i8*] zeroinitializer, align 4
18+
@"main$alloc.5" = internal global [5 x { i8*, i32, i32 }] zeroinitializer, align 4
19+
@"main$alloc.6" = internal global { i8*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i8*, i8*, i32, i32, i8*, i32, i32, i8* } zeroinitializer, align 4
20+
@"main$alloc.7" = internal global [3 x { i8*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i8*, i8*, i32, i32, i8*, i32, i32, i8* }] zeroinitializer, align 4
21+
@"main$alloc.8" = internal global { i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* } zeroinitializer, align 4
22+
23+
define void @runtime.initAll() unnamed_addr {
24+
ret void
25+
}

testdata/init.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,53 @@ var (
4444
uint8SliceDst []uint8
4545
intSliceSrc = []int16{5, 123, 1024}
4646
intSliceDst []int16
47+
48+
someList *linkedList
49+
someBigList *bigLinkedList
4750
)
4851

52+
type linkedList struct {
53+
prev *linkedList
54+
next *linkedList
55+
v int // arbitrary value (don't care)
56+
}
57+
58+
func init() {
59+
someList = &linkedList{
60+
v: -1,
61+
}
62+
for i := 0; i < 3; i++ {
63+
prev := someList
64+
someList = &linkedList{
65+
v: i,
66+
prev: prev,
67+
}
68+
prev.next = someList
69+
}
70+
}
71+
72+
type bigLinkedList struct {
73+
prev *bigLinkedList
74+
next *bigLinkedList
75+
v int
76+
buf [100]*int
77+
}
78+
79+
func init() {
80+
// Create a circular reference.
81+
someBigList = &bigLinkedList{
82+
v: -1,
83+
}
84+
for i := 0; i < 3; i++ {
85+
prev := someBigList
86+
someBigList = &bigLinkedList{
87+
v: i,
88+
prev: prev,
89+
}
90+
prev.next = someBigList
91+
}
92+
}
93+
4994
func init() {
5095
uint8SliceDst = make([]uint8, len(uint8SliceSrc))
5196
copy(uint8SliceDst, uint8SliceSrc)

0 commit comments

Comments
 (0)