Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 51 additions & 11 deletions packages/orchestrator/internal/sandbox/block/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,29 +358,47 @@ func NewCacheFromProcessMemory(
func (c *Cache) copyProcessMemory(
ctx context.Context,
pid int,
ranges []Range,
rs []Range,
) error {
var start int64
// We need to split the ranges because the Kernel does not support reading/writing more than MAX_RW_COUNT bytes in a single operation.
ranges := splitOversizedRanges(rs, MAX_RW_COUNT)

for i := 0; i < len(ranges); i += IOV_MAX {
segmentRanges := ranges[i:min(i+IOV_MAX, len(ranges))]
var offset int64
var rangeIdx int64

remote := make([]unix.RemoteIovec, len(segmentRanges))
for {
var remote []unix.RemoteIovec

var segmentSize int64

for j, r := range segmentRanges {
remote[j] = unix.RemoteIovec{
// We iterate over the range of all ranges until we have reached the limit of the IOV_MAX,
// or until the next range would overflow the MAX_RW_COUNT.
for ; rangeIdx < int64(len(ranges)); rangeIdx++ {
r := ranges[rangeIdx]

if len(remote) == IOV_MAX {
break
}

if segmentSize+r.Size > MAX_RW_COUNT {
break
}

remote = append(remote, unix.RemoteIovec{
Base: uintptr(r.Start),
Len: int(r.Size),
}
})

segmentSize += r.Size
}

if segmentSize == 0 {
break
}

local := []unix.Iovec{
{
Base: c.address(start),
Base: c.address(offset),
// We could keep this as full cache length, but we might as well be exact here.
Len: uint64(segmentSize),
},
Expand Down Expand Up @@ -420,14 +438,36 @@ func (c *Cache) copyProcessMemory(
}

for _, blockOff := range header.BlocksOffsets(segmentSize, c.blockSize) {
c.dirty.Store(start+blockOff, struct{}{})
c.dirty.Store(offset+blockOff, struct{}{})
}

start += segmentSize
offset += segmentSize

break
}
}

return nil
}

// Split ranges so there are no ranges larger than maxSize.
// This is not an optimal split—ideally we would split the ranges so that we can fill each call to unix.ProcessVMReadv to the max size.
// This is though a very simple split that will work and the syscalls overhead here is not very high as opposed to the other things.
func splitOversizedRanges(ranges []Range, maxSize int64) (result []Range) {
for _, r := range ranges {
if r.Size <= maxSize {
result = append(result, r)

continue
}

for offset := int64(0); offset < r.Size; offset += maxSize {
result = append(result, Range{
Start: r.Start + offset,
Size: min(r.Size-offset, maxSize),
})
}
}

return result
}
171 changes: 171 additions & 0 deletions packages/orchestrator/internal/sandbox/block/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,177 @@ func compareData(readBytes []byte, expectedBytes []byte) error {
return nil
}

func TestSplitOversizedRanges(t *testing.T) {
t.Parallel()

tests := []struct {
name string
ranges []Range
maxSize int64
expected []Range
}{
{
name: "empty input",
ranges: nil,
maxSize: 100,
expected: nil,
},
{
name: "all ranges within limit",
ranges: []Range{
{Start: 0, Size: 50},
{Start: 100, Size: 50},
},
maxSize: 100,
expected: []Range{
{Start: 0, Size: 50},
{Start: 100, Size: 50},
},
},
{
name: "range exactly at limit",
ranges: []Range{
{Start: 0, Size: 100},
},
maxSize: 100,
expected: []Range{
{Start: 0, Size: 100},
},
},
{
name: "single oversized range splits evenly",
ranges: []Range{
{Start: 0, Size: 300},
},
maxSize: 100,
expected: []Range{
{Start: 0, Size: 100},
{Start: 100, Size: 100},
{Start: 200, Size: 100},
},
},
{
name: "single oversized range with remainder",
ranges: []Range{
{Start: 0, Size: 250},
},
maxSize: 100,
expected: []Range{
{Start: 0, Size: 100},
{Start: 100, Size: 100},
{Start: 200, Size: 50},
},
},
{
name: "mixed ranges - some need splitting",
ranges: []Range{
{Start: 0, Size: 50},
{Start: 100, Size: 250},
{Start: 400, Size: 80},
},
maxSize: 100,
expected: []Range{
{Start: 0, Size: 50},
{Start: 100, Size: 100},
{Start: 200, Size: 100},
{Start: 300, Size: 50},
{Start: 400, Size: 80},
},
},
{
name: "range just over limit",
ranges: []Range{
{Start: 0, Size: 101},
},
maxSize: 100,
expected: []Range{
{Start: 0, Size: 100},
{Start: 100, Size: 1},
},
},
{
name: "preserves start addresses correctly",
ranges: []Range{
{Start: 1000, Size: 250},
},
maxSize: 100,
expected: []Range{
{Start: 1000, Size: 100},
{Start: 1100, Size: 100},
{Start: 1200, Size: 50},
},
},
{
name: "demonstrate unoptimal split",
ranges: []Range{
{Start: 1000, Size: 250},
{Start: 1250, Size: 250},
},
maxSize: 240,
expected: []Range{
{Start: 1000, Size: 240},
{Start: 1240, Size: 10},
{Start: 1250, Size: 240},
{Start: 1490, Size: 10},
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

result := splitOversizedRanges(tt.ranges, tt.maxSize)
require.Equal(t, tt.expected, result)
})
}
}

// This test is used to verify that the code correctly splits the ranges when the total size exceeds MAX_RW_COUNT.
func TestCopyFromProcess_Exceed_MAX_RW_COUNT(t *testing.T) {
fmt.Println("MAX_RW_COUNT", MAX_RW_COUNT)

t.Parallel()

pageSize := int64(header.PageSize)
// We allocate more than MAX_RW_COUNT to trigger the MAX_RW_COUNT error if the ranges are not split correctly.
size := ((MAX_RW_COUNT + 4*pageSize + pageSize - 1) / pageSize) * pageSize

// Initialize the memory we will copy from.
mem, addr, err := testutils.NewPageMmap(t, uint64(size), uint64(pageSize))
require.NoError(t, err)

n, err := rand.Read(mem)
require.NoError(t, err)
require.Equal(t, len(mem), n)

ranges := []Range{
// We make it so that at least one of the ranges is larger than MAX_RW_COUNT.
{Start: int64(addr), Size: ((MAX_RW_COUNT + 2*pageSize + pageSize - 1) / pageSize) * pageSize},
{Start: int64(addr) + ((MAX_RW_COUNT+2*pageSize+pageSize-1)/pageSize)*pageSize, Size: ((2*pageSize + pageSize - 1) / pageSize) * pageSize},
}

cache, err := NewCacheFromProcessMemory(
t.Context(),
// Regular 4KiB pages for the cache/mmap we will copy to.
header.PageSize,
t.TempDir()+"/cache",
os.Getpid(),
ranges,
)
require.NoError(t, err)

t.Cleanup(func() {
cache.Close()
})

data := make([]byte, size)
n, err = cache.ReadAt(data, 0)
require.NoError(t, err)
require.Equal(t, int(size), n)
require.NoError(t, compareData(data[:n], mem[0:size]))
}

func BenchmarkCopyFromHugepagesFile(b *testing.B) {
pageSize := int64(header.HugepageSize)
size := pageSize * 500
Expand Down
18 changes: 16 additions & 2 deletions packages/orchestrator/internal/sandbox/block/iov.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,28 @@ package block

import (
"fmt"
"math"
"os"

"github.com/tklauser/go-sysconf"

"github.com/e2b-dev/infra/packages/shared/pkg/utils"
)

// IOV_MAX is the limit of the vectors that can be passed in a single ioctl call.
var IOV_MAX = utils.Must(getIOVMax())
var (
// IOV_MAX is the limit of the vectors that can be passed in a single ioctl call.
IOV_MAX = utils.Must(getIOVMax())

PAGE_SIZE = os.Getpagesize()
PAGE_MASK = ^(int64(PAGE_SIZE) - 1)
INT_MAX = int64(math.MaxInt32)

// This is maximum bytes that can be read/written in a single operation.
//
// https://unix.stackexchange.com/questions/794316/why-linux-read-avoids-using-full-2-gib-in-one-call
// https://stackoverflow.com/questions/70368651/why-cant-linux-write-more-than-2147479552-bytes
MAX_RW_COUNT = INT_MAX & PAGE_MASK
)

func getIOVMax() (int, error) {
iovMax, err := sysconf.Sysconf(sysconf.SC_IOV_MAX)
Expand Down
Loading