Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 120 additions & 50 deletions packages/orchestrator/internal/sandbox/block/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,76 +358,146 @@ func NewCacheFromProcessMemory(
func (c *Cache) copyProcessMemory(
ctx context.Context,
pid int,
ranges []Range,
rs []Range,
) error {
splittedRanges, err := splitRanges(rs, MAX_RW_COUNT)
if err != nil {
return fmt.Errorf("failed to split ranges: %w", err)
}

var start int64

for i := 0; i < len(ranges); i += IOV_MAX {
segmentRanges := ranges[i:min(i+IOV_MAX, len(ranges))]
// We need to split the ranges because the Kernel does not suppor reading/writing more than MAX_RW_COUNT bytes in a single operation.
for _, ranges := range splittedRanges {
for i := 0; i < len(ranges); i += IOV_MAX {
segmentRanges := ranges[i:min(i+IOV_MAX, len(ranges))]

remote := make([]unix.RemoteIovec, len(segmentRanges))
remote := make([]unix.RemoteIovec, len(segmentRanges))

var segmentSize int64
var segmentSize int64

for j, r := range segmentRanges {
remote[j] = unix.RemoteIovec{
Base: uintptr(r.Start),
Len: int(r.Size),
for j, r := range segmentRanges {
remote[j] = unix.RemoteIovec{
Base: uintptr(r.Start),
Len: int(r.Size),
}

segmentSize += r.Size
}

segmentSize += r.Size
}
local := []unix.Iovec{
{
Base: c.address(start),
// We could keep this as full cache length, but we might as well be exact here.
Len: uint64(segmentSize),
},
}

local := []unix.Iovec{
{
Base: c.address(start),
// We could keep this as full cache length, but we might as well be exact here.
Len: uint64(segmentSize),
},
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
}

// We could retry only on the remaining segment size, but for simplicity we retry the whole segment.
n, err := unix.ProcessVMReadv(pid,
local,
remote,
0,
)
if errors.Is(err, unix.EAGAIN) {
continue
}
if errors.Is(err, unix.EINTR) {
continue
}
if errors.Is(err, unix.ENOMEM) {
time.Sleep(oomMinBackoff + time.Duration(rand.Intn(int(oomMaxJitter.Milliseconds())))*time.Millisecond)

continue
}

if err != nil {
return fmt.Errorf("failed to read memory: %w", err)
}

if int64(n) != segmentSize {
return fmt.Errorf("failed to read memory: expected %d bytes, got %d", segmentSize, n)
}

for _, blockOff := range header.BlocksOffsets(segmentSize, c.blockSize) {
c.dirty.Store(start+blockOff, struct{}{})
}

start += segmentSize

break
}
}
}

for {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
return nil
}

// We could retry only on the remaining segment size, but for simplicity we retry the whole segment.
n, err := unix.ProcessVMReadv(pid,
local,
remote,
0,
)
if errors.Is(err, unix.EAGAIN) {
continue
}
if errors.Is(err, unix.EINTR) {
continue
}
if errors.Is(err, unix.ENOMEM) {
time.Sleep(oomMinBackoff + time.Duration(rand.Intn(int(oomMaxJitter.Milliseconds())))*time.Millisecond)
func splitRanges(ranges []Range, maxSplitSize int64) (groups [][]Range, err error) {
var group []Range
var groupSize int64

continue
}
for _, r := range ranges {
if r.Size > maxSplitSize {
// If single range is bigger than maxSplitSize, split it into chunks.
start := r.Start
remaining := r.Size

if err != nil {
return fmt.Errorf("failed to read memory: %w", err)
}
for remaining > 0 {
chunkSize := min(remaining, maxSplitSize)

if int64(n) != segmentSize {
return fmt.Errorf("failed to read memory: expected %d bytes, got %d", segmentSize, n)
}
chunk := Range{Start: start, Size: chunkSize}

if groupSize+chunk.Size > maxSplitSize {
if len(group) > 0 {
groups = append(groups, group)
group = nil
groupSize = 0
}
}

for _, blockOff := range header.BlocksOffsets(segmentSize, c.blockSize) {
c.dirty.Store(start+blockOff, struct{}{})
group = append(group, chunk)
groupSize += chunk.Size

if groupSize == maxSplitSize {
groups = append(groups, group)
group = nil
groupSize = 0
}

start += chunkSize
remaining -= chunkSize
}

start += segmentSize
continue
}

break
if groupSize+r.Size > maxSplitSize && len(group) > 0 {
groups = append(groups, group)
group = nil
groupSize = 0
}

group = append(group, r)
groupSize += r.Size

if groupSize == maxSplitSize {
groups = append(groups, group)
group = nil
groupSize = 0
}
}

return nil
if len(group) > 0 {
groups = append(groups, group)
}

return groups, nil
}
39 changes: 39 additions & 0 deletions packages/orchestrator/internal/sandbox/block/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,3 +224,42 @@ func compareData(readBytes []byte, expectedBytes []byte) error {

return nil
}

func TestCopyFromProcess_Exceed_MAX_RW_COUNT(t *testing.T) {
t.Parallel()

pageSize := int64(header.HugepageSize)
size := ((MAX_RW_COUNT + 4*pageSize + pageSize - 1) / pageSize) * pageSize

mem, addr, err := testutils.NewPageMmap(t, uint64(size), uint64(pageSize))
require.NoError(t, err)

n, err := rand.Read(mem)
require.NoError(t, err)
require.Equal(t, len(mem), n)

ranges := []Range{
{Start: int64(addr), Size: ((MAX_RW_COUNT + 2*pageSize + pageSize - 1) / pageSize) * pageSize},
{Start: int64(addr) + ((MAX_RW_COUNT+2*pageSize+pageSize-1)/pageSize)*pageSize, Size: ((2*pageSize + pageSize - 1) / pageSize) * pageSize},
}

cache, err := NewCacheFromProcessMemory(
t.Context(),
// Regular 4KiB pages.
header.PageSize,
t.TempDir()+"/cache",
os.Getpid(),
ranges,
)
require.NoError(t, err)

t.Cleanup(func() {
cache.Close()
})

data := make([]byte, size)
n, err = cache.ReadAt(data, 0)
require.NoError(t, err)
require.Equal(t, int(size), n)
require.NoError(t, compareData(data[:n], mem[0:size]))
}
18 changes: 16 additions & 2 deletions packages/orchestrator/internal/sandbox/block/iov.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,28 @@ package block

import (
"fmt"
"math"
"os"

"github.com/tklauser/go-sysconf"

"github.com/e2b-dev/infra/packages/shared/pkg/utils"
)

// IOV_MAX is the limit of the vectors that can be passed in a single ioctl call.
var IOV_MAX = utils.Must(getIOVMax())
var (
// IOV_MAX is the limit of the vectors that can be passed in a single ioctl call.
IOV_MAX = utils.Must(getIOVMax())

PAGE_SIZE = os.Getpagesize()
PAGE_MASK = ^(PAGE_SIZE - 1)
INT_MAX = math.MaxInt32

// This is maximum bytes that can be read/written in a single operation.
//
// https://unix.stackexchange.com/questions/794316/why-linux-read-avoids-using-full-2-gib-in-one-call
// https://stackoverflow.com/questions/70368651/why-cant-linux-write-more-than-2147479552-bytes
MAX_RW_COUNT = int64(INT_MAX & PAGE_MASK)
)

func getIOVMax() (int, error) {
iovMax, err := sysconf.Sysconf(sysconf.SC_IOV_MAX)
Expand Down
Loading