Skip to content

Commit b856cee

Browse files
committed
fix: speed up provisioning shutdown
1 parent a2acf44 commit b856cee

File tree

5 files changed

+67
-14
lines changed

5 files changed

+67
-14
lines changed

packages/orchestrator/internal/sandbox/block/cache.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,12 +112,16 @@ func (m *Cache) ExportToDiff(out io.Writer) (*header.DiffMetadata, error) {
112112
}
113113

114114
dirty.Set(uint(blockIdx))
115-
_, err = out.Write(block)
115+
n, err := out.Write(block)
116116
if err != nil {
117117
zap.L().Error("error writing to out", zap.Error(err))
118118

119119
return nil, err
120120
}
121+
122+
if int64(n) != m.blockSize {
123+
return nil, fmt.Errorf("short write: %d != %d", int64(n), m.blockSize)
124+
}
121125
}
122126

123127
return &header.DiffMetadata{

packages/orchestrator/internal/sandbox/sandbox.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,61 @@ func (s *Sandbox) FirecrackerVersions() fc.FirecrackerVersions {
655655
return s.process.Versions
656656
}
657657

658+
func (s *Sandbox) Shutdown(ctx context.Context) error {
659+
ctx, span := tracer.Start(ctx, "shutdown sandbox")
660+
defer span.End()
661+
662+
// Stop the health check before pausing the VM
663+
s.Checks.Stop()
664+
665+
if err := s.process.Pause(ctx); err != nil {
666+
return fmt.Errorf("failed to pause VM: %w", err)
667+
}
668+
669+
if err := s.memory.Disable(); err != nil {
670+
return fmt.Errorf("failed to disable uffd: %w", err)
671+
}
672+
673+
// This is required because the FC API doesn't support passing /dev/null
674+
tf, err := storage.TemplateFiles{
675+
BuildID: uuid.New().String(),
676+
KernelVersion: s.Template.Files().KernelVersion,
677+
FirecrackerVersion: s.Template.Files().FirecrackerVersion,
678+
}.CacheFiles(s.config)
679+
if err != nil {
680+
return fmt.Errorf("failed to create template files: %w", err)
681+
}
682+
defer tf.Close(s.config)
683+
684+
// The snapfile is required only because the FC API doesn't support passing /dev/null
685+
snapfile := template.NewLocalFileLink(tf.CacheSnapfilePath(s.config))
686+
defer snapfile.Close()
687+
688+
// The memfile is required only because the FC API doesn't support passing /dev/null
689+
memfile, err := storage.AcquireTmpMemfile(ctx, s.config, tf.BuildID)
690+
if err != nil {
691+
return fmt.Errorf("failed to acquire memfile snapshot: %w", err)
692+
}
693+
defer memfile.Close()
694+
695+
err = s.process.CreateSnapshot(
696+
ctx,
697+
snapfile.Path(),
698+
memfile.Path(),
699+
)
700+
if err != nil {
701+
return fmt.Errorf("error creating snapshot: %w", err)
702+
}
703+
704+
// This should properly flush rootfs to the underlying device.
705+
err = s.Stop(ctx)
706+
if err != nil {
707+
return fmt.Errorf("error stopping sandbox: %w", err)
708+
}
709+
710+
return nil
711+
}
712+
658713
func (s *Sandbox) Pause(
659714
ctx context.Context,
660715
m metadata.Template,

packages/orchestrator/internal/template/build/phases/base/provision.go

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import (
1414
tt "text/template"
1515
"time"
1616

17-
"github.com/google/uuid"
1817
"go.uber.org/zap"
1918
"go.uber.org/zap/zapio"
2019

@@ -27,8 +26,6 @@ import (
2726
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/core/rootfs"
2827
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/writer"
2928
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/constants"
30-
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/metadata"
31-
"github.com/e2b-dev/infra/packages/shared/pkg/storage"
3229
"github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
3330
"github.com/e2b-dev/infra/packages/shared/pkg/utils"
3431
)
@@ -150,17 +147,10 @@ func (bb *BaseBuilder) provisionSandbox(
150147

151148
userLogger.Info("Provisioning was successful, cleaning up")
152149

153-
snapshot, err := sbx.Pause(ctx, metadata.Template{
154-
Template: storage.TemplateFiles{
155-
BuildID: uuid.NewString(),
156-
KernelVersion: fcVersions.KernelVersion,
157-
FirecrackerVersion: fcVersions.FirecrackerVersion,
158-
},
159-
})
150+
err = sbx.Shutdown(ctx)
160151
if err != nil {
161-
return fmt.Errorf("error pausing provisioned sandbox: %w", err)
152+
return fmt.Errorf("error shutting down provisioned sandbox: %w", err)
162153
}
163-
defer snapshot.Close(context.WithoutCancel(ctx))
164154

165155
err = filesystem.RemoveFile(ctx, rootfsPath, provisionScriptResultPath)
166156
if err != nil {

packages/orchestrator/internal/template/build/phases/steps/builder.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ func (sb *StepBuilder) Layer(
112112
if !forceBuild {
113113
m, err := sb.index.LayerMetaFromHash(ctx, hash)
114114
if err != nil {
115-
sb.logger.Info("layer not found in cache, building new base layer", zap.Error(err), zap.String("hash", hash))
115+
sb.logger.Info("layer not found in cache, building new step layer", zap.Error(err), zap.String("hash", hash))
116116
} else {
117117
// Check if the layer is cached
118118
meta, err := sb.index.Cached(ctx, m.Template.BuildID)

packages/shared/pkg/storage/template_cache.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,7 @@ func (c TemplateCacheFiles) CacheMetadataPath(config BuilderConfig) string {
4747
func (c TemplateCacheFiles) cacheDir(config BuilderConfig) string {
4848
return filepath.Join(config.GetTemplateCacheDir(), c.BuildID, "cache", c.CacheIdentifier)
4949
}
50+
51+
func (c TemplateCacheFiles) Close(config BuilderConfig) error {
52+
return os.RemoveAll(c.cacheDir(config))
53+
}

0 commit comments

Comments
 (0)