Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions packages/orchestrator/internal/template/build/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox"
sbxtemplate "github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/template"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/buildcontext"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/builderrors"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/commands"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/config"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/core/envd"
Expand Down Expand Up @@ -130,14 +131,25 @@
b.metrics.RecordBuildDuration(ctx, duration, success)

if success {
b.metrics.RecordBuildResult(ctx, cfg.TeamID, true)
b.metrics.RecordBuildResult(ctx, cfg.TeamID, metrics.BuildResultSuccess)
b.metrics.RecordRootfsSize(ctx, r.RootfsSizeMB<<constants.ToMBShift)
} else if !errors.Is(e, context.Canceled) {
// Skip reporting failure metrics only on explicit cancellation
b.metrics.RecordBuildResult(ctx, cfg.TeamID, false)
} else {
// Determine if the error is a user error or internal error
var resultType metrics.BuildResultType
if builderrors.IsUserError(e) {
resultType = metrics.BuildResultUserError
} else {
resultType = metrics.BuildResultInternalError
}
b.metrics.RecordBuildResult(ctx, cfg.TeamID, resultType)
}
}()

// Wrap context.Canceled as a user error if no user error already exists
defer func() {
e = builderrors.WrapCanceledAsUserError(e)
}()

cacheScope := cfg.CacheScope

// Validate template, update force layers if needed
Expand All @@ -151,7 +163,7 @@
case errors.Is(ctx.Err(), context.Canceled):
l.Error(ctx, fmt.Sprintf("Build failed: %s", buildcache.CanceledBuildReason))
case e != nil:
l.Error(ctx, fmt.Sprintf("Build failed: %v", e))
l.Error(ctx, fmt.Sprintf("Build failed: %v", builderrors.UnwrapUserError(e).Message))

Check failure on line 166 in packages/orchestrator/internal/template/build/builder.go

View workflow job for this annotation

GitHub Actions / lint / golangci-lint (/home/runner/work/infra/infra/packages/orchestrator)

avoid direct access to proto field builderrors.UnwrapUserError(e).Message, use builderrors.UnwrapUserError(e).GetMessage() instead (protogetter)
default:
l.Info(ctx, fmt.Sprintf("Build finished, took %s",
time.Since(startTime).Truncate(time.Second).String()))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,54 @@
package builderrors

import (
"context"
"errors"

"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/phases"
template_manager "github.com/e2b-dev/infra/packages/shared/pkg/grpc/template-manager"
)

const (
InternalErrorMessage = "An internal error occurred. Please try again or contact support with the build ID."
)

// User errors
var (
CanceledError = errors.New("build was cancelled")

Check failure on line 17 in packages/orchestrator/internal/template/build/builderrors/errors.go

View workflow job for this annotation

GitHub Actions / lint / golangci-lint (/home/runner/work/infra/infra/packages/orchestrator)

error-naming: error var CanceledError should have name of the form ErrFoo (revive)
TimeoutError = errors.New("build timed out")

Check failure on line 18 in packages/orchestrator/internal/template/build/builderrors/errors.go

View workflow job for this annotation

GitHub Actions / lint / golangci-lint (/home/runner/work/infra/infra/packages/orchestrator)

error-naming: error var TimeoutError should have name of the form ErrFoo (revive)
)

// IsUserError returns true if the error is a user error (i.e., a PhaseBuildError).
// User errors are caused by the user's configuration and should be shown to them.
func IsUserError(err error) bool {
return phases.UnwrapPhaseBuildError(err) != nil
}

// WrapCanceledAsUserError wraps context.Canceled as a user error if no user error already exists.
// This ensures that user-initiated cancellations are tracked as user errors in metrics.
func WrapCanceledAsUserError(err error) error {
if err == nil {
return nil
}

// If it's already a user error, return as-is
if IsUserError(err) {
return err
}

// If it's a canceled context, wrap it as a user error
if errors.Is(err, context.Canceled) {
return phases.NewPhaseBuildError(phases.PhaseMeta{}, CanceledError)
}

// If it's a timeout context, wrap it as a user error
if errors.Is(err, context.DeadlineExceeded) {
return phases.NewPhaseBuildError(phases.PhaseMeta{}, TimeoutError)
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Internal errors masked when context is also canceled

Medium Severity

When an internal error occurs and the context is also canceled, the defer in builder.go joins them with errors.Join(e, ctx.Err()). Then WrapContextAsUserError checks errors.Is(err, context.Canceled) which finds the context error in the joined error and returns a new PhaseBuildError wrapping ErrCanceled, discarding the original internal error. This causes internal errors to be miscategorized as BuildResultUserError in metrics instead of BuildResultInternalError, undermining the PR's goal of improving error tracking and regression monitoring.

Additional Locations (1)

Fix in Cursor Fix in Web


return err
}

func UnwrapUserError(err error) *template_manager.TemplateBuildStatusReason {
phaseBuildError := phases.UnwrapPhaseBuildError(err)
if phaseBuildError != nil {
Expand All @@ -15,6 +59,6 @@
}

return &template_manager.TemplateBuildStatusReason{
Message: err.Error(),
Message: InternalErrorMessage,
}
}
34 changes: 31 additions & 3 deletions packages/orchestrator/internal/template/build/core/oci/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package oci

import (
"context"
"errors"
"fmt"
"os"
"os/exec"
Expand All @@ -13,6 +14,7 @@ import (
"github.com/google/go-containerregistry/pkg/name"
containerregistry "github.com/google/go-containerregistry/pkg/v1"
"github.com/google/go-containerregistry/pkg/v1/remote"
"github.com/google/go-containerregistry/pkg/v1/remote/transport"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.uber.org/zap"
Expand All @@ -39,13 +41,39 @@ var DefaultPlatform = containerregistry.Platform{
Architecture: "amd64",
}

// wrapImagePullError converts technical Docker registry errors into user-friendly messages.
func wrapImagePullError(err error, imageRef string) error {
if err == nil {
return nil
}

// Check for transport errors with specific error codes from the registry API
var transportErr *transport.Error
if errors.As(err, &transportErr) {
for _, e := range transportErr.Errors {
switch e.Code {
case transport.ManifestUnknownErrorCode:
return fmt.Errorf("image '%s' not found: the image or tag does not exist in the registry", imageRef)
case transport.NameUnknownErrorCode:
return fmt.Errorf("repository '%s' not found: verify the image name is correct", imageRef)
case transport.UnauthorizedErrorCode:
return fmt.Errorf("access denied to '%s': authentication required or insufficient permissions", imageRef)
case transport.DeniedErrorCode:
return fmt.Errorf("access denied to '%s': you don't have permission to pull this image", imageRef)
}
}
}

return fmt.Errorf("failed to pull image '%s': %w", imageRef, err)
}

func GetPublicImage(ctx context.Context, dockerhubRepository dockerhub.RemoteRepository, tag string, authProvider auth.RegistryAuthProvider) (containerregistry.Image, error) {
ctx, span := tracer.Start(ctx, "pull-public-docker-image")
defer span.End()

ref, err := name.ParseReference(tag)
if err != nil {
return nil, fmt.Errorf("invalid image reference: %w", err)
return nil, fmt.Errorf("invalid image reference '%s': %w", tag, err)
}

platform := DefaultPlatform
Expand All @@ -55,7 +83,7 @@ func GetPublicImage(ctx context.Context, dockerhubRepository dockerhub.RemoteRep
if authProvider == nil && ref.Context().RegistryStr() == name.DefaultRegistry {
img, err := dockerhubRepository.GetImage(ctx, tag, platform)
if err != nil {
return nil, fmt.Errorf("error getting image: %w", err)
return nil, wrapImagePullError(err, tag)
}

telemetry.ReportEvent(ctx, "pulled public image through docker remote repository proxy")
Expand Down Expand Up @@ -84,7 +112,7 @@ func GetPublicImage(ctx context.Context, dockerhubRepository dockerhub.RemoteRep

img, err := remote.Image(ref, opts...)
if err != nil {
return nil, fmt.Errorf("error pulling image: %w", err)
return nil, wrapImagePullError(err, tag)
}

telemetry.ReportEvent(ctx, "pulled public image")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/core/filesystem"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/core/oci"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/core/systeminit"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/phases"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/constants"
artifactsregistry "github.com/e2b-dev/infra/packages/shared/pkg/artifacts-registry"
"github.com/e2b-dev/infra/packages/shared/pkg/dockerhub"
Expand Down Expand Up @@ -78,6 +79,7 @@ func New(
func (r *Rootfs) CreateExt4Filesystem(
ctx context.Context,
l logger.Logger,
phaseMetadata phases.PhaseMeta,
rootfsPath string,
provisionScript string,
provisionLogPrefix string,
Expand All @@ -104,7 +106,7 @@ func (r *Rootfs) CreateExt4Filesystem(
img, err = oci.GetImage(childCtx, r.artifactRegistry, template.TemplateID, r.buildContext.Template.BuildID)
}
if err != nil {
return containerregistry.Config{}, fmt.Errorf("error requesting docker image: %w", err)
return containerregistry.Config{}, phases.NewPhaseBuildError(phaseMetadata, err)
}

imageSize, err := oci.GetImageSize(img)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ const (
PhaseFinalize Phase = "finalize"
)

// BuildResultType represents the type of build result
type BuildResultType string

const (
BuildResultSuccess BuildResultType = "success"
BuildResultUserError BuildResultType = "user_error"
BuildResultInternalError BuildResultType = "internal_error"
)

// BuildMetrics contains all metrics related to template building
type BuildMetrics struct {
// Duration histograms
Expand Down Expand Up @@ -97,11 +106,11 @@ func (m *BuildMetrics) RecordPhaseDuration(ctx context.Context, duration time.Du
m.BuildPhaseDurationHistogram.Record(ctx, duration.Milliseconds(), metric.WithAttributes(attrs...))
}

// RecordBuildResult records the result of a build (success or failure)
func (m *BuildMetrics) RecordBuildResult(ctx context.Context, teamID string, success bool) {
// RecordBuildResult records the result of a build (success, user_error, or internal_error)
func (m *BuildMetrics) RecordBuildResult(ctx context.Context, teamID string, resultType BuildResultType) {
attrs := []attribute.KeyValue{
telemetry.WithTeamID(teamID),
attribute.Bool("success", success),
attribute.String("result", string(resultType)),
}
m.BuildResultCounter.Add(ctx, 1, metric.WithAttributes(attrs...))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ func (bb *BaseBuilder) Build(
currentLayer.Hash,
)
if err != nil {
return phases.LayerResult{}, phases.NewPhaseBuildError(bb, err)
return phases.LayerResult{}, err
}

return phases.LayerResult{
Expand Down Expand Up @@ -179,7 +179,7 @@ func (bb *BaseBuilder) buildLayerFromOCI(
// Created here to be able to pass it to CreateSandbox for populating COW cache
rootfsPath := filepath.Join(templateBuildDir, rootfsBuildFileName)

rootfs, memfile, envsImg, err := constructLayerFilesFromOCI(ctx, userLogger, bb.BuildContext, baseMetadata.Template.BuildID, bb.artifactRegistry, bb.dockerhubRepository, rootfsPath)
rootfs, memfile, envsImg, err := constructLayerFilesFromOCI(ctx, userLogger, bb.BuildContext, bb.Metadata(), baseMetadata.Template.BuildID, bb.artifactRegistry, bb.dockerhubRepository, rootfsPath)
if err != nil {
return metadata.Template{}, fmt.Errorf("error building environment: %w", err)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/buildcontext"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/config"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/core/rootfs"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/phases"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/constants"
artifactsregistry "github.com/e2b-dev/infra/packages/shared/pkg/artifacts-registry"
"github.com/e2b-dev/infra/packages/shared/pkg/dockerhub"
Expand All @@ -22,6 +23,7 @@ func constructLayerFilesFromOCI(
ctx context.Context,
userLogger logger.Logger,
buildContext buildcontext.BuildContext,
phaseMetadata phases.PhaseMeta,
// The base build ID can be different from the final requested template build ID.
baseBuildID string,
artifactRegistry artifactsregistry.ArtifactsRegistry,
Expand All @@ -44,7 +46,7 @@ func constructLayerFilesFromOCI(
if err != nil {
return nil, nil, containerregistry.Config{}, fmt.Errorf("error getting provision script: %w", err)
}
imgConfig, err := rtfs.CreateExt4Filesystem(ctx, userLogger, rootfsPath, provisionScript, provisionLogPrefix, provisionScriptResultPath)
imgConfig, err := rtfs.CreateExt4Filesystem(ctx, userLogger, phaseMetadata, rootfsPath, provisionScript, provisionLogPrefix, provisionScriptResultPath)
if err != nil {
return nil, nil, containerregistry.Config{}, fmt.Errorf("error creating ext4 filesystem: %w", err)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/core/filesystem"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/core/rootfs"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/layer"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/phases"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/writer"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/constants"
"github.com/e2b-dev/infra/packages/shared/pkg/logger"
Expand Down Expand Up @@ -164,7 +165,7 @@ func (bb *BaseBuilder) provisionSandbox(
}()

if err := done.WaitWithContext(ctx); err != nil {
return fmt.Errorf("error waiting for provisioning sandbox: %w", err)
return phases.NewPhaseBuildError(bb.Metadata(), fmt.Errorf("error waiting for provisioning sandbox: %w", err))
}

userLogger.Info(ctx, "Provisioning was successful, cleaning up")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,10 @@ func (e *PhaseBuildError) Unwrap() error {
return e.Err
}

func NewPhaseBuildError(phase BuilderPhase, err error) *PhaseBuildError {
m := phase.Metadata()

func NewPhaseBuildError(phaseMetadata PhaseMeta, err error) *PhaseBuildError {
return &PhaseBuildError{
Phase: string(m.Phase),
Step: phaseToStepString(phase),
Phase: string(phaseMetadata.Phase),
Step: stepString(phaseMetadata),
Err: err,
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ func (ppb *PostProcessingBuilder) postProcessingFn(userLogger logger.Logger) lay
sbx.Runtime.SandboxID,
)
if err != nil {
return metadata.Template{}, phases.NewPhaseBuildError(ppb, fmt.Errorf("configuration script failed: %w", err))
return metadata.Template{}, phases.NewPhaseBuildError(ppb.Metadata(), fmt.Errorf("configuration script failed: %w", err))
}

if meta.Start == nil {
Expand Down Expand Up @@ -302,21 +302,21 @@ func (ppb *PostProcessingBuilder) postProcessingFn(userLogger logger.Logger) lay
meta.Start.Context,
)
if err != nil {
return metadata.Template{}, phases.NewPhaseBuildError(ppb, fmt.Errorf("ready command failed: %w", err))
return metadata.Template{}, phases.NewPhaseBuildError(ppb.Metadata(), fmt.Errorf("ready command failed: %w", err))
}

// Wait for the start command to start executing.
select {
case <-ctx.Done():
return metadata.Template{}, phases.NewPhaseBuildError(ppb, fmt.Errorf("waiting for start command failed: %w", commandsCtx.Err()))
return metadata.Template{}, phases.NewPhaseBuildError(ppb.Metadata(), fmt.Errorf("waiting for start command failed: %w", commandsCtx.Err()))
case <-startCmdConfirm:
}
// Cancel the start command context (it's running in the background anyway).
// If it has already finished, check the error.
commandsCancel()
err = startCmdRun.Wait()
if err != nil {
return metadata.Template{}, phases.NewPhaseBuildError(ppb, fmt.Errorf("start command failed: %w", err))
return metadata.Template{}, phases.NewPhaseBuildError(ppb.Metadata(), fmt.Errorf("start command failed: %w", err))
}

return meta, nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func Run(
zap.String("phase", string(meta.Phase)),
zap.String("step_type", meta.StepType),
zap.Intp("step_number", meta.StepNumber),
zap.String("step", phaseToStepString(builder)),
zap.String("step", stepString(meta)),
}

logger.Debug(ctx, "running builder phase", loggerFields...)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ func (sb *StepBuilder) Build(
meta.Context,
)
if err != nil {
return metadata.Template{}, phases.NewPhaseBuildError(sb, err)
return metadata.Template{}, phases.NewPhaseBuildError(sb.Metadata(), err)
}

err = sandboxtools.SyncChangesToDisk(
Expand Down
10 changes: 4 additions & 6 deletions packages/orchestrator/internal/template/build/phases/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@ package phases

import "strconv"

func phaseToStepString(phase BuilderPhase) string {
m := phase.Metadata()

step := m.StepType
if m.StepNumber != nil {
step = strconv.Itoa(*m.StepNumber)
func stepString(phaseMetadata PhaseMeta) string {
step := phaseMetadata.StepType
if phaseMetadata.StepNumber != nil {
step = strconv.Itoa(*phaseMetadata.StepNumber)
}

return step
Expand Down
Loading
Loading