From 8288a768f1982bdfef5c8ab25ed17d56ce85fcad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20F=20Bj=C3=B6rklund?= Date: Mon, 2 Oct 2023 17:47:04 +0200 Subject: [PATCH 1/4] refactor: extract methods for cache handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Anders F Björklund --- pkg/downloader/downloader.go | 37 ++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/pkg/downloader/downloader.go b/pkg/downloader/downloader.go index 02a7312a3b5..651ba26340f 100644 --- a/pkg/downloader/downloader.go +++ b/pkg/downloader/downloader.go @@ -97,8 +97,8 @@ func WithDecompress(decompress bool) Opt { // - The digest was not specified. // - The file already exists in the local target path. // -// When the `data` file exists in the cache dir with `digest.` file, -// the digest is verified by comparing the content of `digest.` with the expected +// When the `data` file exists in the cache dir with `.digest` file, +// the digest is verified by comparing the content of `.digest` with the expected // digest string. So, the actual digest of the `data` file is not computed. func WithExpectedDigest(expectedDigest digest.Digest) Opt { return func(o *options) error { @@ -183,15 +183,11 @@ func Download(local, remote string, opts ...Opt) (*Result, error) { return res, nil } - shad := filepath.Join(o.cacheDir, "download", "by-url-sha256", fmt.Sprintf("%x", sha256.Sum256([]byte(remote)))) + shad := cacheDirectoryPath(o.cacheDir, remote) shadData := filepath.Join(shad, "data") - shadDigest := "" - if o.expectedDigest != "" { - algo := o.expectedDigest.Algorithm().String() - if strings.Contains(algo, "/") || strings.Contains(algo, "\\") { - return nil, fmt.Errorf("invalid digest algorithm %q", algo) - } - shadDigest = filepath.Join(shad, algo+".digest") + shadDigest, err := cacheDigestPath(shad, o.expectedDigest) + if err != nil { + return nil, err } if _, err := os.Stat(shadData); err == nil { logrus.Debugf("file %q is cached as %q", localPath, shadData) @@ -247,6 +243,27 @@ func Download(local, remote string, opts ...Opt) (*Result, error) { return res, nil } +// cacheDirectoryPath returns the cache subdirectory path. +// - "url" file contains the url +// - "data" file contains the data +func cacheDirectoryPath(cacheDir string, remote string) string { + return filepath.Join(cacheDir, "download", "by-url-sha256", fmt.Sprintf("%x", sha256.Sum256([]byte(remote)))) +} + +// cacheDigestPath returns the cache digest file path. +// - ".digest" contains the digest +func cacheDigestPath(shad string, expectedDigest digest.Digest) (string, error) { + shadDigest := "" + if expectedDigest != "" { + algo := expectedDigest.Algorithm().String() + if strings.Contains(algo, "/") || strings.Contains(algo, "\\") { + return "", fmt.Errorf("invalid digest algorithm %q", algo) + } + shadDigest = filepath.Join(shad, algo+".digest") + } + return shadDigest, nil +} + func IsLocal(s string) bool { return !strings.Contains(s, "://") || strings.HasPrefix(s, "file://") } From 50db1b7b438b60ba629fa2c772767f926a2adf17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20F=20Bj=C3=B6rklund?= Date: Mon, 2 Oct 2023 17:52:20 +0200 Subject: [PATCH 2/4] refactor: extract method for cache validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Anders F Björklund --- pkg/downloader/downloader.go | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/pkg/downloader/downloader.go b/pkg/downloader/downloader.go index 651ba26340f..6d69d9c304a 100644 --- a/pkg/downloader/downloader.go +++ b/pkg/downloader/downloader.go @@ -191,12 +191,11 @@ func Download(local, remote string, opts ...Opt) (*Result, error) { } if _, err := os.Stat(shadData); err == nil { logrus.Debugf("file %q is cached as %q", localPath, shadData) - if shadDigestB, err := os.ReadFile(shadDigest); err == nil { + if _, err := os.Stat(shadDigest); err == nil { logrus.Debugf("Comparing digest %q with the cached digest file %q, not computing the actual digest of %q", o.expectedDigest, shadDigest, shadData) - shadDigestS := strings.TrimSpace(string(shadDigestB)) - if o.expectedDigest.String() != shadDigestS { - return nil, fmt.Errorf("expected digest %q does not match the cached digest %q", o.expectedDigest.String(), shadDigestS) + if err := validateCachedDigest(shadDigest, o.expectedDigest); err != nil { + return nil, err } if err := copyLocal(localPath, shadData, ext, o.decompress, "", ""); err != nil { return nil, err @@ -295,6 +294,9 @@ func copyLocal(dst, src, ext string, decompress bool, description string, expect return err } + if expectedDigest != "" { + logrus.Debugf("verifying digest of local file %q (%s)", srcPath, expectedDigest) + } if err := validateLocalFileDigest(srcPath, expectedDigest); err != nil { return err } @@ -383,6 +385,21 @@ func decompressLocal(dst, src, ext string, description string) error { return err } +func validateCachedDigest(shadDigest string, expectedDigest digest.Digest) error { + if expectedDigest == "" { + return nil + } + shadDigestB, err := os.ReadFile(shadDigest) + if err != nil { + return err + } + shadDigestS := strings.TrimSpace(string(shadDigestB)) + if shadDigestS != expectedDigest.String() { + return fmt.Errorf("expected digest %q, got %q", expectedDigest, shadDigestS) + } + return nil +} + func validateLocalFileDigest(localPath string, expectedDigest digest.Digest) error { if localPath == "" { return fmt.Errorf("validateLocalFileDigest: got empty localPath") @@ -390,7 +407,6 @@ func validateLocalFileDigest(localPath string, expectedDigest digest.Digest) err if expectedDigest == "" { return nil } - logrus.Debugf("verifying digest of local file %q (%s)", localPath, expectedDigest) algo := expectedDigest.Algorithm() if !algo.Available() { return fmt.Errorf("expected digest algorithm %q is not available", algo) From beaa734d576eb9734883927f7cb83cd342f31cbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20F=20Bj=C3=B6rklund?= Date: Mon, 2 Oct 2023 17:53:44 +0200 Subject: [PATCH 3/4] Add Cached function for files already Download MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will return any files already in the cache, with valid digest, but not download any new ones. Signed-off-by: Anders F Björklund --- pkg/downloader/downloader.go | 46 +++++++++++++++++++++++++++++++ pkg/downloader/downloader_test.go | 28 +++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/pkg/downloader/downloader.go b/pkg/downloader/downloader.go index 6d69d9c304a..6af420fdb98 100644 --- a/pkg/downloader/downloader.go +++ b/pkg/downloader/downloader.go @@ -242,6 +242,52 @@ func Download(local, remote string, opts ...Opt) (*Result, error) { return res, nil } +// Cached checks if the remote resource is in the cache. +// +// Download caches the remote resource if WithCache or WithCacheDir option is specified. +// Local files are not cached. +// +// When the cache path already exists, Cached returns Result with StatusUsedCache. +func Cached(remote string, opts ...Opt) (*Result, error) { + var o options + for _, f := range opts { + if err := f(&o); err != nil { + return nil, err + } + } + if o.cacheDir == "" { + return nil, fmt.Errorf("caching-only mode requires the cache directory to be specified") + } + if IsLocal(remote) { + return nil, fmt.Errorf("local files are not cached") + } + + shad := cacheDirectoryPath(o.cacheDir, remote) + shadData := filepath.Join(shad, "data") + shadDigest, err := cacheDigestPath(shad, o.expectedDigest) + if err != nil { + return nil, err + } + if _, err := os.Stat(shadData); err != nil { + return nil, err + } + if _, err := os.Stat(shadDigest); err != nil { + if err := validateCachedDigest(shadDigest, o.expectedDigest); err != nil { + return nil, err + } + } else { + if err := validateLocalFileDigest(shadData, o.expectedDigest); err != nil { + return nil, err + } + } + res := &Result{ + Status: StatusUsedCache, + CachePath: shadData, + ValidatedDigest: o.expectedDigest != "", + } + return res, nil +} + // cacheDirectoryPath returns the cache subdirectory path. // - "url" file contains the url // - "data" file contains the data diff --git a/pkg/downloader/downloader_test.go b/pkg/downloader/downloader_test.go index 90b5a1cabdc..7353cb39d05 100644 --- a/pkg/downloader/downloader_test.go +++ b/pkg/downloader/downloader_test.go @@ -5,6 +5,7 @@ import ( "os/exec" "path/filepath" "runtime" + "strings" "testing" "github.com/opencontainers/go-digest" @@ -87,6 +88,24 @@ func TestDownloadRemote(t *testing.T) { assert.NilError(t, err) assert.Equal(t, StatusUsedCache, r.Status) }) + t.Run("cached", func(t *testing.T) { + _, err := Cached(dummyRemoteFileURL, WithExpectedDigest(dummyRemoteFileDigest)) + assert.ErrorContains(t, err, "cache directory to be specified") + + cacheDir := filepath.Join(t.TempDir(), "cache") + r, err := Download("", dummyRemoteFileURL, WithExpectedDigest(dummyRemoteFileDigest), WithCacheDir(cacheDir)) + assert.NilError(t, err) + assert.Equal(t, StatusDownloaded, r.Status) + + r, err = Cached(dummyRemoteFileURL, WithExpectedDigest(dummyRemoteFileDigest), WithCacheDir(cacheDir)) + assert.NilError(t, err) + assert.Equal(t, StatusUsedCache, r.Status) + assert.Assert(t, strings.HasPrefix(r.CachePath, cacheDir), "expected %s to be in %s", r.CachePath, cacheDir) + + wrongDigest := digest.Digest("sha256:8313944efb4f38570c689813f288058b674ea6c487017a5a4738dc674b65f9d9") + _, err = Cached(dummyRemoteFileURL, WithExpectedDigest(wrongDigest), WithCacheDir(cacheDir)) + assert.ErrorContains(t, err, "expected digest") + }) } func TestDownloadLocal(t *testing.T) { @@ -129,6 +148,15 @@ func TestDownloadLocal(t *testing.T) { os.Remove(localTestFile) }) + t.Run("cached", func(t *testing.T) { + localFile := filepath.Join(t.TempDir(), "test-file") + os.Create(localFile) + testLocalFileURL := "file://" + localFile + + cacheDir := filepath.Join(t.TempDir(), "cache") + _, err := Cached(testLocalFileURL, WithCacheDir(cacheDir)) + assert.ErrorContains(t, err, "not cached") + }) } func TestDownloadCompressed(t *testing.T) { From 4efb676b52e68349bb934c0d31d8a00110218399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20F=20Bj=C3=B6rklund?= Date: Mon, 2 Oct 2023 17:55:36 +0200 Subject: [PATCH 4/4] Don't download files that are already cached MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nerdctl archive has already been downloaded to the cache, so don't log downloading it again. Signed-off-by: Anders F Björklund --- pkg/fileutils/download.go | 11 +++++++++++ pkg/start/start.go | 17 +++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/pkg/fileutils/download.go b/pkg/fileutils/download.go index 7d8d2aded72..d116556b8e7 100644 --- a/pkg/fileutils/download.go +++ b/pkg/fileutils/download.go @@ -41,6 +41,17 @@ func DownloadFile(dest string, f limayaml.File, decompress bool, description str return res.CachePath, nil } +// CachedFile checks if a file is in the cache, validating the digest if it is available. Returns path in cache. +func CachedFile(f limayaml.File) (string, error) { + res, err := downloader.Cached(f.Location, + downloader.WithCache(), + downloader.WithExpectedDigest(f.Digest)) + if err != nil { + return "", fmt.Errorf("cache did not contain %q: %w", f.Location, err) + } + return res.CachePath, nil +} + // Errors compose multiple into a single error. // Errors filters out ErrSkipped. func Errors(errs []error) error { diff --git a/pkg/start/start.go b/pkg/start/start.go index 2df500907a6..07b0cce70fb 100644 --- a/pkg/start/start.go +++ b/pkg/start/start.go @@ -34,7 +34,7 @@ const DefaultWatchHostAgentEventsTimeout = 10 * time.Minute // ensureNerdctlArchiveCache prefetches the nerdctl-full-VERSION-GOOS-GOARCH.tar.gz archive // into the cache before launching the hostagent process, so that we can show the progress in tty. // https://github.com/lima-vm/lima/issues/326 -func ensureNerdctlArchiveCache(y *limayaml.LimaYAML) (string, error) { +func ensureNerdctlArchiveCache(y *limayaml.LimaYAML, created bool) (string, error) { if !*y.Containerd.System && !*y.Containerd.User { // nerdctl archive is not needed return "", nil @@ -42,6 +42,13 @@ func ensureNerdctlArchiveCache(y *limayaml.LimaYAML) (string, error) { errs := make([]error, len(y.Containerd.Archives)) for i, f := range y.Containerd.Archives { + // Skip downloading again if the file is already in the cache + if created && f.Arch == *y.Arch && !downloader.IsLocal(f.Location) { + path, err := fileutils.CachedFile(f) + if err == nil { + return path, nil + } + } path, err := fileutils.DownloadFile("", f, false, "the nerdctl archive", *y.Arch) if err != nil { errs[i] = err @@ -80,10 +87,16 @@ func Prepare(_ context.Context, inst *store.Instance) (*Prepared, error) { return nil, err } + // Check if the instance has been created (the base disk already exists) + created := false + baseDisk := filepath.Join(inst.Dir, filenames.BaseDisk) + if _, err := os.Stat(baseDisk); err == nil { + created = true + } if err := limaDriver.CreateDisk(); err != nil { return nil, err } - nerdctlArchiveCache, err := ensureNerdctlArchiveCache(y) + nerdctlArchiveCache, err := ensureNerdctlArchiveCache(y, created) if err != nil { return nil, err }