Skip to content

Don't download files that are already cached #1877

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 94 additions & 15 deletions pkg/downloader/downloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ func WithDecompress(decompress bool) Opt {
// - The digest was not specified.
// - The file already exists in the local target path.
//
// When the `data` file exists in the cache dir with `digest.<ALGO>` file,
// the digest is verified by comparing the content of `digest.<ALGO>` with the expected
// When the `data` file exists in the cache dir with `<ALGO>.digest` file,
// the digest is verified by comparing the content of `<ALGO>.digest` with the expected
// digest string. So, the actual digest of the `data` file is not computed.
func WithExpectedDigest(expectedDigest digest.Digest) Opt {
return func(o *options) error {
Expand Down Expand Up @@ -183,24 +183,19 @@ func Download(local, remote string, opts ...Opt) (*Result, error) {
return res, nil
}

shad := filepath.Join(o.cacheDir, "download", "by-url-sha256", fmt.Sprintf("%x", sha256.Sum256([]byte(remote))))
shad := cacheDirectoryPath(o.cacheDir, remote)
shadData := filepath.Join(shad, "data")
shadDigest := ""
if o.expectedDigest != "" {
algo := o.expectedDigest.Algorithm().String()
if strings.Contains(algo, "/") || strings.Contains(algo, "\\") {
return nil, fmt.Errorf("invalid digest algorithm %q", algo)
}
shadDigest = filepath.Join(shad, algo+".digest")
shadDigest, err := cacheDigestPath(shad, o.expectedDigest)
if err != nil {
return nil, err
}
if _, err := os.Stat(shadData); err == nil {
logrus.Debugf("file %q is cached as %q", localPath, shadData)
if shadDigestB, err := os.ReadFile(shadDigest); err == nil {
if _, err := os.Stat(shadDigest); err == nil {
logrus.Debugf("Comparing digest %q with the cached digest file %q, not computing the actual digest of %q",
o.expectedDigest, shadDigest, shadData)
shadDigestS := strings.TrimSpace(string(shadDigestB))
if o.expectedDigest.String() != shadDigestS {
return nil, fmt.Errorf("expected digest %q does not match the cached digest %q", o.expectedDigest.String(), shadDigestS)
if err := validateCachedDigest(shadDigest, o.expectedDigest); err != nil {
return nil, err
}
if err := copyLocal(localPath, shadData, ext, o.decompress, "", ""); err != nil {
return nil, err
Expand Down Expand Up @@ -247,6 +242,73 @@ func Download(local, remote string, opts ...Opt) (*Result, error) {
return res, nil
}

// Cached checks if the remote resource is in the cache.
//
// Download caches the remote resource if WithCache or WithCacheDir option is specified.
// Local files are not cached.
//
// When the cache path already exists, Cached returns Result with StatusUsedCache.
func Cached(remote string, opts ...Opt) (*Result, error) {
var o options
for _, f := range opts {
if err := f(&o); err != nil {
return nil, err
}
}
if o.cacheDir == "" {
return nil, fmt.Errorf("caching-only mode requires the cache directory to be specified")
}
if IsLocal(remote) {
return nil, fmt.Errorf("local files are not cached")
}

shad := cacheDirectoryPath(o.cacheDir, remote)
shadData := filepath.Join(shad, "data")
shadDigest, err := cacheDigestPath(shad, o.expectedDigest)
if err != nil {
return nil, err
}
if _, err := os.Stat(shadData); err != nil {
return nil, err
}
if _, err := os.Stat(shadDigest); err != nil {
if err := validateCachedDigest(shadDigest, o.expectedDigest); err != nil {
return nil, err
}
} else {
if err := validateLocalFileDigest(shadData, o.expectedDigest); err != nil {
return nil, err
}
}
res := &Result{
Status: StatusUsedCache,
CachePath: shadData,
ValidatedDigest: o.expectedDigest != "",
}
return res, nil
}

// cacheDirectoryPath returns the cache subdirectory path.
// - "url" file contains the url
// - "data" file contains the data
func cacheDirectoryPath(cacheDir string, remote string) string {
return filepath.Join(cacheDir, "download", "by-url-sha256", fmt.Sprintf("%x", sha256.Sum256([]byte(remote))))
}

// cacheDigestPath returns the cache digest file path.
// - "<ALGO>.digest" contains the digest
func cacheDigestPath(shad string, expectedDigest digest.Digest) (string, error) {
shadDigest := ""
if expectedDigest != "" {
algo := expectedDigest.Algorithm().String()
if strings.Contains(algo, "/") || strings.Contains(algo, "\\") {
return "", fmt.Errorf("invalid digest algorithm %q", algo)
}
shadDigest = filepath.Join(shad, algo+".digest")
}
return shadDigest, nil
}

func IsLocal(s string) bool {
return !strings.Contains(s, "://") || strings.HasPrefix(s, "file://")
}
Expand Down Expand Up @@ -278,6 +340,9 @@ func copyLocal(dst, src, ext string, decompress bool, description string, expect
return err
}

if expectedDigest != "" {
logrus.Debugf("verifying digest of local file %q (%s)", srcPath, expectedDigest)
}
if err := validateLocalFileDigest(srcPath, expectedDigest); err != nil {
return err
}
Expand Down Expand Up @@ -366,14 +431,28 @@ func decompressLocal(dst, src, ext string, description string) error {
return err
}

func validateCachedDigest(shadDigest string, expectedDigest digest.Digest) error {
if expectedDigest == "" {
return nil
}
shadDigestB, err := os.ReadFile(shadDigest)
if err != nil {
return err
}
shadDigestS := strings.TrimSpace(string(shadDigestB))
if shadDigestS != expectedDigest.String() {
return fmt.Errorf("expected digest %q, got %q", expectedDigest, shadDigestS)
}
return nil
}

func validateLocalFileDigest(localPath string, expectedDigest digest.Digest) error {
if localPath == "" {
return fmt.Errorf("validateLocalFileDigest: got empty localPath")
}
if expectedDigest == "" {
return nil
}
logrus.Debugf("verifying digest of local file %q (%s)", localPath, expectedDigest)
algo := expectedDigest.Algorithm()
if !algo.Available() {
return fmt.Errorf("expected digest algorithm %q is not available", algo)
Expand Down
28 changes: 28 additions & 0 deletions pkg/downloader/downloader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"os/exec"
"path/filepath"
"runtime"
"strings"
"testing"

"github.com/opencontainers/go-digest"
Expand Down Expand Up @@ -87,6 +88,24 @@ func TestDownloadRemote(t *testing.T) {
assert.NilError(t, err)
assert.Equal(t, StatusUsedCache, r.Status)
})
t.Run("cached", func(t *testing.T) {
_, err := Cached(dummyRemoteFileURL, WithExpectedDigest(dummyRemoteFileDigest))
assert.ErrorContains(t, err, "cache directory to be specified")

cacheDir := filepath.Join(t.TempDir(), "cache")
r, err := Download("", dummyRemoteFileURL, WithExpectedDigest(dummyRemoteFileDigest), WithCacheDir(cacheDir))
assert.NilError(t, err)
assert.Equal(t, StatusDownloaded, r.Status)

r, err = Cached(dummyRemoteFileURL, WithExpectedDigest(dummyRemoteFileDigest), WithCacheDir(cacheDir))
assert.NilError(t, err)
assert.Equal(t, StatusUsedCache, r.Status)
assert.Assert(t, strings.HasPrefix(r.CachePath, cacheDir), "expected %s to be in %s", r.CachePath, cacheDir)

wrongDigest := digest.Digest("sha256:8313944efb4f38570c689813f288058b674ea6c487017a5a4738dc674b65f9d9")
_, err = Cached(dummyRemoteFileURL, WithExpectedDigest(wrongDigest), WithCacheDir(cacheDir))
assert.ErrorContains(t, err, "expected digest")
})
}

func TestDownloadLocal(t *testing.T) {
Expand Down Expand Up @@ -129,6 +148,15 @@ func TestDownloadLocal(t *testing.T) {
os.Remove(localTestFile)
})

t.Run("cached", func(t *testing.T) {
localFile := filepath.Join(t.TempDir(), "test-file")
os.Create(localFile)
testLocalFileURL := "file://" + localFile

cacheDir := filepath.Join(t.TempDir(), "cache")
_, err := Cached(testLocalFileURL, WithCacheDir(cacheDir))
assert.ErrorContains(t, err, "not cached")
})
}

func TestDownloadCompressed(t *testing.T) {
Expand Down
11 changes: 11 additions & 0 deletions pkg/fileutils/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ func DownloadFile(dest string, f limayaml.File, decompress bool, description str
return res.CachePath, nil
}

// CachedFile checks if a file is in the cache, validating the digest if it is available. Returns path in cache.
func CachedFile(f limayaml.File) (string, error) {
res, err := downloader.Cached(f.Location,
downloader.WithCache(),
downloader.WithExpectedDigest(f.Digest))
if err != nil {
return "", fmt.Errorf("cache did not contain %q: %w", f.Location, err)
}
return res.CachePath, nil
}

// Errors compose multiple into a single error.
// Errors filters out ErrSkipped.
func Errors(errs []error) error {
Expand Down
17 changes: 15 additions & 2 deletions pkg/start/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,21 @@ const DefaultWatchHostAgentEventsTimeout = 10 * time.Minute
// ensureNerdctlArchiveCache prefetches the nerdctl-full-VERSION-GOOS-GOARCH.tar.gz archive
// into the cache before launching the hostagent process, so that we can show the progress in tty.
// https://github.com/lima-vm/lima/issues/326
func ensureNerdctlArchiveCache(y *limayaml.LimaYAML) (string, error) {
func ensureNerdctlArchiveCache(y *limayaml.LimaYAML, created bool) (string, error) {
if !*y.Containerd.System && !*y.Containerd.User {
// nerdctl archive is not needed
return "", nil
}

errs := make([]error, len(y.Containerd.Archives))
for i, f := range y.Containerd.Archives {
// Skip downloading again if the file is already in the cache
if created && f.Arch == *y.Arch && !downloader.IsLocal(f.Location) {
path, err := fileutils.CachedFile(f)
if err == nil {
return path, nil
}
}
path, err := fileutils.DownloadFile("", f, false, "the nerdctl archive", *y.Arch)
if err != nil {
errs[i] = err
Expand Down Expand Up @@ -80,10 +87,16 @@ func Prepare(_ context.Context, inst *store.Instance) (*Prepared, error) {
return nil, err
}

// Check if the instance has been created (the base disk already exists)
created := false
baseDisk := filepath.Join(inst.Dir, filenames.BaseDisk)
if _, err := os.Stat(baseDisk); err == nil {
created = true
}
if err := limaDriver.CreateDisk(); err != nil {
return nil, err
}
nerdctlArchiveCache, err := ensureNerdctlArchiveCache(y)
nerdctlArchiveCache, err := ensureNerdctlArchiveCache(y, created)
if err != nil {
return nil, err
}
Expand Down