diff --git a/pkg/limatmpl/embed.go b/pkg/limatmpl/embed.go index e1437b18fe0..044334a92b9 100644 --- a/pkg/limatmpl/embed.go +++ b/pkg/limatmpl/embed.go @@ -6,11 +6,14 @@ package limatmpl import ( "bytes" "context" + "encoding/base64" "fmt" "os" "path/filepath" "slices" + "strings" "sync" + "unicode" "github.com/coreos/go-semver/semver" "github.com/lima-vm/lima/pkg/limayaml" @@ -254,7 +257,7 @@ const mergeDocuments = ` | $a | (select(.mountTypesUnsupported) | .mountTypesUnsupported) |= unique # Remove the custom tags again so they do not clutter up the YAML output. -| $a | .. tag = "" +| $a | .. | select(tag == "!!tag") tag = "" ` // listFields returns dst and src fields like "list[idx].field". @@ -552,11 +555,72 @@ func (tmpl *Template) combineNetworks() { } } +// yamlfmt will fail with a buffer overflow while trying to retain line breaks if the line +// is longer than 64K. We will encode all text files that have a line that comes close. +// maxLineLength is a constant; it is only a variable for the benefit of the unit tests. +var maxLineLength = 65000 + +// encodeScriptReason returns the reason why a script needs to be base64 encoded or the empty string if it doesn't. +func encodeScriptReason(script string) string { + start := 0 + line := 1 + for i, r := range script { + if !(unicode.IsPrint(r) || r == '\n' || r == '\r' || r == '\t') { + return fmt.Sprintf("unprintable character %q at offset %d", r, i) + } + // maxLineLength includes final newline + if i-start >= maxLineLength { + return fmt.Sprintf("line %d (offset %d) is longer than %d characters", line, start, maxLineLength) + } + if r == '\n' { + line++ + start = i + 1 + } + } + return "" +} + +// Break base64 strings into shorter chunks. Technically we could use maxLineLength here, +// but shorter lines look better. +const base64ChunkLength = 76 + +// binaryString returns a base64 encoded version of the binary string, broken into chunks +// of at most base64ChunkLength characters per line. +func binaryString(s string) string { + encoded := base64.StdEncoding.EncodeToString([]byte(s)) + if len(encoded) <= base64ChunkLength { + return encoded + } + + // Estimate capacity: encoded length + number of newlines + lineCount := (len(encoded) + base64ChunkLength - 1) / base64ChunkLength + builder := strings.Builder{} + builder.Grow(len(encoded) + lineCount) + + for i := 0; i < len(encoded); i += base64ChunkLength { + end := i + base64ChunkLength + if end > len(encoded) { + end = len(encoded) + } + builder.WriteString(encoded[i:end]) + builder.WriteByte('\n') + } + + return builder.String() +} + // updateScript replaces a "file" property with the actual script and then renames the field to newName ("script" or "content"). -func (tmpl *Template) updateScript(field string, idx int, newName, script string) { +func (tmpl *Template) updateScript(field string, idx int, newName, script, file string) { + tag := "" + if reason := encodeScriptReason(script); reason != "" { + logrus.Infof("File %q is being base64 encoded: %s", file, reason) + script = binaryString(script) + tag = "!!binary" + } entry := fmt.Sprintf("$a.%s[%d].file", field, idx) - // Assign script to the "file" field and then rename it to "script". - tmpl.expr.WriteString(fmt.Sprintf("| (%s) = %q | (%s | key) = %q\n", entry, script, entry, newName)) + // Assign script to the "file" field and then rename it to "script" or "content". + tmpl.expr.WriteString(fmt.Sprintf("| (%s) = %q | (%s) tag = %q | (%s | key) = %q\n", + entry, script, entry, tag, entry, newName)) } // embedAllScripts replaces all "provision" and "probes" file references with the actual script. @@ -579,7 +643,7 @@ func (tmpl *Template) embedAllScripts(ctx context.Context, embedAll bool) error if err != nil { return err } - tmpl.updateScript("probes", i, "script", string(scriptTmpl.Bytes)) + tmpl.updateScript("probes", i, "script", string(scriptTmpl.Bytes), p.File.URL) } } for i, p := range tmpl.Config.Provision { @@ -605,7 +669,7 @@ func (tmpl *Template) embedAllScripts(ctx context.Context, embedAll bool) error if err != nil { return err } - tmpl.updateScript("provision", i, newName, string(scriptTmpl.Bytes)) + tmpl.updateScript("provision", i, newName, string(scriptTmpl.Bytes), p.File.URL) } } return tmpl.evalExpr() diff --git a/pkg/limatmpl/embed_test.go b/pkg/limatmpl/embed_test.go index 59a2b661b6f..a8e19291152 100644 --- a/pkg/limatmpl/embed_test.go +++ b/pkg/limatmpl/embed_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright The Lima Authors // SPDX-License-Identifier: Apache-2.0 -package limatmpl_test +package limatmpl import ( "context" @@ -11,7 +11,6 @@ import ( "strings" "testing" - "github.com/lima-vm/lima/pkg/limatmpl" "github.com/lima-vm/lima/pkg/limayaml" "github.com/sirupsen/logrus" "gotest.tools/v3/assert" @@ -392,6 +391,27 @@ provision: "base: https://example.com/lima-linux-riscv64.img", "{arch: riscv64, images: [{location: https://example.com/lima-linux-riscv64.img, arch: riscv64}]}", }, + { + "Binary files are base64 encoded", + `# +provision: +- mode: data + file: base1.sh # This comment will move to the "content" key + path: /tmp/data +`, + // base1.sh is binary because it contains an audible bell character '\a' + "# base0.yaml is ignored\n---\n#!\a123456789012345678901234567890123456789012345678901234567890", + ` +provision: +- mode: data + content: !!binary | # This comment will move to the "content" key + IyEHMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0 + NTY3ODkw + path: /tmp/data + +# base0.yaml is ignored +`, + }, } func TestEmbed(t *testing.T) { @@ -436,7 +456,7 @@ func RunEmbedTest(t *testing.T, tc embedTestCase) { err := os.WriteFile(baseFilename, []byte(base), 0o600) assert.NilError(t, err, tc.description) } - tmpl := &limatmpl.Template{ + tmpl := &Template{ Bytes: fmt.Appendf(nil, "base: base0.yaml\n%s", tc.template), Locator: "tmpl.yaml", } @@ -475,3 +495,20 @@ func RunEmbedTest(t *testing.T, tc embedTestCase) { assert.Assert(t, cmp.DeepEqual(tmpl.Config, &expected), tc.description) } } + +func TestEncodeScriptReason(t *testing.T) { + maxLineLength = 8 + t.Run("regular script", func(t *testing.T) { + reason := encodeScriptReason("0123456\n") + assert.Equal(t, reason, "") + }) + t.Run("binary script", func(t *testing.T) { + reason := encodeScriptReason("abc\a123") + assert.Equal(t, reason, "unprintable character '\\a' at offset 3") + }) + t.Run("long line", func(t *testing.T) { + // newline character is included in character count + reason := encodeScriptReason("line 1\nline 2\n01234567\n") + assert.Equal(t, reason, "line 3 (offset 14) is longer than 8 characters") + }) +}