Skip to content

Embed binary files as base64 encoded strings #3421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 70 additions & 6 deletions pkg/limatmpl/embed.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@ package limatmpl
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"os"
"path/filepath"
"slices"
"strings"
"sync"
"unicode"

"github.com/coreos/go-semver/semver"
"github.com/lima-vm/lima/pkg/limayaml"
Expand Down Expand Up @@ -254,7 +257,7 @@ const mergeDocuments = `
| $a | (select(.mountTypesUnsupported) | .mountTypesUnsupported) |= unique

# Remove the custom tags again so they do not clutter up the YAML output.
| $a | .. tag = ""
| $a | .. | select(tag == "!!tag") tag = ""
`

// listFields returns dst and src fields like "list[idx].field".
Expand Down Expand Up @@ -552,11 +555,72 @@ func (tmpl *Template) combineNetworks() {
}
}

// yamlfmt will fail with a buffer overflow while trying to retain line breaks if the line
// is longer than 64K. We will encode all text files that have a line that comes close.
// maxLineLength is a constant; it is only a variable for the benefit of the unit tests.
var maxLineLength = 65000
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if it's a good idea to declare maxLineLength as var instead of const.
For unit tests, we can use strings.Repeat to construct a very long string.

Suggested change
var maxLineLength = 65000
const maxLineLength = 65000

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was using strings.Repeat before, but changed it in response to a request from @AkihiroSuda to add a test for `maxLineLength'.

It is not an exported variable, and the comment right above says it is only mutable so unit tests can mock it. Unit tests should use constant data for "expected" results. Otherwise a thinko in the implementation of the function that is being tested can carry over in the test function constructing the test data and cancel out the error in the implementation. Not that it matters in a trivial case like this.

I think it is fine as is.


// encodeScriptReason returns the reason why a script needs to be base64 encoded or the empty string if it doesn't.
func encodeScriptReason(script string) string {
start := 0
line := 1
for i, r := range script {
if !(unicode.IsPrint(r) || r == '\n' || r == '\r' || r == '\t') {
return fmt.Sprintf("unprintable character %q at offset %d", r, i)
}
// maxLineLength includes final newline
if i-start >= maxLineLength {
return fmt.Sprintf("line %d (offset %d) is longer than %d characters", line, start, maxLineLength)
}
if r == '\n' {
line++
start = i + 1
}
}
return ""
}

// Break base64 strings into shorter chunks. Technically we could use maxLineLength here,
// but shorter lines look better.
const base64ChunkLength = 76

// binaryString returns a base64 encoded version of the binary string, broken into chunks
// of at most base64ChunkLength characters per line.
func binaryString(s string) string {
encoded := base64.StdEncoding.EncodeToString([]byte(s))
if len(encoded) <= base64ChunkLength {
return encoded
}

// Estimate capacity: encoded length + number of newlines
lineCount := (len(encoded) + base64ChunkLength - 1) / base64ChunkLength
builder := strings.Builder{}
builder.Grow(len(encoded) + lineCount)

for i := 0; i < len(encoded); i += base64ChunkLength {
end := i + base64ChunkLength
if end > len(encoded) {
end = len(encoded)
}
builder.WriteString(encoded[i:end])
builder.WriteByte('\n')
}

return builder.String()
}

// updateScript replaces a "file" property with the actual script and then renames the field to newName ("script" or "content").
func (tmpl *Template) updateScript(field string, idx int, newName, script string) {
func (tmpl *Template) updateScript(field string, idx int, newName, script, file string) {
tag := ""
if reason := encodeScriptReason(script); reason != "" {
logrus.Infof("File %q is being base64 encoded: %s", file, reason)
script = binaryString(script)
tag = "!!binary"
}
entry := fmt.Sprintf("$a.%s[%d].file", field, idx)
// Assign script to the "file" field and then rename it to "script".
tmpl.expr.WriteString(fmt.Sprintf("| (%s) = %q | (%s | key) = %q\n", entry, script, entry, newName))
// Assign script to the "file" field and then rename it to "script" or "content".
tmpl.expr.WriteString(fmt.Sprintf("| (%s) = %q | (%s) tag = %q | (%s | key) = %q\n",
entry, script, entry, tag, entry, newName))
}

// embedAllScripts replaces all "provision" and "probes" file references with the actual script.
Expand All @@ -579,7 +643,7 @@ func (tmpl *Template) embedAllScripts(ctx context.Context, embedAll bool) error
if err != nil {
return err
}
tmpl.updateScript("probes", i, "script", string(scriptTmpl.Bytes))
tmpl.updateScript("probes", i, "script", string(scriptTmpl.Bytes), p.File.URL)
}
}
for i, p := range tmpl.Config.Provision {
Expand All @@ -605,7 +669,7 @@ func (tmpl *Template) embedAllScripts(ctx context.Context, embedAll bool) error
if err != nil {
return err
}
tmpl.updateScript("provision", i, newName, string(scriptTmpl.Bytes))
tmpl.updateScript("provision", i, newName, string(scriptTmpl.Bytes), p.File.URL)
}
}
return tmpl.evalExpr()
Expand Down
43 changes: 40 additions & 3 deletions pkg/limatmpl/embed_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// SPDX-FileCopyrightText: Copyright The Lima Authors
// SPDX-License-Identifier: Apache-2.0

package limatmpl_test
package limatmpl

import (
"context"
Expand All @@ -11,7 +11,6 @@ import (
"strings"
"testing"

"github.com/lima-vm/lima/pkg/limatmpl"
"github.com/lima-vm/lima/pkg/limayaml"
"github.com/sirupsen/logrus"
"gotest.tools/v3/assert"
Expand Down Expand Up @@ -392,6 +391,27 @@ provision:
"base: https://example.com/lima-linux-riscv64.img",
"{arch: riscv64, images: [{location: https://example.com/lima-linux-riscv64.img, arch: riscv64}]}",
},
{
"Binary files are base64 encoded",
`#
provision:
- mode: data
file: base1.sh # This comment will move to the "content" key
path: /tmp/data
`,
// base1.sh is binary because it contains an audible bell character '\a'
"# base0.yaml is ignored\n---\n#!\a123456789012345678901234567890123456789012345678901234567890",
`
provision:
- mode: data
content: !!binary | # This comment will move to the "content" key
IyEHMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0
NTY3ODkw
path: /tmp/data

# base0.yaml is ignored
`,
},
}

func TestEmbed(t *testing.T) {
Expand Down Expand Up @@ -436,7 +456,7 @@ func RunEmbedTest(t *testing.T, tc embedTestCase) {
err := os.WriteFile(baseFilename, []byte(base), 0o600)
assert.NilError(t, err, tc.description)
}
tmpl := &limatmpl.Template{
tmpl := &Template{
Bytes: fmt.Appendf(nil, "base: base0.yaml\n%s", tc.template),
Locator: "tmpl.yaml",
}
Expand Down Expand Up @@ -475,3 +495,20 @@ func RunEmbedTest(t *testing.T, tc embedTestCase) {
assert.Assert(t, cmp.DeepEqual(tmpl.Config, &expected), tc.description)
}
}

func TestEncodeScriptReason(t *testing.T) {
maxLineLength = 8
t.Run("regular script", func(t *testing.T) {
reason := encodeScriptReason("0123456\n")
assert.Equal(t, reason, "")
})
t.Run("binary script", func(t *testing.T) {
reason := encodeScriptReason("abc\a123")
assert.Equal(t, reason, "unprintable character '\\a' at offset 3")
})
t.Run("long line", func(t *testing.T) {
// newline character is included in character count
reason := encodeScriptReason("line 1\nline 2\n01234567\n")
assert.Equal(t, reason, "line 3 (offset 14) is longer than 8 characters")
})
}
Loading