mutablelogic · djthorpe · Feb 11, 2025 · Feb 11, 2025 · Feb 11, 2025 · Feb 11, 2025
diff --git a/7283d94c6a7e5bc91e7875ccf51a96d3.m2a b/7283d94c6a7e5bc91e7875ccf51a96d3.m2a
diff --git a/README.md b/README.md
@@ -405,15 +405,74 @@ Commands:
   agents       Return a list of agents
   models       Return a list of models
   tools        Return a list of tools
-  download     Download a model
+  download     Download a model (for Ollama)
   chat         Start a chat session
-  complete     Complete a prompt
+  complete     Complete a prompt, generate image or speech from text
   embedding    Generate an embedding
   version      Print the version of this tool
 
 Run "llm <command> --help" for more information on a command.
 ```
 
+### Prompt Completion
+
+To have the model respond to a prompt, you can use the `complete` command. For example, to
+have the model respond to the prompt "What is the capital of France?" using the `claude-3-5-haiku-20241022`
+model, you can use the following command:
+
+```bash
+llm complete "What is the capital of France?"
+```
+
+The first time you use the command use the ``--model`` flag to specify the model you want to use. Your
+choice of model will be remembered for subsequent completions.
+
+### Explain computer code
+
+To have the model explain a piece of computer code, you can pipe the code into the `complete` command.
+For example, to have the model explain the code in the file `example.go`, you can use the following command:
+
+```bash
+cat example.go | llm complete
+```
+
+### Caption an image
+
+To have the model generate a caption for an image, you can use the `complete` command with the `--file`
+flag. For example, to have the model generate a caption for the image in the file `example.jpg`, you can use
+the following command:
+
+```bash
+llm complete --file picture.png "Explain this image"
+```
+
+### Generate an image
+
+To have the model generate an image from a prompt, you can use the `complete` command with the `--format image`
+option. For example, to have the model generate an image from the prompt "A picture of a cat", you can use
+the following command:
+
+```bash
+llm complete --model dall-e-3 --format image "A picture of a cat"
+```
+
+Flags `--size`, `--quality` and `--style` can be used to specify the image parameters. It will write the image
+file in the current working directory.
+
+### Convert text to speech
+
+To have a model generate text from speech:
+
+```bash
+echo book.txt | llm complete --model tts-1 --format mp3 --voice coral
+```
+
+It will write the audio file in the current working directory. You can currently write
+the following audio formats and voices:
+
+* Formats: `--format mp3`, `--format opus`, `--format aac`, `--format flac`, `--format wav`, `--format pcm`
+* Voices: `--voice alloy`, `--voice ash`, `--voice coral`, `--voice echo`, `--voice fable`, `--voice onyx`, `--voice nova`, `--voice sage`, `--voice shimmer`
+
 ## Contributing & Distribution
 
 _This module is currently in development and subject to change_. Please do file

diff --git a/attachment.go b/attachment.go
@@ -1,29 +1,42 @@
 package llm
 
 import (
+	"crypto/md5"
 	"encoding/base64"
 	"encoding/json"
+	"fmt"
 	"io"
 	"mime"
 	"net/http"
 	"os"
 	"path/filepath"
+	"strings"
 )
 
 ///////////////////////////////////////////////////////////////////////////////
 // TYPES
 
+// General attachment metadata
 type AttachmentMeta struct {
 	Id        string `json:"id,omitempty"`
 	Filename  string `json:"filename,omitempty"`
 	ExpiresAt uint64 `json:"expires_at,omitempty"`
 	Caption   string `json:"transcript,omitempty"`
 	Data      []byte `json:"data"`
+	Type      string `json:"type"`
+}
+
+// OpenAI image metadata
+type ImageMeta struct {
+	Url    string `json:"url,omitempty"`
+	Data   []byte `json:"b64_json,omitempty"`
+	Prompt string `json:"revised_prompt,omitempty"`
 }
 
 // Attachment for messages
 type Attachment struct {
-	meta AttachmentMeta
+	meta  *AttachmentMeta
+	image *ImageMeta
 }
 
 const (
@@ -38,21 +51,30 @@ func NewAttachment() *Attachment {
 	return new(Attachment)
 }
 
+// NewAttachment with OpenAI image
+func NewAttachmentWithImage(image *ImageMeta) *Attachment {
+	return &Attachment{image: image}
+}
+
 // ReadAttachment returns an attachment from a reader object.
 // It is the responsibility of the caller to close the reader.
-func ReadAttachment(r io.Reader) (*Attachment, error) {
-	var filename string
+func ReadAttachment(r io.Reader, mimetype ...string) (*Attachment, error) {
+	var filename, typ string
 	data, err := io.ReadAll(r)
 	if err != nil {
 		return nil, err
 	}
 	if f, ok := r.(*os.File); ok {
 		filename = f.Name()
 	}
+	if len(mimetype) > 0 {
+		typ = mimetype[0]
+	}
 	return &Attachment{
-		meta: AttachmentMeta{
+		meta: &AttachmentMeta{
 			Filename: filename,
 			Data:     data,
+			Type:     typ,
 		},
 	}, nil
 }
@@ -73,19 +95,27 @@ func (a *Attachment) MarshalJSON() ([]byte, error) {
 		Filename string `json:"filename,omitempty"`
 		Type     string `json:"type"`
 		Bytes    uint64 `json:"bytes"`
-		Caption  string `json:"transcript,omitempty"`
+		Hash     string `json:"hash,omitempty"`
+		Caption  string `json:"caption,omitempty"`
 	}
-	j.Id = a.meta.Id
-	j.Filename = a.meta.Filename
+
 	j.Type = a.Type()
-	j.Bytes = uint64(len(a.meta.Data))
-	j.Caption = a.meta.Caption
+	j.Caption = a.Caption()
+	j.Hash = a.Hash()
+	j.Filename = a.Filename()
+	if a.meta != nil {
+		j.Id = a.meta.Id
+		j.Bytes = uint64(len(a.meta.Data))
+	} else if a.image != nil {
+		j.Bytes = uint64(len(a.image.Data))
+	}
+
 	return json.Marshal(j)
 }
 
 // Stringify an attachment
 func (a *Attachment) String() string {
-	data, err := json.MarshalIndent(a.meta, "", "  ")
+	data, err := json.MarshalIndent(a, "", "  ")
 	if err != nil {
 		return err.Error()
 	}
@@ -95,41 +125,83 @@ func (a *Attachment) String() string {
 ////////////////////////////////////////////////////////////////////////////////
 // PUBLIC METHODS
 
+// Compute and print the MD5 hash
+func (a *Attachment) Hash() string {
+	hash := md5.New()
+	hash.Write(a.Data())
+	return fmt.Sprintf("%x", hash.Sum(nil))
+}
+
+// Write out attachment
+func (a *Attachment) Write(w io.Writer) (int, error) {
+	if a.meta != nil {
+		return w.Write(a.meta.Data)
+	}
+	if a.image != nil {
+		return w.Write(a.image.Data)
+	}
+	return 0, io.EOF
+}
+
 // Return the filename of an attachment
 func (a *Attachment) Filename() string {
-	return a.meta.Filename
+	if a.meta != nil && a.meta.Filename != "" {
+		return a.meta.Filename
+	}
+	// Obtain filename from MD5
+	if ext, err := mime.ExtensionsByType(a.Type()); err == nil && len(ext) > 0 {
+		return a.Hash() + ext[0]
+	}
+	return ""
 }
 
 // Return the raw attachment data
 func (a *Attachment) Data() []byte {
-	return a.meta.Data
+	if a.meta != nil {
+		return a.meta.Data
+	}
+	if a.image != nil {
+		return a.image.Data
+	}
+	return nil
 }
 
 // Return the caption for the attachment
 func (a *Attachment) Caption() string {
-	return a.meta.Caption
+	if a.meta != nil {
+		return strings.TrimSpace(a.meta.Caption)
+	}
+	if a.image != nil {
+		return strings.TrimSpace(a.image.Prompt)
+	}
+	return ""
 }
 
 // Return the mime media type for the attachment, based
 // on the data and/or filename extension. Returns an empty string if
 // there is no data or filename
 func (a *Attachment) Type() string {
+	// If there's a mimetype set, use this
+	if a.meta != nil && a.meta.Type != "" {
+		return a.meta.Type
+	}
+
 	// If there's no data or filename, return empty
-	if len(a.meta.Data) == 0 && a.meta.Filename == "" {
+	if len(a.Data()) == 0 && a.Filename() == "" {
 		return ""
 	}
 
 	// Mimetype based on content
 	mimetype := defaultMimetype
-	if len(a.meta.Data) > 0 {
-		mimetype = http.DetectContentType(a.meta.Data)
+	if len(a.Data()) > 0 {
+		mimetype = http.DetectContentType(a.Data())
 		if mimetype != defaultMimetype {
 			return mimetype
 		}
 	}
 
 	// Mimetype based on filename
-	if a.meta.Filename != "" {
+	if a.meta != nil && a.meta.Filename != "" {
 		// Detect mimetype from extension
 		mimetype = mime.TypeByExtension(filepath.Ext(a.meta.Filename))
 	}
@@ -139,24 +211,27 @@ func (a *Attachment) Type() string {
 }
 
 func (a *Attachment) Url() string {
-	return "data:" + a.Type() + ";base64," + base64.StdEncoding.EncodeToString(a.meta.Data)
+	return "data:" + a.Type() + ";base64," + base64.StdEncoding.EncodeToString(a.Data())
 }
 
 // Streaming includes the ability to append data
 func (a *Attachment) Append(other *Attachment) {
-	if other.meta.Id != "" {
-		a.meta.Id = other.meta.Id
-	}
-	if other.meta.Filename != "" {
-		a.meta.Filename = other.meta.Filename
-	}
-	if other.meta.ExpiresAt != 0 {
-		a.meta.ExpiresAt = other.meta.ExpiresAt
-	}
-	if other.meta.Caption != "" {
-		a.meta.Caption += other.meta.Caption
-	}
-	if len(other.meta.Data) > 0 {
-		a.meta.Data = append(a.meta.Data, other.meta.Data...)
+	if a.meta != nil {
+		if other.meta.Id != "" {
+			a.meta.Id = other.meta.Id
+		}
+		if other.meta.Filename != "" {
+			a.meta.Filename = other.meta.Filename
+		}
+		if other.meta.ExpiresAt != 0 {
+			a.meta.ExpiresAt = other.meta.ExpiresAt
+		}
+		if other.meta.Caption != "" {
+			a.meta.Caption += other.meta.Caption
+		}
+		if len(other.meta.Data) > 0 {
+			a.meta.Data = append(a.meta.Data, other.meta.Data...)
+		}
 	}
+	// TODO: Append for image
 }
diff --git a/cmd/llm/chat.go b/cmd/llm/chat.go
@@ -26,7 +26,7 @@ type ChatCmd struct {
 // PUBLIC METHODS
 
 func (cmd *ChatCmd) Run(globals *Globals) error {
-	return run(globals, cmd.Model, func(ctx context.Context, model llm.Model) error {
+	return run(globals, AudioType, cmd.Model, func(ctx context.Context, model llm.Model) error {
 		// Current buffer
 		var buf string
 

diff --git a/cmd/llm/chat2.go b/cmd/llm/chat2.go
@@ -63,7 +63,7 @@ func NewTelegramServer(token string, model llm.Model, system string, toolkit llm
 // PUBLIC METHODS
 
 func (cmd *Chat2Cmd) Run(globals *Globals) error {
-	return run(globals, cmd.Model, func(ctx context.Context, model llm.Model) error {
+	return run(globals, ChatType, cmd.Model, func(ctx context.Context, model llm.Model) error {
 		server, err := NewTelegramServer(cmd.TelegramToken, model, cmd.System, globals.toolkit, telegram.WithDebug(globals.Debug))
 		if err != nil {
 			return err