avatarneil
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 144 additions & 33 deletions b/‎README.md‎
Lines changed: 144 additions & 33 deletions
diff --git a/‎SKILL.md‎
Lines changed: 6 additions & 6 deletions b/‎SKILL.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎clawdbot.plugin.json‎
Lines changed: 87 additions & 68 deletions b/‎clawdbot.plugin.json‎
Lines changed: 87 additions & 68 deletions
@@ -19,7 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Streaming TTS for real-time audio playback
 - Barge-in support to interrupt bot responses
 - Auto-reconnect with heartbeat monitoring
-- Discord slash commands: `/voice join`, `/voice leave`, `/voice status`
+- Discord slash commands: `/discord_voice join`, `/discord_voice leave`, `/discord_voice status`
 - CLI commands for voice management
 - Agent tool `discord_voice` for programmatic control
 - Configurable VAD sensitivity (low/medium/high)
 
@@ -184,21 +184,21 @@ Add these to your bot's OAuth2 URL or configure in Discord Developer Portal.
 
 Once registered with Discord, use these commands:
 
-- `/voice join <channel>` - Join a voice channel
-- `/voice leave` - Leave the current voice channel
-- `/voice status` - Show voice connection status
+- `/discord_voice join <channel>` - Join a voice channel
+- `/discord_voice leave` - Leave the current voice channel
+- `/discord_voice status` - Show voice connection status
 
 ### CLI Commands
 
 ```bash
 # Join a voice channel
-clawdbot voice join <channelId>
+clawdbot discord_voice join <channelId>
 
 # Leave voice
-clawdbot voice leave --guild <guildId>
+clawdbot discord_voice leave --guild <guildId>
 
 # Check status
-clawdbot voice status
+clawdbot discord_voice status
 ```
 
 ### Agent Tool
 
@@ -27,32 +27,50 @@
   },
   "configSchema": {
     "type": "object",
-    "additionalProperties": false,
+    "additionalProperties": true,
     "properties": {
-      "enabled": {
-        "type": "boolean",
-        "default": true
-      },
+      "enabled": { "type": "boolean", "default": true },
       "sttProvider": {
         "type": "string",
-        "enum": ["whisper", "gpt4o-mini", "gpt4o-transcribe", "gpt4o-transcribe-diarize", "deepgram", "local-whisper"],
+        "enum": [
+          "whisper",
+          "gpt4o-mini",
+          "gpt4o-transcribe",
+          "gpt4o-transcribe-diarize",
+          "deepgram",
+          "local-whisper",
+          "wyoming-whisper"
+        ],
         "default": "whisper"
       },
+      "sttFallbackProviders": {
+        "type": "array",
+        "items": {
+          "type": "string",
+          "enum": [
+            "whisper",
+            "gpt4o-mini",
+            "gpt4o-transcribe",
+            "gpt4o-transcribe-diarize",
+            "deepgram",
+            "local-whisper",
+            "wyoming-whisper"
+          ]
+        },
+        "description": "Fallback STT when primary fails (quota, rate limit, Wyoming unreachable)"
+      },
       "streamingSTT": {
         "type": "boolean",
         "default": true,
         "description": "Use streaming STT for lower latency (Deepgram only)"
       },
       "ttsProvider": {
         "type": "string",
-        "enum": ["openai", "elevenlabs", "kokoro"],
+        "enum": ["openai", "elevenlabs", "deepgram", "polly", "edge", "kokoro"],
         "default": "openai",
-        "description": "openai, elevenlabs, or kokoro (free local)"
-      },
-      "ttsVoice": {
-        "type": "string",
-        "default": "nova"
+        "description": "openai, elevenlabs, edge (free), or kokoro (free local)"
       },
+      "ttsVoice": { "type": "string", "default": "nova" },
       "vadSensitivity": {
         "type": "string",
         "enum": ["low", "medium", "high"],
@@ -73,42 +91,24 @@
         "default": "off",
         "description": "Thinking level for voice responses (lower = faster)"
       },
-      "allowedUsers": {
-        "type": "array",
-        "items": { "type": "string" },
-        "default": []
-      },
-      "silenceThresholdMs": {
-        "type": "number",
-        "default": 1500
-      },
-      "minAudioMs": {
-        "type": "number",
-        "default": 500
-      },
-      "maxRecordingMs": {
-        "type": "number",
-        "default": 30000
-      },
+      "allowedUsers": { "type": "array", "items": { "type": "string" }, "default": [] },
+      "silenceThresholdMs": { "type": "number", "default": 1500 },
+      "minAudioMs": { "type": "number", "default": 500 },
+      "maxRecordingMs": { "type": "number", "default": 30000 },
       "heartbeatIntervalMs": {
         "type": "number",
         "default": 30000,
         "description": "Connection health check interval in ms"
       },
-      "autoJoinChannel": {
-        "type": "string",
-        "description": "Voice channel ID to auto-join on startup"
-      },
-      "openclawRoot": {
-        "type": "string",
-        "description": "OpenClaw package root if auto-detection fails"
-      },
+      "autoJoinChannel": { "type": "string", "description": "Voice channel ID to auto-join on startup" },
+      "openclawRoot": { "type": "string", "description": "OpenClaw package root if auto-detection fails" },
       "thinkingSound": {
         "type": "object",
         "properties": {
           "enabled": { "type": "boolean", "default": true },
           "path": { "type": "string", "default": "assets/thinking.mp3" },
-          "volume": { "type": "number", "default": 0.7, "minimum": 0, "maximum": 1 }
+          "volume": { "type": "number", "default": 0.7, "minimum": 0, "maximum": 1 },
+          "stopDelayMs": { "type": "number", "default": 50 }
         }
       },
       "openai": {
@@ -124,6 +124,29 @@
           }
         }
       },
+      "polly": {
+        "type": "object",
+        "properties": {
+          "region": { "type": "string", "default": "us-east-1" },
+          "voiceId": { "type": "string", "default": "Joanna" },
+          "engine": { "type": "string", "enum": ["standard", "neural", "long-form", "generative"] },
+          "accessKeyId": { "type": "string" },
+          "secretAccessKey": { "type": "string" }
+        }
+      },
+      "edge": {
+        "type": "object",
+        "properties": {
+          "voice": { "type": "string", "default": "de-DE-KatjaNeural" },
+          "lang": { "type": "string", "default": "de-DE" },
+          "outputFormat": { "type": "string", "default": "webm-24khz-16bit-mono-opus" },
+          "rate": { "type": "string" },
+          "pitch": { "type": "string" },
+          "volume": { "type": "string" },
+          "proxy": { "type": "string" },
+          "timeoutMs": { "type": "number" }
+        }
+      },
       "kokoro": {
         "type": "object",
         "properties": {
@@ -148,8 +171,25 @@
         "type": "object",
         "properties": {
           "apiKey": { "type": "string" },
-          "model": { "type": "string", "default": "nova-2" }
+          "model": { "type": "string", "default": "nova-2" },
+          "ttsModel": { "type": "string", "default": "aura-asteria-en" }
         }
+      },
+      "wyomingWhisper": {
+        "type": "object",
+        "properties": {
+          "host": { "type": "string", "default": "127.0.0.1" },
+          "port": { "type": "number", "default": 10300 },
+          "uri": { "type": "string" },
+          "language": { "type": "string" },
+          "connectTimeoutMs": { "type": "number", "default": 10000 }
+        },
+        "description": "Wyoming Faster Whisper (remote STT over TCP)"
+      },
+      "localWhisper": {
+        "type": "object",
+        "properties": { "model": { "type": "string" }, "quantized": { "type": "boolean" } },
+        "description": "Local Whisper STT (Xenova)"
       }
     }
   },
@@ -164,7 +204,7 @@
     },
     "ttsProvider": {
       "label": "Text-to-Speech Provider",
-      "help": "Use 'openai' or 'elevenlabs'"
+      "help": "openai, elevenlabs, deepgram, polly, edge, or kokoro"
     },
     "ttsVoice": {
       "label": "TTS Voice (deprecated)",
@@ -174,36 +214,15 @@
       "label": "OpenAI TTS Voice",
       "help": "nova, shimmer, echo, onyx, fable, alloy, ash, sage, coral"
     },
-    "kokoro.voice": {
-      "label": "Kokoro TTS Voice",
-      "help": "af_heart, af_bella, af_nicole, etc."
-    },
-    "vadSensitivity": {
-      "label": "VAD Sensitivity",
-      "help": "Voice activity detection sensitivity (low/medium/high)"
-    },
-    "allowedUsers": {
-      "label": "Allowed Users",
-      "help": "Discord user IDs allowed to use voice (empty = all allowed)"
-    },
-    "openai.apiKey": {
-      "label": "OpenAI API Key",
-      "sensitive": true
-    },
-    "elevenlabs.apiKey": {
-      "label": "ElevenLabs API Key",
-      "sensitive": true
-    },
-    "elevenlabs.modelId": {
-      "label": "ElevenLabs Model",
-      "help": "turbo, flash, v2, v3 (or full model ID)"
-    },
+    "kokoro.voice": { "label": "Kokoro TTS Voice", "help": "af_heart, af_bella, af_nicole, etc." },
+    "vadSensitivity": { "label": "VAD Sensitivity", "help": "Voice activity detection sensitivity (low/medium/high)" },
+    "allowedUsers": { "label": "Allowed Users", "help": "Discord user IDs allowed to use voice (empty = all allowed)" },
+    "openai.apiKey": { "label": "OpenAI API Key", "sensitive": true },
+    "elevenlabs.apiKey": { "label": "ElevenLabs API Key", "sensitive": true },
+    "elevenlabs.modelId": { "label": "ElevenLabs Model", "help": "turbo, flash, v2, v3 (or full model ID)" },
     "thinkingSound.enabled": { "label": "Thinking Sound", "help": "Play sound while processing" },
     "thinkingSound.path": { "label": "Thinking Sound File", "help": "Path to MP3" },
     "thinkingSound.volume": { "label": "Thinking Sound Volume", "help": "Volume 0-1" },
-    "deepgram.apiKey": {
-      "label": "Deepgram API Key",
-      "sensitive": true
-    }
+    "deepgram.apiKey": { "label": "Deepgram API Key", "sensitive": true }
   }
 }