NousResearch · OutThisLife · May 6, 2026 · Apr 26, 2026 · May 6, 2026 · May 6, 2026
diff --git a/hermes_cli/voice.py b/hermes_cli/voice.py
@@ -281,6 +281,7 @@ def _play_beep(frequency: int, count: int = 1) -> None:
 # ── Continuous (VAD) state ───────────────────────────────────────────
 _continuous_lock = threading.Lock()
 _continuous_active = False
+_continuous_stopping = False
 _continuous_recorder: Any = None
 
 # ── TTS-vs-STT feedback guard ────────────────────────────────────────
@@ -370,28 +371,31 @@ def start_continuous(
     on_silent_limit: Optional[Callable[[], None]] = None,
     silence_threshold: int = 200,
     silence_duration: float = 3.0,
+    auto_restart: bool = True,
 ) -> None:
     """Start a VAD-driven continuous recording loop.
 
     The loop calls ``on_transcript(text)`` each time speech is detected and
-    transcribed successfully, then auto-restarts. After
-    ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
-    picked up at all) the loop stops itself and calls ``on_silent_limit``
-    so the UI can reflect "voice off". Idempotent — calling while already
-    active is a no-op.
+    transcribed successfully. If ``auto_restart`` is True, it auto-restarts
+    for the next turn. If ``auto_restart`` is False, the first silence-triggered
+    transcription ends the loop and reports ``"idle"``. After
+    ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech picked
+    up at all) the loop stops itself and calls ``on_silent_limit`` so the UI can
+    reflect "voice off". Idempotent — calling while already active is a no-op.
 
     ``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
     ``"idle"`` so the UI can show a live indicator.
     """
-    global _continuous_active, _continuous_recorder
+    global _continuous_active, _continuous_recorder, _continuous_auto_restart
     global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
     global _continuous_no_speech_count
 
     with _continuous_lock:
-        if _continuous_active:
+        if _continuous_active or _continuous_stopping:
             _debug("start_continuous: already active — no-op")
             return
         _continuous_active = True
+        _continuous_auto_restart = auto_restart
         _continuous_on_transcript = on_transcript
         _continuous_on_status = on_status
         _continuous_on_silent_limit = on_silent_limit
@@ -429,14 +433,14 @@ def start_continuous(
             pass
 
 
-def stop_continuous() -> None:
+def stop_continuous(force_transcribe: bool = False) -> None:
     """Stop the active continuous loop and release the microphone.
 
-    Idempotent — calling while not active is a no-op. Any in-flight
-    transcription completes but its result is discarded (the callback
-    checks ``_continuous_active`` before firing).
+    Idempotent — calling while not active is a no-op. If force_transcribe
+    is True, the current buffer is transcribed before stopping. Otherwise
+    the buffer is discarded.
     """
-    global _continuous_active, _continuous_on_transcript
+    global _continuous_active, _continuous_on_transcript, _continuous_stopping
     global _continuous_on_status, _continuous_on_silent_limit
     global _continuous_recorder, _continuous_no_speech_count
 
@@ -446,18 +450,63 @@ def stop_continuous() -> None:
         _continuous_active = False
         rec = _continuous_recorder
         on_status = _continuous_on_status
+        on_transcript = _continuous_on_transcript
+        _continuous_stopping = rec is not None
         _continuous_on_transcript = None
         _continuous_on_status = None
         _continuous_on_silent_limit = None
         _continuous_no_speech_count = 0
 
     if rec is not None:
-        try:
-            # cancel() (not stop()) discards buffered frames — the loop
-            # is over, we don't want to transcribe a half-captured turn.
-            rec.cancel()
-        except Exception as e:
-            logger.warning("failed to cancel recorder: %s", e)
+        if force_transcribe and on_transcript:
+            if on_status:
+                try:
+                    on_status("transcribing")
+                except Exception:
+                    pass
+            try:
+                wav_path = rec.stop()
+            except Exception as e:
+                logger.warning("failed to stop recorder: %s", e)
+                wav_path = None
+
+            def _transcribe_and_cleanup():
+                try:
+                    if wav_path:
+                        try:
+                            result = transcribe_recording(wav_path)
+                            if result.get("success"):
+                                text = (result.get("transcript") or "").strip()
+                                if text and not is_whisper_hallucination(text):
+                                    on_transcript(text)
+                        finally:
+                            if os.path.isfile(wav_path):
+                                os.unlink(wav_path)
+                except Exception as e:
+                    logger.warning("failed to stop/transcribe recorder: %s", e)
+                finally:
+                    _play_beep(frequency=660, count=2)
+                    global _continuous_stopping
+                    with _continuous_lock:
+                        _continuous_stopping = False
+                    if on_status:
+                        try:
+                            on_status("idle")
+                        except Exception:
+                            pass
+
+            threading.Thread(target=_transcribe_and_cleanup, daemon=True).start()
+            return
+        else:
+            try:
+                # cancel() (not stop()) discards buffered frames — the loop
+                # is over, we don't want to transcribe a half-captured turn.
+                rec.cancel()
+            except Exception as e:
+                logger.warning("failed to cancel recorder: %s", e)
+
+    with _continuous_lock:
+        _continuous_stopping = False
 
     # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
     # silence-auto-stop path plays).
@@ -603,23 +652,39 @@ def _continuous_on_silence() -> None:
                 _debug("_continuous_on_silence: stopped while waiting for TTS")
                 return
 
-    # Restart for the next turn.
-    _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
-    _play_beep(frequency=880, count=1)
-    try:
-        rec.start(on_silence_stop=_continuous_on_silence)
-    except Exception as e:
-        logger.error("failed to restart continuous recording: %s", e)
-        _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
+    if _continuous_auto_restart:
+        # Restart for the next turn.
+        _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
+        _play_beep(frequency=880, count=1)
+        try:
+            rec.start(on_silence_stop=_continuous_on_silence)
+        except Exception as e:
+            logger.error("failed to restart continuous recording: %s", e)
+            _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
+            with _continuous_lock:
+                _continuous_active = False
+            if on_status:
+                try:
+                    on_status("idle")
+                except Exception:
+                    pass
+            return
+
+        if on_status:
+            try:
+                on_status("listening")
+            except Exception:
+                pass
+    else:
+        # Do not auto-restart. Clean up state and notify idle.
+        _debug("_continuous_on_silence: auto_restart=False, stopping loop")
         with _continuous_lock:
             _continuous_active = False
-        return
-
-    if on_status:
-        try:
-            on_status("listening")
-        except Exception:
-            pass
+        if on_status:
+            try:
+                on_status("idle")
+            except Exception:
+                pass
 
 
 # ── TTS API ──────────────────────────────────────────────────────────

diff --git a/tests/hermes_cli/test_voice_wrapper.py b/tests/hermes_cli/test_voice_wrapper.py
@@ -309,6 +309,7 @@ def test_not_active_by_default(self, monkeypatch):
 
         # Isolate from any state left behind by other tests in the session.
         monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_stopping", False, raising=False)
         monkeypatch.setattr(voice, "_continuous_recorder", None)
 
         assert voice.is_continuous_active() is False
@@ -368,6 +369,8 @@ def fake_recorder(self, monkeypatch):
         monkeypatch.setattr(voice, "_continuous_on_transcript", None)
         monkeypatch.setattr(voice, "_continuous_on_status", None)
         monkeypatch.setattr(voice, "_continuous_on_silent_limit", None)
+        monkeypatch.setattr(voice, "_continuous_auto_restart", True, raising=False)
+        monkeypatch.setattr(voice, "_play_beep", lambda *_, **__: None)
 
         class FakeRecorder:
             _silence_threshold = 200
@@ -381,8 +384,12 @@ def __init__(self):
                 self.cancelled = 0
                 # Preset WAV path returned by stop()
                 self.next_stop_wav = "/tmp/fake.wav"
+                self.fail_next_start = False
 
             def start(self, on_silence_stop=None):
+                if self.fail_next_start:
+                    self.fail_next_start = False
+                    raise RuntimeError("boom")
                 self.start_calls += 1
                 self.last_callback = on_silence_stop
                 self.is_recording = True
@@ -433,6 +440,82 @@ def test_loop_auto_restarts_after_transcript(self, fake_recorder, monkeypatch):
 
         voice.stop_continuous()
 
+    def test_auto_restart_false_stops_after_first_transcript(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "single shot"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        statuses = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_status=lambda s: statuses.append(s),
+            auto_restart=False,
+        )
+        fake_recorder.last_callback()
+
+        assert transcripts == ["single shot"]
+        assert fake_recorder.start_calls == 1
+        assert statuses == ["listening", "transcribing", "idle"]
+        assert voice.is_continuous_active() is False
+
+    def test_force_transcribe_stop_delivers_current_buffer(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        class ImmediateThread:
+            def __init__(self, target, daemon=False):
+                self.target = target
+
+            def start(self):
+                self.target()
+
+        monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "manual stop"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        statuses = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_status=lambda s: statuses.append(s),
+        )
+        voice.stop_continuous(force_transcribe=True)
+
+        assert fake_recorder.stopped == 1
+        assert transcripts == ["manual stop"]
+        assert statuses == ["listening", "transcribing", "idle"]
+        assert voice.is_continuous_active() is False
+
+    def test_restart_failure_reports_idle(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "hello world"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        statuses = []
+        voice.start_continuous(on_transcript=lambda _t: None, on_status=statuses.append)
+
+        fake_recorder.fail_next_start = True
+        fake_recorder.last_callback()
+
+        assert statuses == ["listening", "transcribing", "idle"]
+        assert voice.is_continuous_active() is False
+
     def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch):
         import hermes_cli.voice as voice
 

diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
@@ -204,6 +204,7 @@ def fake_start_continuous(**kwargs):
         assert resp["result"]["status"] == "recording"
         assert captured["silence_threshold"] == 200
         assert captured["silence_duration"] == 3.0
+        assert captured["auto_restart"] is False
 
     # Round-12 Copilot review regression on #19835: ``bool`` is a subclass
     # of ``int``, so the naive ``isinstance(threshold, (int, float))``
@@ -232,6 +233,34 @@ def fake_start_continuous(**kwargs):
         assert (
             captured["silence_duration"] == 3.0
         ), f"bool silence_duration leaked through for {bad_bool_cfg!r}"
+        assert captured["auto_restart"] is False
+
+
+def test_voice_record_stop_forces_transcription(monkeypatch):
+    captured: dict = {}
+
+    def fake_stop_continuous(**kwargs):
+        captured.update(kwargs)
+
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.voice",
+        types.SimpleNamespace(
+            start_continuous=lambda **_kwargs: None,
+            stop_continuous=fake_stop_continuous,
+        ),
+    )
+
+    resp = server.dispatch(
+        {
+            "id": "voice-record-stop",
+            "method": "voice.record",
+            "params": {"action": "stop"},
+        }
+    )
+
+    assert resp["result"]["status"] == "stopped"
+    assert captured["force_transcribe"] is True
 
 
 def test_voice_toggle_tts_branch_also_carries_record_key(monkeypatch):

diff --git a/tui_gateway/server.py b/tui_gateway/server.py
@@ -5621,12 +5621,11 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     """VAD-driven continuous record loop, CLI-parity.
 
-    ``start`` turns on a VAD loop that emits ``voice.transcript`` events
-    for each detected utterance and auto-restarts for the next turn.
-    ``stop`` halts the loop (manual stop; matches cli.py's Ctrl+B-while-
-    recording branch clearing ``_voice_continuous``). Three consecutive
-    silent cycles stop the loop automatically and emit a
-    ``voice.transcript`` with ``no_speech_limit=True``.
+    ``start`` begins one VAD-bounded capture and emits ``voice.transcript``
+    after silence stops the recorder. ``stop`` forces transcription of the
+    active buffer, matching classic CLI push-to-talk. Three consecutive
+    silent captures stop the loop automatically and emit ``voice.transcript``
+    with ``no_speech_limit=True``.
     """
     action = params.get("action", "start")
 
@@ -5673,13 +5672,14 @@ def _(rid, params: dict) -> dict:
                 ),
                 silence_threshold=safe_threshold,
                 silence_duration=safe_duration,
+                auto_restart=False,
             )
             return _ok(rid, {"status": "recording"})
 
         # action == "stop"
         from hermes_cli.voice import stop_continuous
 
-        stop_continuous()
+        stop_continuous(force_transcribe=True)
         return _ok(rid, {"status": "stopped"})
     except ImportError:
         return _err(