Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 98 additions & 33 deletions hermes_cli/voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ def _play_beep(frequency: int, count: int = 1) -> None:
# ── Continuous (VAD) state ───────────────────────────────────────────
_continuous_lock = threading.Lock()
_continuous_active = False
_continuous_stopping = False
_continuous_recorder: Any = None

# ── TTS-vs-STT feedback guard ────────────────────────────────────────
Expand Down Expand Up @@ -370,28 +371,31 @@ def start_continuous(
on_silent_limit: Optional[Callable[[], None]] = None,
silence_threshold: int = 200,
silence_duration: float = 3.0,
auto_restart: bool = True,
) -> None:
"""Start a VAD-driven continuous recording loop.

The loop calls ``on_transcript(text)`` each time speech is detected and
transcribed successfully, then auto-restarts. After
``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
picked up at all) the loop stops itself and calls ``on_silent_limit``
so the UI can reflect "voice off". Idempotent — calling while already
active is a no-op.
transcribed successfully. If ``auto_restart`` is True, it auto-restarts
for the next turn. If ``auto_restart`` is False, the first silence-triggered
transcription ends the loop and reports ``"idle"``. After
``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech picked
up at all) the loop stops itself and calls ``on_silent_limit`` so the UI can
reflect "voice off". Idempotent — calling while already active is a no-op.

``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
``"idle"`` so the UI can show a live indicator.
"""
global _continuous_active, _continuous_recorder
global _continuous_active, _continuous_recorder, _continuous_auto_restart
global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
global _continuous_no_speech_count

with _continuous_lock:
if _continuous_active:
if _continuous_active or _continuous_stopping:
_debug("start_continuous: already active — no-op")
return
Comment thread
OutThisLife marked this conversation as resolved.
Outdated
_continuous_active = True
_continuous_auto_restart = auto_restart
_continuous_on_transcript = on_transcript
_continuous_on_status = on_status
_continuous_on_silent_limit = on_silent_limit
Expand Down Expand Up @@ -429,14 +433,14 @@ def start_continuous(
pass


def stop_continuous() -> None:
def stop_continuous(force_transcribe: bool = False) -> None:
"""Stop the active continuous loop and release the microphone.

Idempotent — calling while not active is a no-op. Any in-flight
transcription completes but its result is discarded (the callback
checks ``_continuous_active`` before firing).
Idempotent — calling while not active is a no-op. If force_transcribe
is True, the current buffer is transcribed before stopping. Otherwise
the buffer is discarded.
Comment thread
OutThisLife marked this conversation as resolved.
Outdated
"""
global _continuous_active, _continuous_on_transcript
global _continuous_active, _continuous_on_transcript, _continuous_stopping
global _continuous_on_status, _continuous_on_silent_limit
global _continuous_recorder, _continuous_no_speech_count

Expand All @@ -446,18 +450,63 @@ def stop_continuous() -> None:
_continuous_active = False
rec = _continuous_recorder
on_status = _continuous_on_status
on_transcript = _continuous_on_transcript
_continuous_stopping = rec is not None
_continuous_on_transcript = None
_continuous_on_status = None
_continuous_on_silent_limit = None
_continuous_no_speech_count = 0
Comment thread
OutThisLife marked this conversation as resolved.
Outdated

if rec is not None:
try:
# cancel() (not stop()) discards buffered frames — the loop
# is over, we don't want to transcribe a half-captured turn.
rec.cancel()
except Exception as e:
logger.warning("failed to cancel recorder: %s", e)
if force_transcribe and on_transcript:
if on_status:
try:
on_status("transcribing")
except Exception:
pass
try:
wav_path = rec.stop()
except Exception as e:
logger.warning("failed to stop recorder: %s", e)
wav_path = None
Comment thread
OutThisLife marked this conversation as resolved.

def _transcribe_and_cleanup():
try:
if wav_path:
try:
result = transcribe_recording(wav_path)
if result.get("success"):
text = (result.get("transcript") or "").strip()
if text and not is_whisper_hallucination(text):
on_transcript(text)
finally:
if os.path.isfile(wav_path):
os.unlink(wav_path)
except Exception as e:
logger.warning("failed to stop/transcribe recorder: %s", e)
finally:
_play_beep(frequency=660, count=2)
global _continuous_stopping
with _continuous_lock:
_continuous_stopping = False
if on_status:
try:
on_status("idle")
except Exception:
pass

threading.Thread(target=_transcribe_and_cleanup, daemon=True).start()
return
else:
try:
# cancel() (not stop()) discards buffered frames — the loop
# is over, we don't want to transcribe a half-captured turn.
rec.cancel()
except Exception as e:
logger.warning("failed to cancel recorder: %s", e)

with _continuous_lock:
_continuous_stopping = False

# Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
# silence-auto-stop path plays).
Expand Down Expand Up @@ -603,23 +652,39 @@ def _continuous_on_silence() -> None:
_debug("_continuous_on_silence: stopped while waiting for TTS")
return

# Restart for the next turn.
_debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
_play_beep(frequency=880, count=1)
try:
rec.start(on_silence_stop=_continuous_on_silence)
except Exception as e:
logger.error("failed to restart continuous recording: %s", e)
_debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
if _continuous_auto_restart:
# Restart for the next turn.
_debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
_play_beep(frequency=880, count=1)
try:
Comment thread
OutThisLife marked this conversation as resolved.
rec.start(on_silence_stop=_continuous_on_silence)
except Exception as e:
logger.error("failed to restart continuous recording: %s", e)
_debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
with _continuous_lock:
_continuous_active = False
if on_status:
try:
on_status("idle")
except Exception:
pass
return

if on_status:
try:
on_status("listening")
except Exception:
pass
else:
# Do not auto-restart. Clean up state and notify idle.
_debug("_continuous_on_silence: auto_restart=False, stopping loop")
with _continuous_lock:
_continuous_active = False
return

if on_status:
try:
on_status("listening")
except Exception:
pass
if on_status:
try:
on_status("idle")
except Exception:
pass


# ── TTS API ──────────────────────────────────────────────────────────
Expand Down
83 changes: 83 additions & 0 deletions tests/hermes_cli/test_voice_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ def test_not_active_by_default(self, monkeypatch):

# Isolate from any state left behind by other tests in the session.
monkeypatch.setattr(voice, "_continuous_active", False)
monkeypatch.setattr(voice, "_continuous_stopping", False, raising=False)
monkeypatch.setattr(voice, "_continuous_recorder", None)

assert voice.is_continuous_active() is False
Expand Down Expand Up @@ -368,6 +369,8 @@ def fake_recorder(self, monkeypatch):
monkeypatch.setattr(voice, "_continuous_on_transcript", None)
monkeypatch.setattr(voice, "_continuous_on_status", None)
monkeypatch.setattr(voice, "_continuous_on_silent_limit", None)
monkeypatch.setattr(voice, "_continuous_auto_restart", True, raising=False)
monkeypatch.setattr(voice, "_play_beep", lambda *_, **__: None)

class FakeRecorder:
_silence_threshold = 200
Expand All @@ -381,8 +384,12 @@ def __init__(self):
self.cancelled = 0
# Preset WAV path returned by stop()
self.next_stop_wav = "/tmp/fake.wav"
self.fail_next_start = False

def start(self, on_silence_stop=None):
if self.fail_next_start:
self.fail_next_start = False
raise RuntimeError("boom")
self.start_calls += 1
self.last_callback = on_silence_stop
self.is_recording = True
Expand Down Expand Up @@ -433,6 +440,82 @@ def test_loop_auto_restarts_after_transcript(self, fake_recorder, monkeypatch):

voice.stop_continuous()

def test_auto_restart_false_stops_after_first_transcript(self, fake_recorder, monkeypatch):
import hermes_cli.voice as voice

monkeypatch.setattr(
voice,
"transcribe_recording",
lambda _p: {"success": True, "transcript": "single shot"},
)
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)

transcripts = []
statuses = []

voice.start_continuous(
on_transcript=lambda t: transcripts.append(t),
on_status=lambda s: statuses.append(s),
auto_restart=False,
)
fake_recorder.last_callback()

assert transcripts == ["single shot"]
assert fake_recorder.start_calls == 1
assert statuses == ["listening", "transcribing", "idle"]
assert voice.is_continuous_active() is False

def test_force_transcribe_stop_delivers_current_buffer(self, fake_recorder, monkeypatch):
import hermes_cli.voice as voice

class ImmediateThread:
def __init__(self, target, daemon=False):
self.target = target

def start(self):
self.target()

monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
monkeypatch.setattr(
voice,
"transcribe_recording",
lambda _p: {"success": True, "transcript": "manual stop"},
)
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)

transcripts = []
statuses = []

voice.start_continuous(
on_transcript=lambda t: transcripts.append(t),
on_status=lambda s: statuses.append(s),
)
voice.stop_continuous(force_transcribe=True)

assert fake_recorder.stopped == 1
assert transcripts == ["manual stop"]
assert statuses == ["listening", "transcribing", "idle"]
assert voice.is_continuous_active() is False

def test_restart_failure_reports_idle(self, fake_recorder, monkeypatch):
import hermes_cli.voice as voice

monkeypatch.setattr(
voice,
"transcribe_recording",
lambda _p: {"success": True, "transcript": "hello world"},
)
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)

statuses = []
voice.start_continuous(on_transcript=lambda _t: None, on_status=statuses.append)

fake_recorder.fail_next_start = True
fake_recorder.last_callback()

assert statuses == ["listening", "transcribing", "idle"]
assert voice.is_continuous_active() is False

def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch):
import hermes_cli.voice as voice

Expand Down
29 changes: 29 additions & 0 deletions tests/test_tui_gateway_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ def fake_start_continuous(**kwargs):
assert resp["result"]["status"] == "recording"
assert captured["silence_threshold"] == 200
assert captured["silence_duration"] == 3.0
assert captured["auto_restart"] is False

# Round-12 Copilot review regression on #19835: ``bool`` is a subclass
# of ``int``, so the naive ``isinstance(threshold, (int, float))``
Expand Down Expand Up @@ -232,6 +233,34 @@ def fake_start_continuous(**kwargs):
assert (
captured["silence_duration"] == 3.0
), f"bool silence_duration leaked through for {bad_bool_cfg!r}"
assert captured["auto_restart"] is False


def test_voice_record_stop_forces_transcription(monkeypatch):
captured: dict = {}

def fake_stop_continuous(**kwargs):
captured.update(kwargs)

monkeypatch.setitem(
sys.modules,
"hermes_cli.voice",
types.SimpleNamespace(
start_continuous=lambda **_kwargs: None,
stop_continuous=fake_stop_continuous,
),
)

resp = server.dispatch(
{
"id": "voice-record-stop",
"method": "voice.record",
"params": {"action": "stop"},
}
)

assert resp["result"]["status"] == "stopped"
assert captured["force_transcribe"] is True


def test_voice_toggle_tts_branch_also_carries_record_key(monkeypatch):
Expand Down
14 changes: 7 additions & 7 deletions tui_gateway/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -5621,12 +5621,11 @@ def _(rid, params: dict) -> dict:
def _(rid, params: dict) -> dict:
"""VAD-driven continuous record loop, CLI-parity.

``start`` turns on a VAD loop that emits ``voice.transcript`` events
for each detected utterance and auto-restarts for the next turn.
``stop`` halts the loop (manual stop; matches cli.py's Ctrl+B-while-
recording branch clearing ``_voice_continuous``). Three consecutive
silent cycles stop the loop automatically and emit a
``voice.transcript`` with ``no_speech_limit=True``.
``start`` begins one VAD-bounded capture and emits ``voice.transcript``
after silence stops the recorder. ``stop`` forces transcription of the
active buffer, matching classic CLI push-to-talk. Three consecutive
silent captures stop the loop automatically and emit ``voice.transcript``
with ``no_speech_limit=True``.
"""
action = params.get("action", "start")

Expand Down Expand Up @@ -5673,13 +5672,14 @@ def _(rid, params: dict) -> dict:
),
silence_threshold=safe_threshold,
silence_duration=safe_duration,
auto_restart=False,
Comment thread
OutThisLife marked this conversation as resolved.
)
return _ok(rid, {"status": "recording"})
Comment thread
OutThisLife marked this conversation as resolved.

# action == "stop"
from hermes_cli.voice import stop_continuous

stop_continuous()
stop_continuous(force_transcribe=True)
return _ok(rid, {"status": "stopped"})
Comment thread
OutThisLife marked this conversation as resolved.
except ImportError:
return _err(
Expand Down
Loading
Loading