#!/usr/bin/env python3 """Wyoming Satellite wrapper — echo suppression, writer resilience, streaming timeout. Monkey-patches WakeStreamingSatellite to fix three compounding bugs that cause the satellite to freeze after the first voice command: 1. TTS Echo: Mic picks up speaker audio → false wake word trigger → Whisper hallucinates on silence. Fix: mute mic→wake forwarding while speaker is active. 2. Server Writer Race: HA disconnects after first command, _writer becomes None. If wake word fires before HA reconnects, _send_run_pipeline() silently drops the event → satellite stuck in is_streaming=True forever. Fix: check _writer before entering streaming mode; re-arm wake if no server. 3. No Streaming Timeout: Once stuck in streaming mode, there's no recovery. Fix: auto-reset after 30s if no Transcript arrives. 4. Error events don't reset streaming state in upstream code. Fix: reset is_streaming on Error events from server. Usage: python3 satellite_wrapper.py """ import asyncio import logging import time from wyoming.audio import AudioChunk, AudioStart, AudioStop from wyoming.error import Error from wyoming.wake import Detection from wyoming_satellite.satellite import WakeStreamingSatellite _LOGGER = logging.getLogger() # ─── Tuning constants ──────────────────────────────────────────────────────── # How long to keep wake muted after the last AudioStop from the server. # Must be long enough for sox→aplay buffer to drain (~1-2s) plus audio decay. _GRACE_SECONDS = 5.0 # Safety valve — unmute even if no AudioStop arrives (e.g. long TTS response). _MAX_MUTE_SECONDS = 45.0 # Max time in streaming mode without receiving a Transcript or Error. # Prevents permanent freeze if server never responds. _STREAMING_TIMEOUT = 30.0 # ─── Save original methods ─────────────────────────────────────────────────── _orig_event_from_server = WakeStreamingSatellite.event_from_server _orig_event_from_mic = WakeStreamingSatellite.event_from_mic _orig_event_from_wake = WakeStreamingSatellite.event_from_wake _orig_trigger_detection = WakeStreamingSatellite.trigger_detection _orig_trigger_transcript = WakeStreamingSatellite.trigger_transcript # ─── Patch A: Mute wake on awake.wav ───────────────────────────────────────── async def _patched_trigger_detection(self, detection): """Mute wake word detection when awake.wav starts playing.""" self._speaker_mute_start = time.monotonic() self._speaker_active = True _LOGGER.debug("Speaker active (awake.wav) — wake detection muted") await _orig_trigger_detection(self, detection) # ─── Patch B: Mute wake on done.wav ────────────────────────────────────────── async def _patched_trigger_transcript(self, transcript): """Keep muted through done.wav playback.""" self._speaker_active = True _LOGGER.debug("Speaker active (done.wav) — wake detection muted") await _orig_trigger_transcript(self, transcript) # ─── Patch C: Echo tracking + error recovery ───────────────────────────────── async def _patched_event_from_server(self, event): """Track TTS audio for echo suppression; reset streaming on errors.""" # Echo suppression: track when speaker is active if AudioStart.is_type(event.type): self._speaker_active = True self._speaker_mute_start = time.monotonic() _LOGGER.debug("Speaker active (TTS) — wake detection muted") elif AudioStop.is_type(event.type): self._speaker_unmute_at = time.monotonic() + _GRACE_SECONDS _LOGGER.debug( "TTS finished — will unmute wake in %.1fs", _GRACE_SECONDS ) # Error recovery: reset streaming state if server reports an error if Error.is_type(event.type) and self.is_streaming: _LOGGER.warning("Error from server while streaming — resetting") self.is_streaming = False # Call original handler (plays done.wav, forwards TTS audio, etc.) await _orig_event_from_server(self, event) # After original handler: if Error arrived, re-arm wake detection if Error.is_type(event.type) and not self.is_streaming: await self.trigger_streaming_stop() await self._send_wake_detect() _LOGGER.info("Waiting for wake word (after error)") # ─── Patch D: Echo suppression + streaming timeout ─────────────────────────── async def _patched_event_from_mic(self, event, audio_bytes=None): """Drop mic audio during speaker playback; timeout stuck streaming.""" # --- Streaming timeout --- if self.is_streaming: elapsed = time.monotonic() - getattr(self, "_streaming_start_time", 0) if elapsed > _STREAMING_TIMEOUT: _LOGGER.warning( "Streaming timeout (%.0fs) — no Transcript received, resetting", elapsed, ) self.is_streaming = False # Tell server we're done sending audio await self.event_to_server(AudioStop().event()) await self.trigger_streaming_stop() await self._send_wake_detect() _LOGGER.info("Waiting for wake word (after timeout)") return # --- Echo suppression --- if getattr(self, "_speaker_active", False) and not self.is_streaming: now = time.monotonic() # Check if grace period has elapsed after AudioStop unmute_at = getattr(self, "_speaker_unmute_at", None) if unmute_at and now >= unmute_at: self._speaker_active = False self._speaker_unmute_at = None _LOGGER.debug("Wake detection unmuted (grace period elapsed)") # Safety valve — don't stay muted forever elif now - getattr(self, "_speaker_mute_start", now) > _MAX_MUTE_SECONDS: self._speaker_active = False self._speaker_unmute_at = None _LOGGER.warning("Wake detection force-unmuted (max mute timeout)") elif AudioChunk.is_type(event.type): # Drop this mic chunk — don't feed speaker audio to wake word return await _orig_event_from_mic(self, event, audio_bytes) # ─── Patch E: Writer check before streaming (THE CRITICAL FIX) ─────────────── async def _patched_event_from_wake(self, event): """Check server connection before entering streaming mode.""" if self.is_streaming: return if Detection.is_type(event.type): # THE FIX: If no server connection, don't enter streaming mode. # Without this, _send_run_pipeline() silently drops the RunPipeline # event, and the satellite is stuck in is_streaming=True forever. if self._writer is None: _LOGGER.warning( "Wake word detected but no server connection — re-arming" ) await self._send_wake_detect() return self.is_streaming = True self._streaming_start_time = time.monotonic() _LOGGER.debug("Streaming audio") await self._send_run_pipeline() await self.forward_event(event) await self.trigger_detection(Detection.from_event(event)) await self.trigger_streaming_start() # ─── Apply patches ─────────────────────────────────────────────────────────── WakeStreamingSatellite.event_from_server = _patched_event_from_server WakeStreamingSatellite.event_from_mic = _patched_event_from_mic WakeStreamingSatellite.event_from_wake = _patched_event_from_wake WakeStreamingSatellite.trigger_detection = _patched_trigger_detection WakeStreamingSatellite.trigger_transcript = _patched_trigger_transcript # Instance attributes (set as class defaults so they exist before __init__) WakeStreamingSatellite._speaker_active = False WakeStreamingSatellite._speaker_unmute_at = None WakeStreamingSatellite._speaker_mute_start = 0.0 WakeStreamingSatellite._streaming_start_time = 0.0 # ─── Run the original main ─────────────────────────────────────────────────── if __name__ == "__main__": from wyoming_satellite.__main__ import main try: asyncio.run(main()) except KeyboardInterrupt: pass