Add full Pi 5 satellite setup with ReSpeaker 2-Mics pHAT for kitchen voice control via Wyoming protocol. Includes satellite_wrapper.py that monkey-patches WakeStreamingSatellite to fix three compounding bugs: - TTS echo suppression: mutes wake word detection while speaker plays - Server writer race fix: checks _writer before streaming, re-arms on None - Streaming timeout: auto-recovers after 30s if pipeline hangs - Error recovery: resets streaming state on server Error events Also includes Pi 5 hardware workarounds (wm8960 overlay, stereo-only audio wrappers, ALSA mixer calibration) and deploy.sh with fast iteration commands (--push-wrapper, --test-logs). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
204 lines
8.7 KiB
Python
204 lines
8.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Wyoming Satellite wrapper — echo suppression, writer resilience, streaming timeout.
|
|
|
|
Monkey-patches WakeStreamingSatellite to fix three compounding bugs that cause
|
|
the satellite to freeze after the first voice command:
|
|
|
|
1. TTS Echo: Mic picks up speaker audio → false wake word trigger → Whisper
|
|
hallucinates on silence. Fix: mute mic→wake forwarding while speaker is active.
|
|
|
|
2. Server Writer Race: HA disconnects after first command, _writer becomes None.
|
|
If wake word fires before HA reconnects, _send_run_pipeline() silently drops
|
|
the event → satellite stuck in is_streaming=True forever.
|
|
Fix: check _writer before entering streaming mode; re-arm wake if no server.
|
|
|
|
3. No Streaming Timeout: Once stuck in streaming mode, there's no recovery.
|
|
Fix: auto-reset after 30s if no Transcript arrives.
|
|
|
|
4. Error events don't reset streaming state in upstream code.
|
|
Fix: reset is_streaming on Error events from server.
|
|
|
|
Usage: python3 satellite_wrapper.py <same args as wyoming_satellite>
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
|
|
from wyoming.audio import AudioChunk, AudioStart, AudioStop
|
|
from wyoming.error import Error
|
|
from wyoming.wake import Detection
|
|
|
|
from wyoming_satellite.satellite import WakeStreamingSatellite
|
|
|
|
_LOGGER = logging.getLogger()
|
|
|
|
# ─── Tuning constants ────────────────────────────────────────────────────────
|
|
|
|
# How long to keep wake muted after the last AudioStop from the server.
|
|
# Must be long enough for sox→aplay buffer to drain (~1-2s) plus audio decay.
|
|
_GRACE_SECONDS = 5.0
|
|
|
|
# Safety valve — unmute even if no AudioStop arrives (e.g. long TTS response).
|
|
_MAX_MUTE_SECONDS = 45.0
|
|
|
|
# Max time in streaming mode without receiving a Transcript or Error.
|
|
# Prevents permanent freeze if server never responds.
|
|
_STREAMING_TIMEOUT = 30.0
|
|
|
|
# ─── Save original methods ───────────────────────────────────────────────────
|
|
|
|
_orig_event_from_server = WakeStreamingSatellite.event_from_server
|
|
_orig_event_from_mic = WakeStreamingSatellite.event_from_mic
|
|
_orig_event_from_wake = WakeStreamingSatellite.event_from_wake
|
|
_orig_trigger_detection = WakeStreamingSatellite.trigger_detection
|
|
_orig_trigger_transcript = WakeStreamingSatellite.trigger_transcript
|
|
|
|
# ─── Patch A: Mute wake on awake.wav ─────────────────────────────────────────
|
|
|
|
|
|
async def _patched_trigger_detection(self, detection):
|
|
"""Mute wake word detection when awake.wav starts playing."""
|
|
self._speaker_mute_start = time.monotonic()
|
|
self._speaker_active = True
|
|
_LOGGER.debug("Speaker active (awake.wav) — wake detection muted")
|
|
await _orig_trigger_detection(self, detection)
|
|
|
|
|
|
# ─── Patch B: Mute wake on done.wav ──────────────────────────────────────────
|
|
|
|
|
|
async def _patched_trigger_transcript(self, transcript):
|
|
"""Keep muted through done.wav playback."""
|
|
self._speaker_active = True
|
|
_LOGGER.debug("Speaker active (done.wav) — wake detection muted")
|
|
await _orig_trigger_transcript(self, transcript)
|
|
|
|
|
|
# ─── Patch C: Echo tracking + error recovery ─────────────────────────────────
|
|
|
|
|
|
async def _patched_event_from_server(self, event):
|
|
"""Track TTS audio for echo suppression; reset streaming on errors."""
|
|
# Echo suppression: track when speaker is active
|
|
if AudioStart.is_type(event.type):
|
|
self._speaker_active = True
|
|
self._speaker_mute_start = time.monotonic()
|
|
_LOGGER.debug("Speaker active (TTS) — wake detection muted")
|
|
elif AudioStop.is_type(event.type):
|
|
self._speaker_unmute_at = time.monotonic() + _GRACE_SECONDS
|
|
_LOGGER.debug(
|
|
"TTS finished — will unmute wake in %.1fs", _GRACE_SECONDS
|
|
)
|
|
|
|
# Error recovery: reset streaming state if server reports an error
|
|
if Error.is_type(event.type) and self.is_streaming:
|
|
_LOGGER.warning("Error from server while streaming — resetting")
|
|
self.is_streaming = False
|
|
|
|
# Call original handler (plays done.wav, forwards TTS audio, etc.)
|
|
await _orig_event_from_server(self, event)
|
|
|
|
# After original handler: if Error arrived, re-arm wake detection
|
|
if Error.is_type(event.type) and not self.is_streaming:
|
|
await self.trigger_streaming_stop()
|
|
await self._send_wake_detect()
|
|
_LOGGER.info("Waiting for wake word (after error)")
|
|
|
|
|
|
# ─── Patch D: Echo suppression + streaming timeout ───────────────────────────
|
|
|
|
|
|
async def _patched_event_from_mic(self, event, audio_bytes=None):
|
|
"""Drop mic audio during speaker playback; timeout stuck streaming."""
|
|
|
|
# --- Streaming timeout ---
|
|
if self.is_streaming:
|
|
elapsed = time.monotonic() - getattr(self, "_streaming_start_time", 0)
|
|
if elapsed > _STREAMING_TIMEOUT:
|
|
_LOGGER.warning(
|
|
"Streaming timeout (%.0fs) — no Transcript received, resetting",
|
|
elapsed,
|
|
)
|
|
self.is_streaming = False
|
|
# Tell server we're done sending audio
|
|
await self.event_to_server(AudioStop().event())
|
|
await self.trigger_streaming_stop()
|
|
await self._send_wake_detect()
|
|
_LOGGER.info("Waiting for wake word (after timeout)")
|
|
return
|
|
|
|
# --- Echo suppression ---
|
|
if getattr(self, "_speaker_active", False) and not self.is_streaming:
|
|
now = time.monotonic()
|
|
|
|
# Check if grace period has elapsed after AudioStop
|
|
unmute_at = getattr(self, "_speaker_unmute_at", None)
|
|
if unmute_at and now >= unmute_at:
|
|
self._speaker_active = False
|
|
self._speaker_unmute_at = None
|
|
_LOGGER.debug("Wake detection unmuted (grace period elapsed)")
|
|
# Safety valve — don't stay muted forever
|
|
elif now - getattr(self, "_speaker_mute_start", now) > _MAX_MUTE_SECONDS:
|
|
self._speaker_active = False
|
|
self._speaker_unmute_at = None
|
|
_LOGGER.warning("Wake detection force-unmuted (max mute timeout)")
|
|
elif AudioChunk.is_type(event.type):
|
|
# Drop this mic chunk — don't feed speaker audio to wake word
|
|
return
|
|
|
|
await _orig_event_from_mic(self, event, audio_bytes)
|
|
|
|
|
|
# ─── Patch E: Writer check before streaming (THE CRITICAL FIX) ───────────────
|
|
|
|
|
|
async def _patched_event_from_wake(self, event):
|
|
"""Check server connection before entering streaming mode."""
|
|
if self.is_streaming:
|
|
return
|
|
|
|
if Detection.is_type(event.type):
|
|
# THE FIX: If no server connection, don't enter streaming mode.
|
|
# Without this, _send_run_pipeline() silently drops the RunPipeline
|
|
# event, and the satellite is stuck in is_streaming=True forever.
|
|
if self._writer is None:
|
|
_LOGGER.warning(
|
|
"Wake word detected but no server connection — re-arming"
|
|
)
|
|
await self._send_wake_detect()
|
|
return
|
|
|
|
self.is_streaming = True
|
|
self._streaming_start_time = time.monotonic()
|
|
_LOGGER.debug("Streaming audio")
|
|
await self._send_run_pipeline()
|
|
await self.forward_event(event)
|
|
await self.trigger_detection(Detection.from_event(event))
|
|
await self.trigger_streaming_start()
|
|
|
|
|
|
# ─── Apply patches ───────────────────────────────────────────────────────────
|
|
|
|
WakeStreamingSatellite.event_from_server = _patched_event_from_server
|
|
WakeStreamingSatellite.event_from_mic = _patched_event_from_mic
|
|
WakeStreamingSatellite.event_from_wake = _patched_event_from_wake
|
|
WakeStreamingSatellite.trigger_detection = _patched_trigger_detection
|
|
WakeStreamingSatellite.trigger_transcript = _patched_trigger_transcript
|
|
|
|
# Instance attributes (set as class defaults so they exist before __init__)
|
|
WakeStreamingSatellite._speaker_active = False
|
|
WakeStreamingSatellite._speaker_unmute_at = None
|
|
WakeStreamingSatellite._speaker_mute_start = 0.0
|
|
WakeStreamingSatellite._streaming_start_time = 0.0
|
|
|
|
# ─── Run the original main ───────────────────────────────────────────────────
|
|
|
|
if __name__ == "__main__":
|
|
from wyoming_satellite.__main__ import main
|
|
|
|
try:
|
|
asyncio.run(main())
|
|
except KeyboardInterrupt:
|
|
pass
|