Files
homeai/homeai-rpi/satellite_wrapper.py
Aodhan Collins 1e52c002c2 feat: Raspberry Pi 5 kitchen satellite — Wyoming voice satellite with ReSpeaker pHAT
Add full Pi 5 satellite setup with ReSpeaker 2-Mics pHAT for kitchen
voice control via Wyoming protocol. Includes satellite_wrapper.py that
monkey-patches WakeStreamingSatellite to fix three compounding bugs:

- TTS echo suppression: mutes wake word detection while speaker plays
- Server writer race fix: checks _writer before streaming, re-arms on None
- Streaming timeout: auto-recovers after 30s if pipeline hangs
- Error recovery: resets streaming state on server Error events

Also includes Pi 5 hardware workarounds (wm8960 overlay, stereo-only
audio wrappers, ALSA mixer calibration) and deploy.sh with fast
iteration commands (--push-wrapper, --test-logs).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 20:09:47 +00:00

204 lines
8.7 KiB
Python

#!/usr/bin/env python3
"""Wyoming Satellite wrapper — echo suppression, writer resilience, streaming timeout.
Monkey-patches WakeStreamingSatellite to fix three compounding bugs that cause
the satellite to freeze after the first voice command:
1. TTS Echo: Mic picks up speaker audio → false wake word trigger → Whisper
hallucinates on silence. Fix: mute mic→wake forwarding while speaker is active.
2. Server Writer Race: HA disconnects after first command, _writer becomes None.
If wake word fires before HA reconnects, _send_run_pipeline() silently drops
the event → satellite stuck in is_streaming=True forever.
Fix: check _writer before entering streaming mode; re-arm wake if no server.
3. No Streaming Timeout: Once stuck in streaming mode, there's no recovery.
Fix: auto-reset after 30s if no Transcript arrives.
4. Error events don't reset streaming state in upstream code.
Fix: reset is_streaming on Error events from server.
Usage: python3 satellite_wrapper.py <same args as wyoming_satellite>
"""
import asyncio
import logging
import time
from wyoming.audio import AudioChunk, AudioStart, AudioStop
from wyoming.error import Error
from wyoming.wake import Detection
from wyoming_satellite.satellite import WakeStreamingSatellite
_LOGGER = logging.getLogger()
# ─── Tuning constants ────────────────────────────────────────────────────────
# How long to keep wake muted after the last AudioStop from the server.
# Must be long enough for sox→aplay buffer to drain (~1-2s) plus audio decay.
_GRACE_SECONDS = 5.0
# Safety valve — unmute even if no AudioStop arrives (e.g. long TTS response).
_MAX_MUTE_SECONDS = 45.0
# Max time in streaming mode without receiving a Transcript or Error.
# Prevents permanent freeze if server never responds.
_STREAMING_TIMEOUT = 30.0
# ─── Save original methods ───────────────────────────────────────────────────
_orig_event_from_server = WakeStreamingSatellite.event_from_server
_orig_event_from_mic = WakeStreamingSatellite.event_from_mic
_orig_event_from_wake = WakeStreamingSatellite.event_from_wake
_orig_trigger_detection = WakeStreamingSatellite.trigger_detection
_orig_trigger_transcript = WakeStreamingSatellite.trigger_transcript
# ─── Patch A: Mute wake on awake.wav ─────────────────────────────────────────
async def _patched_trigger_detection(self, detection):
"""Mute wake word detection when awake.wav starts playing."""
self._speaker_mute_start = time.monotonic()
self._speaker_active = True
_LOGGER.debug("Speaker active (awake.wav) — wake detection muted")
await _orig_trigger_detection(self, detection)
# ─── Patch B: Mute wake on done.wav ──────────────────────────────────────────
async def _patched_trigger_transcript(self, transcript):
"""Keep muted through done.wav playback."""
self._speaker_active = True
_LOGGER.debug("Speaker active (done.wav) — wake detection muted")
await _orig_trigger_transcript(self, transcript)
# ─── Patch C: Echo tracking + error recovery ─────────────────────────────────
async def _patched_event_from_server(self, event):
"""Track TTS audio for echo suppression; reset streaming on errors."""
# Echo suppression: track when speaker is active
if AudioStart.is_type(event.type):
self._speaker_active = True
self._speaker_mute_start = time.monotonic()
_LOGGER.debug("Speaker active (TTS) — wake detection muted")
elif AudioStop.is_type(event.type):
self._speaker_unmute_at = time.monotonic() + _GRACE_SECONDS
_LOGGER.debug(
"TTS finished — will unmute wake in %.1fs", _GRACE_SECONDS
)
# Error recovery: reset streaming state if server reports an error
if Error.is_type(event.type) and self.is_streaming:
_LOGGER.warning("Error from server while streaming — resetting")
self.is_streaming = False
# Call original handler (plays done.wav, forwards TTS audio, etc.)
await _orig_event_from_server(self, event)
# After original handler: if Error arrived, re-arm wake detection
if Error.is_type(event.type) and not self.is_streaming:
await self.trigger_streaming_stop()
await self._send_wake_detect()
_LOGGER.info("Waiting for wake word (after error)")
# ─── Patch D: Echo suppression + streaming timeout ───────────────────────────
async def _patched_event_from_mic(self, event, audio_bytes=None):
"""Drop mic audio during speaker playback; timeout stuck streaming."""
# --- Streaming timeout ---
if self.is_streaming:
elapsed = time.monotonic() - getattr(self, "_streaming_start_time", 0)
if elapsed > _STREAMING_TIMEOUT:
_LOGGER.warning(
"Streaming timeout (%.0fs) — no Transcript received, resetting",
elapsed,
)
self.is_streaming = False
# Tell server we're done sending audio
await self.event_to_server(AudioStop().event())
await self.trigger_streaming_stop()
await self._send_wake_detect()
_LOGGER.info("Waiting for wake word (after timeout)")
return
# --- Echo suppression ---
if getattr(self, "_speaker_active", False) and not self.is_streaming:
now = time.monotonic()
# Check if grace period has elapsed after AudioStop
unmute_at = getattr(self, "_speaker_unmute_at", None)
if unmute_at and now >= unmute_at:
self._speaker_active = False
self._speaker_unmute_at = None
_LOGGER.debug("Wake detection unmuted (grace period elapsed)")
# Safety valve — don't stay muted forever
elif now - getattr(self, "_speaker_mute_start", now) > _MAX_MUTE_SECONDS:
self._speaker_active = False
self._speaker_unmute_at = None
_LOGGER.warning("Wake detection force-unmuted (max mute timeout)")
elif AudioChunk.is_type(event.type):
# Drop this mic chunk — don't feed speaker audio to wake word
return
await _orig_event_from_mic(self, event, audio_bytes)
# ─── Patch E: Writer check before streaming (THE CRITICAL FIX) ───────────────
async def _patched_event_from_wake(self, event):
"""Check server connection before entering streaming mode."""
if self.is_streaming:
return
if Detection.is_type(event.type):
# THE FIX: If no server connection, don't enter streaming mode.
# Without this, _send_run_pipeline() silently drops the RunPipeline
# event, and the satellite is stuck in is_streaming=True forever.
if self._writer is None:
_LOGGER.warning(
"Wake word detected but no server connection — re-arming"
)
await self._send_wake_detect()
return
self.is_streaming = True
self._streaming_start_time = time.monotonic()
_LOGGER.debug("Streaming audio")
await self._send_run_pipeline()
await self.forward_event(event)
await self.trigger_detection(Detection.from_event(event))
await self.trigger_streaming_start()
# ─── Apply patches ───────────────────────────────────────────────────────────
WakeStreamingSatellite.event_from_server = _patched_event_from_server
WakeStreamingSatellite.event_from_mic = _patched_event_from_mic
WakeStreamingSatellite.event_from_wake = _patched_event_from_wake
WakeStreamingSatellite.trigger_detection = _patched_trigger_detection
WakeStreamingSatellite.trigger_transcript = _patched_trigger_transcript
# Instance attributes (set as class defaults so they exist before __init__)
WakeStreamingSatellite._speaker_active = False
WakeStreamingSatellite._speaker_unmute_at = None
WakeStreamingSatellite._speaker_mute_start = 0.0
WakeStreamingSatellite._streaming_start_time = 0.0
# ─── Run the original main ───────────────────────────────────────────────────
if __name__ == "__main__":
from wyoming_satellite.__main__ import main
try:
asyncio.run(main())
except KeyboardInterrupt:
pass