feat: complete voice pipeline — fix wake word crash, bridge timeout, HA conversation agent

- Fix Wyoming satellite crash on wake word: convert macOS .aiff chimes to .wav (Python wave module only reads RIFF format, not AIFF) - Fix OpenClaw HTTP bridge: increase subprocess timeout 30s → 120s, add SO_REUSEADDR - Fix HA conversation component: use HTTP agent (not CLI) since HA runs in Docker on a different machine; update default host to Mac Mini IP, timeout to 120s - Rewrite character manager as Vite+React app with schema validation - Add Wyoming satellite wake word command, ElevenLabs TTS server, wakeword monitor - Add Phase 5 development plan - Update TODO.md: mark voice pipeline and agent tasks complete Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 00:15:55 +00:00
parent 664bb6d275
commit 6db8ae4492
34 changed files with 4649 additions and 1083 deletions
--- a/homeai-voice/wyoming/wakeword_command.py
+++ b/homeai-voice/wyoming/wakeword_command.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+"""Wake word detection command for Wyoming Satellite.
+
+The satellite feeds raw 16kHz 16-bit mono audio via stdin.
+This script reads that audio, runs openWakeWord, and prints
+the wake word name to stdout when detected.
+
+Usage (called by wyoming-satellite --wake-command):
+    python wakeword_command.py [--wake-word hey_jarvis] [--threshold 0.5]
+"""
+
+import argparse
+import sys
+import numpy as np
+import logging
+
+_LOGGER = logging.getLogger(__name__)
+
+SAMPLE_RATE = 16000
+CHUNK_SIZE = 1280  # ~80ms at 16kHz — recommended by openWakeWord
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--wake-word", default="hey_jarvis")
+    parser.add_argument("--threshold", type=float, default=0.5)
+    parser.add_argument("--cooldown", type=float, default=3.0)
+    parser.add_argument("--debug", action="store_true")
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        level=logging.DEBUG if args.debug else logging.WARNING,
+        format="%(asctime)s %(levelname)s %(message)s",
+        stream=sys.stderr,
+    )
+
+    import openwakeword
+    from openwakeword.model import Model
+
+    oww = Model(
+        wakeword_models=[args.wake_word],
+        inference_framework="onnx",
+    )
+
+    import time
+    last_trigger = 0.0
+    bytes_per_chunk = CHUNK_SIZE * 2  # 16-bit = 2 bytes per sample
+
+    _LOGGER.debug("Wake word command ready, reading audio from stdin")
+
+    try:
+        while True:
+            raw = sys.stdin.buffer.read(bytes_per_chunk)
+            if not raw:
+                break
+            if len(raw) < bytes_per_chunk:
+                # Pad with zeros if short read
+                raw = raw + b'\x00' * (bytes_per_chunk - len(raw))
+
+            chunk = np.frombuffer(raw, dtype=np.int16)
+            oww.predict(chunk)
+
+            for ww, scores in oww.prediction_buffer.items():
+                score = scores[-1] if scores else 0.0
+                if score >= args.threshold:
+                    now = time.time()
+                    if now - last_trigger >= args.cooldown:
+                        last_trigger = now
+                        # Print wake word name to stdout — satellite reads this
+                        print(ww, flush=True)
+                        _LOGGER.debug("Wake word detected: %s (score=%.3f)", ww, score)
+    except (KeyboardInterrupt, BrokenPipeError):
+        pass
+
+
+if __name__ == "__main__":
+    main()