feat: complete voice pipeline — fix wake word crash, bridge timeout, HA conversation agent
- Fix Wyoming satellite crash on wake word: convert macOS .aiff chimes to .wav (Python wave module only reads RIFF format, not AIFF) - Fix OpenClaw HTTP bridge: increase subprocess timeout 30s → 120s, add SO_REUSEADDR - Fix HA conversation component: use HTTP agent (not CLI) since HA runs in Docker on a different machine; update default host to Mac Mini IP, timeout to 120s - Rewrite character manager as Vite+React app with schema validation - Add Wyoming satellite wake word command, ElevenLabs TTS server, wakeword monitor - Add Phase 5 development plan - Update TODO.md: mark voice pipeline and agent tasks complete Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
77
homeai-voice/wyoming/wakeword_command.py
Normal file
77
homeai-voice/wyoming/wakeword_command.py
Normal file
@@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Wake word detection command for Wyoming Satellite.
|
||||
|
||||
The satellite feeds raw 16kHz 16-bit mono audio via stdin.
|
||||
This script reads that audio, runs openWakeWord, and prints
|
||||
the wake word name to stdout when detected.
|
||||
|
||||
Usage (called by wyoming-satellite --wake-command):
|
||||
python wakeword_command.py [--wake-word hey_jarvis] [--threshold 0.5]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import numpy as np
|
||||
import logging
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
SAMPLE_RATE = 16000
|
||||
CHUNK_SIZE = 1280 # ~80ms at 16kHz — recommended by openWakeWord
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--wake-word", default="hey_jarvis")
|
||||
parser.add_argument("--threshold", type=float, default=0.5)
|
||||
parser.add_argument("--cooldown", type=float, default=3.0)
|
||||
parser.add_argument("--debug", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG if args.debug else logging.WARNING,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
stream=sys.stderr,
|
||||
)
|
||||
|
||||
import openwakeword
|
||||
from openwakeword.model import Model
|
||||
|
||||
oww = Model(
|
||||
wakeword_models=[args.wake_word],
|
||||
inference_framework="onnx",
|
||||
)
|
||||
|
||||
import time
|
||||
last_trigger = 0.0
|
||||
bytes_per_chunk = CHUNK_SIZE * 2 # 16-bit = 2 bytes per sample
|
||||
|
||||
_LOGGER.debug("Wake word command ready, reading audio from stdin")
|
||||
|
||||
try:
|
||||
while True:
|
||||
raw = sys.stdin.buffer.read(bytes_per_chunk)
|
||||
if not raw:
|
||||
break
|
||||
if len(raw) < bytes_per_chunk:
|
||||
# Pad with zeros if short read
|
||||
raw = raw + b'\x00' * (bytes_per_chunk - len(raw))
|
||||
|
||||
chunk = np.frombuffer(raw, dtype=np.int16)
|
||||
oww.predict(chunk)
|
||||
|
||||
for ww, scores in oww.prediction_buffer.items():
|
||||
score = scores[-1] if scores else 0.0
|
||||
if score >= args.threshold:
|
||||
now = time.time()
|
||||
if now - last_trigger >= args.cooldown:
|
||||
last_trigger = now
|
||||
# Print wake word name to stdout — satellite reads this
|
||||
print(ww, flush=True)
|
||||
_LOGGER.debug("Wake word detected: %s (score=%.3f)", ww, score)
|
||||
except (KeyboardInterrupt, BrokenPipeError):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user