Files
homeai/homeai-voice/wyoming/wakeword_command.py
Aodhan Collins 6db8ae4492 feat: complete voice pipeline — fix wake word crash, bridge timeout, HA conversation agent
- Fix Wyoming satellite crash on wake word: convert macOS .aiff chimes to .wav
  (Python wave module only reads RIFF format, not AIFF)
- Fix OpenClaw HTTP bridge: increase subprocess timeout 30s → 120s, add SO_REUSEADDR
- Fix HA conversation component: use HTTP agent (not CLI) since HA runs in Docker
  on a different machine; update default host to Mac Mini IP, timeout to 120s
- Rewrite character manager as Vite+React app with schema validation
- Add Wyoming satellite wake word command, ElevenLabs TTS server, wakeword monitor
- Add Phase 5 development plan
- Update TODO.md: mark voice pipeline and agent tasks complete

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 00:15:55 +00:00

78 lines
2.4 KiB
Python

#!/usr/bin/env python3
"""Wake word detection command for Wyoming Satellite.
The satellite feeds raw 16kHz 16-bit mono audio via stdin.
This script reads that audio, runs openWakeWord, and prints
the wake word name to stdout when detected.
Usage (called by wyoming-satellite --wake-command):
python wakeword_command.py [--wake-word hey_jarvis] [--threshold 0.5]
"""
import argparse
import sys
import numpy as np
import logging
_LOGGER = logging.getLogger(__name__)
SAMPLE_RATE = 16000
CHUNK_SIZE = 1280 # ~80ms at 16kHz — recommended by openWakeWord
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--wake-word", default="hey_jarvis")
parser.add_argument("--threshold", type=float, default=0.5)
parser.add_argument("--cooldown", type=float, default=3.0)
parser.add_argument("--debug", action="store_true")
args = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.debug else logging.WARNING,
format="%(asctime)s %(levelname)s %(message)s",
stream=sys.stderr,
)
import openwakeword
from openwakeword.model import Model
oww = Model(
wakeword_models=[args.wake_word],
inference_framework="onnx",
)
import time
last_trigger = 0.0
bytes_per_chunk = CHUNK_SIZE * 2 # 16-bit = 2 bytes per sample
_LOGGER.debug("Wake word command ready, reading audio from stdin")
try:
while True:
raw = sys.stdin.buffer.read(bytes_per_chunk)
if not raw:
break
if len(raw) < bytes_per_chunk:
# Pad with zeros if short read
raw = raw + b'\x00' * (bytes_per_chunk - len(raw))
chunk = np.frombuffer(raw, dtype=np.int16)
oww.predict(chunk)
for ww, scores in oww.prediction_buffer.items():
score = scores[-1] if scores else 0.0
if score >= args.threshold:
now = time.time()
if now - last_trigger >= args.cooldown:
last_trigger = now
# Print wake word name to stdout — satellite reads this
print(ww, flush=True)
_LOGGER.debug("Wake word detected: %s (score=%.3f)", ww, score)
except (KeyboardInterrupt, BrokenPipeError):
pass
if __name__ == "__main__":
main()