- Fix Wyoming satellite crash on wake word: convert macOS .aiff chimes to .wav (Python wave module only reads RIFF format, not AIFF) - Fix OpenClaw HTTP bridge: increase subprocess timeout 30s → 120s, add SO_REUSEADDR - Fix HA conversation component: use HTTP agent (not CLI) since HA runs in Docker on a different machine; update default host to Mac Mini IP, timeout to 120s - Rewrite character manager as Vite+React app with schema validation - Add Wyoming satellite wake word command, ElevenLabs TTS server, wakeword monitor - Add Phase 5 development plan - Update TODO.md: mark voice pipeline and agent tasks complete Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
78 lines
2.4 KiB
Python
78 lines
2.4 KiB
Python
#!/usr/bin/env python3
|
|
"""Wake word detection command for Wyoming Satellite.
|
|
|
|
The satellite feeds raw 16kHz 16-bit mono audio via stdin.
|
|
This script reads that audio, runs openWakeWord, and prints
|
|
the wake word name to stdout when detected.
|
|
|
|
Usage (called by wyoming-satellite --wake-command):
|
|
python wakeword_command.py [--wake-word hey_jarvis] [--threshold 0.5]
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
import numpy as np
|
|
import logging
|
|
|
|
_LOGGER = logging.getLogger(__name__)
|
|
|
|
SAMPLE_RATE = 16000
|
|
CHUNK_SIZE = 1280 # ~80ms at 16kHz — recommended by openWakeWord
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--wake-word", default="hey_jarvis")
|
|
parser.add_argument("--threshold", type=float, default=0.5)
|
|
parser.add_argument("--cooldown", type=float, default=3.0)
|
|
parser.add_argument("--debug", action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
logging.basicConfig(
|
|
level=logging.DEBUG if args.debug else logging.WARNING,
|
|
format="%(asctime)s %(levelname)s %(message)s",
|
|
stream=sys.stderr,
|
|
)
|
|
|
|
import openwakeword
|
|
from openwakeword.model import Model
|
|
|
|
oww = Model(
|
|
wakeword_models=[args.wake_word],
|
|
inference_framework="onnx",
|
|
)
|
|
|
|
import time
|
|
last_trigger = 0.0
|
|
bytes_per_chunk = CHUNK_SIZE * 2 # 16-bit = 2 bytes per sample
|
|
|
|
_LOGGER.debug("Wake word command ready, reading audio from stdin")
|
|
|
|
try:
|
|
while True:
|
|
raw = sys.stdin.buffer.read(bytes_per_chunk)
|
|
if not raw:
|
|
break
|
|
if len(raw) < bytes_per_chunk:
|
|
# Pad with zeros if short read
|
|
raw = raw + b'\x00' * (bytes_per_chunk - len(raw))
|
|
|
|
chunk = np.frombuffer(raw, dtype=np.int16)
|
|
oww.predict(chunk)
|
|
|
|
for ww, scores in oww.prediction_buffer.items():
|
|
score = scores[-1] if scores else 0.0
|
|
if score >= args.threshold:
|
|
now = time.time()
|
|
if now - last_trigger >= args.cooldown:
|
|
last_trigger = now
|
|
# Print wake word name to stdout — satellite reads this
|
|
print(ww, flush=True)
|
|
_LOGGER.debug("Wake word detected: %s (score=%.3f)", ww, score)
|
|
except (KeyboardInterrupt, BrokenPipeError):
|
|
pass
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|