homeai/homeai-voice/wyoming/wakeword_daemon.py

#!/usr/bin/env python3
"""Always-on wake word detection daemon using openWakeWord.

Listens on the default microphone, fires an HTTP POST to --notify-url
when the wake word is detected.

Usage:
    python wakeword_daemon.py --wake-word hey_jarvis --notify-url http://localhost:8080/wake
"""

import argparse
import logging
import time
import urllib.request
import json
import numpy as np

_LOGGER = logging.getLogger(__name__)

SAMPLE_RATE = 16000
CHUNK_SIZE = 1280  # ~80ms at 16kHz — recommended by openWakeWord


def notify(url: str, wake_word: str, score: float):
    payload = json.dumps({"wake_word": wake_word, "score": float(score)}).encode()
    try:
        req = urllib.request.Request(
            url,
            data=payload,
            headers={"Content-Type": "application/json"},
            method="POST",
        )
        with urllib.request.urlopen(req, timeout=2):
            pass
        _LOGGER.info("Wake word '%s' detected (score=%.3f) — notified %s", wake_word, score, url)
    except Exception as e:
        _LOGGER.warning("Failed to notify %s: %s", url, e)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--wake-word", default="hey_jarvis")
    parser.add_argument("--notify-url", default="http://localhost:8080/wake")
    parser.add_argument("--threshold", type=float, default=0.5)
    parser.add_argument("--cooldown", type=float, default=3.0, help="Seconds between triggers")
    parser.add_argument("--model-dir", default=None, help="Path to custom .onnx wake word model")
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()

    logging.basicConfig(
        level=logging.DEBUG if args.debug else logging.INFO,
        format="%(asctime)s %(levelname)s %(message)s",
    )

    try:
        import pyaudio
    except ImportError:
        _LOGGER.error("pyaudio not installed. Run: pip install pyaudio")
        raise SystemExit(1)

    import openwakeword
    from openwakeword.model import Model

    _LOGGER.info("Loading wake word model: %s", args.wake_word)

    model_paths = []
    if args.model_dir:
        import os, glob
        model_paths = glob.glob(os.path.join(args.model_dir, "*.onnx"))

    oww = Model(
        wakeword_models=model_paths if model_paths else [args.wake_word],
        inference_framework="onnx",
    )

    audio = pyaudio.PyAudio()
    stream = audio.open(
        rate=SAMPLE_RATE,
        channels=1,
        format=pyaudio.paInt16,
        input=True,
        frames_per_buffer=CHUNK_SIZE,
    )

    _LOGGER.info("Listening for wake word '%s'...", args.wake_word)
    last_trigger = 0.0

    try:
        while True:
            raw = stream.read(CHUNK_SIZE, exception_on_overflow=False)
            chunk = np.frombuffer(raw, dtype=np.int16)
            oww.predict(chunk)

            for ww, scores in oww.prediction_buffer.items():
                score = scores[-1] if scores else 0.0
                if score >= args.threshold:
                    now = time.time()
                    if now - last_trigger >= args.cooldown:
                        last_trigger = now
                        notify(args.notify_url, ww, score)
    except KeyboardInterrupt:
        pass
    finally:
        stream.stop_stream()
        stream.close()
        audio.terminate()


if __name__ == "__main__":
    main()