Complete P2 (LLM) and P3 (voice pipeline) implementation
P2 — homeai-llm: - Fix ollama launchd plist path for Apple Silicon (/opt/homebrew/bin/ollama) - Add Modelfiles for local GGUF models: llama3.3:70b, qwen3:32b, codestral:22b (registered via `ollama create` — no re-download needed) P3 — homeai-voice: - Wyoming STT: wyoming-faster-whisper, large-v3 model, port 10300 - Wyoming TTS: custom Kokoro ONNX server (wyoming_kokoro_server.py), port 10301 Voice af_heart; models at ~/models/kokoro/ - Wake word: openWakeWord daemon (hey_jarvis), notifies OpenClaw at /wake - launchd plists for all three services + load-all-launchd.sh helper - Smoke test: wyoming/test-pipeline.sh — 3/3 passing HA Wyoming integration pending manual UI config (STT 10.0.0.200:10300, TTS 10.0.0.200:10301). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
88
homeai-voice/wyoming/test-pipeline.sh
Executable file
88
homeai-voice/wyoming/test-pipeline.sh
Executable file
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env bash
|
||||
# Smoke test for the Wyoming voice pipeline.
|
||||
# Tests: STT server alive | TTS server alive | TTS generates audio
|
||||
#
|
||||
# Usage: ./test-pipeline.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
STT_HOST="${STT_HOST:-localhost}"
|
||||
STT_PORT="${STT_PORT:-10300}"
|
||||
TTS_HOST="${TTS_HOST:-localhost}"
|
||||
TTS_PORT="${TTS_PORT:-10301}"
|
||||
VENV="${VENV:-$HOME/homeai-voice-env}"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
check() {
|
||||
local name="$1"; local result="$2"
|
||||
if [[ "$result" == ok* ]]; then
|
||||
echo " [PASS] $name${result#ok}"; PASS=$((PASS + 1))
|
||||
else
|
||||
echo " [FAIL] $name — $result"; FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== HomeAI Voice Pipeline Smoke Test ==="
|
||||
echo ""
|
||||
|
||||
echo "1. STT Wyoming server (port $STT_PORT)"
|
||||
if nc -z -w2 "$STT_HOST" "$STT_PORT" 2>/dev/null; then
|
||||
check "STT port open" "ok"
|
||||
else
|
||||
check "STT port open" "port $STT_PORT not reachable — is wyoming-stt running?"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "2. TTS Wyoming server (port $TTS_PORT)"
|
||||
if nc -z -w2 "$TTS_HOST" "$TTS_PORT" 2>/dev/null; then
|
||||
check "TTS port open" "ok"
|
||||
else
|
||||
check "TTS port open" "port $TTS_PORT not reachable — is wyoming-tts running?"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "3. Kokoro TTS synthesis test"
|
||||
TTS_OUTPUT="/tmp/homeai-tts-test.wav"
|
||||
"$VENV/bin/python3" - <<'PYEOF'
|
||||
import sys, os, asyncio
|
||||
import numpy as np
|
||||
|
||||
model_dir = os.path.expanduser("~/models/kokoro")
|
||||
model_path = os.path.join(model_dir, "kokoro-v1.0.onnx")
|
||||
voices_path = os.path.join(model_dir, "voices-v1.0.bin")
|
||||
|
||||
if not os.path.exists(model_path):
|
||||
print(f"Model not found: {model_path}")
|
||||
sys.exit(1)
|
||||
|
||||
from kokoro_onnx import Kokoro
|
||||
tts = Kokoro(model_path, voices_path)
|
||||
samples, sr = tts.create("Hello, I am your home assistant. The voice pipeline is working.", voice="af_heart", speed=1.0)
|
||||
|
||||
# Write WAV
|
||||
import wave, struct
|
||||
samples_int16 = (np.clip(samples, -1.0, 1.0) * 32767).astype(np.int16)
|
||||
with wave.open("/tmp/homeai-tts-test.wav", "w") as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(sr)
|
||||
wf.writeframes(samples_int16.tobytes())
|
||||
|
||||
print(f"Generated {len(samples)/sr:.1f}s of audio at {sr}Hz")
|
||||
sys.exit(0)
|
||||
PYEOF
|
||||
if [[ $? -eq 0 && -f "$TTS_OUTPUT" ]]; then
|
||||
size=$(wc -c < "$TTS_OUTPUT")
|
||||
check "Kokoro synthesis" "ok — ${size} bytes written to $TTS_OUTPUT"
|
||||
echo ""
|
||||
echo " To play: afplay $TTS_OUTPUT"
|
||||
else
|
||||
check "Kokoro synthesis" "synthesis failed"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "─────────────────────────────────"
|
||||
echo "Results: $PASS passed, $FAIL failed"
|
||||
[[ $FAIL -eq 0 ]]
|
||||
110
homeai-voice/wyoming/wakeword_daemon.py
Normal file
110
homeai-voice/wyoming/wakeword_daemon.py
Normal file
@@ -0,0 +1,110 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Always-on wake word detection daemon using openWakeWord.
|
||||
|
||||
Listens on the default microphone, fires an HTTP POST to --notify-url
|
||||
when the wake word is detected.
|
||||
|
||||
Usage:
|
||||
python wakeword_daemon.py --wake-word hey_jarvis --notify-url http://localhost:8080/wake
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import time
|
||||
import urllib.request
|
||||
import json
|
||||
import numpy as np
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
SAMPLE_RATE = 16000
|
||||
CHUNK_SIZE = 1280 # ~80ms at 16kHz — recommended by openWakeWord
|
||||
|
||||
|
||||
def notify(url: str, wake_word: str, score: float):
|
||||
payload = json.dumps({"wake_word": wake_word, "score": float(score)}).encode()
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=2):
|
||||
pass
|
||||
_LOGGER.info("Wake word '%s' detected (score=%.3f) — notified %s", wake_word, score, url)
|
||||
except Exception as e:
|
||||
_LOGGER.warning("Failed to notify %s: %s", url, e)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--wake-word", default="hey_jarvis")
|
||||
parser.add_argument("--notify-url", default="http://localhost:8080/wake")
|
||||
parser.add_argument("--threshold", type=float, default=0.5)
|
||||
parser.add_argument("--cooldown", type=float, default=3.0, help="Seconds between triggers")
|
||||
parser.add_argument("--model-dir", default=None, help="Path to custom .onnx wake word model")
|
||||
parser.add_argument("--debug", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG if args.debug else logging.INFO,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
)
|
||||
|
||||
try:
|
||||
import pyaudio
|
||||
except ImportError:
|
||||
_LOGGER.error("pyaudio not installed. Run: pip install pyaudio")
|
||||
raise SystemExit(1)
|
||||
|
||||
import openwakeword
|
||||
from openwakeword.model import Model
|
||||
|
||||
_LOGGER.info("Loading wake word model: %s", args.wake_word)
|
||||
|
||||
model_paths = []
|
||||
if args.model_dir:
|
||||
import os, glob
|
||||
model_paths = glob.glob(os.path.join(args.model_dir, "*.onnx"))
|
||||
|
||||
oww = Model(
|
||||
wakeword_models=model_paths if model_paths else [args.wake_word],
|
||||
inference_framework="onnx",
|
||||
)
|
||||
|
||||
audio = pyaudio.PyAudio()
|
||||
stream = audio.open(
|
||||
rate=SAMPLE_RATE,
|
||||
channels=1,
|
||||
format=pyaudio.paInt16,
|
||||
input=True,
|
||||
frames_per_buffer=CHUNK_SIZE,
|
||||
)
|
||||
|
||||
_LOGGER.info("Listening for wake word '%s'...", args.wake_word)
|
||||
last_trigger = 0.0
|
||||
|
||||
try:
|
||||
while True:
|
||||
raw = stream.read(CHUNK_SIZE, exception_on_overflow=False)
|
||||
chunk = np.frombuffer(raw, dtype=np.int16)
|
||||
oww.predict(chunk)
|
||||
|
||||
for ww, scores in oww.prediction_buffer.items():
|
||||
score = scores[-1] if scores else 0.0
|
||||
if score >= args.threshold:
|
||||
now = time.time()
|
||||
if now - last_trigger >= args.cooldown:
|
||||
last_trigger = now
|
||||
notify(args.notify_url, ww, score)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
stream.stop_stream()
|
||||
stream.close()
|
||||
audio.terminate()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user