Complete P2 (LLM) and P3 (voice pipeline) implementation

P2 — homeai-llm:
- Fix ollama launchd plist path for Apple Silicon (/opt/homebrew/bin/ollama)
- Add Modelfiles for local GGUF models: llama3.3:70b, qwen3:32b, codestral:22b
  (registered via `ollama create` — no re-download needed)

P3 — homeai-voice:
- Wyoming STT: wyoming-faster-whisper, large-v3 model, port 10300
- Wyoming TTS: custom Kokoro ONNX server (wyoming_kokoro_server.py), port 10301
  Voice af_heart; models at ~/models/kokoro/
- Wake word: openWakeWord daemon (hey_jarvis), notifies OpenClaw at /wake
- launchd plists for all three services + load-all-launchd.sh helper
- Smoke test: wyoming/test-pipeline.sh — 3/3 passing

HA Wyoming integration pending manual UI config (STT 10.0.0.200:10300,
TTS 10.0.0.200:10301).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Aodhan Collins
2026-03-04 23:28:22 +00:00
parent 858d7be33c
commit c31724c92b
12 changed files with 534 additions and 27 deletions

View File

@@ -0,0 +1,34 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
"http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>com.homeai.wakeword</string>
<key>ProgramArguments</key>
<array>
<string>/Users/aodhan/homeai-voice-env/bin/python3</string>
<string>/Users/aodhan/gitea/homeai/homeai-voice/wyoming/wakeword_daemon.py</string>
<string>--wake-word</string>
<string>hey_jarvis</string>
<string>--notify-url</string>
<string>http://localhost:8080/wake</string>
</array>
<key>RunAtLoad</key>
<true/>
<key>KeepAlive</key>
<true/>
<key>StandardOutPath</key>
<string>/tmp/homeai-wakeword.log</string>
<key>StandardErrorPath</key>
<string>/tmp/homeai-wakeword-error.log</string>
<key>ThrottleInterval</key>
<integer>10</integer>
</dict>
</plist>

View File

@@ -0,0 +1,43 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
"http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>com.homeai.wyoming-stt</string>
<key>ProgramArguments</key>
<array>
<string>/Users/aodhan/homeai-voice-env/bin/wyoming-faster-whisper</string>
<string>--uri</string>
<string>tcp://0.0.0.0:10300</string>
<string>--model</string>
<string>large-v3</string>
<string>--language</string>
<string>en</string>
<string>--device</string>
<string>cpu</string>
<string>--compute-type</string>
<string>int8</string>
<string>--data-dir</string>
<string>/Users/aodhan/models/whisper</string>
<string>--download-dir</string>
<string>/Users/aodhan/models/whisper</string>
</array>
<key>RunAtLoad</key>
<true/>
<key>KeepAlive</key>
<true/>
<key>StandardOutPath</key>
<string>/tmp/homeai-wyoming-stt.log</string>
<key>StandardErrorPath</key>
<string>/tmp/homeai-wyoming-stt-error.log</string>
<key>ThrottleInterval</key>
<integer>10</integer>
</dict>
</plist>

View File

@@ -0,0 +1,36 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
"http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>com.homeai.wyoming-tts</string>
<key>ProgramArguments</key>
<array>
<string>/Users/aodhan/homeai-voice-env/bin/python3</string>
<string>/Users/aodhan/gitea/homeai/homeai-voice/tts/wyoming_kokoro_server.py</string>
<string>--uri</string>
<string>tcp://0.0.0.0:10301</string>
<string>--voice</string>
<string>af_heart</string>
<string>--speed</string>
<string>1.0</string>
</array>
<key>RunAtLoad</key>
<true/>
<key>KeepAlive</key>
<true/>
<key>StandardOutPath</key>
<string>/tmp/homeai-wyoming-tts.log</string>
<key>StandardErrorPath</key>
<string>/tmp/homeai-wyoming-tts-error.log</string>
<key>ThrottleInterval</key>
<integer>10</integer>
</dict>
</plist>

View File

@@ -0,0 +1,35 @@
#!/usr/bin/env bash
# Load (or reload) all homeai-voice launchd services.
set -euo pipefail
LAUNCHD_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/launchd" && pwd)"
LAUNCH_AGENTS=~/Library/LaunchAgents
PLISTS=(
com.homeai.wyoming-stt.plist
com.homeai.wyoming-tts.plist
com.homeai.wakeword.plist
)
for plist in "${PLISTS[@]}"; do
src="${LAUNCHD_DIR}/${plist}"
dst="${LAUNCH_AGENTS}/${plist}"
label="${plist%.plist}"
cp "$src" "$dst"
if launchctl list "$label" &>/dev/null; then
launchctl unload "$dst" 2>/dev/null || true
fi
launchctl load "$dst"
echo "Loaded: $label"
done
echo ""
echo "Status:"
for plist in "${PLISTS[@]}"; do
label="${plist%.plist}"
pid=$(launchctl list "$label" 2>/dev/null | awk 'NR==2{print $1}')
echo " $label — PID: ${pid:--}"
done

View File

@@ -0,0 +1,145 @@
#!/usr/bin/env python3
"""Wyoming TTS server backed by Kokoro ONNX.
Usage:
python wyoming_kokoro_server.py --uri tcp://0.0.0.0:10301 --voice af_heart
"""
import argparse
import asyncio
import logging
import os
import numpy as np
from wyoming.audio import AudioChunk, AudioStart, AudioStop
from wyoming.event import Event
from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
from wyoming.server import AsyncEventHandler, AsyncServer
from wyoming.tts import Synthesize
_LOGGER = logging.getLogger(__name__)
SAMPLE_RATE = 24000
SAMPLE_WIDTH = 2 # int16
CHANNELS = 1
CHUNK_SECONDS = 1 # stream in 1-second chunks
def _load_kokoro():
from kokoro_onnx import Kokoro
model_dir = os.path.expanduser("~/models/kokoro")
return Kokoro(
os.path.join(model_dir, "kokoro-v1.0.onnx"),
os.path.join(model_dir, "voices-v1.0.bin"),
)
class KokoroEventHandler(AsyncEventHandler):
def __init__(self, tts, default_voice: str, speed: float, *args, **kwargs):
super().__init__(*args, **kwargs)
self._tts = tts
self._default_voice = default_voice
self._speed = speed
# Send info immediately on connect
asyncio.ensure_future(self._send_info())
async def _send_info(self):
info = Info(
tts=[
TtsProgram(
name="kokoro",
description="Kokoro ONNX TTS",
attribution=Attribution(
name="thewh1teagle/kokoro-onnx",
url="https://github.com/thewh1teagle/kokoro-onnx",
),
installed=True,
voices=[
TtsVoice(
name=self._default_voice,
description="Kokoro voice",
attribution=Attribution(name="kokoro", url=""),
installed=True,
languages=["en-us"],
speakers=[TtsVoiceSpeaker(name=self._default_voice)],
)
],
)
]
)
await self.write_event(info.event())
async def handle_event(self, event: Event) -> bool:
if Synthesize.is_type(event.type):
synthesize = Synthesize.from_event(event)
text = synthesize.text
voice = self._default_voice
if synthesize.voice and synthesize.voice.name:
voice = synthesize.voice.name
_LOGGER.debug("Synthesizing %r with voice=%s speed=%.1f", text, voice, self._speed)
try:
loop = asyncio.get_event_loop()
samples, sample_rate = await loop.run_in_executor(
None, lambda: self._tts.create(text, voice=voice, speed=self._speed)
)
samples_int16 = (np.clip(samples, -1.0, 1.0) * 32767).astype(np.int16)
audio_bytes = samples_int16.tobytes()
await self.write_event(
AudioStart(rate=SAMPLE_RATE, width=SAMPLE_WIDTH, channels=CHANNELS).event()
)
chunk_size = SAMPLE_RATE * SAMPLE_WIDTH * CHANNELS * CHUNK_SECONDS
for i in range(0, len(audio_bytes), chunk_size):
await self.write_event(
AudioChunk(
rate=SAMPLE_RATE,
width=SAMPLE_WIDTH,
channels=CHANNELS,
audio=audio_bytes[i : i + chunk_size],
).event()
)
await self.write_event(AudioStop().event())
_LOGGER.info("Synthesized %.1fs of audio", len(samples) / sample_rate)
except Exception:
_LOGGER.exception("Synthesis error")
await self.write_event(AudioStop().event())
return True # keep connection open
async def main():
parser = argparse.ArgumentParser()
parser.add_argument("--uri", default="tcp://0.0.0.0:10301")
parser.add_argument("--voice", default="af_heart")
parser.add_argument("--speed", type=float, default=1.0)
parser.add_argument("--debug", action="store_true")
args = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.debug else logging.INFO,
format="%(asctime)s %(levelname)s %(name)s %(message)s",
)
_LOGGER.info("Loading Kokoro ONNX model...")
tts = _load_kokoro()
_LOGGER.info("Kokoro loaded. Starting Wyoming TTS on %s (voice=%s)", args.uri, args.voice)
server = AsyncServer.from_uri(args.uri)
def handler_factory(reader, writer):
return KokoroEventHandler(tts, args.voice, args.speed, reader, writer)
await server.run(handler_factory)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,88 @@
#!/usr/bin/env bash
# Smoke test for the Wyoming voice pipeline.
# Tests: STT server alive | TTS server alive | TTS generates audio
#
# Usage: ./test-pipeline.sh
set -euo pipefail
STT_HOST="${STT_HOST:-localhost}"
STT_PORT="${STT_PORT:-10300}"
TTS_HOST="${TTS_HOST:-localhost}"
TTS_PORT="${TTS_PORT:-10301}"
VENV="${VENV:-$HOME/homeai-voice-env}"
PASS=0
FAIL=0
check() {
local name="$1"; local result="$2"
if [[ "$result" == ok* ]]; then
echo " [PASS] $name${result#ok}"; PASS=$((PASS + 1))
else
echo " [FAIL] $name$result"; FAIL=$((FAIL + 1))
fi
}
echo "=== HomeAI Voice Pipeline Smoke Test ==="
echo ""
echo "1. STT Wyoming server (port $STT_PORT)"
if nc -z -w2 "$STT_HOST" "$STT_PORT" 2>/dev/null; then
check "STT port open" "ok"
else
check "STT port open" "port $STT_PORT not reachable — is wyoming-stt running?"
fi
echo ""
echo "2. TTS Wyoming server (port $TTS_PORT)"
if nc -z -w2 "$TTS_HOST" "$TTS_PORT" 2>/dev/null; then
check "TTS port open" "ok"
else
check "TTS port open" "port $TTS_PORT not reachable — is wyoming-tts running?"
fi
echo ""
echo "3. Kokoro TTS synthesis test"
TTS_OUTPUT="/tmp/homeai-tts-test.wav"
"$VENV/bin/python3" - <<'PYEOF'
import sys, os, asyncio
import numpy as np
model_dir = os.path.expanduser("~/models/kokoro")
model_path = os.path.join(model_dir, "kokoro-v1.0.onnx")
voices_path = os.path.join(model_dir, "voices-v1.0.bin")
if not os.path.exists(model_path):
print(f"Model not found: {model_path}")
sys.exit(1)
from kokoro_onnx import Kokoro
tts = Kokoro(model_path, voices_path)
samples, sr = tts.create("Hello, I am your home assistant. The voice pipeline is working.", voice="af_heart", speed=1.0)
# Write WAV
import wave, struct
samples_int16 = (np.clip(samples, -1.0, 1.0) * 32767).astype(np.int16)
with wave.open("/tmp/homeai-tts-test.wav", "w") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(sr)
wf.writeframes(samples_int16.tobytes())
print(f"Generated {len(samples)/sr:.1f}s of audio at {sr}Hz")
sys.exit(0)
PYEOF
if [[ $? -eq 0 && -f "$TTS_OUTPUT" ]]; then
size=$(wc -c < "$TTS_OUTPUT")
check "Kokoro synthesis" "ok — ${size} bytes written to $TTS_OUTPUT"
echo ""
echo " To play: afplay $TTS_OUTPUT"
else
check "Kokoro synthesis" "synthesis failed"
fi
echo ""
echo "─────────────────────────────────"
echo "Results: $PASS passed, $FAIL failed"
[[ $FAIL -eq 0 ]]

View File

@@ -0,0 +1,110 @@
#!/usr/bin/env python3
"""Always-on wake word detection daemon using openWakeWord.
Listens on the default microphone, fires an HTTP POST to --notify-url
when the wake word is detected.
Usage:
python wakeword_daemon.py --wake-word hey_jarvis --notify-url http://localhost:8080/wake
"""
import argparse
import logging
import time
import urllib.request
import json
import numpy as np
_LOGGER = logging.getLogger(__name__)
SAMPLE_RATE = 16000
CHUNK_SIZE = 1280 # ~80ms at 16kHz — recommended by openWakeWord
def notify(url: str, wake_word: str, score: float):
payload = json.dumps({"wake_word": wake_word, "score": float(score)}).encode()
try:
req = urllib.request.Request(
url,
data=payload,
headers={"Content-Type": "application/json"},
method="POST",
)
with urllib.request.urlopen(req, timeout=2):
pass
_LOGGER.info("Wake word '%s' detected (score=%.3f) — notified %s", wake_word, score, url)
except Exception as e:
_LOGGER.warning("Failed to notify %s: %s", url, e)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--wake-word", default="hey_jarvis")
parser.add_argument("--notify-url", default="http://localhost:8080/wake")
parser.add_argument("--threshold", type=float, default=0.5)
parser.add_argument("--cooldown", type=float, default=3.0, help="Seconds between triggers")
parser.add_argument("--model-dir", default=None, help="Path to custom .onnx wake word model")
parser.add_argument("--debug", action="store_true")
args = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.debug else logging.INFO,
format="%(asctime)s %(levelname)s %(message)s",
)
try:
import pyaudio
except ImportError:
_LOGGER.error("pyaudio not installed. Run: pip install pyaudio")
raise SystemExit(1)
import openwakeword
from openwakeword.model import Model
_LOGGER.info("Loading wake word model: %s", args.wake_word)
model_paths = []
if args.model_dir:
import os, glob
model_paths = glob.glob(os.path.join(args.model_dir, "*.onnx"))
oww = Model(
wakeword_models=model_paths if model_paths else [args.wake_word],
inference_framework="onnx",
)
audio = pyaudio.PyAudio()
stream = audio.open(
rate=SAMPLE_RATE,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=CHUNK_SIZE,
)
_LOGGER.info("Listening for wake word '%s'...", args.wake_word)
last_trigger = 0.0
try:
while True:
raw = stream.read(CHUNK_SIZE, exception_on_overflow=False)
chunk = np.frombuffer(raw, dtype=np.int16)
oww.predict(chunk)
for ww, scores in oww.prediction_buffer.items():
score = scores[-1] if scores else 0.0
if score >= args.threshold:
now = time.time()
if now - last_trigger >= args.cooldown:
last_trigger = now
notify(args.notify_url, ww, score)
except KeyboardInterrupt:
pass
finally:
stream.stop_stream()
stream.close()
audio.terminate()
if __name__ == "__main__":
main()