2026-03-04 23:31:34 +00:00
12 changed files with 534 additions and 27 deletions
--- a/TODO.md
+++ b/TODO.md
@@ -9,30 +9,26 @@
 ### P1 · homeai-infra
- [ ] Install Docker Desktop for Mac, enable launch at login
+- [x] Install Docker Desktop for Mac, enable launch at login
- [ ] Create shared `homeai` Docker network
+- [x] Create shared `homeai` Docker network
- [ ] Create `~/server/docker/` directory structure
+- [x] Create `~/server/docker/` directory structure
- [ ] Write compose files: Home Assistant, Portainer, Uptime Kuma, Gitea, code-server, n8n
+- [x] Write compose files: Uptime Kuma, code-server, n8n (HA, Portainer, Gitea are pre-existing on 10.0.0.199)
- [ ] Write `.env.secrets.example` and `Makefile`
+- [x] `docker compose up -d` — bring all services up
- [ ] `make up-all` — bring all services up
+- [x] Home Assistant onboarding — long-lived access token generated, stored in `.env`
 - [ ] Home Assistant onboarding — generate long-lived access token
 - [ ] Write `~/server/.env.services` with all service URLs
 - [ ] Install Tailscale, verify all services reachable on Tailnet
- [ ] Gitea: create admin account, initialise all 8 sub-project repos, configure SSH
+- [ ] Gitea: initialise all 8 sub-project repos, configure SSH
 - [ ] Uptime Kuma: add monitors for all services, configure mobile alerts
 - [ ] Verify all containers survive a cold reboot
 ### P2 · homeai-llm
- [ ] Install Ollama natively via brew
+- [x] Install Ollama natively via brew
- [ ] Write and load launchd plist (`com.ollama.ollama.plist`)
+- [x] Write and load launchd plist (`com.homeai.ollama.plist`) — `/opt/homebrew/bin/ollama`
- [ ] Write `ollama-models.txt` with model manifest
+- [x] Register local GGUF models via Modelfiles (no download): llama3.3:70b, qwen3:32b, codestral:22b
- [ ] Run `scripts/pull-models.sh` — pull all models
+- [x] Deploy Open WebUI via Docker compose (port 3030)
 - [x] Verify Open WebUI connected to Ollama, all models available
 - [ ] Run `scripts/benchmark.sh` — record results in `benchmark-results.md`
 - [ ] Deploy Open WebUI via Docker compose (port 3030)
 - [ ] Verify Open WebUI connected to Ollama, all models available
 - [ ] Add Ollama + Open WebUI to Uptime Kuma monitors
 - [ ] Add `OLLAMA_URL` and `OPEN_WEBUI_URL` to `.env.services`
 ---
@@ -40,20 +36,19 @@
 ### P3 · homeai-voice
- [ ] Compile Whisper.cpp with Metal support
+- [x] Install `wyoming-faster-whisper` — model: faster-whisper-large-v3 (auto-downloaded)
- [ ] Download Whisper models (`large-v3`, `medium.en`) to `~/models/whisper/`
+- [x] Install Kokoro ONNX TTS — models at `~/models/kokoro/`
- [ ] Install `wyoming-faster-whisper`, test STT from audio file
+- [x] Write Wyoming-Kokoro adapter server (`homeai-voice/tts/wyoming_kokoro_server.py`)
- [ ] Install Kokoro TTS, test output to audio file
+- [x] Write + load launchd plists for Wyoming STT (10300) and TTS (10301)
- [ ] Install Wyoming-Kokoro adapter, verify Wyoming protocol
+- [x] Install openWakeWord + pyaudio — model: hey_jarvis
- [ ] Write + load launchd plists for Wyoming STT (10300) and TTS (10301)
+- [x] Write + load openWakeWord launchd plist (`com.homeai.wakeword`)
- [ ] Connect Home Assistant Wyoming integration (STT + TTS)
+- [x] Write `wyoming/test-pipeline.sh` — smoke test (3/3 passing)
 - [~] Connect Home Assistant Wyoming integration (STT + TTS) — awaiting HA UI config
 - [ ] Create HA Voice Assistant pipeline
 - [ ] Test HA Assist via browser: type query → hear spoken response
 - [ ] Install openWakeWord, test wake detection with USB mic
 - [ ] Write + load openWakeWord launchd plist
 - [ ] Install Chatterbox TTS (MPS build), test with sample `.wav`
 - [ ] Install Qwen3-TTS via MLX (fallback)
- [ ] Write `wyoming/test-pipeline.sh` — end-to-end smoke test
+- [ ] Train custom wake word using character name
 - [ ] Add Wyoming STT/TTS to Uptime Kuma monitors
 ---
--- a/homeai-llm/launchd/com.homeai.ollama.plist
+++ b/homeai-llm/launchd/com.homeai.ollama.plist
@@ -8,7 +8,7 @@
  <key>ProgramArguments</key>
  <array>
-    <string>/usr/local/bin/ollama</string>
+    <string>/opt/homebrew/bin/ollama</string>
    <string>serve</string>
  </array>
--- a/homeai-llm/modelfiles/Codestral-22B
+++ b/homeai-llm/modelfiles/Codestral-22B
@@ -0,0 +1,7 @@
 FROM /Users/aodhan/Models/LLM/Codestral-22B-v0.1-GGUF/Codestral-22B-v0.1-Q4_K_M.gguf
 PARAMETER num_ctx 16384
 PARAMETER temperature 0.2
 PARAMETER top_p 0.95
 SYSTEM "You are an expert coding assistant."
--- a/homeai-llm/modelfiles/Llama-3.3-70B
+++ b/homeai-llm/modelfiles/Llama-3.3-70B
@@ -0,0 +1,7 @@
 FROM /Users/aodhan/Models/LLM/Llama-3.3-70B-Instruct-GGUF/Llama-3.3-70B-Instruct-Q4_K_M.gguf
 PARAMETER num_ctx 8192
 PARAMETER temperature 0.7
 PARAMETER top_p 0.9
 SYSTEM "You are a helpful AI assistant."
--- a/homeai-llm/modelfiles/Qwen3-32B
+++ b/homeai-llm/modelfiles/Qwen3-32B
@@ -0,0 +1,7 @@
 FROM /Users/aodhan/Models/LLM/Qwen3-32B-GGUF/Qwen3-32B-Q4_K_M.gguf
 PARAMETER num_ctx 8192
 PARAMETER temperature 0.7
 PARAMETER top_p 0.9
 SYSTEM "You are a helpful AI assistant."
--- a/homeai-voice/scripts/launchd/com.homeai.wakeword.plist
+++ b/homeai-voice/scripts/launchd/com.homeai.wakeword.plist
@@ -0,0 +1,34 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
  "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
  <key>Label</key>
  <string>com.homeai.wakeword</string>
  <key>ProgramArguments</key>
  <array>
    <string>/Users/aodhan/homeai-voice-env/bin/python3</string>
    <string>/Users/aodhan/gitea/homeai/homeai-voice/wyoming/wakeword_daemon.py</string>
    <string>--wake-word</string>
    <string>hey_jarvis</string>
    <string>--notify-url</string>
    <string>http://localhost:8080/wake</string>
  </array>
  <key>RunAtLoad</key>
  <true/>
  <key>KeepAlive</key>
  <true/>
  <key>StandardOutPath</key>
  <string>/tmp/homeai-wakeword.log</string>
  <key>StandardErrorPath</key>
  <string>/tmp/homeai-wakeword-error.log</string>
  <key>ThrottleInterval</key>
  <integer>10</integer>
 </dict>
 </plist>
--- a/homeai-voice/scripts/launchd/com.homeai.wyoming-stt.plist
+++ b/homeai-voice/scripts/launchd/com.homeai.wyoming-stt.plist
@@ -0,0 +1,43 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
  "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
  <key>Label</key>
  <string>com.homeai.wyoming-stt</string>
  <key>ProgramArguments</key>
  <array>
    <string>/Users/aodhan/homeai-voice-env/bin/wyoming-faster-whisper</string>
    <string>--uri</string>
    <string>tcp://0.0.0.0:10300</string>
    <string>--model</string>
    <string>large-v3</string>
    <string>--language</string>
    <string>en</string>
    <string>--device</string>
    <string>cpu</string>
    <string>--compute-type</string>
    <string>int8</string>
    <string>--data-dir</string>
    <string>/Users/aodhan/models/whisper</string>
    <string>--download-dir</string>
    <string>/Users/aodhan/models/whisper</string>
  </array>
  <key>RunAtLoad</key>
  <true/>
  <key>KeepAlive</key>
  <true/>
  <key>StandardOutPath</key>
  <string>/tmp/homeai-wyoming-stt.log</string>
  <key>StandardErrorPath</key>
  <string>/tmp/homeai-wyoming-stt-error.log</string>
  <key>ThrottleInterval</key>
  <integer>10</integer>
 </dict>
 </plist>
--- a/homeai-voice/scripts/launchd/com.homeai.wyoming-tts.plist
+++ b/homeai-voice/scripts/launchd/com.homeai.wyoming-tts.plist
@@ -0,0 +1,36 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
  "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
  <key>Label</key>
  <string>com.homeai.wyoming-tts</string>
  <key>ProgramArguments</key>
  <array>
    <string>/Users/aodhan/homeai-voice-env/bin/python3</string>
    <string>/Users/aodhan/gitea/homeai/homeai-voice/tts/wyoming_kokoro_server.py</string>
    <string>--uri</string>
    <string>tcp://0.0.0.0:10301</string>
    <string>--voice</string>
    <string>af_heart</string>
    <string>--speed</string>
    <string>1.0</string>
  </array>
  <key>RunAtLoad</key>
  <true/>
  <key>KeepAlive</key>
  <true/>
  <key>StandardOutPath</key>
  <string>/tmp/homeai-wyoming-tts.log</string>
  <key>StandardErrorPath</key>
  <string>/tmp/homeai-wyoming-tts-error.log</string>
  <key>ThrottleInterval</key>
  <integer>10</integer>
 </dict>
 </plist>
--- a/homeai-voice/scripts/load-all-launchd.sh
+++ b/homeai-voice/scripts/load-all-launchd.sh
@@ -0,0 +1,35 @@
 #!/usr/bin/env bash
 # Load (or reload) all homeai-voice launchd services.
 set -euo pipefail
 LAUNCHD_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/launchd" && pwd)"
 LAUNCH_AGENTS=~/Library/LaunchAgents
 PLISTS=(
    com.homeai.wyoming-stt.plist
    com.homeai.wyoming-tts.plist
    com.homeai.wakeword.plist
 )
 for plist in "${PLISTS[@]}"; do
    src="${LAUNCHD_DIR}/${plist}"
    dst="${LAUNCH_AGENTS}/${plist}"
    label="${plist%.plist}"
    cp "$src" "$dst"
    if launchctl list "$label" &>/dev/null; then
        launchctl unload "$dst" 2>/dev/null || true
    fi
    launchctl load "$dst"
    echo "Loaded: $label"
 done
 echo ""
 echo "Status:"
 for plist in "${PLISTS[@]}"; do
    label="${plist%.plist}"
    pid=$(launchctl list "$label" 2>/dev/null | awk 'NR==2{print $1}')
    echo "  $label — PID: ${pid:--}"
 done
--- a/homeai-voice/tts/wyoming_kokoro_server.py
+++ b/homeai-voice/tts/wyoming_kokoro_server.py
@@ -0,0 +1,145 @@
 #!/usr/bin/env python3
 """Wyoming TTS server backed by Kokoro ONNX.
 Usage:
    python wyoming_kokoro_server.py --uri tcp://0.0.0.0:10301 --voice af_heart
 """
 import argparse
 import asyncio
 import logging
 import os
 import numpy as np
 from wyoming.audio import AudioChunk, AudioStart, AudioStop
 from wyoming.event import Event
 from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
 from wyoming.server import AsyncEventHandler, AsyncServer
 from wyoming.tts import Synthesize
 _LOGGER = logging.getLogger(__name__)
 SAMPLE_RATE = 24000
 SAMPLE_WIDTH = 2  # int16
 CHANNELS = 1
 CHUNK_SECONDS = 1  # stream in 1-second chunks
 def _load_kokoro():
    from kokoro_onnx import Kokoro
    model_dir = os.path.expanduser("~/models/kokoro")
    return Kokoro(
        os.path.join(model_dir, "kokoro-v1.0.onnx"),
        os.path.join(model_dir, "voices-v1.0.bin"),
    )
 class KokoroEventHandler(AsyncEventHandler):
    def __init__(self, tts, default_voice: str, speed: float, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._tts = tts
        self._default_voice = default_voice
        self._speed = speed
        # Send info immediately on connect
        asyncio.ensure_future(self._send_info())
    async def _send_info(self):
        info = Info(
            tts=[
                TtsProgram(
                    name="kokoro",
                    description="Kokoro ONNX TTS",
                    attribution=Attribution(
                        name="thewh1teagle/kokoro-onnx",
                        url="https://github.com/thewh1teagle/kokoro-onnx",
                    ),
                    installed=True,
                    voices=[
                        TtsVoice(
                            name=self._default_voice,
                            description="Kokoro voice",
                            attribution=Attribution(name="kokoro", url=""),
                            installed=True,
                            languages=["en-us"],
                            speakers=[TtsVoiceSpeaker(name=self._default_voice)],
                        )
                    ],
                )
            ]
        )
        await self.write_event(info.event())
    async def handle_event(self, event: Event) -> bool:
        if Synthesize.is_type(event.type):
            synthesize = Synthesize.from_event(event)
            text = synthesize.text
            voice = self._default_voice
            if synthesize.voice and synthesize.voice.name:
                voice = synthesize.voice.name
            _LOGGER.debug("Synthesizing %r with voice=%s speed=%.1f", text, voice, self._speed)
            try:
                loop = asyncio.get_event_loop()
                samples, sample_rate = await loop.run_in_executor(
                    None, lambda: self._tts.create(text, voice=voice, speed=self._speed)
                )
                samples_int16 = (np.clip(samples, -1.0, 1.0) * 32767).astype(np.int16)
                audio_bytes = samples_int16.tobytes()
                await self.write_event(
                    AudioStart(rate=SAMPLE_RATE, width=SAMPLE_WIDTH, channels=CHANNELS).event()
                )
                chunk_size = SAMPLE_RATE * SAMPLE_WIDTH * CHANNELS * CHUNK_SECONDS
                for i in range(0, len(audio_bytes), chunk_size):
                    await self.write_event(
                        AudioChunk(
                            rate=SAMPLE_RATE,
                            width=SAMPLE_WIDTH,
                            channels=CHANNELS,
                            audio=audio_bytes[i : i + chunk_size],
                        ).event()
                    )
                await self.write_event(AudioStop().event())
                _LOGGER.info("Synthesized %.1fs of audio", len(samples) / sample_rate)
            except Exception:
                _LOGGER.exception("Synthesis error")
                await self.write_event(AudioStop().event())
        return True  # keep connection open
 async def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--uri", default="tcp://0.0.0.0:10301")
    parser.add_argument("--voice", default="af_heart")
    parser.add_argument("--speed", type=float, default=1.0)
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()
    logging.basicConfig(
        level=logging.DEBUG if args.debug else logging.INFO,
        format="%(asctime)s %(levelname)s %(name)s %(message)s",
    )
    _LOGGER.info("Loading Kokoro ONNX model...")
    tts = _load_kokoro()
    _LOGGER.info("Kokoro loaded. Starting Wyoming TTS on %s (voice=%s)", args.uri, args.voice)
    server = AsyncServer.from_uri(args.uri)
    def handler_factory(reader, writer):
        return KokoroEventHandler(tts, args.voice, args.speed, reader, writer)
    await server.run(handler_factory)
 if __name__ == "__main__":
    asyncio.run(main())
--- a/homeai-voice/wyoming/test-pipeline.sh
+++ b/homeai-voice/wyoming/test-pipeline.sh
@@ -0,0 +1,88 @@
 #!/usr/bin/env bash
 # Smoke test for the Wyoming voice pipeline.
 # Tests: STT server alive | TTS server alive | TTS generates audio
 #
 # Usage: ./test-pipeline.sh
 set -euo pipefail
 STT_HOST="${STT_HOST:-localhost}"
 STT_PORT="${STT_PORT:-10300}"
 TTS_HOST="${TTS_HOST:-localhost}"
 TTS_PORT="${TTS_PORT:-10301}"
 VENV="${VENV:-$HOME/homeai-voice-env}"
 PASS=0
 FAIL=0
 check() {
    local name="$1"; local result="$2"
    if [[ "$result" == ok* ]]; then
        echo "  [PASS] $name${result#ok}"; PASS=$((PASS + 1))
    else
        echo "  [FAIL] $name — $result"; FAIL=$((FAIL + 1))
    fi
 }
 echo "=== HomeAI Voice Pipeline Smoke Test ==="
 echo ""
 echo "1. STT Wyoming server (port $STT_PORT)"
 if nc -z -w2 "$STT_HOST" "$STT_PORT" 2>/dev/null; then
    check "STT port open" "ok"
 else
    check "STT port open" "port $STT_PORT not reachable — is wyoming-stt running?"
 fi
 echo ""
 echo "2. TTS Wyoming server (port $TTS_PORT)"
 if nc -z -w2 "$TTS_HOST" "$TTS_PORT" 2>/dev/null; then
    check "TTS port open" "ok"
 else
    check "TTS port open" "port $TTS_PORT not reachable — is wyoming-tts running?"
 fi
 echo ""
 echo "3. Kokoro TTS synthesis test"
 TTS_OUTPUT="/tmp/homeai-tts-test.wav"
 "$VENV/bin/python3" - <<'PYEOF'
 import sys, os, asyncio
 import numpy as np
 model_dir = os.path.expanduser("~/models/kokoro")
 model_path = os.path.join(model_dir, "kokoro-v1.0.onnx")
 voices_path = os.path.join(model_dir, "voices-v1.0.bin")
 if not os.path.exists(model_path):
    print(f"Model not found: {model_path}")
    sys.exit(1)
 from kokoro_onnx import Kokoro
 tts = Kokoro(model_path, voices_path)
 samples, sr = tts.create("Hello, I am your home assistant. The voice pipeline is working.", voice="af_heart", speed=1.0)
 # Write WAV
 import wave, struct
 samples_int16 = (np.clip(samples, -1.0, 1.0) * 32767).astype(np.int16)
 with wave.open("/tmp/homeai-tts-test.wav", "w") as wf:
    wf.setnchannels(1)
    wf.setsampwidth(2)
    wf.setframerate(sr)
    wf.writeframes(samples_int16.tobytes())
 print(f"Generated {len(samples)/sr:.1f}s of audio at {sr}Hz")
 sys.exit(0)
 PYEOF
 if [[ $? -eq 0 && -f "$TTS_OUTPUT" ]]; then
    size=$(wc -c < "$TTS_OUTPUT")
    check "Kokoro synthesis" "ok — ${size} bytes written to $TTS_OUTPUT"
    echo ""
    echo "  To play: afplay $TTS_OUTPUT"
 else
    check "Kokoro synthesis" "synthesis failed"
 fi
 echo ""
 echo "─────────────────────────────────"
 echo "Results: $PASS passed, $FAIL failed"
 [[ $FAIL -eq 0 ]]
--- a/homeai-voice/wyoming/wakeword_daemon.py
+++ b/homeai-voice/wyoming/wakeword_daemon.py
@@ -0,0 +1,110 @@
 #!/usr/bin/env python3
 """Always-on wake word detection daemon using openWakeWord.
 Listens on the default microphone, fires an HTTP POST to --notify-url
 when the wake word is detected.
 Usage:
    python wakeword_daemon.py --wake-word hey_jarvis --notify-url http://localhost:8080/wake
 """
 import argparse
 import logging
 import time
 import urllib.request
 import json
 import numpy as np
 _LOGGER = logging.getLogger(__name__)
 SAMPLE_RATE = 16000
 CHUNK_SIZE = 1280  # ~80ms at 16kHz — recommended by openWakeWord
 def notify(url: str, wake_word: str, score: float):
    payload = json.dumps({"wake_word": wake_word, "score": float(score)}).encode()
    try:
        req = urllib.request.Request(
            url,
            data=payload,
            headers={"Content-Type": "application/json"},
            method="POST",
        )
        with urllib.request.urlopen(req, timeout=2):
            pass
        _LOGGER.info("Wake word '%s' detected (score=%.3f) — notified %s", wake_word, score, url)
    except Exception as e:
        _LOGGER.warning("Failed to notify %s: %s", url, e)
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--wake-word", default="hey_jarvis")
    parser.add_argument("--notify-url", default="http://localhost:8080/wake")
    parser.add_argument("--threshold", type=float, default=0.5)
    parser.add_argument("--cooldown", type=float, default=3.0, help="Seconds between triggers")
    parser.add_argument("--model-dir", default=None, help="Path to custom .onnx wake word model")
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()
    logging.basicConfig(
        level=logging.DEBUG if args.debug else logging.INFO,
        format="%(asctime)s %(levelname)s %(message)s",
    )
    try:
        import pyaudio
    except ImportError:
        _LOGGER.error("pyaudio not installed. Run: pip install pyaudio")
        raise SystemExit(1)
    import openwakeword
    from openwakeword.model import Model
    _LOGGER.info("Loading wake word model: %s", args.wake_word)
    model_paths = []
    if args.model_dir:
        import os, glob
        model_paths = glob.glob(os.path.join(args.model_dir, "*.onnx"))
    oww = Model(
        wakeword_models=model_paths if model_paths else [args.wake_word],
        inference_framework="onnx",
    )
    audio = pyaudio.PyAudio()
    stream = audio.open(
        rate=SAMPLE_RATE,
        channels=1,
        format=pyaudio.paInt16,
        input=True,
        frames_per_buffer=CHUNK_SIZE,
    )
    _LOGGER.info("Listening for wake word '%s'...", args.wake_word)
    last_trigger = 0.0
    try:
        while True:
            raw = stream.read(CHUNK_SIZE, exception_on_overflow=False)
            chunk = np.frombuffer(raw, dtype=np.int16)
            oww.predict(chunk)
            for ww, scores in oww.prediction_buffer.items():
                score = scores[-1] if scores else 0.0
                if score >= args.threshold:
                    now = time.time()
                    if now - last_trigger >= args.cooldown:
                        last_trigger = now
                        notify(args.notify_url, ww, score)
    except KeyboardInterrupt:
        pass
    finally:
        stream.stop_stream()
        stream.close()
        audio.terminate()
 if __name__ == "__main__":
    main()