diff --git a/TODO.md b/TODO.md index f568ea6..d28f31a 100644 --- a/TODO.md +++ b/TODO.md @@ -9,30 +9,26 @@ ### P1 · homeai-infra -- [ ] Install Docker Desktop for Mac, enable launch at login -- [ ] Create shared `homeai` Docker network -- [ ] Create `~/server/docker/` directory structure -- [ ] Write compose files: Home Assistant, Portainer, Uptime Kuma, Gitea, code-server, n8n -- [ ] Write `.env.secrets.example` and `Makefile` -- [ ] `make up-all` — bring all services up -- [ ] Home Assistant onboarding — generate long-lived access token -- [ ] Write `~/server/.env.services` with all service URLs +- [x] Install Docker Desktop for Mac, enable launch at login +- [x] Create shared `homeai` Docker network +- [x] Create `~/server/docker/` directory structure +- [x] Write compose files: Uptime Kuma, code-server, n8n (HA, Portainer, Gitea are pre-existing on 10.0.0.199) +- [x] `docker compose up -d` — bring all services up +- [x] Home Assistant onboarding — long-lived access token generated, stored in `.env` - [ ] Install Tailscale, verify all services reachable on Tailnet -- [ ] Gitea: create admin account, initialise all 8 sub-project repos, configure SSH +- [ ] Gitea: initialise all 8 sub-project repos, configure SSH - [ ] Uptime Kuma: add monitors for all services, configure mobile alerts - [ ] Verify all containers survive a cold reboot ### P2 · homeai-llm -- [ ] Install Ollama natively via brew -- [ ] Write and load launchd plist (`com.ollama.ollama.plist`) -- [ ] Write `ollama-models.txt` with model manifest -- [ ] Run `scripts/pull-models.sh` — pull all models +- [x] Install Ollama natively via brew +- [x] Write and load launchd plist (`com.homeai.ollama.plist`) — `/opt/homebrew/bin/ollama` +- [x] Register local GGUF models via Modelfiles (no download): llama3.3:70b, qwen3:32b, codestral:22b +- [x] Deploy Open WebUI via Docker compose (port 3030) +- [x] Verify Open WebUI connected to Ollama, all models available - [ ] Run `scripts/benchmark.sh` — record results in `benchmark-results.md` -- [ ] Deploy Open WebUI via Docker compose (port 3030) -- [ ] Verify Open WebUI connected to Ollama, all models available - [ ] Add Ollama + Open WebUI to Uptime Kuma monitors -- [ ] Add `OLLAMA_URL` and `OPEN_WEBUI_URL` to `.env.services` --- @@ -40,20 +36,19 @@ ### P3 · homeai-voice -- [ ] Compile Whisper.cpp with Metal support -- [ ] Download Whisper models (`large-v3`, `medium.en`) to `~/models/whisper/` -- [ ] Install `wyoming-faster-whisper`, test STT from audio file -- [ ] Install Kokoro TTS, test output to audio file -- [ ] Install Wyoming-Kokoro adapter, verify Wyoming protocol -- [ ] Write + load launchd plists for Wyoming STT (10300) and TTS (10301) -- [ ] Connect Home Assistant Wyoming integration (STT + TTS) +- [x] Install `wyoming-faster-whisper` — model: faster-whisper-large-v3 (auto-downloaded) +- [x] Install Kokoro ONNX TTS — models at `~/models/kokoro/` +- [x] Write Wyoming-Kokoro adapter server (`homeai-voice/tts/wyoming_kokoro_server.py`) +- [x] Write + load launchd plists for Wyoming STT (10300) and TTS (10301) +- [x] Install openWakeWord + pyaudio — model: hey_jarvis +- [x] Write + load openWakeWord launchd plist (`com.homeai.wakeword`) +- [x] Write `wyoming/test-pipeline.sh` — smoke test (3/3 passing) +- [~] Connect Home Assistant Wyoming integration (STT + TTS) — awaiting HA UI config - [ ] Create HA Voice Assistant pipeline - [ ] Test HA Assist via browser: type query → hear spoken response -- [ ] Install openWakeWord, test wake detection with USB mic -- [ ] Write + load openWakeWord launchd plist - [ ] Install Chatterbox TTS (MPS build), test with sample `.wav` - [ ] Install Qwen3-TTS via MLX (fallback) -- [ ] Write `wyoming/test-pipeline.sh` — end-to-end smoke test +- [ ] Train custom wake word using character name - [ ] Add Wyoming STT/TTS to Uptime Kuma monitors --- diff --git a/homeai-llm/launchd/com.homeai.ollama.plist b/homeai-llm/launchd/com.homeai.ollama.plist index 46322ce..52cf8d6 100644 --- a/homeai-llm/launchd/com.homeai.ollama.plist +++ b/homeai-llm/launchd/com.homeai.ollama.plist @@ -8,7 +8,7 @@ ProgramArguments - /usr/local/bin/ollama + /opt/homebrew/bin/ollama serve diff --git a/homeai-llm/modelfiles/Codestral-22B b/homeai-llm/modelfiles/Codestral-22B new file mode 100644 index 0000000..9925656 --- /dev/null +++ b/homeai-llm/modelfiles/Codestral-22B @@ -0,0 +1,7 @@ +FROM /Users/aodhan/Models/LLM/Codestral-22B-v0.1-GGUF/Codestral-22B-v0.1-Q4_K_M.gguf + +PARAMETER num_ctx 16384 +PARAMETER temperature 0.2 +PARAMETER top_p 0.95 + +SYSTEM "You are an expert coding assistant." diff --git a/homeai-llm/modelfiles/Llama-3.3-70B b/homeai-llm/modelfiles/Llama-3.3-70B new file mode 100644 index 0000000..6b03461 --- /dev/null +++ b/homeai-llm/modelfiles/Llama-3.3-70B @@ -0,0 +1,7 @@ +FROM /Users/aodhan/Models/LLM/Llama-3.3-70B-Instruct-GGUF/Llama-3.3-70B-Instruct-Q4_K_M.gguf + +PARAMETER num_ctx 8192 +PARAMETER temperature 0.7 +PARAMETER top_p 0.9 + +SYSTEM "You are a helpful AI assistant." diff --git a/homeai-llm/modelfiles/Qwen3-32B b/homeai-llm/modelfiles/Qwen3-32B new file mode 100644 index 0000000..5c07100 --- /dev/null +++ b/homeai-llm/modelfiles/Qwen3-32B @@ -0,0 +1,7 @@ +FROM /Users/aodhan/Models/LLM/Qwen3-32B-GGUF/Qwen3-32B-Q4_K_M.gguf + +PARAMETER num_ctx 8192 +PARAMETER temperature 0.7 +PARAMETER top_p 0.9 + +SYSTEM "You are a helpful AI assistant." diff --git a/homeai-voice/scripts/launchd/com.homeai.wakeword.plist b/homeai-voice/scripts/launchd/com.homeai.wakeword.plist new file mode 100644 index 0000000..feb5df9 --- /dev/null +++ b/homeai-voice/scripts/launchd/com.homeai.wakeword.plist @@ -0,0 +1,34 @@ + + + + + Label + com.homeai.wakeword + + ProgramArguments + + /Users/aodhan/homeai-voice-env/bin/python3 + /Users/aodhan/gitea/homeai/homeai-voice/wyoming/wakeword_daemon.py + --wake-word + hey_jarvis + --notify-url + http://localhost:8080/wake + + + RunAtLoad + + + KeepAlive + + + StandardOutPath + /tmp/homeai-wakeword.log + + StandardErrorPath + /tmp/homeai-wakeword-error.log + + ThrottleInterval + 10 + + diff --git a/homeai-voice/scripts/launchd/com.homeai.wyoming-stt.plist b/homeai-voice/scripts/launchd/com.homeai.wyoming-stt.plist new file mode 100644 index 0000000..e7e59b1 --- /dev/null +++ b/homeai-voice/scripts/launchd/com.homeai.wyoming-stt.plist @@ -0,0 +1,43 @@ + + + + + Label + com.homeai.wyoming-stt + + ProgramArguments + + /Users/aodhan/homeai-voice-env/bin/wyoming-faster-whisper + --uri + tcp://0.0.0.0:10300 + --model + large-v3 + --language + en + --device + cpu + --compute-type + int8 + --data-dir + /Users/aodhan/models/whisper + --download-dir + /Users/aodhan/models/whisper + + + RunAtLoad + + + KeepAlive + + + StandardOutPath + /tmp/homeai-wyoming-stt.log + + StandardErrorPath + /tmp/homeai-wyoming-stt-error.log + + ThrottleInterval + 10 + + diff --git a/homeai-voice/scripts/launchd/com.homeai.wyoming-tts.plist b/homeai-voice/scripts/launchd/com.homeai.wyoming-tts.plist new file mode 100644 index 0000000..4fb3fe6 --- /dev/null +++ b/homeai-voice/scripts/launchd/com.homeai.wyoming-tts.plist @@ -0,0 +1,36 @@ + + + + + Label + com.homeai.wyoming-tts + + ProgramArguments + + /Users/aodhan/homeai-voice-env/bin/python3 + /Users/aodhan/gitea/homeai/homeai-voice/tts/wyoming_kokoro_server.py + --uri + tcp://0.0.0.0:10301 + --voice + af_heart + --speed + 1.0 + + + RunAtLoad + + + KeepAlive + + + StandardOutPath + /tmp/homeai-wyoming-tts.log + + StandardErrorPath + /tmp/homeai-wyoming-tts-error.log + + ThrottleInterval + 10 + + diff --git a/homeai-voice/scripts/load-all-launchd.sh b/homeai-voice/scripts/load-all-launchd.sh new file mode 100755 index 0000000..4962fcf --- /dev/null +++ b/homeai-voice/scripts/load-all-launchd.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Load (or reload) all homeai-voice launchd services. + +set -euo pipefail + +LAUNCHD_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/launchd" && pwd)" +LAUNCH_AGENTS=~/Library/LaunchAgents + +PLISTS=( + com.homeai.wyoming-stt.plist + com.homeai.wyoming-tts.plist + com.homeai.wakeword.plist +) + +for plist in "${PLISTS[@]}"; do + src="${LAUNCHD_DIR}/${plist}" + dst="${LAUNCH_AGENTS}/${plist}" + label="${plist%.plist}" + + cp "$src" "$dst" + + if launchctl list "$label" &>/dev/null; then + launchctl unload "$dst" 2>/dev/null || true + fi + launchctl load "$dst" + echo "Loaded: $label" +done + +echo "" +echo "Status:" +for plist in "${PLISTS[@]}"; do + label="${plist%.plist}" + pid=$(launchctl list "$label" 2>/dev/null | awk 'NR==2{print $1}') + echo " $label — PID: ${pid:--}" +done diff --git a/homeai-voice/tts/wyoming_kokoro_server.py b/homeai-voice/tts/wyoming_kokoro_server.py new file mode 100644 index 0000000..503109c --- /dev/null +++ b/homeai-voice/tts/wyoming_kokoro_server.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +"""Wyoming TTS server backed by Kokoro ONNX. + +Usage: + python wyoming_kokoro_server.py --uri tcp://0.0.0.0:10301 --voice af_heart +""" + +import argparse +import asyncio +import logging +import os + +import numpy as np + +from wyoming.audio import AudioChunk, AudioStart, AudioStop +from wyoming.event import Event +from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker +from wyoming.server import AsyncEventHandler, AsyncServer +from wyoming.tts import Synthesize + +_LOGGER = logging.getLogger(__name__) + +SAMPLE_RATE = 24000 +SAMPLE_WIDTH = 2 # int16 +CHANNELS = 1 +CHUNK_SECONDS = 1 # stream in 1-second chunks + + +def _load_kokoro(): + from kokoro_onnx import Kokoro + model_dir = os.path.expanduser("~/models/kokoro") + return Kokoro( + os.path.join(model_dir, "kokoro-v1.0.onnx"), + os.path.join(model_dir, "voices-v1.0.bin"), + ) + + +class KokoroEventHandler(AsyncEventHandler): + def __init__(self, tts, default_voice: str, speed: float, *args, **kwargs): + super().__init__(*args, **kwargs) + self._tts = tts + self._default_voice = default_voice + self._speed = speed + + # Send info immediately on connect + asyncio.ensure_future(self._send_info()) + + async def _send_info(self): + info = Info( + tts=[ + TtsProgram( + name="kokoro", + description="Kokoro ONNX TTS", + attribution=Attribution( + name="thewh1teagle/kokoro-onnx", + url="https://github.com/thewh1teagle/kokoro-onnx", + ), + installed=True, + voices=[ + TtsVoice( + name=self._default_voice, + description="Kokoro voice", + attribution=Attribution(name="kokoro", url=""), + installed=True, + languages=["en-us"], + speakers=[TtsVoiceSpeaker(name=self._default_voice)], + ) + ], + ) + ] + ) + await self.write_event(info.event()) + + async def handle_event(self, event: Event) -> bool: + if Synthesize.is_type(event.type): + synthesize = Synthesize.from_event(event) + text = synthesize.text + voice = self._default_voice + + if synthesize.voice and synthesize.voice.name: + voice = synthesize.voice.name + + _LOGGER.debug("Synthesizing %r with voice=%s speed=%.1f", text, voice, self._speed) + + try: + loop = asyncio.get_event_loop() + samples, sample_rate = await loop.run_in_executor( + None, lambda: self._tts.create(text, voice=voice, speed=self._speed) + ) + + samples_int16 = (np.clip(samples, -1.0, 1.0) * 32767).astype(np.int16) + audio_bytes = samples_int16.tobytes() + + await self.write_event( + AudioStart(rate=SAMPLE_RATE, width=SAMPLE_WIDTH, channels=CHANNELS).event() + ) + + chunk_size = SAMPLE_RATE * SAMPLE_WIDTH * CHANNELS * CHUNK_SECONDS + for i in range(0, len(audio_bytes), chunk_size): + await self.write_event( + AudioChunk( + rate=SAMPLE_RATE, + width=SAMPLE_WIDTH, + channels=CHANNELS, + audio=audio_bytes[i : i + chunk_size], + ).event() + ) + + await self.write_event(AudioStop().event()) + _LOGGER.info("Synthesized %.1fs of audio", len(samples) / sample_rate) + + except Exception: + _LOGGER.exception("Synthesis error") + await self.write_event(AudioStop().event()) + + return True # keep connection open + + +async def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--uri", default="tcp://0.0.0.0:10301") + parser.add_argument("--voice", default="af_heart") + parser.add_argument("--speed", type=float, default=1.0) + parser.add_argument("--debug", action="store_true") + args = parser.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.debug else logging.INFO, + format="%(asctime)s %(levelname)s %(name)s %(message)s", + ) + + _LOGGER.info("Loading Kokoro ONNX model...") + tts = _load_kokoro() + _LOGGER.info("Kokoro loaded. Starting Wyoming TTS on %s (voice=%s)", args.uri, args.voice) + + server = AsyncServer.from_uri(args.uri) + + def handler_factory(reader, writer): + return KokoroEventHandler(tts, args.voice, args.speed, reader, writer) + + await server.run(handler_factory) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/homeai-voice/wyoming/test-pipeline.sh b/homeai-voice/wyoming/test-pipeline.sh new file mode 100755 index 0000000..a38bbfc --- /dev/null +++ b/homeai-voice/wyoming/test-pipeline.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# Smoke test for the Wyoming voice pipeline. +# Tests: STT server alive | TTS server alive | TTS generates audio +# +# Usage: ./test-pipeline.sh + +set -euo pipefail + +STT_HOST="${STT_HOST:-localhost}" +STT_PORT="${STT_PORT:-10300}" +TTS_HOST="${TTS_HOST:-localhost}" +TTS_PORT="${TTS_PORT:-10301}" +VENV="${VENV:-$HOME/homeai-voice-env}" + +PASS=0 +FAIL=0 + +check() { + local name="$1"; local result="$2" + if [[ "$result" == ok* ]]; then + echo " [PASS] $name${result#ok}"; PASS=$((PASS + 1)) + else + echo " [FAIL] $name — $result"; FAIL=$((FAIL + 1)) + fi +} + +echo "=== HomeAI Voice Pipeline Smoke Test ===" +echo "" + +echo "1. STT Wyoming server (port $STT_PORT)" +if nc -z -w2 "$STT_HOST" "$STT_PORT" 2>/dev/null; then + check "STT port open" "ok" +else + check "STT port open" "port $STT_PORT not reachable — is wyoming-stt running?" +fi + +echo "" +echo "2. TTS Wyoming server (port $TTS_PORT)" +if nc -z -w2 "$TTS_HOST" "$TTS_PORT" 2>/dev/null; then + check "TTS port open" "ok" +else + check "TTS port open" "port $TTS_PORT not reachable — is wyoming-tts running?" +fi + +echo "" +echo "3. Kokoro TTS synthesis test" +TTS_OUTPUT="/tmp/homeai-tts-test.wav" +"$VENV/bin/python3" - <<'PYEOF' +import sys, os, asyncio +import numpy as np + +model_dir = os.path.expanduser("~/models/kokoro") +model_path = os.path.join(model_dir, "kokoro-v1.0.onnx") +voices_path = os.path.join(model_dir, "voices-v1.0.bin") + +if not os.path.exists(model_path): + print(f"Model not found: {model_path}") + sys.exit(1) + +from kokoro_onnx import Kokoro +tts = Kokoro(model_path, voices_path) +samples, sr = tts.create("Hello, I am your home assistant. The voice pipeline is working.", voice="af_heart", speed=1.0) + +# Write WAV +import wave, struct +samples_int16 = (np.clip(samples, -1.0, 1.0) * 32767).astype(np.int16) +with wave.open("/tmp/homeai-tts-test.wav", "w") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(sr) + wf.writeframes(samples_int16.tobytes()) + +print(f"Generated {len(samples)/sr:.1f}s of audio at {sr}Hz") +sys.exit(0) +PYEOF +if [[ $? -eq 0 && -f "$TTS_OUTPUT" ]]; then + size=$(wc -c < "$TTS_OUTPUT") + check "Kokoro synthesis" "ok — ${size} bytes written to $TTS_OUTPUT" + echo "" + echo " To play: afplay $TTS_OUTPUT" +else + check "Kokoro synthesis" "synthesis failed" +fi + +echo "" +echo "─────────────────────────────────" +echo "Results: $PASS passed, $FAIL failed" +[[ $FAIL -eq 0 ]] diff --git a/homeai-voice/wyoming/wakeword_daemon.py b/homeai-voice/wyoming/wakeword_daemon.py new file mode 100644 index 0000000..56018d6 --- /dev/null +++ b/homeai-voice/wyoming/wakeword_daemon.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +"""Always-on wake word detection daemon using openWakeWord. + +Listens on the default microphone, fires an HTTP POST to --notify-url +when the wake word is detected. + +Usage: + python wakeword_daemon.py --wake-word hey_jarvis --notify-url http://localhost:8080/wake +""" + +import argparse +import logging +import time +import urllib.request +import json +import numpy as np + +_LOGGER = logging.getLogger(__name__) + +SAMPLE_RATE = 16000 +CHUNK_SIZE = 1280 # ~80ms at 16kHz — recommended by openWakeWord + + +def notify(url: str, wake_word: str, score: float): + payload = json.dumps({"wake_word": wake_word, "score": float(score)}).encode() + try: + req = urllib.request.Request( + url, + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=2): + pass + _LOGGER.info("Wake word '%s' detected (score=%.3f) — notified %s", wake_word, score, url) + except Exception as e: + _LOGGER.warning("Failed to notify %s: %s", url, e) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--wake-word", default="hey_jarvis") + parser.add_argument("--notify-url", default="http://localhost:8080/wake") + parser.add_argument("--threshold", type=float, default=0.5) + parser.add_argument("--cooldown", type=float, default=3.0, help="Seconds between triggers") + parser.add_argument("--model-dir", default=None, help="Path to custom .onnx wake word model") + parser.add_argument("--debug", action="store_true") + args = parser.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.debug else logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", + ) + + try: + import pyaudio + except ImportError: + _LOGGER.error("pyaudio not installed. Run: pip install pyaudio") + raise SystemExit(1) + + import openwakeword + from openwakeword.model import Model + + _LOGGER.info("Loading wake word model: %s", args.wake_word) + + model_paths = [] + if args.model_dir: + import os, glob + model_paths = glob.glob(os.path.join(args.model_dir, "*.onnx")) + + oww = Model( + wakeword_models=model_paths if model_paths else [args.wake_word], + inference_framework="onnx", + ) + + audio = pyaudio.PyAudio() + stream = audio.open( + rate=SAMPLE_RATE, + channels=1, + format=pyaudio.paInt16, + input=True, + frames_per_buffer=CHUNK_SIZE, + ) + + _LOGGER.info("Listening for wake word '%s'...", args.wake_word) + last_trigger = 0.0 + + try: + while True: + raw = stream.read(CHUNK_SIZE, exception_on_overflow=False) + chunk = np.frombuffer(raw, dtype=np.int16) + oww.predict(chunk) + + for ww, scores in oww.prediction_buffer.items(): + score = scores[-1] if scores else 0.0 + if score >= args.threshold: + now = time.time() + if now - last_trigger >= args.cooldown: + last_trigger = now + notify(args.notify_url, ww, score) + except KeyboardInterrupt: + pass + finally: + stream.stop_stream() + stream.close() + audio.terminate() + + +if __name__ == "__main__": + main()