From 60eb89ea42d7713688664f6d9f5827d3637a907e Mon Sep 17 00:00:00 2001 From: Aodhan Collins Date: Tue, 17 Mar 2026 19:15:46 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20character=20system=20v2=20=E2=80=94=20s?= =?UTF-8?q?chema=20upgrade,=20memory=20system,=20per-character=20TTS=20rou?= =?UTF-8?q?ting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Character schema v2: background, dialogue_style, appearance, skills, gaze_presets with automatic v1→v2 migration. LLM-assisted character creation via Character MCP server. Two-tier memory system (personal per-character + general shared) with budget-based injection into LLM system prompt. Per-character TTS voice routing via state file — Wyoming TTS server reads active config to route between Kokoro (local) and ElevenLabs (cloud PCM 24kHz). Dashboard: memories page, conversation history, character profile on cards, auto-TTS engine selection from character config. Also includes VTube Studio expression bridge and ComfyUI API guide. Co-Authored-By: Claude Opus 4.6 --- .env.example | 7 +- CLAUDE.md | 96 +++- TODO.md | 88 ++- .../openclaw_conversation/const.py | 2 +- .../openclaw_conversation/conversation.py | 15 +- .../launchd/com.homeai.openclaw-bridge.plist | 2 + .../launchd/com.homeai.openclaw.plist | 2 + homeai-agent/openclaw-http-bridge.py | 381 +++++++++++-- .../launchd/com.homeai.dashboard.plist | 2 + homeai-dashboard/schema/character.schema.json | 48 +- homeai-dashboard/src/App.jsx | 13 + homeai-dashboard/src/components/ChatPanel.jsx | 20 +- .../src/components/ConversationList.jsx | 70 +++ .../src/components/MessageBubble.jsx | 100 +++- .../src/components/SettingsDrawer.jsx | 52 +- .../src/components/ThinkingIndicator.jsx | 15 +- .../src/hooks/useActiveCharacter.js | 28 + homeai-dashboard/src/hooks/useChat.js | 107 +++- .../src/hooks/useConversations.js | 66 +++ homeai-dashboard/src/hooks/useTtsPlayback.js | 6 +- homeai-dashboard/src/lib/SchemaValidator.js | 36 ++ homeai-dashboard/src/lib/api.js | 34 +- homeai-dashboard/src/lib/constants.js | 8 + homeai-dashboard/src/lib/conversationApi.js | 25 + homeai-dashboard/src/lib/memoryApi.js | 45 ++ homeai-dashboard/src/pages/Characters.jsx | 247 ++++++-- homeai-dashboard/src/pages/Chat.jsx | 185 +++--- homeai-dashboard/src/pages/Editor.jsx | 460 +++++++++++++-- homeai-dashboard/src/pages/Memories.jsx | 346 +++++++++++ homeai-dashboard/vite.config.js | 539 +++++++++++++++++- homeai-images/API_GUIDE.md | 219 +++++++ .../launchd/com.homeai.preload-models.plist | 14 +- homeai-llm/scripts/preload-models.sh | 80 ++- .../launchd/com.homeai.vtube-bridge.plist | 40 ++ homeai-visual/scripts/test-expressions.py | 170 ++++++ homeai-visual/setup.sh | 100 ++-- homeai-visual/vtube-bridge.py | 454 +++++++++++++++ .../launchd/com.homeai.wyoming-tts.plist | 6 + homeai-voice/tts/wyoming_kokoro_server.py | 127 ++++- 39 files changed, 3846 insertions(+), 409 deletions(-) create mode 100644 homeai-dashboard/src/components/ConversationList.jsx create mode 100644 homeai-dashboard/src/hooks/useActiveCharacter.js create mode 100644 homeai-dashboard/src/hooks/useConversations.js create mode 100644 homeai-dashboard/src/lib/conversationApi.js create mode 100644 homeai-dashboard/src/lib/memoryApi.js create mode 100644 homeai-dashboard/src/pages/Memories.jsx create mode 100644 homeai-images/API_GUIDE.md create mode 100644 homeai-visual/launchd/com.homeai.vtube-bridge.plist create mode 100644 homeai-visual/scripts/test-expressions.py create mode 100644 homeai-visual/vtube-bridge.py diff --git a/.env.example b/.env.example index c58eddb..d442b20 100644 --- a/.env.example +++ b/.env.example @@ -9,6 +9,7 @@ OPENAI_API_KEY= DEEPSEEK_API_KEY= GEMINI_API_KEY= ELEVENLABS_API_KEY= +GAZE_API_KEY= # ─── Data & Paths ────────────────────────────────────────────────────────────── DATA_DIR=${HOME}/homeai-data @@ -40,10 +41,14 @@ OPEN_WEBUI_URL=http://localhost:3030 OLLAMA_PRIMARY_MODEL=llama3.3:70b OLLAMA_FAST_MODEL=qwen2.5:7b +# Medium model kept warm for voice pipeline (override per persona) +# Used by preload-models.sh keep-warm daemon +HOMEAI_MEDIUM_MODEL=qwen3.5:35b-a3b + # ─── P3: Voice ───────────────────────────────────────────────────────────────── WYOMING_STT_URL=tcp://localhost:10300 WYOMING_TTS_URL=tcp://localhost:10301 -ELEVENLABS_API_KEY= # Create at elevenlabs.io if using elevenlabs TTS engine +# ELEVENLABS_API_KEY is set above in API Keys section # ─── P4: Agent ───────────────────────────────────────────────────────────────── OPENCLAW_URL=http://localhost:8080 diff --git a/CLAUDE.md b/CLAUDE.md index 352482e..f7bd348 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -26,6 +26,7 @@ All AI inference runs locally on this machine. No cloud dependency required (clo ### AI & LLM - **Ollama** — local LLM runtime (target models: Llama 3.3 70B, Qwen 2.5 72B) +- **Model keep-warm daemon** — `preload-models.sh` runs as a loop, checks every 5 min, re-pins evicted models with `keep_alive=-1`. Keeps `qwen2.5:7b` (small/fast) and `$HOMEAI_MEDIUM_MODEL` (default: `qwen3.5:35b-a3b`) always loaded in VRAM. Medium model is configurable via env var for per-persona model assignment. - **Open WebUI** — browser-based chat interface, runs as Docker container ### Image Generation @@ -35,7 +36,8 @@ All AI inference runs locally on this machine. No cloud dependency required (clo ### Speech - **Whisper.cpp** — speech-to-text, optimised for Apple Silicon/Neural Engine -- **Kokoro TTS** — fast, lightweight text-to-speech (primary, low-latency) +- **Kokoro TTS** — fast, lightweight text-to-speech (primary, low-latency, local) +- **ElevenLabs TTS** — cloud voice cloning/synthesis (per-character voice ID, routed via state file) - **Chatterbox TTS** — voice cloning engine (Apple Silicon MPS optimised) - **Qwen3-TTS** — alternative voice cloning via MLX - **openWakeWord** — always-on wake word detection @@ -49,11 +51,13 @@ All AI inference runs locally on this machine. No cloud dependency required (clo ### AI Agent / Orchestration - **OpenClaw** — primary AI agent layer; receives voice commands, calls tools, manages personality - **n8n** — visual workflow automation (Docker), chains AI actions -- **mem0** — long-term memory layer for the AI character +- **Character Memory System** — two-tier JSON-based memories (personal per-character + general shared), injected into LLM system prompt with budget truncation ### Character & Personality -- **Character Manager** (built — see `character-manager.jsx`) — single config UI for personality, prompts, models, Live2D mappings, and notes -- Character config exports to JSON, consumed by OpenClaw system prompt and pipeline +- **Character Schema v2** — JSON spec with background, dialogue_style, appearance, skills, gaze_presets (v1 auto-migrated) +- **HomeAI Dashboard** — unified web app: character editor, chat, memory manager, service dashboard +- **Character MCP Server** — LLM-assisted character creation via Fandom wiki/Wikipedia lookup (Docker) +- Character config stored as JSON files in `~/homeai-data/characters/`, consumed by bridge for system prompt construction ### Visual Representation - **VTube Studio** — Live2D model display on desktop (macOS) and mobile (iOS/Android) @@ -85,47 +89,79 @@ All AI inference runs locally on this machine. No cloud dependency required (clo ESP32-S3-BOX-3 (room) → Wake word detected (openWakeWord, runs locally on device or Mac Mini) → Audio streamed to Mac Mini via Wyoming Satellite - → Whisper.cpp transcribes speech to text - → OpenClaw receives text + context - → Ollama LLM generates response (with character persona from system prompt) - → mem0 updates long-term memory + → Whisper MLX transcribes speech to text + → HA conversation agent → OpenClaw HTTP Bridge + → Bridge resolves character (satellite_id → character mapping) + → Bridge builds system prompt (profile + memories) and writes TTS config to state file + → OpenClaw CLI → Ollama LLM generates response → Response dispatched: - → Kokoro/Chatterbox renders TTS audio + → Wyoming TTS reads state file → routes to Kokoro (local) or ElevenLabs (cloud) → Audio sent back to ESP32-S3-BOX-3 (spoken response) → VTube Studio API triggered (expression + lip sync on desktop/mobile) → Home Assistant action called if applicable (lights, music, etc.) ``` +### Timeout Strategy + +The HTTP bridge checks Ollama `/api/ps` before each request to determine if the LLM is already loaded: + +| Layer | Warm (model loaded) | Cold (model loading) | +|---|---|---| +| HA conversation component | 200s | 200s | +| OpenClaw HTTP bridge | 60s | 180s | +| OpenClaw agent | 60s | 60s | + +The keep-warm daemon ensures models stay loaded, so cold starts should be rare (only after Ollama restarts or VRAM pressure). + --- ## Character System -The AI assistant has a defined personality managed via the Character Manager tool. +The AI assistant has a defined personality managed via the HomeAI Dashboard (character editor + memory manager). -Key config surfaces: -- **System prompt** — injected into every Ollama request -- **Voice clone reference** — `.wav` file path for Chatterbox/Qwen3-TTS -- **Live2D expression mappings** — idle, speaking, thinking, happy, error states -- **VTube Studio WebSocket triggers** — JSON map of events to expressions +### Character Schema v2 + +Each character is a JSON file in `~/homeai-data/characters/` with: +- **System prompt** — core personality, injected into every LLM request +- **Profile fields** — background, appearance, dialogue_style, skills array +- **TTS config** — engine (kokoro/elevenlabs), kokoro_voice, elevenlabs_voice_id, elevenlabs_model, speed +- **GAZE presets** — array of `{preset, trigger}` for image generation styles - **Custom prompt rules** — trigger/response overrides for specific contexts -- **mem0** — persistent memory that evolves over time -Character config JSON (exported from Character Manager) is the single source of truth consumed by all pipeline components. +### Memory System + +Two-tier memory stored as JSON in `~/homeai-data/memories/`: +- **Personal memories** (`personal/{character_id}.json`) — per-character, about user interactions +- **General memories** (`general.json`) — shared operational knowledge (tool usage, device info, routines) + +Memories are injected into the system prompt by the bridge with budget truncation (personal: 4000 chars, general: 3000 chars, newest first). + +### TTS Voice Routing + +The bridge writes the active character's TTS config to `~/homeai-data/active-tts-voice.json` before each request. The Wyoming TTS server reads this state file to determine which engine/voice to use: +- **Kokoro** — local, fast, uses `kokoro_voice` field (e.g., `af_heart`) +- **ElevenLabs** — cloud, uses `elevenlabs_voice_id` + `elevenlabs_model`, returns PCM 24kHz + +This works for both ESP32/HA pipeline and dashboard chat. --- ## Project Priorities -1. **Foundation** — Docker stack up (Home Assistant, Open WebUI, Portainer, Uptime Kuma) -2. **LLM** — Ollama running with target models, Open WebUI connected -3. **Voice pipeline** — Whisper → Ollama → Kokoro → Wyoming → Home Assistant -4. **OpenClaw** — installed, onboarded, connected to Ollama and Home Assistant -5. **ESP32-S3-BOX-3** — ESPHome flash, Wyoming Satellite, LVGL face -6. **Character system** — system prompt wired up, mem0 integrated, voice cloned -7. **VTube Studio** — model loaded, WebSocket API bridge written as OpenClaw skill -8. **ComfyUI** — image generation online, character-consistent model workflows -9. **Extended integrations** — n8n workflows, Music Assistant, Snapcast, Gitea, code-server -10. **Polish** — Authelia, Tailscale hardening, mobile companion, iOS widgets +1. **Foundation** — Docker stack up (Home Assistant, Open WebUI, Portainer, Uptime Kuma) ✅ +2. **LLM** — Ollama running with target models, Open WebUI connected ✅ +3. **Voice pipeline** — Whisper → Ollama → Kokoro → Wyoming → Home Assistant ✅ +4. **OpenClaw** — installed, onboarded, connected to Ollama and Home Assistant ✅ +5. **ESP32-S3-BOX-3** — ESPHome flash, Wyoming Satellite, display faces ✅ +6. **Character system** — schema v2, dashboard editor, memory system, per-character TTS routing ✅ +7. **Animated visual** — PNG/GIF character visual for the web assistant (initial visual layer) +8. **Android app** — companion app for mobile access to the assistant +9. **ComfyUI** — image generation online, character-consistent model workflows +10. **Extended integrations** — n8n workflows, Music Assistant, Snapcast, Gitea, code-server +11. **Polish** — Authelia, Tailscale hardening, iOS widgets + +### Stretch Goals +- **Live2D / VTube Studio** — full Live2D model with WebSocket API bridge (requires learning Live2D tooling) --- @@ -133,7 +169,11 @@ Character config JSON (exported from Character Manager) is the single source of - All Docker compose files: `~/server/docker/` - OpenClaw skills: `~/.openclaw/skills/` -- Character configs: `~/.openclaw/characters/` +- Character configs: `~/homeai-data/characters/` +- Character memories: `~/homeai-data/memories/` +- Conversation history: `~/homeai-data/conversations/` +- Active TTS state: `~/homeai-data/active-tts-voice.json` +- Satellite → character map: `~/homeai-data/satellite-map.json` - Whisper models: `~/models/whisper/` - Ollama models: managed by Ollama at `~/.ollama/models/` - ComfyUI models: `~/ComfyUI/models/` diff --git a/TODO.md b/TODO.md index 2547bae..15b45b2 100644 --- a/TODO.md +++ b/TODO.md @@ -26,7 +26,7 @@ - [x] Register local GGUF models via Modelfiles (no download): llama3.3:70b, qwen3:32b, codestral:22b, qwen2.5:7b - [x] Register additional models: EVA-LLaMA-3.33-70B, Midnight-Miqu-70B, QwQ-32B, Qwen3.5-35B, Qwen3-Coder-30B, Qwen3-VL-30B, GLM-4.6V-Flash, DeepSeek-R1-8B, gemma-3-27b - [x] Add qwen3.5:35b-a3b (MoE, Q8_0) — 26.7 tok/s, recommended for voice pipeline -- [x] Write model preload script + launchd service (keeps voice model in VRAM permanently) +- [x] Write model keep-warm daemon + launchd service (pins qwen2.5:7b + $HOMEAI_MEDIUM_MODEL in VRAM, checks every 5 min) - [x] Deploy Open WebUI via Docker compose (port 3030) - [x] Verify Open WebUI connected to Ollama, all models available - [x] Run pipeline benchmark (homeai-voice/scripts/benchmark_pipeline.py) — STT/LLM/TTS latency profiled @@ -82,7 +82,7 @@ - [x] Verify full voice → agent → HA action flow - [x] Add OpenClaw to Uptime Kuma monitors (Manual user action required) -### P5 · homeai-character *(can start alongside P4)* +### P5 · homeai-dashboard *(character system + dashboard)* - [x] Define and write `schema/character.schema.json` (v1) - [x] Write `characters/aria.json` — default character @@ -100,6 +100,15 @@ - [x] Add character profile management to dashboard — store/switch character configs with attached profile images - [x] Add TTS voice preview in character editor — Kokoro preview via OpenClaw bridge with loading state, custom text, stop control - [x] Merge homeai-character + homeai-desktop into unified homeai-dashboard (services, chat, characters, editor) +- [x] Upgrade character schema to v2 — background, dialogue_style, appearance, skills, gaze_presets (auto-migrate v1) +- [x] Add LLM-assisted character creation via Character MCP server (Fandom/Wikipedia lookup) +- [x] Add character memory system — personal (per-character) + general (shared) memories with dashboard UI +- [x] Add conversation history with per-conversation persistence +- [x] Wire character_id through full pipeline (dashboard → bridge → LLM system prompt) +- [x] Add TTS text cleaning — strip tags, asterisks, emojis, markdown before synthesis +- [x] Add per-character TTS voice routing — bridge writes state file, Wyoming server reads it +- [x] Add ElevenLabs TTS support in Wyoming server — cloud voice synthesis via state file routing +- [x] Dashboard auto-selects character's TTS engine/voice (Kokoro or ElevenLabs) - [ ] Deploy dashboard as Docker container or static site on Mac Mini --- @@ -123,50 +132,71 @@ - [ ] Flash remaining units (bedroom, kitchen) - [ ] Document MAC address → room name mapping +### P6b · homeai-rpi (Kitchen Satellite) + +- [x] Set up Wyoming Satellite on Raspberry Pi 5 (SELBINA) with ReSpeaker 2-Mics pHAT +- [x] Write setup.sh — full Pi provisioning (venv, drivers, systemd, scripts) +- [x] Write deploy.sh — remote deploy/manage from Mac Mini (push-wrapper, test-logs, etc.) +- [x] Write satellite_wrapper.py — monkey-patches fixing TTS echo, writer race, streaming timeout +- [x] Test multi-command voice loop without freezing + --- ## Phase 5 — Visual Layer ### P7 · homeai-visual -- [ ] Install VTube Studio (Mac App Store) -- [ ] Enable WebSocket API on port 8001 -- [ ] Source/purchase a Live2D model (nizima.com or booth.pm) -- [ ] Load model in VTube Studio -- [ ] Create hotkeys for all 8 expression states -- [ ] Write `skills/vtube_studio` SKILL.md + implementation -- [ ] Run auth flow — click Allow in VTube Studio, save token -- [ ] Test all 8 expressions via test script -- [ ] Update `aria.json` with real VTube Studio hotkey IDs -- [ ] Write `lipsync.py` amplitude-based helper -- [ ] Integrate lip sync into OpenClaw TTS dispatch -- [ ] Test full pipeline: voice → thinking expression → speaking with lip sync +#### VTube Studio Expression Bridge +- [x] Write `vtube-bridge.py` — persistent WebSocket ↔ HTTP bridge daemon (port 8002) +- [x] Write `vtube-ctl` CLI wrapper + OpenClaw skill (`~/.openclaw/skills/vtube-studio/`) +- [x] Wire expression triggers into `openclaw-http-bridge.py` (thinking → idle, speaking → idle) +- [x] Add amplitude-based lip sync to `wyoming_kokoro_server.py` (RMS → MouthOpen parameter) +- [x] Write `test-expressions.py` — auth flow, expression cycle, lip sync sweep, latency test +- [x] Write launchd plist + setup.sh for venv creation and service registration +- [ ] Install VTube Studio from Mac App Store, enable WebSocket API (port 8001) +- [ ] Source/purchase Live2D model, load in VTube Studio +- [ ] Create 8 expression hotkeys, record UUIDs +- [ ] Run `setup.sh` to create venv, install websockets, load launchd service +- [ ] Run `vtube-ctl auth` — click Allow in VTube Studio +- [ ] Update `aria.json` with real hotkey UUIDs (replace placeholders) +- [ ] Run `test-expressions.py --all` — verify expressions + lip sync + latency - [ ] Set up VTube Studio mobile (iPhone/iPad) on Tailnet +#### Web Visuals (Dashboard) +- [ ] Design PNG/GIF character visuals for web assistant (idle, thinking, speaking, etc.) +- [ ] Integrate animated visuals into homeai-dashboard chat view +- [ ] Sync visual state to voice pipeline events (listening, processing, responding) +- [ ] Add expression transitions and idle animations + +### P8 · homeai-android + +- [ ] Build Android companion app for mobile assistant access +- [ ] Integrate with OpenClaw bridge API (chat, TTS, STT) +- [ ] Add character visual display +- [ ] Push notification support via ntfy/FCM + --- ## Phase 6 — Image Generation -### P8 · homeai-images +### P9 · homeai-images (ComfyUI) - [ ] Clone ComfyUI to `~/ComfyUI/`, install deps in venv - [ ] Verify MPS is detected at launch - [ ] Write and load launchd plist (`com.homeai.comfyui.plist`) -- [ ] Download SDXL base model -- [ ] Download Flux.1-schnell -- [ ] Download ControlNet models (canny, depth) +- [ ] Download SDXL base model + Flux.1-schnell + ControlNet models - [ ] Test generation via ComfyUI web UI (port 8188) -- [ ] Build and export `quick.json`, `portrait.json`, `scene.json`, `upscale.json` workflows +- [ ] Build and export workflow JSONs (quick, portrait, scene, upscale) - [ ] Write `skills/comfyui` SKILL.md + implementation -- [ ] Test skill: "Generate a portrait of Aria looking happy" - [ ] Collect character reference images for LoRA training -- [ ] Train SDXL LoRA with kohya_ss, verify character consistency - [ ] Add ComfyUI to Uptime Kuma monitors --- ## Phase 7 — Extended Integrations & Polish +### P10 · Integrations & Polish + - [ ] Deploy Music Assistant (Docker), integrate with Home Assistant - [ ] Write `skills/music` SKILL.md for OpenClaw - [ ] Deploy Snapcast server on Mac Mini @@ -183,10 +213,24 @@ --- +## Stretch Goals + +### Live2D / VTube Studio + +- [ ] Learn Live2D modelling toolchain (Live2D Cubism Editor) +- [ ] Install VTube Studio (Mac App Store), enable WebSocket API on port 8001 +- [ ] Source/commission a Live2D model (nizima.com or booth.pm) +- [ ] Create hotkeys for expression states +- [ ] Write `skills/vtube_studio` SKILL.md + implementation +- [ ] Write `lipsync.py` amplitude-based helper +- [ ] Integrate lip sync into OpenClaw TTS dispatch +- [ ] Set up VTube Studio mobile (iPhone/iPad) on Tailnet + +--- + ## Open Decisions - [ ] Confirm character name (determines wake word training) -- [ ] Live2D model: purchase off-the-shelf or commission custom? - [ ] mem0 backend: Chroma (simple) vs Qdrant Docker (better semantic search)? - [ ] Snapcast output: ESP32 built-in speakers or dedicated audio hardware per room? - [ ] Authelia user store: local file vs LDAP? diff --git a/homeai-agent/custom_components/openclaw_conversation/const.py b/homeai-agent/custom_components/openclaw_conversation/const.py index c2f7411..a33fb57 100644 --- a/homeai-agent/custom_components/openclaw_conversation/const.py +++ b/homeai-agent/custom_components/openclaw_conversation/const.py @@ -12,7 +12,7 @@ CONF_TIMEOUT = "timeout" DEFAULT_HOST = "10.0.0.101" DEFAULT_PORT = 8081 # OpenClaw HTTP Bridge (not 8080 gateway) DEFAULT_AGENT = "main" -DEFAULT_TIMEOUT = 120 +DEFAULT_TIMEOUT = 200 # Must exceed bridge cold timeout (180s) # API endpoints OPENCLAW_API_PATH = "/api/agent/message" diff --git a/homeai-agent/custom_components/openclaw_conversation/conversation.py b/homeai-agent/custom_components/openclaw_conversation/conversation.py index a09d379..c15c7b3 100644 --- a/homeai-agent/custom_components/openclaw_conversation/conversation.py +++ b/homeai-agent/custom_components/openclaw_conversation/conversation.py @@ -77,12 +77,16 @@ class OpenClawAgent(AbstractConversationAgent): _LOGGER.debug("Processing message: %s", text) try: - response_text = await self._call_openclaw(text) - + response_text = await self._call_openclaw( + text, + satellite_id=getattr(user_input, "satellite_id", None), + device_id=getattr(user_input, "device_id", None), + ) + # Create proper IntentResponse for Home Assistant intent_response = IntentResponse(language=user_input.language or "en") intent_response.async_set_speech(response_text) - + return ConversationResult( response=intent_response, conversation_id=conversation_id, @@ -96,13 +100,14 @@ class OpenClawAgent(AbstractConversationAgent): conversation_id=conversation_id, ) - async def _call_openclaw(self, message: str) -> str: + async def _call_openclaw(self, message: str, satellite_id: str = None, device_id: str = None) -> str: """Call OpenClaw API and return the response.""" url = f"http://{self.host}:{self.port}{OPENCLAW_API_PATH}" - + payload = { "message": message, "agent": self.agent_name, + "satellite_id": satellite_id or device_id, } session = async_get_clientsession(self.hass) diff --git a/homeai-agent/launchd/com.homeai.openclaw-bridge.plist b/homeai-agent/launchd/com.homeai.openclaw-bridge.plist index 2d85ef6..785178b 100644 --- a/homeai-agent/launchd/com.homeai.openclaw-bridge.plist +++ b/homeai-agent/launchd/com.homeai.openclaw-bridge.plist @@ -35,6 +35,8 @@ PATH /opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin + ELEVENLABS_API_KEY + sk_ec10e261c6190307a37aa161a9583504dcf25a0cabe5dbd5 diff --git a/homeai-agent/launchd/com.homeai.openclaw.plist b/homeai-agent/launchd/com.homeai.openclaw.plist index 95f264e..7e9d2a2 100644 --- a/homeai-agent/launchd/com.homeai.openclaw.plist +++ b/homeai-agent/launchd/com.homeai.openclaw.plist @@ -28,6 +28,8 @@ eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJmZGQ1NzZlYWNkMTU0ZTY2ODY1OTkzYTlhNTIxM2FmNyIsImlhdCI6MTc3MjU4ODYyOCwiZXhwIjoyMDg3OTQ4NjI4fQ.CTAU1EZgpVLp_aRnk4vg6cQqwS5N-p8jQkAAXTxFmLY HASS_TOKEN eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJmZGQ1NzZlYWNkMTU0ZTY2ODY1OTkzYTlhNTIxM2FmNyIsImlhdCI6MTc3MjU4ODYyOCwiZXhwIjoyMDg3OTQ4NjI4fQ.CTAU1EZgpVLp_aRnk4vg6cQqwS5N-p8jQkAAXTxFmLY + GAZE_API_KEY + e63401f17e4845e1059f830267f839fe7fc7b6083b1cb1730863318754d799f4 RunAtLoad diff --git a/homeai-agent/openclaw-http-bridge.py b/homeai-agent/openclaw-http-bridge.py index 2e1c6e9..82482c4 100644 --- a/homeai-agent/openclaw-http-bridge.py +++ b/homeai-agent/openclaw-http-bridge.py @@ -24,9 +24,12 @@ Endpoints: import argparse import json +import os import subprocess import sys import asyncio +import urllib.request +import threading from http.server import HTTPServer, BaseHTTPRequestHandler from socketserver import ThreadingMixIn from urllib.parse import urlparse @@ -40,19 +43,222 @@ from wyoming.asr import Transcribe, Transcript from wyoming.audio import AudioStart, AudioChunk, AudioStop from wyoming.info import Info +# Timeout settings (seconds) +TIMEOUT_WARM = 120 # Model already loaded in VRAM +TIMEOUT_COLD = 180 # Model needs loading first (~10-20s load + inference) +OLLAMA_PS_URL = "http://localhost:11434/api/ps" +VTUBE_BRIDGE_URL = "http://localhost:8002" -def load_character_prompt() -> str: - """Load the active character system prompt.""" - character_path = Path.home() / ".openclaw" / "characters" / "aria.json" + +def _vtube_fire_and_forget(path: str, data: dict): + """Send a non-blocking POST to the VTube Studio bridge. Failures are silent.""" + def _post(): + try: + body = json.dumps(data).encode() + req = urllib.request.Request( + f"{VTUBE_BRIDGE_URL}{path}", + data=body, + headers={"Content-Type": "application/json"}, + method="POST", + ) + urllib.request.urlopen(req, timeout=2) + except Exception: + pass # bridge may not be running — that's fine + threading.Thread(target=_post, daemon=True).start() + + +def is_model_warm() -> bool: + """Check if the default Ollama model is already loaded in VRAM.""" + try: + req = urllib.request.Request(OLLAMA_PS_URL) + with urllib.request.urlopen(req, timeout=2) as resp: + data = json.loads(resp.read()) + return len(data.get("models", [])) > 0 + except Exception: + # If we can't reach Ollama, assume cold (safer longer timeout) + return False + + +CHARACTERS_DIR = Path("/Users/aodhan/homeai-data/characters") +SATELLITE_MAP_PATH = Path("/Users/aodhan/homeai-data/satellite-map.json") +MEMORIES_DIR = Path("/Users/aodhan/homeai-data/memories") +ACTIVE_TTS_VOICE_PATH = Path("/Users/aodhan/homeai-data/active-tts-voice.json") + + +def clean_text_for_tts(text: str) -> str: + """Strip content that shouldn't be spoken: tags, asterisks, emojis, markdown.""" + # Remove HTML/XML tags and their content for common non-spoken tags + text = re.sub(r'<[^>]+>', '', text) + # Remove content between asterisks (actions/emphasis markup like *sighs*) + text = re.sub(r'\*[^*]+\*', '', text) + # Remove markdown bold/italic markers that might remain + text = re.sub(r'[*_]{1,3}', '', text) + # Remove markdown headers + text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE) + # Remove markdown links [text](url) → keep text + text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) + # Remove bare URLs + text = re.sub(r'https?://\S+', '', text) + # Remove code blocks and inline code + text = re.sub(r'```[\s\S]*?```', '', text) + text = re.sub(r'`[^`]+`', '', text) + # Remove emojis + text = re.sub( + r'[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF' + r'\U0001F1E0-\U0001F1FF\U0001F900-\U0001F9FF\U0001FA00-\U0001FAFF' + r'\U00002702-\U000027B0\U0000FE00-\U0000FE0F\U0000200D' + r'\U00002600-\U000026FF\U00002300-\U000023FF]+', '', text + ) + # Collapse multiple spaces/newlines + text = re.sub(r'\n{2,}', '\n', text) + text = re.sub(r'[ \t]{2,}', ' ', text) + return text.strip() + + +def load_satellite_map() -> dict: + """Load the satellite-to-character mapping.""" + try: + with open(SATELLITE_MAP_PATH) as f: + return json.load(f) + except Exception: + return {"default": "aria_default", "satellites": {}} + + +def set_active_tts_voice(character_id: str, tts_config: dict): + """Write the active TTS config to a state file for the Wyoming TTS server to read.""" + try: + ACTIVE_TTS_VOICE_PATH.parent.mkdir(parents=True, exist_ok=True) + state = { + "character_id": character_id, + "engine": tts_config.get("engine", "kokoro"), + "kokoro_voice": tts_config.get("kokoro_voice", ""), + "elevenlabs_voice_id": tts_config.get("elevenlabs_voice_id", ""), + "elevenlabs_model": tts_config.get("elevenlabs_model", "eleven_multilingual_v2"), + "speed": tts_config.get("speed", 1), + } + with open(ACTIVE_TTS_VOICE_PATH, "w") as f: + json.dump(state, f) + except Exception as e: + print(f"[OpenClaw Bridge] Warning: could not write active TTS config: {e}") + + +def resolve_character_id(satellite_id: str = None) -> str: + """Resolve a satellite ID to a character profile ID.""" + sat_map = load_satellite_map() + if satellite_id and satellite_id in sat_map.get("satellites", {}): + return sat_map["satellites"][satellite_id] + return sat_map.get("default", "aria_default") + + +def load_character(character_id: str = None) -> dict: + """Load a character profile by ID. Returns the full character data dict.""" + if not character_id: + character_id = resolve_character_id() + safe_id = character_id.replace("/", "_") + character_path = CHARACTERS_DIR / f"{safe_id}.json" if not character_path.exists(): - return "" + return {} try: with open(character_path) as f: - data = json.load(f) - return data.get("system_prompt", "") + profile = json.load(f) + return profile.get("data", {}) except Exception: + return {} + + +def load_character_prompt(satellite_id: str = None, character_id: str = None) -> str: + """Load the full system prompt for a character, resolved by satellite or explicit ID. + Builds a rich prompt from system_prompt + profile fields (background, dialogue_style, etc.).""" + if not character_id: + character_id = resolve_character_id(satellite_id) + char = load_character(character_id) + if not char: return "" + sections = [] + + # Core system prompt + prompt = char.get("system_prompt", "") + if prompt: + sections.append(prompt) + + # Character profile fields + profile_parts = [] + if char.get("background"): + profile_parts.append(f"## Background\n{char['background']}") + if char.get("appearance"): + profile_parts.append(f"## Appearance\n{char['appearance']}") + if char.get("dialogue_style"): + profile_parts.append(f"## Dialogue Style\n{char['dialogue_style']}") + if char.get("skills"): + skills = char["skills"] + if isinstance(skills, list): + skills_text = ", ".join(skills[:15]) + else: + skills_text = str(skills) + profile_parts.append(f"## Skills & Interests\n{skills_text}") + if profile_parts: + sections.append("[Character Profile]\n" + "\n\n".join(profile_parts)) + + # Character metadata + meta_lines = [] + if char.get("display_name"): + meta_lines.append(f"Your name is: {char['display_name']}") + # Support both v1 (gaze_preset string) and v2 (gaze_presets array) + gaze_presets = char.get("gaze_presets", []) + if gaze_presets and isinstance(gaze_presets, list): + for gp in gaze_presets: + preset = gp.get("preset", "") + trigger = gp.get("trigger", "self-portrait") + if preset: + meta_lines.append(f"GAZE preset '{preset}' — use for: {trigger}") + elif char.get("gaze_preset"): + meta_lines.append(f"Your gaze_preset for self-portraits is: {char['gaze_preset']}") + if meta_lines: + sections.append("[Character Metadata]\n" + "\n".join(meta_lines)) + + # Memories (personal + general) + personal, general = load_memories(character_id) + if personal: + sections.append("[Personal Memories]\n" + "\n".join(f"- {m}" for m in personal)) + if general: + sections.append("[General Knowledge]\n" + "\n".join(f"- {m}" for m in general)) + + return "\n\n".join(sections) + + +def load_memories(character_id: str) -> tuple[list[str], list[str]]: + """Load personal (per-character) and general memories. + Returns (personal_contents, general_contents) truncated to fit context budget.""" + PERSONAL_BUDGET = 4000 # max chars for personal memories in prompt + GENERAL_BUDGET = 3000 # max chars for general memories in prompt + + def _read_memories(path: Path, budget: int) -> list[str]: + try: + with open(path) as f: + data = json.load(f) + except Exception: + return [] + memories = data.get("memories", []) + # Sort newest first + memories.sort(key=lambda m: m.get("createdAt", ""), reverse=True) + result = [] + used = 0 + for m in memories: + content = m.get("content", "").strip() + if not content: + continue + if used + len(content) > budget: + break + result.append(content) + used += len(content) + return result + + safe_id = character_id.replace("/", "_") + personal = _read_memories(MEMORIES_DIR / "personal" / f"{safe_id}.json", PERSONAL_BUDGET) + general = _read_memories(MEMORIES_DIR / "general.json", GENERAL_BUDGET) + return personal, general + class OpenClawBridgeHandler(BaseHTTPRequestHandler): """HTTP request handler for OpenClaw bridge.""" @@ -95,44 +301,78 @@ class OpenClawBridgeHandler(BaseHTTPRequestHandler): self._send_json_response(404, {"error": "Not found"}) def _handle_tts_request(self): - """Handle TTS request and return wav audio.""" + """Handle TTS request and return audio. Routes to Kokoro or ElevenLabs based on engine.""" content_length = int(self.headers.get("Content-Length", 0)) if content_length == 0: self._send_json_response(400, {"error": "Empty body"}) return - + try: body = self.rfile.read(content_length).decode() data = json.loads(body) except json.JSONDecodeError: self._send_json_response(400, {"error": "Invalid JSON"}) return - + text = data.get("text", "Hello, this is a test.") - # Strip emojis so TTS doesn't try to read them out - text = re.sub( - r'[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF' - r'\U0001F1E0-\U0001F1FF\U0001F900-\U0001F9FF\U0001FA00-\U0001FAFF' - r'\U00002702-\U000027B0\U0000FE00-\U0000FE0F\U0000200D' - r'\U00002600-\U000026FF\U00002300-\U000023FF]+', '', text - ).strip() + text = clean_text_for_tts(text) voice = data.get("voice", "af_heart") - + engine = data.get("engine", "kokoro") + try: - # Run the async Wyoming client - audio_bytes = asyncio.run(self._synthesize_audio(text, voice)) - - # Send WAV response + # Signal avatar: speaking + _vtube_fire_and_forget("/expression", {"event": "speaking"}) + + if engine == "elevenlabs": + audio_bytes, content_type = self._synthesize_elevenlabs(text, voice, data.get("model")) + else: + # Default: local Kokoro via Wyoming + audio_bytes = asyncio.run(self._synthesize_audio(text, voice)) + content_type = "audio/wav" + + # Signal avatar: idle + _vtube_fire_and_forget("/expression", {"event": "idle"}) + self.send_response(200) - self.send_header("Content-Type", "audio/wav") - # Allow CORS for local testing from Vite + self.send_header("Content-Type", content_type) self.send_header("Access-Control-Allow-Origin", "*") self.end_headers() self.wfile.write(audio_bytes) - + except Exception as e: + _vtube_fire_and_forget("/expression", {"event": "error"}) self._send_json_response(500, {"error": str(e)}) + def _synthesize_elevenlabs(self, text: str, voice_id: str, model: str = None) -> tuple[bytes, str]: + """Call ElevenLabs TTS API and return (audio_bytes, content_type).""" + api_key = os.environ.get("ELEVENLABS_API_KEY", "") + if not api_key: + raise RuntimeError("ELEVENLABS_API_KEY not set in environment") + if not voice_id: + raise RuntimeError("No ElevenLabs voice ID provided") + + model = model or "eleven_multilingual_v2" + url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" + payload = json.dumps({ + "text": text, + "model_id": model, + "voice_settings": {"stability": 0.5, "similarity_boost": 0.75}, + }).encode() + + req = urllib.request.Request( + url, + data=payload, + headers={ + "Content-Type": "application/json", + "xi-api-key": api_key, + "Accept": "audio/mpeg", + }, + method="POST", + ) + with urllib.request.urlopen(req, timeout=30) as resp: + audio_bytes = resp.read() + return audio_bytes, "audio/mpeg" + def do_OPTIONS(self): """Handle CORS preflight requests.""" self.send_response(204) @@ -264,6 +504,43 @@ class OpenClawBridgeHandler(BaseHTTPRequestHandler): print(f"[OpenClaw Bridge] Wake word detected: {wake_word_data.get('wake_word', 'unknown')}") self._send_json_response(200, {"status": "ok", "message": "Wake word received"}) + @staticmethod + def _call_openclaw(message: str, agent: str, timeout: int) -> str: + """Call OpenClaw CLI and return stdout.""" + result = subprocess.run( + ["/opt/homebrew/bin/openclaw", "agent", "--message", message, "--agent", agent], + capture_output=True, + text=True, + timeout=timeout, + check=True, + ) + return result.stdout.strip() + + @staticmethod + def _needs_followup(response: str) -> bool: + """Detect if the model promised to act but didn't actually do it. + Returns True if the response looks like a 'will do' without a result.""" + if not response: + return False + resp_lower = response.lower() + # If the response contains a URL or JSON-like output, it probably completed + if "http://" in response or "https://" in response or '"status"' in response: + return False + # If it contains a tool result indicator (ha-ctl output, gaze-ctl output) + if any(kw in resp_lower for kw in ["image_url", "seed", "entity_id", "state:", "turned on", "turned off"]): + return False + # Detect promise-like language without substance + promise_phrases = [ + "let me", "i'll ", "i will ", "sure thing", "sure,", "right away", + "generating", "one moment", "working on", "hang on", "just a moment", + "on it", "let me generate", "let me create", + ] + has_promise = any(phrase in resp_lower for phrase in promise_phrases) + # Short responses with promise language are likely incomplete + if has_promise and len(response) < 200: + return True + return False + def _handle_agent_request(self): """Handle agent message request.""" content_length = int(self.headers.get("Content-Length", 0)) @@ -280,29 +557,63 @@ class OpenClawBridgeHandler(BaseHTTPRequestHandler): message = data.get("message") agent = data.get("agent", "main") + satellite_id = data.get("satellite_id") + explicit_character_id = data.get("character_id") if not message: self._send_json_response(400, {"error": "Message is required"}) return - # Inject system prompt - system_prompt = load_character_prompt() + # Resolve character: explicit ID > satellite mapping > default + if explicit_character_id: + character_id = explicit_character_id + else: + character_id = resolve_character_id(satellite_id) + system_prompt = load_character_prompt(character_id=character_id) + + # Set the active TTS config for the Wyoming server to pick up + char = load_character(character_id) + tts_config = char.get("tts", {}) + if tts_config: + set_active_tts_voice(character_id, tts_config) + engine = tts_config.get("engine", "kokoro") + voice_label = tts_config.get("kokoro_voice", "") if engine == "kokoro" else tts_config.get("elevenlabs_voice_id", "") + print(f"[OpenClaw Bridge] Active TTS: {engine} / {voice_label}") + + if satellite_id: + print(f"[OpenClaw Bridge] Satellite: {satellite_id} → character: {character_id}") + elif explicit_character_id: + print(f"[OpenClaw Bridge] Character: {character_id}") if system_prompt: message = f"System Context: {system_prompt}\n\nUser Request: {message}" + # Check if model is warm to set appropriate timeout + warm = is_model_warm() + timeout = TIMEOUT_WARM if warm else TIMEOUT_COLD + print(f"[OpenClaw Bridge] Model {'warm' if warm else 'cold'}, timeout={timeout}s") + + # Signal avatar: thinking + _vtube_fire_and_forget("/expression", {"event": "thinking"}) + # Call OpenClaw CLI (use full path for launchd compatibility) try: - result = subprocess.run( - ["/opt/homebrew/bin/openclaw", "agent", "--message", message, "--agent", agent], - capture_output=True, - text=True, - timeout=120, - check=True - ) - response_text = result.stdout.strip() + response_text = self._call_openclaw(message, agent, timeout) + + # Re-prompt if the model promised to act but didn't call a tool. + # Detect "I'll do X" / "Let me X" responses that lack any result. + if self._needs_followup(response_text): + print(f"[OpenClaw Bridge] Response looks like a promise without action, re-prompting") + followup = ( + "You just said you would do something but didn't actually call the exec tool. " + "Do NOT explain what you will do — call the tool NOW using exec and return the result." + ) + response_text = self._call_openclaw(followup, agent, timeout) + + # Signal avatar: idle (TTS handler will override to 'speaking' if voice is used) + _vtube_fire_and_forget("/expression", {"event": "idle"}) self._send_json_response(200, {"response": response_text}) except subprocess.TimeoutExpired: - self._send_json_response(504, {"error": "OpenClaw command timed out"}) + self._send_json_response(504, {"error": f"OpenClaw command timed out after {timeout}s (model was {'warm' if warm else 'cold'})"}) except subprocess.CalledProcessError as e: error_msg = e.stderr.strip() if e.stderr else "OpenClaw command failed" self._send_json_response(500, {"error": error_msg}) diff --git a/homeai-dashboard/launchd/com.homeai.dashboard.plist b/homeai-dashboard/launchd/com.homeai.dashboard.plist index 7235ef9..7c3bc69 100644 --- a/homeai-dashboard/launchd/com.homeai.dashboard.plist +++ b/homeai-dashboard/launchd/com.homeai.dashboard.plist @@ -24,6 +24,8 @@ /opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin HOME /Users/aodhan + GAZE_API_KEY + e63401f17e4845e1059f830267f839fe7fc7b6083b1cb1730863318754d799f4 RunAtLoad diff --git a/homeai-dashboard/schema/character.schema.json b/homeai-dashboard/schema/character.schema.json index bd524dc..ff222d0 100644 --- a/homeai-dashboard/schema/character.schema.json +++ b/homeai-dashboard/schema/character.schema.json @@ -1,15 +1,24 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "title": "HomeAI Character Config", - "version": "1", + "version": "2", "type": "object", "required": ["schema_version", "name", "system_prompt", "tts"], "properties": { - "schema_version": { "type": "integer", "const": 1 }, + "schema_version": { "type": "integer", "enum": [1, 2] }, "name": { "type": "string" }, "display_name": { "type": "string" }, "description": { "type": "string" }, + "background": { "type": "string", "description": "Backstory, lore, or general prompt enrichment" }, + "dialogue_style": { "type": "string", "description": "How the persona speaks or reacts, with example lines" }, + "appearance": { "type": "string", "description": "Physical description, also used for image prompting" }, + "skills": { + "type": "array", + "description": "Topics the persona specialises in or enjoys talking about", + "items": { "type": "string" } + }, + "system_prompt": { "type": "string" }, "model_overrides": { @@ -31,35 +40,21 @@ "voice_ref_path": { "type": "string" }, "kokoro_voice": { "type": "string" }, "elevenlabs_voice_id": { "type": "string" }, + "elevenlabs_voice_name": { "type": "string" }, "elevenlabs_model": { "type": "string", "default": "eleven_monolingual_v1" }, "speed": { "type": "number", "default": 1.0 } } }, - "live2d_expressions": { - "type": "object", - "description": "Maps semantic state to VTube Studio hotkey ID", - "properties": { - "idle": { "type": "string" }, - "listening": { "type": "string" }, - "thinking": { "type": "string" }, - "speaking": { "type": "string" }, - "happy": { "type": "string" }, - "sad": { "type": "string" }, - "surprised": { "type": "string" }, - "error": { "type": "string" } - } - }, - - "vtube_ws_triggers": { - "type": "object", - "description": "VTube Studio WebSocket actions keyed by event name", - "additionalProperties": { + "gaze_presets": { + "type": "array", + "description": "GAZE image generation presets with trigger conditions", + "items": { "type": "object", + "required": ["preset"], "properties": { - "type": { "type": "string", "enum": ["hotkey", "parameter"] }, - "id": { "type": "string" }, - "value": { "type": "number" } + "preset": { "type": "string" }, + "trigger": { "type": "string", "default": "self-portrait" } } } }, @@ -78,5 +73,6 @@ }, "notes": { "type": "string" } - } -} \ No newline at end of file + }, + "additionalProperties": true +} diff --git a/homeai-dashboard/src/App.jsx b/homeai-dashboard/src/App.jsx index b2694b5..f1cb18e 100644 --- a/homeai-dashboard/src/App.jsx +++ b/homeai-dashboard/src/App.jsx @@ -3,6 +3,7 @@ import Dashboard from './pages/Dashboard'; import Chat from './pages/Chat'; import Characters from './pages/Characters'; import Editor from './pages/Editor'; +import Memories from './pages/Memories'; function NavItem({ to, children, icon }) { return ( @@ -77,6 +78,17 @@ function Layout({ children }) { Characters + + + + } + > + Memories + +
} /> } />
} /> +
} />
} /> diff --git a/homeai-dashboard/src/components/ChatPanel.jsx b/homeai-dashboard/src/components/ChatPanel.jsx index 47e8994..e1a602a 100644 --- a/homeai-dashboard/src/components/ChatPanel.jsx +++ b/homeai-dashboard/src/components/ChatPanel.jsx @@ -2,8 +2,10 @@ import { useEffect, useRef } from 'react' import MessageBubble from './MessageBubble' import ThinkingIndicator from './ThinkingIndicator' -export default function ChatPanel({ messages, isLoading, onReplay }) { +export default function ChatPanel({ messages, isLoading, onReplay, character }) { const bottomRef = useRef(null) + const name = character?.name || 'AI' + const image = character?.image || null useEffect(() => { bottomRef.current?.scrollIntoView({ behavior: 'smooth' }) @@ -13,10 +15,14 @@ export default function ChatPanel({ messages, isLoading, onReplay }) { return (
-
- AI -
-

Hi, I'm Aria

+ {image ? ( + {name} + ) : ( +
+ {name[0]} +
+ )} +

Hi, I'm {name}

Type a message or press the mic to talk

@@ -26,9 +32,9 @@ export default function ChatPanel({ messages, isLoading, onReplay }) { return (
{messages.map((msg) => ( - + ))} - {isLoading && } + {isLoading && }
) diff --git a/homeai-dashboard/src/components/ConversationList.jsx b/homeai-dashboard/src/components/ConversationList.jsx new file mode 100644 index 0000000..34b15d9 --- /dev/null +++ b/homeai-dashboard/src/components/ConversationList.jsx @@ -0,0 +1,70 @@ +function timeAgo(dateStr) { + if (!dateStr) return '' + const diff = Date.now() - new Date(dateStr).getTime() + const mins = Math.floor(diff / 60000) + if (mins < 1) return 'just now' + if (mins < 60) return `${mins}m ago` + const hours = Math.floor(mins / 60) + if (hours < 24) return `${hours}h ago` + const days = Math.floor(hours / 24) + return `${days}d ago` +} + +export default function ConversationList({ conversations, activeId, onCreate, onSelect, onDelete }) { + return ( +
+ {/* New chat button */} +
+ +
+ + {/* Conversation list */} +
+ {conversations.length === 0 ? ( +

No conversations yet

+ ) : ( + conversations.map(conv => ( +
onSelect(conv.id)} + className={`group flex items-start gap-2 px-3 py-2.5 cursor-pointer border-b border-gray-800/50 transition-colors ${ + conv.id === activeId + ? 'bg-gray-800 text-white' + : 'text-gray-400 hover:bg-gray-800/50 hover:text-gray-200' + }`} + > +
+

+ {conv.title || 'New conversation'} +

+
+ {conv.characterName && ( + {conv.characterName} + )} + {timeAgo(conv.updatedAt)} +
+
+ +
+ )) + )} +
+
+ ) +} diff --git a/homeai-dashboard/src/components/MessageBubble.jsx b/homeai-dashboard/src/components/MessageBubble.jsx index fb2af38..b0b8f6b 100644 --- a/homeai-dashboard/src/components/MessageBubble.jsx +++ b/homeai-dashboard/src/components/MessageBubble.jsx @@ -1,14 +1,100 @@ -export default function MessageBubble({ message, onReplay }) { +import { useState } from 'react' + +function Avatar({ character }) { + const name = character?.name || 'AI' + const image = character?.image || null + + if (image) { + return {name} + } + + return ( +
+ {name[0]} +
+ ) +} + +function ImageOverlay({ src, onClose }) { + return ( +
+ Full size e.stopPropagation()} + /> + +
+ ) +} + +const IMAGE_URL_RE = /(https?:\/\/[^\s]+\.(?:png|jpg|jpeg|gif|webp))/gi + +function RichContent({ text }) { + const [overlayImage, setOverlayImage] = useState(null) + const parts = [] + let lastIndex = 0 + let match + + IMAGE_URL_RE.lastIndex = 0 + while ((match = IMAGE_URL_RE.exec(text)) !== null) { + if (match.index > lastIndex) { + parts.push({ type: 'text', value: text.slice(lastIndex, match.index) }) + } + parts.push({ type: 'image', value: match[1] }) + lastIndex = IMAGE_URL_RE.lastIndex + } + if (lastIndex < text.length) { + parts.push({ type: 'text', value: text.slice(lastIndex) }) + } + + if (parts.length === 1 && parts[0].type === 'text') { + return <>{text} + } + + return ( + <> + {parts.map((part, i) => + part.type === 'image' ? ( + + ) : ( + {part.value} + ) + )} + {overlayImage && setOverlayImage(null)} />} + + ) +} + +export default function MessageBubble({ message, onReplay, character }) { const isUser = message.role === 'user' return (
- {!isUser && ( -
- AI -
- )} + {!isUser && }
- {message.content} + {isUser ? message.content : }
{!isUser && !message.isError && onReplay && (
+ {/* TTS Engine */} +
+ + +
+ {/* Voice */}
- + {isKokoro ? ( + + ) : ( +
+ onUpdate('voice', e.target.value)} + className="w-full bg-gray-800 text-gray-200 text-sm rounded-lg px-3 py-2 border border-gray-700 focus:outline-none focus:border-indigo-500" + placeholder={settings.ttsEngine === 'elevenlabs' ? 'ElevenLabs voice ID' : 'Voice identifier'} + readOnly + /> +

+ Set via active character profile +

+
+ )}
{/* Auto TTS */} diff --git a/homeai-dashboard/src/components/ThinkingIndicator.jsx b/homeai-dashboard/src/components/ThinkingIndicator.jsx index 852e44b..dc5a55a 100644 --- a/homeai-dashboard/src/components/ThinkingIndicator.jsx +++ b/homeai-dashboard/src/components/ThinkingIndicator.jsx @@ -1,9 +1,16 @@ -export default function ThinkingIndicator() { +export default function ThinkingIndicator({ character }) { + const name = character?.name || 'AI' + const image = character?.image || null + return (
-
- AI -
+ {image ? ( + {name} + ) : ( +
+ {name[0]} +
+ )}
diff --git a/homeai-dashboard/src/hooks/useActiveCharacter.js b/homeai-dashboard/src/hooks/useActiveCharacter.js new file mode 100644 index 0000000..941f1b5 --- /dev/null +++ b/homeai-dashboard/src/hooks/useActiveCharacter.js @@ -0,0 +1,28 @@ +import { useState, useEffect } from 'react' + +const ACTIVE_KEY = 'homeai_active_character' + +export function useActiveCharacter() { + const [character, setCharacter] = useState(null) + + useEffect(() => { + const activeId = localStorage.getItem(ACTIVE_KEY) + if (!activeId) return + + fetch(`/api/characters/${activeId}`) + .then(r => r.ok ? r.json() : null) + .then(profile => { + if (profile) { + setCharacter({ + id: profile.id, + name: profile.data.display_name || profile.data.name || 'AI', + image: profile.image || null, + tts: profile.data.tts || null, + }) + } + }) + .catch(() => {}) + }, []) + + return character +} diff --git a/homeai-dashboard/src/hooks/useChat.js b/homeai-dashboard/src/hooks/useChat.js index c015cb5..591c79e 100644 --- a/homeai-dashboard/src/hooks/useChat.js +++ b/homeai-dashboard/src/hooks/useChat.js @@ -1,45 +1,124 @@ -import { useState, useCallback } from 'react' +import { useState, useCallback, useEffect, useRef } from 'react' import { sendMessage } from '../lib/api' +import { getConversation, saveConversation } from '../lib/conversationApi' -export function useChat() { +export function useChat(conversationId, conversationMeta, onConversationUpdate) { const [messages, setMessages] = useState([]) const [isLoading, setIsLoading] = useState(false) + const [isLoadingConv, setIsLoadingConv] = useState(false) + const convRef = useRef(null) + const idRef = useRef(conversationId) - const send = useCallback(async (text) => { + // Keep idRef in sync + useEffect(() => { idRef.current = conversationId }, [conversationId]) + + // Load conversation from server when ID changes + useEffect(() => { + if (!conversationId) { + setMessages([]) + convRef.current = null + return + } + + let cancelled = false + setIsLoadingConv(true) + + getConversation(conversationId).then(conv => { + if (cancelled) return + if (conv) { + convRef.current = conv + setMessages(conv.messages || []) + } else { + convRef.current = null + setMessages([]) + } + setIsLoadingConv(false) + }).catch(() => { + if (!cancelled) { + convRef.current = null + setMessages([]) + setIsLoadingConv(false) + } + }) + + return () => { cancelled = true } + }, [conversationId]) + + // Persist conversation to server + const persist = useCallback(async (updatedMessages, title, overrideId) => { + const id = overrideId || idRef.current + if (!id) return + const now = new Date().toISOString() + const conv = { + id, + title: title || convRef.current?.title || '', + characterId: conversationMeta?.characterId || convRef.current?.characterId || '', + characterName: conversationMeta?.characterName || convRef.current?.characterName || '', + createdAt: convRef.current?.createdAt || now, + updatedAt: now, + messages: updatedMessages, + } + convRef.current = conv + await saveConversation(conv).catch(() => {}) + if (onConversationUpdate) { + onConversationUpdate(id, { + title: conv.title, + updatedAt: conv.updatedAt, + messageCount: conv.messages.length, + }) + } + }, [conversationMeta, onConversationUpdate]) + + // send accepts an optional overrideId for when the conversation was just created + const send = useCallback(async (text, overrideId) => { if (!text.trim() || isLoading) return null - const userMsg = { id: Date.now(), role: 'user', content: text.trim(), timestamp: new Date() } - setMessages((prev) => [...prev, userMsg]) + const userMsg = { id: Date.now(), role: 'user', content: text.trim(), timestamp: new Date().toISOString() } + const isFirstMessage = messages.length === 0 + const newMessages = [...messages, userMsg] + setMessages(newMessages) setIsLoading(true) try { - const response = await sendMessage(text.trim()) + const response = await sendMessage(text.trim(), conversationMeta?.characterId || null) const assistantMsg = { id: Date.now() + 1, role: 'assistant', content: response, - timestamp: new Date(), + timestamp: new Date().toISOString(), } - setMessages((prev) => [...prev, assistantMsg]) + const allMessages = [...newMessages, assistantMsg] + setMessages(allMessages) + + const title = isFirstMessage + ? text.trim().slice(0, 80) + (text.trim().length > 80 ? '...' : '') + : undefined + await persist(allMessages, title, overrideId) + return response } catch (err) { const errorMsg = { id: Date.now() + 1, role: 'assistant', content: `Error: ${err.message}`, - timestamp: new Date(), + timestamp: new Date().toISOString(), isError: true, } - setMessages((prev) => [...prev, errorMsg]) + const allMessages = [...newMessages, errorMsg] + setMessages(allMessages) + await persist(allMessages, undefined, overrideId) return null } finally { setIsLoading(false) } - }, [isLoading]) + }, [isLoading, messages, persist]) - const clearHistory = useCallback(() => { + const clearHistory = useCallback(async () => { setMessages([]) - }, []) + if (idRef.current) { + await persist([], undefined) + } + }, [persist]) - return { messages, isLoading, send, clearHistory } + return { messages, isLoading, isLoadingConv, send, clearHistory } } diff --git a/homeai-dashboard/src/hooks/useConversations.js b/homeai-dashboard/src/hooks/useConversations.js new file mode 100644 index 0000000..8dd36a1 --- /dev/null +++ b/homeai-dashboard/src/hooks/useConversations.js @@ -0,0 +1,66 @@ +import { useState, useEffect, useCallback } from 'react' +import { listConversations, saveConversation, deleteConversation as deleteConv } from '../lib/conversationApi' + +const ACTIVE_KEY = 'homeai_active_conversation' + +export function useConversations() { + const [conversations, setConversations] = useState([]) + const [activeId, setActiveId] = useState(() => localStorage.getItem(ACTIVE_KEY) || null) + const [isLoading, setIsLoading] = useState(true) + + const loadList = useCallback(async () => { + try { + const list = await listConversations() + setConversations(list) + } catch { + setConversations([]) + } finally { + setIsLoading(false) + } + }, []) + + useEffect(() => { loadList() }, [loadList]) + + const select = useCallback((id) => { + setActiveId(id) + if (id) { + localStorage.setItem(ACTIVE_KEY, id) + } else { + localStorage.removeItem(ACTIVE_KEY) + } + }, []) + + const create = useCallback(async (characterId, characterName) => { + const id = `conv_${Date.now()}` + const now = new Date().toISOString() + const conv = { + id, + title: '', + characterId: characterId || '', + characterName: characterName || '', + createdAt: now, + updatedAt: now, + messages: [], + } + await saveConversation(conv) + setConversations(prev => [{ ...conv, messageCount: 0 }, ...prev]) + select(id) + return id + }, [select]) + + const remove = useCallback(async (id) => { + await deleteConv(id) + setConversations(prev => prev.filter(c => c.id !== id)) + if (activeId === id) { + select(null) + } + }, [activeId, select]) + + const updateMeta = useCallback((id, updates) => { + setConversations(prev => prev.map(c => + c.id === id ? { ...c, ...updates } : c + )) + }, []) + + return { conversations, activeId, isLoading, select, create, remove, updateMeta, refresh: loadList } +} diff --git a/homeai-dashboard/src/hooks/useTtsPlayback.js b/homeai-dashboard/src/hooks/useTtsPlayback.js index 52d325d..199bf61 100644 --- a/homeai-dashboard/src/hooks/useTtsPlayback.js +++ b/homeai-dashboard/src/hooks/useTtsPlayback.js @@ -1,7 +1,7 @@ import { useState, useRef, useCallback } from 'react' import { synthesize } from '../lib/api' -export function useTtsPlayback(voice) { +export function useTtsPlayback(voice, engine = 'kokoro', model = null) { const [isPlaying, setIsPlaying] = useState(false) const audioCtxRef = useRef(null) const sourceRef = useRef(null) @@ -23,7 +23,7 @@ export function useTtsPlayback(voice) { setIsPlaying(true) try { - const audioData = await synthesize(text, voice) + const audioData = await synthesize(text, voice, engine, model) const ctx = getAudioContext() if (ctx.state === 'suspended') await ctx.resume() @@ -42,7 +42,7 @@ export function useTtsPlayback(voice) { console.error('TTS playback error:', err) setIsPlaying(false) } - }, [voice]) + }, [voice, engine, model]) const stop = useCallback(() => { if (sourceRef.current) { diff --git a/homeai-dashboard/src/lib/SchemaValidator.js b/homeai-dashboard/src/lib/SchemaValidator.js index c28b1fb..7bfdd4c 100644 --- a/homeai-dashboard/src/lib/SchemaValidator.js +++ b/homeai-dashboard/src/lib/SchemaValidator.js @@ -4,7 +4,43 @@ import schema from '../../schema/character.schema.json' const ajv = new Ajv({ allErrors: true, strict: false }) const validate = ajv.compile(schema) +/** + * Migrate a v1 character config to v2 in-place. + * Removes live2d/vtube fields, converts gaze_preset to gaze_presets array, + * and initialises new persona fields. + */ +export function migrateV1toV2(config) { + config.schema_version = 2 + + // Remove deprecated fields + delete config.live2d_expressions + delete config.vtube_ws_triggers + + // Convert single gaze_preset string → gaze_presets array + if ('gaze_preset' in config) { + const old = config.gaze_preset + config.gaze_presets = old ? [{ preset: old, trigger: 'self-portrait' }] : [] + delete config.gaze_preset + } + if (!config.gaze_presets) { + config.gaze_presets = [] + } + + // Initialise new fields if absent + if (config.background === undefined) config.background = '' + if (config.dialogue_style === undefined) config.dialogue_style = '' + if (config.appearance === undefined) config.appearance = '' + if (config.skills === undefined) config.skills = [] + + return config +} + export function validateCharacter(config) { + // Auto-migrate v1 → v2 + if (config.schema_version === 1 || config.schema_version === undefined) { + migrateV1toV2(config) + } + const valid = validate(config) if (!valid) { throw new Error(ajv.errorsText(validate.errors)) diff --git a/homeai-dashboard/src/lib/api.js b/homeai-dashboard/src/lib/api.js index 63daa68..7718d05 100644 --- a/homeai-dashboard/src/lib/api.js +++ b/homeai-dashboard/src/lib/api.js @@ -1,8 +1,30 @@ -export async function sendMessage(text) { - const res = await fetch('/api/agent/message', { +const MAX_RETRIES = 3 +const RETRY_DELAY_MS = 2000 + +async function fetchWithRetry(url, options, retries = MAX_RETRIES) { + for (let attempt = 1; attempt <= retries; attempt++) { + try { + const res = await fetch(url, options) + if (res.status === 502 && attempt < retries) { + // Bridge unreachable — wait and retry + await new Promise(r => setTimeout(r, RETRY_DELAY_MS * attempt)) + continue + } + return res + } catch (err) { + if (attempt >= retries) throw err + await new Promise(r => setTimeout(r, RETRY_DELAY_MS * attempt)) + } + } +} + +export async function sendMessage(text, characterId = null) { + const payload = { message: text, agent: 'main' } + if (characterId) payload.character_id = characterId + const res = await fetchWithRetry('/api/agent/message', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ message: text, agent: 'main' }), + body: JSON.stringify(payload), }) if (!res.ok) { const err = await res.json().catch(() => ({ error: 'Request failed' })) @@ -12,11 +34,13 @@ export async function sendMessage(text) { return data.response } -export async function synthesize(text, voice) { +export async function synthesize(text, voice, engine = 'kokoro', model = null) { + const payload = { text, voice, engine } + if (model) payload.model = model const res = await fetch('/api/tts', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ text, voice }), + body: JSON.stringify(payload), }) if (!res.ok) throw new Error('TTS failed') return await res.arrayBuffer() diff --git a/homeai-dashboard/src/lib/constants.js b/homeai-dashboard/src/lib/constants.js index 4851015..f3bacf8 100644 --- a/homeai-dashboard/src/lib/constants.js +++ b/homeai-dashboard/src/lib/constants.js @@ -30,7 +30,15 @@ export const VOICES = [ { id: 'bm_lewis', label: 'Lewis (M, UK)' }, ] +export const TTS_ENGINES = [ + { id: 'kokoro', label: 'Kokoro (local)' }, + { id: 'chatterbox', label: 'Chatterbox (voice clone)' }, + { id: 'qwen3', label: 'Qwen3 TTS' }, + { id: 'elevenlabs', label: 'ElevenLabs (cloud)' }, +] + export const DEFAULT_SETTINGS = { + ttsEngine: 'kokoro', voice: DEFAULT_VOICE, autoTts: true, sttMode: 'bridge', diff --git a/homeai-dashboard/src/lib/conversationApi.js b/homeai-dashboard/src/lib/conversationApi.js new file mode 100644 index 0000000..c638b58 --- /dev/null +++ b/homeai-dashboard/src/lib/conversationApi.js @@ -0,0 +1,25 @@ +export async function listConversations() { + const res = await fetch('/api/conversations') + if (!res.ok) throw new Error(`Failed to list conversations: ${res.status}`) + return res.json() +} + +export async function getConversation(id) { + const res = await fetch(`/api/conversations/${encodeURIComponent(id)}`) + if (!res.ok) return null + return res.json() +} + +export async function saveConversation(conversation) { + const res = await fetch('/api/conversations', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(conversation), + }) + if (!res.ok) throw new Error(`Failed to save conversation: ${res.status}`) +} + +export async function deleteConversation(id) { + const res = await fetch(`/api/conversations/${encodeURIComponent(id)}`, { method: 'DELETE' }) + if (!res.ok) throw new Error(`Failed to delete conversation: ${res.status}`) +} diff --git a/homeai-dashboard/src/lib/memoryApi.js b/homeai-dashboard/src/lib/memoryApi.js new file mode 100644 index 0000000..e1378d6 --- /dev/null +++ b/homeai-dashboard/src/lib/memoryApi.js @@ -0,0 +1,45 @@ +export async function getPersonalMemories(characterId) { + const res = await fetch(`/api/memories/personal/${encodeURIComponent(characterId)}`) + if (!res.ok) return { characterId, memories: [] } + return res.json() +} + +export async function savePersonalMemory(characterId, memory) { + const res = await fetch(`/api/memories/personal/${encodeURIComponent(characterId)}`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(memory), + }) + if (!res.ok) throw new Error(`Failed to save memory: ${res.status}`) + return res.json() +} + +export async function deletePersonalMemory(characterId, memoryId) { + const res = await fetch(`/api/memories/personal/${encodeURIComponent(characterId)}/${encodeURIComponent(memoryId)}`, { + method: 'DELETE', + }) + if (!res.ok) throw new Error(`Failed to delete memory: ${res.status}`) +} + +export async function getGeneralMemories() { + const res = await fetch('/api/memories/general') + if (!res.ok) return { memories: [] } + return res.json() +} + +export async function saveGeneralMemory(memory) { + const res = await fetch('/api/memories/general', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(memory), + }) + if (!res.ok) throw new Error(`Failed to save memory: ${res.status}`) + return res.json() +} + +export async function deleteGeneralMemory(memoryId) { + const res = await fetch(`/api/memories/general/${encodeURIComponent(memoryId)}`, { + method: 'DELETE', + }) + if (!res.ok) throw new Error(`Failed to delete memory: ${res.status}`) +} diff --git a/homeai-dashboard/src/pages/Characters.jsx b/homeai-dashboard/src/pages/Characters.jsx index 3425b49..5bab01a 100644 --- a/homeai-dashboard/src/pages/Characters.jsx +++ b/homeai-dashboard/src/pages/Characters.jsx @@ -1,23 +1,9 @@ -import { useState, useEffect } from 'react'; +import { useState, useEffect, useCallback } from 'react'; import { useNavigate } from 'react-router-dom'; import { validateCharacter } from '../lib/SchemaValidator'; -const STORAGE_KEY = 'homeai_characters'; const ACTIVE_KEY = 'homeai_active_character'; -function loadProfiles() { - try { - const raw = localStorage.getItem(STORAGE_KEY); - return raw ? JSON.parse(raw) : []; - } catch { - return []; - } -} - -function saveProfiles(profiles) { - localStorage.setItem(STORAGE_KEY, JSON.stringify(profiles)); -} - function getActiveId() { return localStorage.getItem(ACTIVE_KEY) || null; } @@ -27,15 +13,52 @@ function setActiveId(id) { } export default function Characters() { - const [profiles, setProfiles] = useState(loadProfiles); + const [profiles, setProfiles] = useState([]); const [activeId, setActive] = useState(getActiveId); const [error, setError] = useState(null); const [dragOver, setDragOver] = useState(false); + const [loading, setLoading] = useState(true); + const [satMap, setSatMap] = useState({ default: '', satellites: {} }); + const [newSatId, setNewSatId] = useState(''); + const [newSatChar, setNewSatChar] = useState(''); const navigate = useNavigate(); + // Load profiles and satellite map on mount useEffect(() => { - saveProfiles(profiles); - }, [profiles]); + Promise.all([ + fetch('/api/characters').then(r => r.json()), + fetch('/api/satellite-map').then(r => r.json()), + ]) + .then(([chars, map]) => { + setProfiles(chars); + setSatMap(map); + setLoading(false); + }) + .catch(err => { setError(`Failed to load: ${err.message}`); setLoading(false); }); + }, []); + + const saveSatMap = useCallback(async (updated) => { + setSatMap(updated); + await fetch('/api/satellite-map', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(updated), + }); + }, []); + + const saveProfile = useCallback(async (profile) => { + const res = await fetch('/api/characters', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(profile), + }); + if (!res.ok) throw new Error('Failed to save profile'); + }, []); + + const deleteProfile = useCallback(async (id) => { + const safeId = id.replace(/[^a-zA-Z0-9_\-\.]/g, '_'); + await fetch(`/api/characters/${safeId}`, { method: 'DELETE' }); + }, []); const handleImport = (e) => { const files = Array.from(e.target?.files || []); @@ -47,12 +70,14 @@ export default function Characters() { files.forEach(file => { if (!file.name.endsWith('.json')) return; const reader = new FileReader(); - reader.onload = (ev) => { + reader.onload = async (ev) => { try { const data = JSON.parse(ev.target.result); validateCharacter(data); const id = data.name + '_' + Date.now(); - setProfiles(prev => [...prev, { id, data, image: null, addedAt: new Date().toISOString() }]); + const profile = { id, data, image: null, addedAt: new Date().toISOString() }; + await saveProfile(profile); + setProfiles(prev => [...prev, profile]); setError(null); } catch (err) { setError(`Import failed for ${file.name}: ${err.message}`); @@ -73,15 +98,17 @@ export default function Characters() { const file = e.target.files[0]; if (!file) return; const reader = new FileReader(); - reader.onload = (ev) => { - setProfiles(prev => - prev.map(p => p.id === profileId ? { ...p, image: ev.target.result } : p) - ); + reader.onload = async (ev) => { + const updated = profiles.map(p => p.id === profileId ? { ...p, image: ev.target.result } : p); + const profile = updated.find(p => p.id === profileId); + if (profile) await saveProfile(profile); + setProfiles(updated); }; reader.readAsDataURL(file); }; - const removeProfile = (id) => { + const removeProfile = async (id) => { + await deleteProfile(id); setProfiles(prev => prev.filter(p => p.id !== id)); if (activeId === id) { setActive(null); @@ -92,6 +119,28 @@ export default function Characters() { const activateProfile = (id) => { setActive(id); setActiveId(id); + + // Sync active character's TTS settings to chat settings + const profile = profiles.find(p => p.id === id); + if (profile?.data?.tts) { + const tts = profile.data.tts; + const engine = tts.engine || 'kokoro'; + let voice; + if (engine === 'kokoro') voice = tts.kokoro_voice || 'af_heart'; + else if (engine === 'elevenlabs') voice = tts.elevenlabs_voice_id || ''; + else if (engine === 'chatterbox') voice = tts.voice_ref_path || ''; + else voice = ''; + + try { + const raw = localStorage.getItem('homeai_dashboard_settings'); + const settings = raw ? JSON.parse(raw) : {}; + localStorage.setItem('homeai_dashboard_settings', JSON.stringify({ + ...settings, + ttsEngine: engine, + voice: voice, + })); + } catch { /* ignore */ } + } }; const exportProfile = (profile) => { @@ -125,13 +174,28 @@ export default function Characters() { )}

- +
+ + +
{error && ( @@ -158,7 +222,11 @@ export default function Characters() {
{/* Profile grid */} - {profiles.length === 0 ? ( + {loading ? ( +
+

Loading characters...

+
+ ) : profiles.length === 0 ? (
@@ -230,11 +298,32 @@ export default function Characters() { {char.model_overrides?.primary || 'default'} - {char.tts?.kokoro_voice && ( + {char.tts?.engine === 'kokoro' && char.tts?.kokoro_voice && ( {char.tts.kokoro_voice} )} + {char.tts?.engine === 'elevenlabs' && char.tts?.elevenlabs_voice_id && ( + + {char.tts.elevenlabs_voice_name || char.tts.elevenlabs_voice_id.slice(0, 8) + '…'} + + )} + {char.tts?.engine === 'chatterbox' && char.tts?.voice_ref_path && ( + + {char.tts.voice_ref_path.split('/').pop()} + + )} + {(() => { + const defaultPreset = char.gaze_presets?.find(gp => gp.trigger === 'self-portrait')?.preset + || char.gaze_presets?.[0]?.preset + || char.gaze_preset + || null; + return defaultPreset ? ( + + {defaultPreset} + + ) : null; + })()}
@@ -287,6 +376,96 @@ export default function Characters() { })}
)} + + {/* Satellite Assignment */} + {!loading && profiles.length > 0 && ( +
+
+

Satellite Routing

+

Assign characters to voice satellites. Unmapped satellites use the default.

+
+ + {/* Default character */} +
+ + +
+ + {/* Per-satellite assignments */} + {Object.entries(satMap.satellites || {}).map(([satId, charId]) => ( +
+ {satId} + + +
+ ))} + + {/* Add new satellite */} +
+ setNewSatId(e.target.value)} + placeholder="Satellite ID (from bridge log)" + className="w-32 shrink-0 bg-gray-800 text-gray-200 text-sm rounded-lg px-3 py-2 border border-gray-700 focus:outline-none focus:border-indigo-500 font-mono" + /> + + +
+
+ )}
); } diff --git a/homeai-dashboard/src/pages/Chat.jsx b/homeai-dashboard/src/pages/Chat.jsx index 08817a5..c80ec1b 100644 --- a/homeai-dashboard/src/pages/Chat.jsx +++ b/homeai-dashboard/src/pages/Chat.jsx @@ -1,115 +1,146 @@ -import { useState, useEffect, useCallback } from 'react' +import { useState, useCallback } from 'react' import ChatPanel from '../components/ChatPanel' import InputBar from '../components/InputBar' import StatusIndicator from '../components/StatusIndicator' import SettingsDrawer from '../components/SettingsDrawer' +import ConversationList from '../components/ConversationList' import { useSettings } from '../hooks/useSettings' import { useBridgeHealth } from '../hooks/useBridgeHealth' import { useChat } from '../hooks/useChat' import { useTtsPlayback } from '../hooks/useTtsPlayback' import { useVoiceInput } from '../hooks/useVoiceInput' +import { useActiveCharacter } from '../hooks/useActiveCharacter' +import { useConversations } from '../hooks/useConversations' export default function Chat() { const { settings, updateSetting } = useSettings() const isOnline = useBridgeHealth() - const { messages, isLoading, send, clearHistory } = useChat() - const { isPlaying, speak, stop } = useTtsPlayback(settings.voice) + const character = useActiveCharacter() + const { + conversations, activeId, isLoading: isLoadingList, + select, create, remove, updateMeta, + } = useConversations() + + const convMeta = { + characterId: character?.id || '', + characterName: character?.name || '', + } + + const { messages, isLoading, isLoadingConv, send, clearHistory } = useChat(activeId, convMeta, updateMeta) + + // Use character's TTS config if available, fall back to global settings + const ttsEngine = character?.tts?.engine || settings.ttsEngine + const ttsVoice = ttsEngine === 'elevenlabs' + ? (character?.tts?.elevenlabs_voice_id || settings.voice) + : (character?.tts?.kokoro_voice || settings.voice) + const ttsModel = ttsEngine === 'elevenlabs' ? (character?.tts?.elevenlabs_model || null) : null + const { isPlaying, speak, stop } = useTtsPlayback(ttsVoice, ttsEngine, ttsModel) const { isRecording, isTranscribing, startRecording, stopRecording } = useVoiceInput(settings.sttMode) const [settingsOpen, setSettingsOpen] = useState(false) - // Send a message and optionally speak the response const handleSend = useCallback(async (text) => { - const response = await send(text) + // Auto-create a conversation if none is active + let newId = null + if (!activeId) { + newId = await create(convMeta.characterId, convMeta.characterName) + } + const response = await send(text, newId) if (response && settings.autoTts) { speak(response) } - }, [send, settings.autoTts, speak]) + }, [activeId, create, convMeta, send, settings.autoTts, speak]) - // Toggle voice recording const handleVoiceToggle = useCallback(async () => { if (isRecording) { const text = await stopRecording() - if (text) { - handleSend(text) - } + if (text) handleSend(text) } else { startRecording() } }, [isRecording, stopRecording, startRecording, handleSend]) - // Space bar push-to-talk when input not focused - useEffect(() => { - const handleKeyDown = (e) => { - if (e.code === 'Space' && e.target.tagName !== 'TEXTAREA' && e.target.tagName !== 'INPUT') { - e.preventDefault() - handleVoiceToggle() - } - } - window.addEventListener('keydown', handleKeyDown) - return () => window.removeEventListener('keydown', handleKeyDown) - }, [handleVoiceToggle]) + const handleNewChat = useCallback(() => { + create(convMeta.characterId, convMeta.characterName) + }, [create, convMeta]) return ( -
- {/* Status bar */} -
-
- - - {isOnline === null ? 'Connecting...' : isOnline ? 'Connected' : 'Offline'} - -
-
- {messages.length > 0 && ( - - )} - {isPlaying && ( - - )} - -
-
+
+ {/* Conversation sidebar */} + {/* Chat area */} - +
+ {/* Status bar */} +
+
+ + + {isOnline === null ? 'Connecting...' : isOnline ? 'Connected' : 'Offline'} + +
+
+ {messages.length > 0 && ( + + )} + {isPlaying && ( + + )} + +
+
- {/* Input */} - + {/* Messages */} + - {/* Settings drawer */} - setSettingsOpen(false)} - settings={settings} - onUpdate={updateSetting} - /> + {/* Input */} + + + {/* Settings drawer */} + setSettingsOpen(false)} + settings={settings} + onUpdate={updateSetting} + /> +
) } diff --git a/homeai-dashboard/src/pages/Editor.jsx b/homeai-dashboard/src/pages/Editor.jsx index f34c81c..8af0c91 100644 --- a/homeai-dashboard/src/pages/Editor.jsx +++ b/homeai-dashboard/src/pages/Editor.jsx @@ -1,14 +1,18 @@ import React, { useState, useEffect, useRef } from 'react'; -import { validateCharacter } from '../lib/SchemaValidator'; +import { validateCharacter, migrateV1toV2 } from '../lib/SchemaValidator'; const DEFAULT_CHARACTER = { - schema_version: 1, - name: "aria", - display_name: "Aria", - description: "Default HomeAI assistant persona", - system_prompt: "You are Aria, a warm, curious, and helpful AI assistant living in the home. You speak naturally and conversationally — never robotic. You are knowledgeable but never condescending. You remember the people you live with and build on those memories over time. Keep responses concise when controlling smart home devices; be more expressive in casual conversation. Never break character.", + schema_version: 2, + name: "", + display_name: "", + description: "", + background: "", + dialogue_style: "", + appearance: "", + skills: [], + system_prompt: "", model_overrides: { - primary: "llama3.3:70b", + primary: "qwen3.5:35b-a3b", fast: "qwen2.5:7b" }, tts: { @@ -16,24 +20,8 @@ const DEFAULT_CHARACTER = { kokoro_voice: "af_heart", speed: 1.0 }, - live2d_expressions: { - idle: "expr_idle", - listening: "expr_listening", - thinking: "expr_thinking", - speaking: "expr_speaking", - happy: "expr_happy", - sad: "expr_sad", - surprised: "expr_surprised", - error: "expr_error" - }, - vtube_ws_triggers: { - thinking: { type: "hotkey", id: "expr_thinking" }, - speaking: { type: "hotkey", id: "expr_speaking" }, - idle: { type: "hotkey", id: "expr_idle" } - }, - custom_rules: [ - { trigger: "good morning", response: "Good morning! How did you sleep?", condition: "time_of_day == morning" } - ], + gaze_presets: [], + custom_rules: [], notes: "" }; @@ -43,7 +31,12 @@ export default function Editor() { if (editData) { sessionStorage.removeItem('edit_character'); try { - return JSON.parse(editData); + const parsed = JSON.parse(editData); + // Auto-migrate v1 data + if (parsed.schema_version === 1 || !parsed.schema_version) { + migrateV1toV2(parsed); + } + return parsed; } catch { return DEFAULT_CHARACTER; } @@ -52,6 +45,7 @@ export default function Editor() { }); const [error, setError] = useState(null); const [saved, setSaved] = useState(false); + const isEditing = !!sessionStorage.getItem('edit_character_profile_id'); // TTS preview state const [ttsState, setTtsState] = useState('idle'); @@ -65,6 +59,19 @@ export default function Editor() { const [elevenLabsModels, setElevenLabsModels] = useState([]); const [isLoadingElevenLabs, setIsLoadingElevenLabs] = useState(false); + // GAZE presets state (from API) + const [availableGazePresets, setAvailableGazePresets] = useState([]); + const [isLoadingGaze, setIsLoadingGaze] = useState(false); + + // Character lookup state + const [lookupName, setLookupName] = useState(''); + const [lookupFranchise, setLookupFranchise] = useState(''); + const [isLookingUp, setIsLookingUp] = useState(false); + const [lookupDone, setLookupDone] = useState(false); + + // Skills input state + const [newSkill, setNewSkill] = useState(''); + const fetchElevenLabsData = async (key) => { if (!key) return; setIsLoadingElevenLabs(true); @@ -95,6 +102,16 @@ export default function Editor() { } }, [character.tts.engine]); + // Fetch GAZE presets on mount + useEffect(() => { + setIsLoadingGaze(true); + fetch('/api/gaze/presets') + .then(r => r.ok ? r.json() : { presets: [] }) + .then(data => setAvailableGazePresets(data.presets || [])) + .catch(() => {}) + .finally(() => setIsLoadingGaze(false)); + }, []); + useEffect(() => { return () => { if (audioRef.current) { audioRef.current.pause(); audioRef.current = null; } @@ -119,27 +136,35 @@ export default function Editor() { } }; - const handleSaveToProfiles = () => { + const handleSaveToProfiles = async () => { try { validateCharacter(character); setError(null); const profileId = sessionStorage.getItem('edit_character_profile_id'); - const storageKey = 'homeai_characters'; - const raw = localStorage.getItem(storageKey); - let profiles = raw ? JSON.parse(raw) : []; + let profile; if (profileId) { - profiles = profiles.map(p => - p.id === profileId ? { ...p, data: character } : p - ); - sessionStorage.removeItem('edit_character_profile_id'); + const res = await fetch('/api/characters'); + const profiles = await res.json(); + const existing = profiles.find(p => p.id === profileId); + profile = existing + ? { ...existing, data: character } + : { id: profileId, data: character, image: null, addedAt: new Date().toISOString() }; + // Keep the profile ID in sessionStorage so subsequent saves update the same file } else { const id = character.name + '_' + Date.now(); - profiles.push({ id, data: character, image: null, addedAt: new Date().toISOString() }); + profile = { id, data: character, image: null, addedAt: new Date().toISOString() }; + // Store the new ID so subsequent saves update the same file + sessionStorage.setItem('edit_character_profile_id', profile.id); } - localStorage.setItem(storageKey, JSON.stringify(profiles)); + await fetch('/api/characters', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(profile), + }); + setSaved(true); setTimeout(() => setSaved(false), 2000); } catch (err) { @@ -164,6 +189,59 @@ export default function Editor() { reader.readAsText(file); }; + // Character lookup from MCP + const handleCharacterLookup = async () => { + if (!lookupName || !lookupFranchise) return; + setIsLookingUp(true); + setError(null); + try { + const res = await fetch('/api/character-lookup', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name: lookupName, franchise: lookupFranchise }), + }); + if (!res.ok) { + const err = await res.json().catch(() => ({ error: 'Lookup failed' })); + throw new Error(err.error || `Lookup returned ${res.status}`); + } + const data = await res.json(); + + // Build dialogue_style from personality + notable quotes + let dialogueStyle = data.personality || ''; + if (data.notable_quotes?.length) { + dialogueStyle += '\n\nExample dialogue:\n' + data.notable_quotes.map(q => `"${q}"`).join('\n'); + } + + // Filter abilities to clean text-only entries (skip image captions) + const skills = (data.abilities || []) + .filter(a => a.length > 20 && !a.includes('.jpg') && !a.includes('.png')) + .slice(0, 10); + + // Auto-generate system prompt + const promptName = character.display_name || lookupName; + const personality = data.personality ? data.personality.split('.').slice(0, 3).join('.') + '.' : ''; + const systemPrompt = `You are ${promptName} from ${lookupFranchise}. ${personality} Stay in character at all times. Respond naturally and conversationally.`; + + setCharacter(prev => ({ + ...prev, + name: prev.name || lookupName.toLowerCase().replace(/\s+/g, '_'), + display_name: prev.display_name || lookupName, + description: data.description ? data.description.split('.').slice(0, 2).join('.') + '.' : prev.description, + background: data.background || prev.background, + appearance: data.appearance || prev.appearance, + dialogue_style: dialogueStyle || prev.dialogue_style, + skills: skills.length > 0 ? skills : prev.skills, + system_prompt: prev.system_prompt || systemPrompt, + })); + + setLookupDone(true); + } catch (err) { + setError(`Character lookup failed: ${err.message}`); + } finally { + setIsLookingUp(false); + } + }; + const handleChange = (field, value) => { setCharacter(prev => ({ ...prev, [field]: value })); }; @@ -175,6 +253,50 @@ export default function Editor() { })); }; + // Skills helpers + const addSkill = () => { + const trimmed = newSkill.trim(); + if (!trimmed) return; + setCharacter(prev => ({ + ...prev, + skills: [...(prev.skills || []), trimmed] + })); + setNewSkill(''); + }; + + const removeSkill = (index) => { + setCharacter(prev => { + const updated = [...(prev.skills || [])]; + updated.splice(index, 1); + return { ...prev, skills: updated }; + }); + }; + + // GAZE preset helpers + const addGazePreset = () => { + setCharacter(prev => ({ + ...prev, + gaze_presets: [...(prev.gaze_presets || []), { preset: '', trigger: 'self-portrait' }] + })); + }; + + const removeGazePreset = (index) => { + setCharacter(prev => { + const updated = [...(prev.gaze_presets || [])]; + updated.splice(index, 1); + return { ...prev, gaze_presets: updated }; + }); + }; + + const handleGazePresetChange = (index, field, value) => { + setCharacter(prev => { + const updated = [...(prev.gaze_presets || [])]; + updated[index] = { ...updated[index], [field]: value }; + return { ...prev, gaze_presets: updated }; + }); + }; + + // Custom rules helpers const handleRuleChange = (index, field, value) => { setCharacter(prev => { const newRules = [...(prev.custom_rules || [])]; @@ -198,37 +320,40 @@ export default function Editor() { }); }; + // TTS preview const stopPreview = () => { - if (audioRef.current) { - audioRef.current.pause(); - audioRef.current = null; - } - if (objectUrlRef.current) { - URL.revokeObjectURL(objectUrlRef.current); - objectUrlRef.current = null; - } + if (audioRef.current) { audioRef.current.pause(); audioRef.current = null; } + if (objectUrlRef.current) { URL.revokeObjectURL(objectUrlRef.current); objectUrlRef.current = null; } window.speechSynthesis.cancel(); setTtsState('idle'); }; const previewTTS = async () => { stopPreview(); - const text = previewText || `Hi, I am ${character.display_name}. This is a preview of my voice.`; + const text = previewText || `Hi, I am ${character.display_name || character.name}. This is a preview of my voice.`; + const engine = character.tts.engine; - if (character.tts.engine === 'kokoro') { + let bridgeBody = null; + if (engine === 'kokoro') { + bridgeBody = { text, voice: character.tts.kokoro_voice, engine: 'kokoro' }; + } else if (engine === 'elevenlabs' && character.tts.elevenlabs_voice_id) { + bridgeBody = { text, voice: character.tts.elevenlabs_voice_id, engine: 'elevenlabs', model: character.tts.elevenlabs_model }; + } + + if (bridgeBody) { setTtsState('loading'); let blob; try { const response = await fetch('/api/tts', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ text, voice: character.tts.kokoro_voice }) + body: JSON.stringify(bridgeBody) }); if (!response.ok) throw new Error('TTS bridge returned ' + response.status); blob = await response.blob(); } catch (err) { setTtsState('idle'); - setError(`Kokoro preview failed: ${err.message}. Falling back to browser TTS.`); + setError(`${engine} preview failed: ${err.message}. Falling back to browser TTS.`); runBrowserTTS(text); return; } @@ -269,7 +394,9 @@ export default function Editor() {

Character Editor

- Editing: {character.display_name || character.name} + {character.display_name || character.name + ? `Editing: ${character.display_name || character.name}` + : 'New character'}

@@ -311,6 +438,64 @@ export default function Editor() { {error && (
{error} + +
+ )} + + {/* Character Lookup — auto-fill from fictional character wiki */} + {!isEditing && ( +
+
+ + + +

Auto-fill from Character

+
+

Fetch character data from Fandom/Wikipedia to auto-populate fields. You can edit everything after.

+
+
+ + setLookupName(e.target.value)} + placeholder="e.g. Tifa Lockhart" + /> +
+
+ + setLookupFranchise(e.target.value)} + placeholder="e.g. Final Fantasy VII" + /> +
+ +
+ {lookupDone && ( +

Fields populated from wiki data. Review and edit below.

+ )}
)} @@ -324,11 +509,11 @@ export default function Editor() {
- handleChange('display_name', e.target.value)} /> + handleChange('display_name', e.target.value)} />
- handleChange('description', e.target.value)} /> + handleChange('description', e.target.value)} />
@@ -359,7 +544,14 @@ export default function Editor() {
{elevenLabsVoices.length > 0 ? ( - { + const voiceId = e.target.value; + const voice = elevenLabsVoices.find(v => v.voice_id === voiceId); + setCharacter(prev => ({ + ...prev, + tts: { ...prev.tts, elevenlabs_voice_id: voiceId, elevenlabs_voice_name: voice?.name || '' } + })); + }}> {elevenLabsVoices.map(v => ( @@ -439,7 +631,7 @@ export default function Editor() { className={inputClass} value={previewText} onChange={(e) => setPreviewText(e.target.value)} - placeholder={`Hi, I am ${character.display_name}. This is a preview of my voice.`} + placeholder={`Hi, I am ${character.display_name || character.name || 'your character'}. This is a preview of my voice.`} />
@@ -474,7 +666,9 @@ export default function Editor() {

{character.tts.engine === 'kokoro' ? 'Previews via local Kokoro TTS bridge (port 8081).' - : 'Uses browser TTS for preview. Local TTS available with Kokoro engine.'} + : character.tts.engine === 'elevenlabs' + ? 'Previews via ElevenLabs through bridge.' + : 'Uses browser TTS for preview. Local TTS available with Kokoro engine.'}

@@ -483,25 +677,154 @@ export default function Editor() {

System Prompt

- {character.system_prompt.length} chars + {(character.system_prompt || '').length} chars