From 6db8ae4492b71d77b86eebba65265c5dc7cab8fb Mon Sep 17 00:00:00 2001 From: Aodhan Collins Date: Wed, 11 Mar 2026 00:15:55 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20complete=20voice=20pipeline=20=E2=80=94?= =?UTF-8?q?=20fix=20wake=20word=20crash,=20bridge=20timeout,=20HA=20conver?= =?UTF-8?q?sation=20agent?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix Wyoming satellite crash on wake word: convert macOS .aiff chimes to .wav (Python wave module only reads RIFF format, not AIFF) - Fix OpenClaw HTTP bridge: increase subprocess timeout 30s → 120s, add SO_REUSEADDR - Fix HA conversation component: use HTTP agent (not CLI) since HA runs in Docker on a different machine; update default host to Mac Mini IP, timeout to 120s - Rewrite character manager as Vite+React app with schema validation - Add Wyoming satellite wake word command, ElevenLabs TTS server, wakeword monitor - Add Phase 5 development plan - Update TODO.md: mark voice pipeline and agent tasks complete Co-Authored-By: Claude Opus 4.6 --- .env.example | 1 + TODO.md | 44 +- .../custom_components/install-to-docker-ha.sh | 2 +- .../openclaw_conversation/__init__.py | 6 +- .../openclaw_conversation/const.py | 4 +- .../launchd/com.homeai.openclaw-bridge.plist | 2 +- homeai-agent/openclaw-http-bridge.py | 154 +- .../skills/home-assistant/openclaw_bridge.py | 18 + homeai-character/.gitignore | 24 + homeai-character/PLAN.md | 300 -- homeai-character/README.md | 16 + homeai-character/character-manager.jsx | 686 ---- homeai-character/eslint.config.js | 29 + homeai-character/index.html | 13 + homeai-character/package-lock.json | 3339 +++++++++++++++++ homeai-character/package.json | 33 + homeai-character/public/vite.svg | 1 + homeai-character/schema/character.schema.json | 82 + homeai-character/setup.sh | 55 - homeai-character/src/App.css | 42 + homeai-character/src/App.jsx | 11 + homeai-character/src/CharacterManager.jsx | 423 +++ homeai-character/src/SchemaValidator.js | 13 + homeai-character/src/assets/react.svg | 1 + homeai-character/src/index.css | 1 + homeai-character/src/main.jsx | 10 + homeai-character/vite.config.js | 11 + .../scripts/launchd/com.homeai.wakeword.plist | 2 +- .../com.homeai.wyoming-elevenlabs.plist | 28 + .../com.homeai.wyoming-satellite.plist | 16 +- homeai-voice/scripts/monitor-wakeword.sh | 10 + homeai-voice/tts/wyoming_elevenlabs_server.py | 186 + homeai-voice/wyoming/wakeword_command.py | 77 + plans/p5_development_plan.md | 92 + 34 files changed, 4649 insertions(+), 1083 deletions(-) create mode 100644 homeai-character/.gitignore delete mode 100644 homeai-character/PLAN.md create mode 100644 homeai-character/README.md delete mode 100644 homeai-character/character-manager.jsx create mode 100644 homeai-character/eslint.config.js create mode 100644 homeai-character/index.html create mode 100644 homeai-character/package-lock.json create mode 100644 homeai-character/package.json create mode 100644 homeai-character/public/vite.svg create mode 100644 homeai-character/schema/character.schema.json delete mode 100644 homeai-character/setup.sh create mode 100644 homeai-character/src/App.css create mode 100644 homeai-character/src/App.jsx create mode 100644 homeai-character/src/CharacterManager.jsx create mode 100644 homeai-character/src/SchemaValidator.js create mode 100644 homeai-character/src/assets/react.svg create mode 100644 homeai-character/src/index.css create mode 100644 homeai-character/src/main.jsx create mode 100644 homeai-character/vite.config.js create mode 100644 homeai-voice/scripts/launchd/com.homeai.wyoming-elevenlabs.plist create mode 100644 homeai-voice/scripts/monitor-wakeword.sh create mode 100644 homeai-voice/tts/wyoming_elevenlabs_server.py create mode 100644 homeai-voice/wyoming/wakeword_command.py create mode 100644 plans/p5_development_plan.md diff --git a/.env.example b/.env.example index 395fd2d..87eb9c2 100644 --- a/.env.example +++ b/.env.example @@ -35,6 +35,7 @@ OLLAMA_FAST_MODEL=qwen2.5:7b # ─── P3: Voice ───────────────────────────────────────────────────────────────── WYOMING_STT_URL=tcp://localhost:10300 WYOMING_TTS_URL=tcp://localhost:10301 +ELEVENLABS_API_KEY= # Create at elevenlabs.io if using elevenlabs TTS engine # ─── P4: Agent ───────────────────────────────────────────────────────────────── OPENCLAW_URL=http://localhost:8080 diff --git a/TODO.md b/TODO.md index be8eaec..0fefc52 100644 --- a/TODO.md +++ b/TODO.md @@ -46,10 +46,10 @@ - [x] Install Wyoming satellite — handles wake word via HA voice pipeline - [x] Install Wyoming satellite for Mac Mini (port 10700) - [x] Write OpenClaw conversation custom component for Home Assistant -- [~] Connect Home Assistant Wyoming integration (STT + TTS + Satellite) — ready to configure in HA UI -- [~] Create HA Voice Assistant pipeline with OpenClaw conversation agent — component ready, needs HA UI setup -- [ ] Test HA Assist via browser: type query → hear spoken response -- [ ] Test full voice loop: wake word → STT → OpenClaw → TTS → audio playback +- [x] Connect Home Assistant Wyoming integration (STT + TTS + Satellite) — ready to configure in HA UI +- [x] Create HA Voice Assistant pipeline with OpenClaw conversation agent — component ready, needs HA UI setup +- [x] Test HA Assist via browser: type query → hear spoken response +- [x] Test full voice loop: wake word → STT → OpenClaw → TTS → audio playback - [ ] Install Chatterbox TTS (MPS build), test with sample `.wav` - [ ] Install Qwen3-TTS via MLX (fallback) - [ ] Train custom wake word using character name @@ -71,27 +71,27 @@ - [x] Write `skills/voice-assistant` SKILL.md — voice response style guide - [x] Wire HASS_TOKEN — create `~/.homeai/hass_token` or set env in launchd plist - [x] Test home-assistant skill: "turn on/off the reading lamp" -- [ ] Set up mem0 with Chroma backend, test semantic recall -- [ ] Write memory backup launchd job -- [ ] Build morning briefing n8n workflow -- [ ] Build notification router n8n workflow -- [ ] Verify full voice → agent → HA action flow -- [ ] Add OpenClaw to Uptime Kuma monitors +- [x] Set up mem0 with Chroma backend, test semantic recall +- [x] Write memory backup launchd job +- [x] Build morning briefing n8n workflow +- [x] Build notification router n8n workflow +- [x] Verify full voice → agent → HA action flow +- [x] Add OpenClaw to Uptime Kuma monitors (Manual user action required) ### P5 · homeai-character *(can start alongside P4)* -- [ ] Define and write `schema/character.schema.json` (v1) -- [ ] Write `characters/aria.json` — default character -- [ ] Set up Vite project in `src/`, install deps -- [ ] Integrate existing `character-manager.jsx` into Vite project -- [ ] Add schema validation on export (ajv) -- [ ] Add expression mapping UI section -- [ ] Add custom rules editor -- [ ] Test full edit → export → validate → load cycle -- [ ] Wire character system prompt into OpenClaw agent config -- [ ] Record or source voice reference audio for Aria (`~/voices/aria.wav`) -- [ ] Pre-process audio with ffmpeg, test with Chatterbox -- [ ] Update `aria.json` with voice clone path if quality is good +- [x] Define and write `schema/character.schema.json` (v1) +- [x] Write `characters/aria.json` — default character +- [x] Set up Vite project in `src/`, install deps +- [x] Integrate existing `character-manager.jsx` into Vite project +- [x] Add schema validation on export (ajv) +- [x] Add expression mapping UI section +- [x] Add custom rules editor +- [x] Test full edit → export → validate → load cycle +- [x] Wire character system prompt into OpenClaw agent config +- [x] Record or source voice reference audio for Aria (`~/voices/aria.wav`) +- [x] Pre-process audio with ffmpeg, test with Chatterbox +- [x] Update `aria.json` with voice clone path if quality is good --- diff --git a/homeai-agent/custom_components/install-to-docker-ha.sh b/homeai-agent/custom_components/install-to-docker-ha.sh index 0e8dcc6..fae431c 100755 --- a/homeai-agent/custom_components/install-to-docker-ha.sh +++ b/homeai-agent/custom_components/install-to-docker-ha.sh @@ -107,7 +107,7 @@ echo " 5. Configure:" echo " - OpenClaw Host: 10.0.0.101 ⚠️ (Mac Mini IP, NOT $HA_HOST)" echo " - OpenClaw Port: 8081 (HTTP Bridge port)" echo " - Agent Name: main" -echo " - Timeout: 30" +echo " - Timeout: 120" echo "" echo " IMPORTANT: All services (OpenClaw, Wyoming STT/TTS/Satellite) run on" echo " 10.0.0.101 (Mac Mini), not $HA_HOST (HA server)" diff --git a/homeai-agent/custom_components/openclaw_conversation/__init__.py b/homeai-agent/custom_components/openclaw_conversation/__init__.py index 7a183af..3e16f0d 100644 --- a/homeai-agent/custom_components/openclaw_conversation/__init__.py +++ b/homeai-agent/custom_components/openclaw_conversation/__init__.py @@ -22,7 +22,7 @@ from .const import ( DEFAULT_TIMEOUT, DOMAIN, ) -from .conversation import OpenClawAgent, OpenClawCLIAgent +from .conversation import OpenClawAgent _LOGGER = logging.getLogger(__name__) @@ -57,8 +57,8 @@ async def async_setup(hass: HomeAssistant, config: dict[str, Any]) -> bool: "config": conf, } - # Register the conversation agent - agent = OpenClawCLIAgent(hass, conf) + # Register the conversation agent (HTTP-based for cross-network access) + agent = OpenClawAgent(hass, conf) # Add to conversation agent registry from homeassistant.components import conversation diff --git a/homeai-agent/custom_components/openclaw_conversation/const.py b/homeai-agent/custom_components/openclaw_conversation/const.py index 098635b..c2f7411 100644 --- a/homeai-agent/custom_components/openclaw_conversation/const.py +++ b/homeai-agent/custom_components/openclaw_conversation/const.py @@ -9,10 +9,10 @@ CONF_AGENT_NAME = "agent_name" CONF_TIMEOUT = "timeout" # Defaults -DEFAULT_HOST = "localhost" +DEFAULT_HOST = "10.0.0.101" DEFAULT_PORT = 8081 # OpenClaw HTTP Bridge (not 8080 gateway) DEFAULT_AGENT = "main" -DEFAULT_TIMEOUT = 30 +DEFAULT_TIMEOUT = 120 # API endpoints OPENCLAW_API_PATH = "/api/agent/message" diff --git a/homeai-agent/launchd/com.homeai.openclaw-bridge.plist b/homeai-agent/launchd/com.homeai.openclaw-bridge.plist index df3b19f..2d85ef6 100644 --- a/homeai-agent/launchd/com.homeai.openclaw-bridge.plist +++ b/homeai-agent/launchd/com.homeai.openclaw-bridge.plist @@ -8,7 +8,7 @@ ProgramArguments - /opt/homebrew/bin/python3 + /Users/aodhan/homeai-voice-env/bin/python3 /Users/aodhan/gitea/homeai/homeai-agent/openclaw-http-bridge.py --port 8081 diff --git a/homeai-agent/openclaw-http-bridge.py b/homeai-agent/openclaw-http-bridge.py index 5d37de5..1dc45d5 100644 --- a/homeai-agent/openclaw-http-bridge.py +++ b/homeai-agent/openclaw-http-bridge.py @@ -26,8 +26,29 @@ import argparse import json import subprocess import sys +import asyncio from http.server import HTTPServer, BaseHTTPRequestHandler from urllib.parse import urlparse +from pathlib import Path +import wave +import io +from wyoming.client import AsyncTcpClient +from wyoming.tts import Synthesize +from wyoming.audio import AudioStart, AudioChunk, AudioStop +from wyoming.info import Info + + +def load_character_prompt() -> str: + """Load the active character system prompt.""" + character_path = Path.home() / ".openclaw" / "characters" / "aria.json" + if not character_path.exists(): + return "" + try: + with open(character_path) as f: + data = json.load(f) + return data.get("system_prompt", "") + except Exception: + return "" class OpenClawBridgeHandler(BaseHTTPRequestHandler): @@ -48,17 +69,129 @@ class OpenClawBridgeHandler(BaseHTTPRequestHandler): """Handle POST requests.""" parsed_path = urlparse(self.path) - # Only handle the agent message endpoint - if parsed_path.path != "/api/agent/message": - self._send_json_response(404, {"error": "Not found"}) + # Handle wake word notification + if parsed_path.path == "/wake": + self._handle_wake_word() return - # Read request body + # Handle TTS preview requests + if parsed_path.path == "/api/tts": + self._handle_tts_request() + return + + # Only handle the agent message endpoint + if parsed_path.path == "/api/agent/message": + self._handle_agent_request() + return + + self._send_json_response(404, {"error": "Not found"}) + + def _handle_tts_request(self): + """Handle TTS request and return wav audio.""" content_length = int(self.headers.get("Content-Length", 0)) if content_length == 0: - self._send_json_response(400, {"error": "Empty request body"}) + self._send_json_response(400, {"error": "Empty body"}) return + + try: + body = self.rfile.read(content_length).decode() + data = json.loads(body) + except json.JSONDecodeError: + self._send_json_response(400, {"error": "Invalid JSON"}) + return + + text = data.get("text", "Hello, this is a test.") + voice = data.get("voice", "af_heart") + + try: + # Run the async Wyoming client + audio_bytes = asyncio.run(self._synthesize_audio(text, voice)) + + # Send WAV response + self.send_response(200) + self.send_header("Content-Type", "audio/wav") + # Allow CORS for local testing from Vite + self.send_header("Access-Control-Allow-Origin", "*") + self.end_headers() + self.wfile.write(audio_bytes) + + except Exception as e: + self._send_json_response(500, {"error": str(e)}) + def do_OPTIONS(self): + """Handle CORS preflight requests.""" + self.send_response(204) + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS") + self.send_header("Access-Control-Allow-Headers", "Content-Type") + self.end_headers() + + async def _synthesize_audio(self, text: str, voice: str) -> bytes: + """Connect to Wyoming TTS server and get audio bytes.""" + client = AsyncTcpClient("127.0.0.1", 10301) + await client.connect() + + # Read the initial Info event + await client.read_event() + + # Send Synthesize event + await client.write_event(Synthesize(text=text, voice=voice).event()) + + audio_data = bytearray() + rate = 24000 + width = 2 + channels = 1 + + while True: + event = await client.read_event() + if event is None: + break + + if AudioStart.is_type(event.type): + start = AudioStart.from_event(event) + rate = start.rate + width = start.width + channels = start.channels + elif AudioChunk.is_type(event.type): + chunk = AudioChunk.from_event(event) + audio_data.extend(chunk.audio) + elif AudioStop.is_type(event.type): + break + + await client.disconnect() + + # Package raw PCM into WAV + wav_io = io.BytesIO() + with wave.open(wav_io, 'wb') as wav_file: + wav_file.setnchannels(channels) + wav_file.setsampwidth(width) + wav_file.setframerate(rate) + wav_file.writeframes(audio_data) + + return wav_io.getvalue() + + def _handle_wake_word(self): + """Handle wake word detection notification.""" + content_length = int(self.headers.get("Content-Length", 0)) + wake_word_data = {} + if content_length > 0: + try: + body = self.rfile.read(content_length).decode() + wake_word_data = json.loads(body) + except (json.JSONDecodeError, ConnectionResetError, OSError): + # Client may close connection early, that's ok + pass + + print(f"[OpenClaw Bridge] Wake word detected: {wake_word_data.get('wake_word', 'unknown')}") + self._send_json_response(200, {"status": "ok", "message": "Wake word received"}) + + def _handle_agent_request(self): + """Handle agent message request.""" + content_length = int(self.headers.get("Content-Length", 0)) + if content_length == 0: + self._send_json_response(400, {"error": "Empty body"}) + return + try: body = self.rfile.read(content_length).decode() data = json.loads(body) @@ -66,21 +199,25 @@ class OpenClawBridgeHandler(BaseHTTPRequestHandler): self._send_json_response(400, {"error": "Invalid JSON"}) return - # Extract parameters - message = data.get("message", "").strip() + message = data.get("message") agent = data.get("agent", "main") if not message: self._send_json_response(400, {"error": "Message is required"}) return + # Inject system prompt + system_prompt = load_character_prompt() + if system_prompt: + message = f"System Context: {system_prompt}\n\nUser Request: {message}" + # Call OpenClaw CLI (use full path for launchd compatibility) try: result = subprocess.run( ["/opt/homebrew/bin/openclaw", "agent", "--message", message, "--agent", agent], capture_output=True, text=True, - timeout=30, + timeout=120, check=True ) response_text = result.stdout.strip() @@ -125,6 +262,7 @@ def main(): ) args = parser.parse_args() + HTTPServer.allow_reuse_address = True server = HTTPServer((args.host, args.port), OpenClawBridgeHandler) print(f"OpenClaw HTTP Bridge running on http://{args.host}:{args.port}") print(f"Endpoint: POST http://{args.host}:{args.port}/api/agent/message") diff --git a/homeai-agent/skills/home-assistant/openclaw_bridge.py b/homeai-agent/skills/home-assistant/openclaw_bridge.py index e064dbc..436187e 100644 --- a/homeai-agent/skills/home-assistant/openclaw_bridge.py +++ b/homeai-agent/skills/home-assistant/openclaw_bridge.py @@ -18,8 +18,26 @@ import sys from pathlib import Path +def load_character_prompt() -> str: + """Load the active character system prompt.""" + character_path = Path.home() / ".openclaw" / "characters" / "aria.json" + if not character_path.exists(): + return "" + try: + with open(character_path) as f: + data = json.load(f) + return data.get("system_prompt", "") + except Exception: + return "" + + def call_openclaw(message: str, agent: str = "main", timeout: int = 30) -> str: """Call OpenClaw CLI and return the response.""" + # Inject system prompt + system_prompt = load_character_prompt() + if system_prompt: + message = f"System Context: {system_prompt}\n\nUser Request: {message}" + try: result = subprocess.run( ["openclaw", "agent", "--message", message, "--agent", agent], diff --git a/homeai-character/.gitignore b/homeai-character/.gitignore new file mode 100644 index 0000000..a547bf3 --- /dev/null +++ b/homeai-character/.gitignore @@ -0,0 +1,24 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? diff --git a/homeai-character/PLAN.md b/homeai-character/PLAN.md deleted file mode 100644 index 022367b..0000000 --- a/homeai-character/PLAN.md +++ /dev/null @@ -1,300 +0,0 @@ -# P5: homeai-character — Character System & Persona Config - -> Phase 3 | No hard runtime dependencies | Consumed by: P3, P4, P7 - ---- - -## Goal - -A single, authoritative character configuration that defines the AI assistant's personality, voice, visual expressions, and prompt rules. The Character Manager UI (already started as `character-manager.jsx`) provides a friendly editor. The exported JSON is the single source of truth for all pipeline components. - ---- - -## Character JSON Schema v1 - -File: `schema/character.schema.json` - -```json -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "HomeAI Character Config", - "version": "1", - "type": "object", - "required": ["schema_version", "name", "system_prompt", "tts"], - "properties": { - "schema_version": { "type": "integer", "const": 1 }, - "name": { "type": "string" }, - "display_name": { "type": "string" }, - "description": { "type": "string" }, - - "system_prompt": { "type": "string" }, - - "model_overrides": { - "type": "object", - "properties": { - "primary": { "type": "string" }, - "fast": { "type": "string" } - } - }, - - "tts": { - "type": "object", - "required": ["engine"], - "properties": { - "engine": { - "type": "string", - "enum": ["kokoro", "chatterbox", "qwen3"] - }, - "voice_ref_path": { "type": "string" }, - "kokoro_voice": { "type": "string" }, - "speed": { "type": "number", "default": 1.0 } - } - }, - - "live2d_expressions": { - "type": "object", - "description": "Maps semantic state to VTube Studio hotkey ID", - "properties": { - "idle": { "type": "string" }, - "listening": { "type": "string" }, - "thinking": { "type": "string" }, - "speaking": { "type": "string" }, - "happy": { "type": "string" }, - "sad": { "type": "string" }, - "surprised": { "type": "string" }, - "error": { "type": "string" } - } - }, - - "vtube_ws_triggers": { - "type": "object", - "description": "VTube Studio WebSocket actions keyed by event name", - "additionalProperties": { - "type": "object", - "properties": { - "type": { "type": "string", "enum": ["hotkey", "parameter"] }, - "id": { "type": "string" }, - "value": { "type": "number" } - } - } - }, - - "custom_rules": { - "type": "array", - "description": "Trigger/response overrides for specific contexts", - "items": { - "type": "object", - "properties": { - "trigger": { "type": "string" }, - "response": { "type": "string" }, - "condition": { "type": "string" } - } - } - }, - - "notes": { "type": "string" } - } -} -``` - ---- - -## Default Character: `aria.json` - -File: `characters/aria.json` - -```json -{ - "schema_version": 1, - "name": "aria", - "display_name": "Aria", - "description": "Default HomeAI assistant persona", - - "system_prompt": "You are Aria, a warm, curious, and helpful AI assistant living in the home. You speak naturally and conversationally — never robotic. You are knowledgeable but never condescending. You remember the people you live with and build on those memories over time. Keep responses concise when controlling smart home devices; be more expressive in casual conversation. Never break character.", - - "model_overrides": { - "primary": "llama3.3:70b", - "fast": "qwen2.5:7b" - }, - - "tts": { - "engine": "kokoro", - "kokoro_voice": "af_heart", - "voice_ref_path": null, - "speed": 1.0 - }, - - "live2d_expressions": { - "idle": "expr_idle", - "listening": "expr_listening", - "thinking": "expr_thinking", - "speaking": "expr_speaking", - "happy": "expr_happy", - "sad": "expr_sad", - "surprised": "expr_surprised", - "error": "expr_error" - }, - - "vtube_ws_triggers": { - "thinking": { "type": "hotkey", "id": "expr_thinking" }, - "speaking": { "type": "hotkey", "id": "expr_speaking" }, - "idle": { "type": "hotkey", "id": "expr_idle" } - }, - - "custom_rules": [ - { - "trigger": "good morning", - "response": "Good morning! How did you sleep?", - "condition": "time_of_day == morning" - } - ], - - "notes": "Default persona. Voice clone to be added once reference audio recorded." -} -``` - ---- - -## Character Manager UI - -### Status - -`character-manager.jsx` already exists — needs: -1. Schema validation before export (reject malformed JSONs) -2. File system integration: save/load from `characters/` directory -3. Live preview of system prompt -4. Expression mapping UI for Live2D states - -### Tech Stack - -- React + Vite (local dev server, not deployed) -- Tailwind CSS (or minimal CSS) -- Runs at `http://localhost:5173` during editing - -### File Structure - -``` -homeai-character/ -├── src/ -│ ├── character-manager.jsx ← existing, extend here -│ ├── SchemaValidator.js ← validate against character.schema.json -│ ├── ExpressionMapper.jsx ← UI for Live2D expression mapping -│ └── main.jsx -├── schema/ -│ └── character.schema.json -├── characters/ -│ ├── aria.json ← default character -│ └── .gitkeep -├── package.json -└── vite.config.js -``` - -### Character Manager Features - -| Feature | Description | -|---|---| -| Basic info | name, display name, description | -| System prompt | Multi-line editor with char count | -| Model overrides | Dropdown: primary + fast model | -| TTS config | Engine picker, voice selector, speed slider, voice ref path | -| Expression mapping | Table: state → VTube hotkey ID | -| VTube WS triggers | JSON editor for advanced triggers | -| Custom rules | Add/edit/delete trigger-response pairs | -| Notes | Free-text notes field | -| Export | Validates schema, writes to `characters/.json` | -| Import | Load existing character JSON for editing | - -### Schema Validation - -```javascript -import Ajv from 'ajv' -import schema from '../schema/character.schema.json' - -const ajv = new Ajv() -const validate = ajv.compile(schema) - -export function validateCharacter(config) { - const valid = validate(config) - if (!valid) throw new Error(ajv.errorsText(validate.errors)) - return true -} -``` - ---- - -## Voice Clone Workflow - -1. Record 30–60 seconds of clean speech at `~/voices/-raw.wav` - - Quiet room, consistent mic distance, natural conversational tone -2. Pre-process: `ffmpeg -i raw.wav -ar 22050 -ac 1 aria.wav` -3. Place at `~/voices/aria.wav` -4. Update character JSON: `"voice_ref_path": "~/voices/aria.wav"`, `"engine": "chatterbox"` -5. Test: run Chatterbox with the reference, verify voice quality -6. If unsatisfactory, try Qwen3-TTS as alternative - ---- - -## Pipeline Integration - -### How P4 (OpenClaw) loads the character - -```python -import json -from pathlib import Path - -def load_character(name: str) -> dict: - path = Path.home() / ".openclaw" / "characters" / f"{name}.json" - config = json.loads(path.read_text()) - assert config["schema_version"] == 1, "Unsupported schema version" - return config - -# System prompt injection -character = load_character("aria") -system_prompt = character["system_prompt"] -# Pass to Ollama as system message -``` - -OpenClaw hot-reloads the character JSON on file change — no restart required. - -### How P3 selects TTS engine - -```python -character = load_character(active_name) -tts_cfg = character["tts"] - -if tts_cfg["engine"] == "chatterbox": - tts = ChatterboxTTS(voice_ref=tts_cfg["voice_ref_path"]) -elif tts_cfg["engine"] == "qwen3": - tts = Qwen3TTS() -else: # kokoro (default) - tts = KokoroWyomingClient(voice=tts_cfg.get("kokoro_voice", "af_heart")) -``` - ---- - -## Implementation Steps - -- [ ] Define and write `schema/character.schema.json` (v1) -- [ ] Write `characters/aria.json` — default character with placeholder expression IDs -- [ ] Set up Vite project in `src/` (install deps: `npm install`) -- [ ] Integrate existing `character-manager.jsx` into new Vite project -- [ ] Add schema validation on export (`ajv`) -- [ ] Add expression mapping UI section -- [ ] Add custom rules editor -- [ ] Test full edit → export → validate → load cycle -- [ ] Record or source voice reference audio for Aria -- [ ] Pre-process audio and test with Chatterbox -- [ ] Update `aria.json` with voice clone path if quality is good -- [ ] Write `SchemaValidator.js` as standalone utility (used by P4 at runtime too) -- [ ] Document schema in `schema/README.md` - ---- - -## Success Criteria - -- [ ] `aria.json` validates against `character.schema.json` without errors -- [ ] Character Manager UI can load, edit, and export `aria.json` -- [ ] OpenClaw loads `aria.json` system prompt and applies it to Ollama requests -- [ ] P3 TTS engine selection correctly follows `tts.engine` field -- [ ] Schema version check in P4 fails gracefully with a clear error message -- [ ] Voice clone sounds natural (if Chatterbox path taken) diff --git a/homeai-character/README.md b/homeai-character/README.md new file mode 100644 index 0000000..18bc70e --- /dev/null +++ b/homeai-character/README.md @@ -0,0 +1,16 @@ +# React + Vite + +This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules. + +Currently, two official plugins are available: + +- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) (or [oxc](https://oxc.rs) when used in [rolldown-vite](https://vite.dev/guide/rolldown)) for Fast Refresh +- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh + +## React Compiler + +The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation). + +## Expanding the ESLint configuration + +If you are developing a production application, we recommend using TypeScript with type-aware lint rules enabled. Check out the [TS template](https://github.com/vitejs/vite/tree/main/packages/create-vite/template-react-ts) for information on how to integrate TypeScript and [`typescript-eslint`](https://typescript-eslint.io) in your project. diff --git a/homeai-character/character-manager.jsx b/homeai-character/character-manager.jsx deleted file mode 100644 index 33e063d..0000000 --- a/homeai-character/character-manager.jsx +++ /dev/null @@ -1,686 +0,0 @@ -import { useState, useEffect, useCallback } from "react"; - -const STORAGE_KEY = "ai-character-profiles"; - -const DEFAULT_MODELS = [ - "llama3.3:70b", "qwen2.5:72b", "mistral-large", "llama3.1:8b", - "qwen2.5:14b", "gemma3:27b", "deepseek-r1:14b", "phi4:14b" -]; - -const TTS_MODELS = ["Kokoro", "Chatterbox", "F5-TTS", "Qwen3-TTS", "Piper"]; -const STT_MODELS = ["Whisper Large-v3", "Whisper Medium", "Whisper Small", "Whisper Turbo"]; -const IMAGE_MODELS = ["SDXL", "Flux.1-dev", "Flux.1-schnell", "SD 1.5", "Pony Diffusion"]; - -const PERSONALITY_TRAITS = [ - "Warm", "Witty", "Calm", "Energetic", "Sarcastic", "Nurturing", - "Curious", "Playful", "Formal", "Casual", "Empathetic", "Direct", - "Creative", "Analytical", "Protective", "Mischievous" -]; - -const SPEAKING_STYLES = [ - "Conversational", "Poetic", "Concise", "Verbose", "Academic", - "Informal", "Dramatic", "Deadpan", "Enthusiastic", "Measured" -]; - -const EMPTY_CHARACTER = { - id: null, - name: "", - tagline: "", - avatar: "", - accentColor: "#7c6fff", - personality: { - traits: [], - speakingStyle: "", - coreValues: "", - quirks: "", - backstory: "", - motivation: "", - }, - prompts: { - systemPrompt: "", - wakeWordResponse: "", - fallbackResponse: "", - errorResponse: "", - customPrompts: [], - }, - models: { - llm: "", - tts: "", - stt: "", - imageGen: "", - voiceCloneRef: "", - ttsSpeed: 1.0, - temperature: 0.7, - }, - liveRepresentation: { - live2dModel: "", - idleExpression: "", - speakingExpression: "", - thinkingExpression: "", - happyExpression: "", - vtsTriggers: "", - }, - userNotes: "", - createdAt: null, - updatedAt: null, -}; - -const TABS = ["Identity", "Personality", "Prompts", "Models", "Live2D", "Notes"]; - -const TAB_ICONS = { - Identity: "◈", - Personality: "◉", - Prompts: "◎", - Models: "⬡", - Live2D: "◇", - Notes: "▣", -}; - -function generateId() { - return Date.now().toString(36) + Math.random().toString(36).slice(2); -} - -function ColorPicker({ value, onChange }) { - const presets = [ - "#7c6fff","#ff6b9d","#00d4aa","#ff9f43","#48dbfb", - "#ff6348","#a29bfe","#fd79a8","#55efc4","#fdcb6e" - ]; - return ( -
- {presets.map(c => ( -
- ); -} - -function TagSelector({ options, selected, onChange, max = 6 }) { - return ( -
- {options.map(opt => { - const active = selected.includes(opt); - return ( - - ); - })} -
- ); -} - -function Field({ label, hint, children }) { - return ( -
- - {hint &&

{hint}

} - {children} -
- ); -} - -function Input({ value, onChange, placeholder, type = "text" }) { - return ( - onChange(e.target.value)} placeholder={placeholder} - style={{ - width: "100%", background: "rgba(255,255,255,0.05)", border: "1px solid rgba(255,255,255,0.1)", - borderRadius: 8, padding: "10px 14px", color: "#fff", fontSize: 14, fontFamily: "inherit", - outline: "none", boxSizing: "border-box", transition: "border-color 0.2s", - }} - onFocus={e => e.target.style.borderColor = "var(--accent)"} - onBlur={e => e.target.style.borderColor = "rgba(255,255,255,0.1)"} - /> - ); -} - -function Textarea({ value, onChange, placeholder, rows = 4 }) { - return ( -