Initial project structure and planning docs

Full project plan across 8 sub-projects (homeai-infra, homeai-llm, homeai-voice, homeai-agent, homeai-character, homeai-esp32, homeai-visual, homeai-images). Includes per-project PLAN.md files, top-level PROJECT_PLAN.md, and master TODO.md. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-04 01:11:37 +00:00
commit 38247d7cc4
11 changed files with 3060 additions and 0 deletions
--- a/homeai-character/PLAN.md
+++ b/homeai-character/PLAN.md
@@ -0,0 +1,300 @@
+# P5: homeai-character — Character System & Persona Config
+
+> Phase 3 | No hard runtime dependencies | Consumed by: P3, P4, P7
+
+---
+
+## Goal
+
+A single, authoritative character configuration that defines the AI assistant's personality, voice, visual expressions, and prompt rules. The Character Manager UI (already started as `character-manager.jsx`) provides a friendly editor. The exported JSON is the single source of truth for all pipeline components.
+
+---
+
+## Character JSON Schema v1
+
+File: `schema/character.schema.json`
+
+```json
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "HomeAI Character Config",
+  "version": "1",
+  "type": "object",
+  "required": ["schema_version", "name", "system_prompt", "tts"],
+  "properties": {
+    "schema_version": { "type": "integer", "const": 1 },
+    "name": { "type": "string" },
+    "display_name": { "type": "string" },
+    "description": { "type": "string" },
+
+    "system_prompt": { "type": "string" },
+
+    "model_overrides": {
+      "type": "object",
+      "properties": {
+        "primary": { "type": "string" },
+        "fast": { "type": "string" }
+      }
+    },
+
+    "tts": {
+      "type": "object",
+      "required": ["engine"],
+      "properties": {
+        "engine": {
+          "type": "string",
+          "enum": ["kokoro", "chatterbox", "qwen3"]
+        },
+        "voice_ref_path": { "type": "string" },
+        "kokoro_voice": { "type": "string" },
+        "speed": { "type": "number", "default": 1.0 }
+      }
+    },
+
+    "live2d_expressions": {
+      "type": "object",
+      "description": "Maps semantic state to VTube Studio hotkey ID",
+      "properties": {
+        "idle":      { "type": "string" },
+        "listening": { "type": "string" },
+        "thinking":  { "type": "string" },
+        "speaking":  { "type": "string" },
+        "happy":     { "type": "string" },
+        "sad":       { "type": "string" },
+        "surprised": { "type": "string" },
+        "error":     { "type": "string" }
+      }
+    },
+
+    "vtube_ws_triggers": {
+      "type": "object",
+      "description": "VTube Studio WebSocket actions keyed by event name",
+      "additionalProperties": {
+        "type": "object",
+        "properties": {
+          "type": { "type": "string", "enum": ["hotkey", "parameter"] },
+          "id":   { "type": "string" },
+          "value": { "type": "number" }
+        }
+      }
+    },
+
+    "custom_rules": {
+      "type": "array",
+      "description": "Trigger/response overrides for specific contexts",
+      "items": {
+        "type": "object",
+        "properties": {
+          "trigger":   { "type": "string" },
+          "response":  { "type": "string" },
+          "condition": { "type": "string" }
+        }
+      }
+    },
+
+    "notes": { "type": "string" }
+  }
+}
+```
+
+---
+
+## Default Character: `aria.json`
+
+File: `characters/aria.json`
+
+```json
+{
+  "schema_version": 1,
+  "name": "aria",
+  "display_name": "Aria",
+  "description": "Default HomeAI assistant persona",
+
+  "system_prompt": "You are Aria, a warm, curious, and helpful AI assistant living in the home. You speak naturally and conversationally — never robotic. You are knowledgeable but never condescending. You remember the people you live with and build on those memories over time. Keep responses concise when controlling smart home devices; be more expressive in casual conversation. Never break character.",
+
+  "model_overrides": {
+    "primary": "llama3.3:70b",
+    "fast": "qwen2.5:7b"
+  },
+
+  "tts": {
+    "engine": "kokoro",
+    "kokoro_voice": "af_heart",
+    "voice_ref_path": null,
+    "speed": 1.0
+  },
+
+  "live2d_expressions": {
+    "idle":      "expr_idle",
+    "listening": "expr_listening",
+    "thinking":  "expr_thinking",
+    "speaking":  "expr_speaking",
+    "happy":     "expr_happy",
+    "sad":       "expr_sad",
+    "surprised": "expr_surprised",
+    "error":     "expr_error"
+  },
+
+  "vtube_ws_triggers": {
+    "thinking": { "type": "hotkey", "id": "expr_thinking" },
+    "speaking": { "type": "hotkey", "id": "expr_speaking" },
+    "idle":     { "type": "hotkey", "id": "expr_idle" }
+  },
+
+  "custom_rules": [
+    {
+      "trigger": "good morning",
+      "response": "Good morning! How did you sleep?",
+      "condition": "time_of_day == morning"
+    }
+  ],
+
+  "notes": "Default persona. Voice clone to be added once reference audio recorded."
+}
+```
+
+---
+
+## Character Manager UI
+
+### Status
+
+`character-manager.jsx` already exists — needs:
+1. Schema validation before export (reject malformed JSONs)
+2. File system integration: save/load from `characters/` directory
+3. Live preview of system prompt
+4. Expression mapping UI for Live2D states
+
+### Tech Stack
+
+- React + Vite (local dev server, not deployed)
+- Tailwind CSS (or minimal CSS)
+- Runs at `http://localhost:5173` during editing
+
+### File Structure
+
+```
+homeai-character/
+├── src/
+│   ├── character-manager.jsx   ← existing, extend here
+│   ├── SchemaValidator.js      ← validate against character.schema.json
+│   ├── ExpressionMapper.jsx    ← UI for Live2D expression mapping
+│   └── main.jsx
+├── schema/
+│   └── character.schema.json
+├── characters/
+│   ├── aria.json               ← default character
+│   └── .gitkeep
+├── package.json
+└── vite.config.js
+```
+
+### Character Manager Features
+
+| Feature | Description |
+|---|---|
+| Basic info | name, display name, description |
+| System prompt | Multi-line editor with char count |
+| Model overrides | Dropdown: primary + fast model |
+| TTS config | Engine picker, voice selector, speed slider, voice ref path |
+| Expression mapping | Table: state → VTube hotkey ID |
+| VTube WS triggers | JSON editor for advanced triggers |
+| Custom rules | Add/edit/delete trigger-response pairs |
+| Notes | Free-text notes field |
+| Export | Validates schema, writes to `characters/<name>.json` |
+| Import | Load existing character JSON for editing |
+
+### Schema Validation
+
+```javascript
+import Ajv from 'ajv'
+import schema from '../schema/character.schema.json'
+
+const ajv = new Ajv()
+const validate = ajv.compile(schema)
+
+export function validateCharacter(config) {
+  const valid = validate(config)
+  if (!valid) throw new Error(ajv.errorsText(validate.errors))
+  return true
+}
+```
+
+---
+
+## Voice Clone Workflow
+
+1. Record 30–60 seconds of clean speech at `~/voices/<name>-raw.wav`
+   - Quiet room, consistent mic distance, natural conversational tone
+2. Pre-process: `ffmpeg -i raw.wav -ar 22050 -ac 1 aria.wav`
+3. Place at `~/voices/aria.wav`
+4. Update character JSON: `"voice_ref_path": "~/voices/aria.wav"`, `"engine": "chatterbox"`
+5. Test: run Chatterbox with the reference, verify voice quality
+6. If unsatisfactory, try Qwen3-TTS as alternative
+
+---
+
+## Pipeline Integration
+
+### How P4 (OpenClaw) loads the character
+
+```python
+import json
+from pathlib import Path
+
+def load_character(name: str) -> dict:
+    path = Path.home() / ".openclaw" / "characters" / f"{name}.json"
+    config = json.loads(path.read_text())
+    assert config["schema_version"] == 1, "Unsupported schema version"
+    return config
+
+# System prompt injection
+character = load_character("aria")
+system_prompt = character["system_prompt"]
+# Pass to Ollama as system message
+```
+
+OpenClaw hot-reloads the character JSON on file change — no restart required.
+
+### How P3 selects TTS engine
+
+```python
+character = load_character(active_name)
+tts_cfg = character["tts"]
+
+if tts_cfg["engine"] == "chatterbox":
+    tts = ChatterboxTTS(voice_ref=tts_cfg["voice_ref_path"])
+elif tts_cfg["engine"] == "qwen3":
+    tts = Qwen3TTS()
+else:  # kokoro (default)
+    tts = KokoroWyomingClient(voice=tts_cfg.get("kokoro_voice", "af_heart"))
+```
+
+---
+
+## Implementation Steps
+
+- [ ] Define and write `schema/character.schema.json` (v1)
+- [ ] Write `characters/aria.json` — default character with placeholder expression IDs
+- [ ] Set up Vite project in `src/` (install deps: `npm install`)
+- [ ] Integrate existing `character-manager.jsx` into new Vite project
+- [ ] Add schema validation on export (`ajv`)
+- [ ] Add expression mapping UI section
+- [ ] Add custom rules editor
+- [ ] Test full edit → export → validate → load cycle
+- [ ] Record or source voice reference audio for Aria
+- [ ] Pre-process audio and test with Chatterbox
+- [ ] Update `aria.json` with voice clone path if quality is good
+- [ ] Write `SchemaValidator.js` as standalone utility (used by P4 at runtime too)
+- [ ] Document schema in `schema/README.md`
+
+---
+
+## Success Criteria
+
+- [ ] `aria.json` validates against `character.schema.json` without errors
+- [ ] Character Manager UI can load, edit, and export `aria.json`
+- [ ] OpenClaw loads `aria.json` system prompt and applies it to Ollama requests
+- [ ] P3 TTS engine selection correctly follows `tts.engine` field
+- [ ] Schema version check in P4 fails gracefully with a clear error message
+- [ ] Voice clone sounds natural (if Chatterbox path taken)