|
|
|
|
@@ -24,9 +24,12 @@ Endpoints:
|
|
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
import subprocess
|
|
|
|
|
import sys
|
|
|
|
|
import asyncio
|
|
|
|
|
import urllib.request
|
|
|
|
|
import threading
|
|
|
|
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
|
|
|
from socketserver import ThreadingMixIn
|
|
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
@@ -40,19 +43,222 @@ from wyoming.asr import Transcribe, Transcript
|
|
|
|
|
from wyoming.audio import AudioStart, AudioChunk, AudioStop
|
|
|
|
|
from wyoming.info import Info
|
|
|
|
|
|
|
|
|
|
# Timeout settings (seconds)
|
|
|
|
|
TIMEOUT_WARM = 120 # Model already loaded in VRAM
|
|
|
|
|
TIMEOUT_COLD = 180 # Model needs loading first (~10-20s load + inference)
|
|
|
|
|
OLLAMA_PS_URL = "http://localhost:11434/api/ps"
|
|
|
|
|
VTUBE_BRIDGE_URL = "http://localhost:8002"
|
|
|
|
|
|
|
|
|
|
def load_character_prompt() -> str:
|
|
|
|
|
"""Load the active character system prompt."""
|
|
|
|
|
character_path = Path.home() / ".openclaw" / "characters" / "aria.json"
|
|
|
|
|
|
|
|
|
|
def _vtube_fire_and_forget(path: str, data: dict):
|
|
|
|
|
"""Send a non-blocking POST to the VTube Studio bridge. Failures are silent."""
|
|
|
|
|
def _post():
|
|
|
|
|
try:
|
|
|
|
|
body = json.dumps(data).encode()
|
|
|
|
|
req = urllib.request.Request(
|
|
|
|
|
f"{VTUBE_BRIDGE_URL}{path}",
|
|
|
|
|
data=body,
|
|
|
|
|
headers={"Content-Type": "application/json"},
|
|
|
|
|
method="POST",
|
|
|
|
|
)
|
|
|
|
|
urllib.request.urlopen(req, timeout=2)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass # bridge may not be running — that's fine
|
|
|
|
|
threading.Thread(target=_post, daemon=True).start()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_model_warm() -> bool:
|
|
|
|
|
"""Check if the default Ollama model is already loaded in VRAM."""
|
|
|
|
|
try:
|
|
|
|
|
req = urllib.request.Request(OLLAMA_PS_URL)
|
|
|
|
|
with urllib.request.urlopen(req, timeout=2) as resp:
|
|
|
|
|
data = json.loads(resp.read())
|
|
|
|
|
return len(data.get("models", [])) > 0
|
|
|
|
|
except Exception:
|
|
|
|
|
# If we can't reach Ollama, assume cold (safer longer timeout)
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CHARACTERS_DIR = Path("/Users/aodhan/homeai-data/characters")
|
|
|
|
|
SATELLITE_MAP_PATH = Path("/Users/aodhan/homeai-data/satellite-map.json")
|
|
|
|
|
MEMORIES_DIR = Path("/Users/aodhan/homeai-data/memories")
|
|
|
|
|
ACTIVE_TTS_VOICE_PATH = Path("/Users/aodhan/homeai-data/active-tts-voice.json")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def clean_text_for_tts(text: str) -> str:
|
|
|
|
|
"""Strip content that shouldn't be spoken: tags, asterisks, emojis, markdown."""
|
|
|
|
|
# Remove HTML/XML tags and their content for common non-spoken tags
|
|
|
|
|
text = re.sub(r'<[^>]+>', '', text)
|
|
|
|
|
# Remove content between asterisks (actions/emphasis markup like *sighs*)
|
|
|
|
|
text = re.sub(r'\*[^*]+\*', '', text)
|
|
|
|
|
# Remove markdown bold/italic markers that might remain
|
|
|
|
|
text = re.sub(r'[*_]{1,3}', '', text)
|
|
|
|
|
# Remove markdown headers
|
|
|
|
|
text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)
|
|
|
|
|
# Remove markdown links [text](url) → keep text
|
|
|
|
|
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
|
|
|
|
|
# Remove bare URLs
|
|
|
|
|
text = re.sub(r'https?://\S+', '', text)
|
|
|
|
|
# Remove code blocks and inline code
|
|
|
|
|
text = re.sub(r'```[\s\S]*?```', '', text)
|
|
|
|
|
text = re.sub(r'`[^`]+`', '', text)
|
|
|
|
|
# Remove emojis
|
|
|
|
|
text = re.sub(
|
|
|
|
|
r'[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF'
|
|
|
|
|
r'\U0001F1E0-\U0001F1FF\U0001F900-\U0001F9FF\U0001FA00-\U0001FAFF'
|
|
|
|
|
r'\U00002702-\U000027B0\U0000FE00-\U0000FE0F\U0000200D'
|
|
|
|
|
r'\U00002600-\U000026FF\U00002300-\U000023FF]+', '', text
|
|
|
|
|
)
|
|
|
|
|
# Collapse multiple spaces/newlines
|
|
|
|
|
text = re.sub(r'\n{2,}', '\n', text)
|
|
|
|
|
text = re.sub(r'[ \t]{2,}', ' ', text)
|
|
|
|
|
return text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_satellite_map() -> dict:
|
|
|
|
|
"""Load the satellite-to-character mapping."""
|
|
|
|
|
try:
|
|
|
|
|
with open(SATELLITE_MAP_PATH) as f:
|
|
|
|
|
return json.load(f)
|
|
|
|
|
except Exception:
|
|
|
|
|
return {"default": "aria_default", "satellites": {}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_active_tts_voice(character_id: str, tts_config: dict):
|
|
|
|
|
"""Write the active TTS config to a state file for the Wyoming TTS server to read."""
|
|
|
|
|
try:
|
|
|
|
|
ACTIVE_TTS_VOICE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
state = {
|
|
|
|
|
"character_id": character_id,
|
|
|
|
|
"engine": tts_config.get("engine", "kokoro"),
|
|
|
|
|
"kokoro_voice": tts_config.get("kokoro_voice", ""),
|
|
|
|
|
"elevenlabs_voice_id": tts_config.get("elevenlabs_voice_id", ""),
|
|
|
|
|
"elevenlabs_model": tts_config.get("elevenlabs_model", "eleven_multilingual_v2"),
|
|
|
|
|
"speed": tts_config.get("speed", 1),
|
|
|
|
|
}
|
|
|
|
|
with open(ACTIVE_TTS_VOICE_PATH, "w") as f:
|
|
|
|
|
json.dump(state, f)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"[OpenClaw Bridge] Warning: could not write active TTS config: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def resolve_character_id(satellite_id: str = None) -> str:
|
|
|
|
|
"""Resolve a satellite ID to a character profile ID."""
|
|
|
|
|
sat_map = load_satellite_map()
|
|
|
|
|
if satellite_id and satellite_id in sat_map.get("satellites", {}):
|
|
|
|
|
return sat_map["satellites"][satellite_id]
|
|
|
|
|
return sat_map.get("default", "aria_default")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_character(character_id: str = None) -> dict:
|
|
|
|
|
"""Load a character profile by ID. Returns the full character data dict."""
|
|
|
|
|
if not character_id:
|
|
|
|
|
character_id = resolve_character_id()
|
|
|
|
|
safe_id = character_id.replace("/", "_")
|
|
|
|
|
character_path = CHARACTERS_DIR / f"{safe_id}.json"
|
|
|
|
|
if not character_path.exists():
|
|
|
|
|
return ""
|
|
|
|
|
return {}
|
|
|
|
|
try:
|
|
|
|
|
with open(character_path) as f:
|
|
|
|
|
data = json.load(f)
|
|
|
|
|
return data.get("system_prompt", "")
|
|
|
|
|
profile = json.load(f)
|
|
|
|
|
return profile.get("data", {})
|
|
|
|
|
except Exception:
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_character_prompt(satellite_id: str = None, character_id: str = None) -> str:
|
|
|
|
|
"""Load the full system prompt for a character, resolved by satellite or explicit ID.
|
|
|
|
|
Builds a rich prompt from system_prompt + profile fields (background, dialogue_style, etc.)."""
|
|
|
|
|
if not character_id:
|
|
|
|
|
character_id = resolve_character_id(satellite_id)
|
|
|
|
|
char = load_character(character_id)
|
|
|
|
|
if not char:
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
sections = []
|
|
|
|
|
|
|
|
|
|
# Core system prompt
|
|
|
|
|
prompt = char.get("system_prompt", "")
|
|
|
|
|
if prompt:
|
|
|
|
|
sections.append(prompt)
|
|
|
|
|
|
|
|
|
|
# Character profile fields
|
|
|
|
|
profile_parts = []
|
|
|
|
|
if char.get("background"):
|
|
|
|
|
profile_parts.append(f"## Background\n{char['background']}")
|
|
|
|
|
if char.get("appearance"):
|
|
|
|
|
profile_parts.append(f"## Appearance\n{char['appearance']}")
|
|
|
|
|
if char.get("dialogue_style"):
|
|
|
|
|
profile_parts.append(f"## Dialogue Style\n{char['dialogue_style']}")
|
|
|
|
|
if char.get("skills"):
|
|
|
|
|
skills = char["skills"]
|
|
|
|
|
if isinstance(skills, list):
|
|
|
|
|
skills_text = ", ".join(skills[:15])
|
|
|
|
|
else:
|
|
|
|
|
skills_text = str(skills)
|
|
|
|
|
profile_parts.append(f"## Skills & Interests\n{skills_text}")
|
|
|
|
|
if profile_parts:
|
|
|
|
|
sections.append("[Character Profile]\n" + "\n\n".join(profile_parts))
|
|
|
|
|
|
|
|
|
|
# Character metadata
|
|
|
|
|
meta_lines = []
|
|
|
|
|
if char.get("display_name"):
|
|
|
|
|
meta_lines.append(f"Your name is: {char['display_name']}")
|
|
|
|
|
# Support both v1 (gaze_preset string) and v2 (gaze_presets array)
|
|
|
|
|
gaze_presets = char.get("gaze_presets", [])
|
|
|
|
|
if gaze_presets and isinstance(gaze_presets, list):
|
|
|
|
|
for gp in gaze_presets:
|
|
|
|
|
preset = gp.get("preset", "")
|
|
|
|
|
trigger = gp.get("trigger", "self-portrait")
|
|
|
|
|
if preset:
|
|
|
|
|
meta_lines.append(f"GAZE preset '{preset}' — use for: {trigger}")
|
|
|
|
|
elif char.get("gaze_preset"):
|
|
|
|
|
meta_lines.append(f"Your gaze_preset for self-portraits is: {char['gaze_preset']}")
|
|
|
|
|
if meta_lines:
|
|
|
|
|
sections.append("[Character Metadata]\n" + "\n".join(meta_lines))
|
|
|
|
|
|
|
|
|
|
# Memories (personal + general)
|
|
|
|
|
personal, general = load_memories(character_id)
|
|
|
|
|
if personal:
|
|
|
|
|
sections.append("[Personal Memories]\n" + "\n".join(f"- {m}" for m in personal))
|
|
|
|
|
if general:
|
|
|
|
|
sections.append("[General Knowledge]\n" + "\n".join(f"- {m}" for m in general))
|
|
|
|
|
|
|
|
|
|
return "\n\n".join(sections)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_memories(character_id: str) -> tuple[list[str], list[str]]:
|
|
|
|
|
"""Load personal (per-character) and general memories.
|
|
|
|
|
Returns (personal_contents, general_contents) truncated to fit context budget."""
|
|
|
|
|
PERSONAL_BUDGET = 4000 # max chars for personal memories in prompt
|
|
|
|
|
GENERAL_BUDGET = 3000 # max chars for general memories in prompt
|
|
|
|
|
|
|
|
|
|
def _read_memories(path: Path, budget: int) -> list[str]:
|
|
|
|
|
try:
|
|
|
|
|
with open(path) as f:
|
|
|
|
|
data = json.load(f)
|
|
|
|
|
except Exception:
|
|
|
|
|
return []
|
|
|
|
|
memories = data.get("memories", [])
|
|
|
|
|
# Sort newest first
|
|
|
|
|
memories.sort(key=lambda m: m.get("createdAt", ""), reverse=True)
|
|
|
|
|
result = []
|
|
|
|
|
used = 0
|
|
|
|
|
for m in memories:
|
|
|
|
|
content = m.get("content", "").strip()
|
|
|
|
|
if not content:
|
|
|
|
|
continue
|
|
|
|
|
if used + len(content) > budget:
|
|
|
|
|
break
|
|
|
|
|
result.append(content)
|
|
|
|
|
used += len(content)
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
safe_id = character_id.replace("/", "_")
|
|
|
|
|
personal = _read_memories(MEMORIES_DIR / "personal" / f"{safe_id}.json", PERSONAL_BUDGET)
|
|
|
|
|
general = _read_memories(MEMORIES_DIR / "general.json", GENERAL_BUDGET)
|
|
|
|
|
return personal, general
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class OpenClawBridgeHandler(BaseHTTPRequestHandler):
|
|
|
|
|
"""HTTP request handler for OpenClaw bridge."""
|
|
|
|
|
@@ -95,44 +301,78 @@ class OpenClawBridgeHandler(BaseHTTPRequestHandler):
|
|
|
|
|
self._send_json_response(404, {"error": "Not found"})
|
|
|
|
|
|
|
|
|
|
def _handle_tts_request(self):
|
|
|
|
|
"""Handle TTS request and return wav audio."""
|
|
|
|
|
"""Handle TTS request and return audio. Routes to Kokoro or ElevenLabs based on engine."""
|
|
|
|
|
content_length = int(self.headers.get("Content-Length", 0))
|
|
|
|
|
if content_length == 0:
|
|
|
|
|
self._send_json_response(400, {"error": "Empty body"})
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
body = self.rfile.read(content_length).decode()
|
|
|
|
|
data = json.loads(body)
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
self._send_json_response(400, {"error": "Invalid JSON"})
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text = data.get("text", "Hello, this is a test.")
|
|
|
|
|
# Strip emojis so TTS doesn't try to read them out
|
|
|
|
|
text = re.sub(
|
|
|
|
|
r'[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF'
|
|
|
|
|
r'\U0001F1E0-\U0001F1FF\U0001F900-\U0001F9FF\U0001FA00-\U0001FAFF'
|
|
|
|
|
r'\U00002702-\U000027B0\U0000FE00-\U0000FE0F\U0000200D'
|
|
|
|
|
r'\U00002600-\U000026FF\U00002300-\U000023FF]+', '', text
|
|
|
|
|
).strip()
|
|
|
|
|
text = clean_text_for_tts(text)
|
|
|
|
|
voice = data.get("voice", "af_heart")
|
|
|
|
|
|
|
|
|
|
engine = data.get("engine", "kokoro")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# Run the async Wyoming client
|
|
|
|
|
audio_bytes = asyncio.run(self._synthesize_audio(text, voice))
|
|
|
|
|
|
|
|
|
|
# Send WAV response
|
|
|
|
|
# Signal avatar: speaking
|
|
|
|
|
_vtube_fire_and_forget("/expression", {"event": "speaking"})
|
|
|
|
|
|
|
|
|
|
if engine == "elevenlabs":
|
|
|
|
|
audio_bytes, content_type = self._synthesize_elevenlabs(text, voice, data.get("model"))
|
|
|
|
|
else:
|
|
|
|
|
# Default: local Kokoro via Wyoming
|
|
|
|
|
audio_bytes = asyncio.run(self._synthesize_audio(text, voice))
|
|
|
|
|
content_type = "audio/wav"
|
|
|
|
|
|
|
|
|
|
# Signal avatar: idle
|
|
|
|
|
_vtube_fire_and_forget("/expression", {"event": "idle"})
|
|
|
|
|
|
|
|
|
|
self.send_response(200)
|
|
|
|
|
self.send_header("Content-Type", "audio/wav")
|
|
|
|
|
# Allow CORS for local testing from Vite
|
|
|
|
|
self.send_header("Content-Type", content_type)
|
|
|
|
|
self.send_header("Access-Control-Allow-Origin", "*")
|
|
|
|
|
self.end_headers()
|
|
|
|
|
self.wfile.write(audio_bytes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
_vtube_fire_and_forget("/expression", {"event": "error"})
|
|
|
|
|
self._send_json_response(500, {"error": str(e)})
|
|
|
|
|
|
|
|
|
|
def _synthesize_elevenlabs(self, text: str, voice_id: str, model: str = None) -> tuple[bytes, str]:
|
|
|
|
|
"""Call ElevenLabs TTS API and return (audio_bytes, content_type)."""
|
|
|
|
|
api_key = os.environ.get("ELEVENLABS_API_KEY", "")
|
|
|
|
|
if not api_key:
|
|
|
|
|
raise RuntimeError("ELEVENLABS_API_KEY not set in environment")
|
|
|
|
|
if not voice_id:
|
|
|
|
|
raise RuntimeError("No ElevenLabs voice ID provided")
|
|
|
|
|
|
|
|
|
|
model = model or "eleven_multilingual_v2"
|
|
|
|
|
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
|
|
|
|
payload = json.dumps({
|
|
|
|
|
"text": text,
|
|
|
|
|
"model_id": model,
|
|
|
|
|
"voice_settings": {"stability": 0.5, "similarity_boost": 0.75},
|
|
|
|
|
}).encode()
|
|
|
|
|
|
|
|
|
|
req = urllib.request.Request(
|
|
|
|
|
url,
|
|
|
|
|
data=payload,
|
|
|
|
|
headers={
|
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
|
"xi-api-key": api_key,
|
|
|
|
|
"Accept": "audio/mpeg",
|
|
|
|
|
},
|
|
|
|
|
method="POST",
|
|
|
|
|
)
|
|
|
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
|
|
|
audio_bytes = resp.read()
|
|
|
|
|
return audio_bytes, "audio/mpeg"
|
|
|
|
|
|
|
|
|
|
def do_OPTIONS(self):
|
|
|
|
|
"""Handle CORS preflight requests."""
|
|
|
|
|
self.send_response(204)
|
|
|
|
|
@@ -264,6 +504,43 @@ class OpenClawBridgeHandler(BaseHTTPRequestHandler):
|
|
|
|
|
print(f"[OpenClaw Bridge] Wake word detected: {wake_word_data.get('wake_word', 'unknown')}")
|
|
|
|
|
self._send_json_response(200, {"status": "ok", "message": "Wake word received"})
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _call_openclaw(message: str, agent: str, timeout: int) -> str:
|
|
|
|
|
"""Call OpenClaw CLI and return stdout."""
|
|
|
|
|
result = subprocess.run(
|
|
|
|
|
["/opt/homebrew/bin/openclaw", "agent", "--message", message, "--agent", agent],
|
|
|
|
|
capture_output=True,
|
|
|
|
|
text=True,
|
|
|
|
|
timeout=timeout,
|
|
|
|
|
check=True,
|
|
|
|
|
)
|
|
|
|
|
return result.stdout.strip()
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _needs_followup(response: str) -> bool:
|
|
|
|
|
"""Detect if the model promised to act but didn't actually do it.
|
|
|
|
|
Returns True if the response looks like a 'will do' without a result."""
|
|
|
|
|
if not response:
|
|
|
|
|
return False
|
|
|
|
|
resp_lower = response.lower()
|
|
|
|
|
# If the response contains a URL or JSON-like output, it probably completed
|
|
|
|
|
if "http://" in response or "https://" in response or '"status"' in response:
|
|
|
|
|
return False
|
|
|
|
|
# If it contains a tool result indicator (ha-ctl output, gaze-ctl output)
|
|
|
|
|
if any(kw in resp_lower for kw in ["image_url", "seed", "entity_id", "state:", "turned on", "turned off"]):
|
|
|
|
|
return False
|
|
|
|
|
# Detect promise-like language without substance
|
|
|
|
|
promise_phrases = [
|
|
|
|
|
"let me", "i'll ", "i will ", "sure thing", "sure,", "right away",
|
|
|
|
|
"generating", "one moment", "working on", "hang on", "just a moment",
|
|
|
|
|
"on it", "let me generate", "let me create",
|
|
|
|
|
]
|
|
|
|
|
has_promise = any(phrase in resp_lower for phrase in promise_phrases)
|
|
|
|
|
# Short responses with promise language are likely incomplete
|
|
|
|
|
if has_promise and len(response) < 200:
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
def _handle_agent_request(self):
|
|
|
|
|
"""Handle agent message request."""
|
|
|
|
|
content_length = int(self.headers.get("Content-Length", 0))
|
|
|
|
|
@@ -280,29 +557,63 @@ class OpenClawBridgeHandler(BaseHTTPRequestHandler):
|
|
|
|
|
|
|
|
|
|
message = data.get("message")
|
|
|
|
|
agent = data.get("agent", "main")
|
|
|
|
|
satellite_id = data.get("satellite_id")
|
|
|
|
|
explicit_character_id = data.get("character_id")
|
|
|
|
|
|
|
|
|
|
if not message:
|
|
|
|
|
self._send_json_response(400, {"error": "Message is required"})
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Inject system prompt
|
|
|
|
|
system_prompt = load_character_prompt()
|
|
|
|
|
# Resolve character: explicit ID > satellite mapping > default
|
|
|
|
|
if explicit_character_id:
|
|
|
|
|
character_id = explicit_character_id
|
|
|
|
|
else:
|
|
|
|
|
character_id = resolve_character_id(satellite_id)
|
|
|
|
|
system_prompt = load_character_prompt(character_id=character_id)
|
|
|
|
|
|
|
|
|
|
# Set the active TTS config for the Wyoming server to pick up
|
|
|
|
|
char = load_character(character_id)
|
|
|
|
|
tts_config = char.get("tts", {})
|
|
|
|
|
if tts_config:
|
|
|
|
|
set_active_tts_voice(character_id, tts_config)
|
|
|
|
|
engine = tts_config.get("engine", "kokoro")
|
|
|
|
|
voice_label = tts_config.get("kokoro_voice", "") if engine == "kokoro" else tts_config.get("elevenlabs_voice_id", "")
|
|
|
|
|
print(f"[OpenClaw Bridge] Active TTS: {engine} / {voice_label}")
|
|
|
|
|
|
|
|
|
|
if satellite_id:
|
|
|
|
|
print(f"[OpenClaw Bridge] Satellite: {satellite_id} → character: {character_id}")
|
|
|
|
|
elif explicit_character_id:
|
|
|
|
|
print(f"[OpenClaw Bridge] Character: {character_id}")
|
|
|
|
|
if system_prompt:
|
|
|
|
|
message = f"System Context: {system_prompt}\n\nUser Request: {message}"
|
|
|
|
|
|
|
|
|
|
# Check if model is warm to set appropriate timeout
|
|
|
|
|
warm = is_model_warm()
|
|
|
|
|
timeout = TIMEOUT_WARM if warm else TIMEOUT_COLD
|
|
|
|
|
print(f"[OpenClaw Bridge] Model {'warm' if warm else 'cold'}, timeout={timeout}s")
|
|
|
|
|
|
|
|
|
|
# Signal avatar: thinking
|
|
|
|
|
_vtube_fire_and_forget("/expression", {"event": "thinking"})
|
|
|
|
|
|
|
|
|
|
# Call OpenClaw CLI (use full path for launchd compatibility)
|
|
|
|
|
try:
|
|
|
|
|
result = subprocess.run(
|
|
|
|
|
["/opt/homebrew/bin/openclaw", "agent", "--message", message, "--agent", agent],
|
|
|
|
|
capture_output=True,
|
|
|
|
|
text=True,
|
|
|
|
|
timeout=120,
|
|
|
|
|
check=True
|
|
|
|
|
)
|
|
|
|
|
response_text = result.stdout.strip()
|
|
|
|
|
response_text = self._call_openclaw(message, agent, timeout)
|
|
|
|
|
|
|
|
|
|
# Re-prompt if the model promised to act but didn't call a tool.
|
|
|
|
|
# Detect "I'll do X" / "Let me X" responses that lack any result.
|
|
|
|
|
if self._needs_followup(response_text):
|
|
|
|
|
print(f"[OpenClaw Bridge] Response looks like a promise without action, re-prompting")
|
|
|
|
|
followup = (
|
|
|
|
|
"You just said you would do something but didn't actually call the exec tool. "
|
|
|
|
|
"Do NOT explain what you will do — call the tool NOW using exec and return the result."
|
|
|
|
|
)
|
|
|
|
|
response_text = self._call_openclaw(followup, agent, timeout)
|
|
|
|
|
|
|
|
|
|
# Signal avatar: idle (TTS handler will override to 'speaking' if voice is used)
|
|
|
|
|
_vtube_fire_and_forget("/expression", {"event": "idle"})
|
|
|
|
|
self._send_json_response(200, {"response": response_text})
|
|
|
|
|
except subprocess.TimeoutExpired:
|
|
|
|
|
self._send_json_response(504, {"error": "OpenClaw command timed out"})
|
|
|
|
|
self._send_json_response(504, {"error": f"OpenClaw command timed out after {timeout}s (model was {'warm' if warm else 'cold'})"})
|
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
|
|
|
error_msg = e.stderr.strip() if e.stderr else "OpenClaw command failed"
|
|
|
|
|
self._send_json_response(500, {"error": error_msg})
|
|
|
|
|
|