- Deploy Music Assistant on Pi (10.0.0.199:8095) with host networking for Chromecast mDNS discovery, Spotify + SMB library support - Switch primary LLM from Ollama to Claude Sonnet 4 (Anthropic API), local models remain as fallback - Add model info tag under each assistant message in dashboard chat, persisted in conversation JSON - Rewrite homeai-agent/setup.sh: loads .env, injects API keys into plists, symlinks plists to ~/Library/LaunchAgents/, smoke tests services - Update install_service() in common.sh to use symlinks instead of copies - Open UFW ports on Pi for Music Assistant (8095, 8097, 8927) - Add ANTHROPIC_API_KEY to openclaw + bridge launchd plists Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
712 lines
27 KiB
Python
712 lines
27 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
OpenClaw HTTP Bridge
|
|
|
|
A simple HTTP server that translates HTTP POST requests to OpenClaw CLI calls.
|
|
This allows Home Assistant (running in Docker on a different machine) to
|
|
communicate with OpenClaw via HTTP.
|
|
|
|
Usage:
|
|
python3 openclaw-http-bridge.py [--port 8081]
|
|
|
|
Endpoints:
|
|
POST /api/agent/message
|
|
{
|
|
"message": "Your message here",
|
|
"agent": "main"
|
|
}
|
|
|
|
Returns:
|
|
{
|
|
"response": "OpenClaw response text"
|
|
}
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import asyncio
|
|
import urllib.request
|
|
import threading
|
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
from socketserver import ThreadingMixIn
|
|
from urllib.parse import urlparse
|
|
from pathlib import Path
|
|
import wave
|
|
import io
|
|
import re
|
|
from wyoming.client import AsyncTcpClient
|
|
from wyoming.tts import Synthesize, SynthesizeVoice
|
|
from wyoming.asr import Transcribe, Transcript
|
|
from wyoming.audio import AudioStart, AudioChunk, AudioStop
|
|
from wyoming.info import Info
|
|
|
|
# Timeout settings (seconds)
|
|
TIMEOUT_WARM = 120 # Model already loaded in VRAM
|
|
TIMEOUT_COLD = 180 # Model needs loading first (~10-20s load + inference)
|
|
OLLAMA_PS_URL = "http://localhost:11434/api/ps"
|
|
VTUBE_BRIDGE_URL = "http://localhost:8002"
|
|
DEFAULT_MODEL = "anthropic/claude-sonnet-4-20250514"
|
|
|
|
|
|
def _vtube_fire_and_forget(path: str, data: dict):
|
|
"""Send a non-blocking POST to the VTube Studio bridge. Failures are silent."""
|
|
def _post():
|
|
try:
|
|
body = json.dumps(data).encode()
|
|
req = urllib.request.Request(
|
|
f"{VTUBE_BRIDGE_URL}{path}",
|
|
data=body,
|
|
headers={"Content-Type": "application/json"},
|
|
method="POST",
|
|
)
|
|
urllib.request.urlopen(req, timeout=2)
|
|
except Exception:
|
|
pass # bridge may not be running — that's fine
|
|
threading.Thread(target=_post, daemon=True).start()
|
|
|
|
|
|
def is_model_warm() -> bool:
|
|
"""Check if the default Ollama model is already loaded in VRAM."""
|
|
try:
|
|
req = urllib.request.Request(OLLAMA_PS_URL)
|
|
with urllib.request.urlopen(req, timeout=2) as resp:
|
|
data = json.loads(resp.read())
|
|
return len(data.get("models", [])) > 0
|
|
except Exception:
|
|
# If we can't reach Ollama, assume cold (safer longer timeout)
|
|
return False
|
|
|
|
|
|
CHARACTERS_DIR = Path("/Users/aodhan/homeai-data/characters")
|
|
SATELLITE_MAP_PATH = Path("/Users/aodhan/homeai-data/satellite-map.json")
|
|
MEMORIES_DIR = Path("/Users/aodhan/homeai-data/memories")
|
|
ACTIVE_TTS_VOICE_PATH = Path("/Users/aodhan/homeai-data/active-tts-voice.json")
|
|
ACTIVE_MODE_PATH = Path("/Users/aodhan/homeai-data/active-mode.json")
|
|
|
|
# Cloud provider model mappings for mode routing
|
|
CLOUD_MODELS = {
|
|
"anthropic": "anthropic/claude-sonnet-4-20250514",
|
|
"openai": "openai/gpt-4o",
|
|
}
|
|
|
|
|
|
def load_mode() -> dict:
|
|
"""Load the public/private mode configuration."""
|
|
try:
|
|
with open(ACTIVE_MODE_PATH) as f:
|
|
return json.load(f)
|
|
except Exception:
|
|
return {"mode": "private", "cloud_provider": "anthropic", "overrides": {}}
|
|
|
|
|
|
def resolve_model(mode_data: dict) -> str | None:
|
|
"""Resolve which model to use based on mode. Returns None for default (private/local)."""
|
|
mode = mode_data.get("mode", "private")
|
|
if mode == "private":
|
|
return None # Use OpenClaw default (ollama/qwen3.5:35b-a3b)
|
|
provider = mode_data.get("cloud_provider", "anthropic")
|
|
return CLOUD_MODELS.get(provider, CLOUD_MODELS["anthropic"])
|
|
|
|
|
|
def clean_text_for_tts(text: str) -> str:
|
|
"""Strip content that shouldn't be spoken: tags, asterisks, emojis, markdown."""
|
|
# Remove HTML/XML tags and their content for common non-spoken tags
|
|
text = re.sub(r'<[^>]+>', '', text)
|
|
# Remove content between asterisks (actions/emphasis markup like *sighs*)
|
|
text = re.sub(r'\*[^*]+\*', '', text)
|
|
# Remove markdown bold/italic markers that might remain
|
|
text = re.sub(r'[*_]{1,3}', '', text)
|
|
# Remove markdown headers
|
|
text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)
|
|
# Remove markdown links [text](url) → keep text
|
|
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
|
|
# Remove bare URLs
|
|
text = re.sub(r'https?://\S+', '', text)
|
|
# Remove code blocks and inline code
|
|
text = re.sub(r'```[\s\S]*?```', '', text)
|
|
text = re.sub(r'`[^`]+`', '', text)
|
|
# Remove emojis
|
|
text = re.sub(
|
|
r'[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF'
|
|
r'\U0001F1E0-\U0001F1FF\U0001F900-\U0001F9FF\U0001FA00-\U0001FAFF'
|
|
r'\U00002702-\U000027B0\U0000FE00-\U0000FE0F\U0000200D'
|
|
r'\U00002600-\U000026FF\U00002300-\U000023FF]+', '', text
|
|
)
|
|
# Collapse multiple spaces/newlines
|
|
text = re.sub(r'\n{2,}', '\n', text)
|
|
text = re.sub(r'[ \t]{2,}', ' ', text)
|
|
return text.strip()
|
|
|
|
|
|
def load_satellite_map() -> dict:
|
|
"""Load the satellite-to-character mapping."""
|
|
try:
|
|
with open(SATELLITE_MAP_PATH) as f:
|
|
return json.load(f)
|
|
except Exception:
|
|
return {"default": "aria_default", "satellites": {}}
|
|
|
|
|
|
def set_active_tts_voice(character_id: str, tts_config: dict):
|
|
"""Write the active TTS config to a state file for the Wyoming TTS server to read."""
|
|
try:
|
|
ACTIVE_TTS_VOICE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
state = {
|
|
"character_id": character_id,
|
|
"engine": tts_config.get("engine", "kokoro"),
|
|
"kokoro_voice": tts_config.get("kokoro_voice", ""),
|
|
"elevenlabs_voice_id": tts_config.get("elevenlabs_voice_id", ""),
|
|
"elevenlabs_model": tts_config.get("elevenlabs_model", "eleven_multilingual_v2"),
|
|
"speed": tts_config.get("speed", 1),
|
|
}
|
|
with open(ACTIVE_TTS_VOICE_PATH, "w") as f:
|
|
json.dump(state, f)
|
|
except Exception as e:
|
|
print(f"[OpenClaw Bridge] Warning: could not write active TTS config: {e}")
|
|
|
|
|
|
def resolve_character_id(satellite_id: str = None) -> str:
|
|
"""Resolve a satellite ID to a character profile ID."""
|
|
sat_map = load_satellite_map()
|
|
if satellite_id and satellite_id in sat_map.get("satellites", {}):
|
|
return sat_map["satellites"][satellite_id]
|
|
return sat_map.get("default", "aria_default")
|
|
|
|
|
|
def load_character(character_id: str = None) -> dict:
|
|
"""Load a character profile by ID. Returns the full character data dict."""
|
|
if not character_id:
|
|
character_id = resolve_character_id()
|
|
safe_id = character_id.replace("/", "_")
|
|
character_path = CHARACTERS_DIR / f"{safe_id}.json"
|
|
if not character_path.exists():
|
|
return {}
|
|
try:
|
|
with open(character_path) as f:
|
|
profile = json.load(f)
|
|
return profile.get("data", {})
|
|
except Exception:
|
|
return {}
|
|
|
|
|
|
def load_character_prompt(satellite_id: str = None, character_id: str = None) -> str:
|
|
"""Load the full system prompt for a character, resolved by satellite or explicit ID.
|
|
Builds a rich prompt from system_prompt + profile fields (background, dialogue_style, etc.)."""
|
|
if not character_id:
|
|
character_id = resolve_character_id(satellite_id)
|
|
char = load_character(character_id)
|
|
if not char:
|
|
return ""
|
|
|
|
sections = []
|
|
|
|
# Core system prompt
|
|
prompt = char.get("system_prompt", "")
|
|
if prompt:
|
|
sections.append(prompt)
|
|
|
|
# Character profile fields
|
|
profile_parts = []
|
|
if char.get("background"):
|
|
profile_parts.append(f"## Background\n{char['background']}")
|
|
if char.get("appearance"):
|
|
profile_parts.append(f"## Appearance\n{char['appearance']}")
|
|
if char.get("dialogue_style"):
|
|
profile_parts.append(f"## Dialogue Style\n{char['dialogue_style']}")
|
|
if char.get("skills"):
|
|
skills = char["skills"]
|
|
if isinstance(skills, list):
|
|
skills_text = ", ".join(skills[:15])
|
|
else:
|
|
skills_text = str(skills)
|
|
profile_parts.append(f"## Skills & Interests\n{skills_text}")
|
|
if profile_parts:
|
|
sections.append("[Character Profile]\n" + "\n\n".join(profile_parts))
|
|
|
|
# Character metadata
|
|
meta_lines = []
|
|
if char.get("display_name"):
|
|
meta_lines.append(f"Your name is: {char['display_name']}")
|
|
# Support both v1 (gaze_preset string) and v2 (gaze_presets array)
|
|
gaze_presets = char.get("gaze_presets", [])
|
|
if gaze_presets and isinstance(gaze_presets, list):
|
|
for gp in gaze_presets:
|
|
preset = gp.get("preset", "")
|
|
trigger = gp.get("trigger", "self-portrait")
|
|
if preset:
|
|
meta_lines.append(f"GAZE preset '{preset}' — use for: {trigger}")
|
|
elif char.get("gaze_preset"):
|
|
meta_lines.append(f"Your gaze_preset for self-portraits is: {char['gaze_preset']}")
|
|
if meta_lines:
|
|
sections.append("[Character Metadata]\n" + "\n".join(meta_lines))
|
|
|
|
# Memories (personal + general)
|
|
personal, general = load_memories(character_id)
|
|
if personal:
|
|
sections.append("[Personal Memories]\n" + "\n".join(f"- {m}" for m in personal))
|
|
if general:
|
|
sections.append("[General Knowledge]\n" + "\n".join(f"- {m}" for m in general))
|
|
|
|
return "\n\n".join(sections)
|
|
|
|
|
|
def load_memories(character_id: str) -> tuple[list[str], list[str]]:
|
|
"""Load personal (per-character) and general memories.
|
|
Returns (personal_contents, general_contents) truncated to fit context budget."""
|
|
PERSONAL_BUDGET = 4000 # max chars for personal memories in prompt
|
|
GENERAL_BUDGET = 3000 # max chars for general memories in prompt
|
|
|
|
def _read_memories(path: Path, budget: int) -> list[str]:
|
|
try:
|
|
with open(path) as f:
|
|
data = json.load(f)
|
|
except Exception:
|
|
return []
|
|
memories = data.get("memories", [])
|
|
# Sort newest first
|
|
memories.sort(key=lambda m: m.get("createdAt", ""), reverse=True)
|
|
result = []
|
|
used = 0
|
|
for m in memories:
|
|
content = m.get("content", "").strip()
|
|
if not content:
|
|
continue
|
|
if used + len(content) > budget:
|
|
break
|
|
result.append(content)
|
|
used += len(content)
|
|
return result
|
|
|
|
safe_id = character_id.replace("/", "_")
|
|
personal = _read_memories(MEMORIES_DIR / "personal" / f"{safe_id}.json", PERSONAL_BUDGET)
|
|
general = _read_memories(MEMORIES_DIR / "general.json", GENERAL_BUDGET)
|
|
return personal, general
|
|
|
|
|
|
class OpenClawBridgeHandler(BaseHTTPRequestHandler):
|
|
"""HTTP request handler for OpenClaw bridge."""
|
|
|
|
def log_message(self, format, *args):
|
|
"""Log requests to stderr."""
|
|
print(f"[OpenClaw Bridge] {self.address_string()} - {format % args}")
|
|
|
|
def _send_json_response(self, status_code: int, data: dict):
|
|
"""Send a JSON response."""
|
|
self.send_response(status_code)
|
|
self.send_header("Content-Type", "application/json")
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps(data).encode())
|
|
|
|
def do_POST(self):
|
|
"""Handle POST requests."""
|
|
parsed_path = urlparse(self.path)
|
|
|
|
# Handle wake word notification
|
|
if parsed_path.path == "/wake":
|
|
self._handle_wake_word()
|
|
return
|
|
|
|
# Handle TTS preview requests
|
|
if parsed_path.path == "/api/tts":
|
|
self._handle_tts_request()
|
|
return
|
|
|
|
# Handle STT requests
|
|
if parsed_path.path == "/api/stt":
|
|
self._handle_stt_request()
|
|
return
|
|
|
|
# Only handle the agent message endpoint
|
|
if parsed_path.path == "/api/agent/message":
|
|
self._handle_agent_request()
|
|
return
|
|
|
|
self._send_json_response(404, {"error": "Not found"})
|
|
|
|
def _handle_tts_request(self):
|
|
"""Handle TTS request and return audio. Routes to Kokoro or ElevenLabs based on engine."""
|
|
content_length = int(self.headers.get("Content-Length", 0))
|
|
if content_length == 0:
|
|
self._send_json_response(400, {"error": "Empty body"})
|
|
return
|
|
|
|
try:
|
|
body = self.rfile.read(content_length).decode()
|
|
data = json.loads(body)
|
|
except json.JSONDecodeError:
|
|
self._send_json_response(400, {"error": "Invalid JSON"})
|
|
return
|
|
|
|
text = data.get("text", "Hello, this is a test.")
|
|
text = clean_text_for_tts(text)
|
|
voice = data.get("voice", "af_heart")
|
|
engine = data.get("engine", "kokoro")
|
|
|
|
try:
|
|
# Signal avatar: speaking
|
|
_vtube_fire_and_forget("/expression", {"event": "speaking"})
|
|
|
|
if engine == "elevenlabs":
|
|
audio_bytes, content_type = self._synthesize_elevenlabs(text, voice, data.get("model"))
|
|
else:
|
|
# Default: local Kokoro via Wyoming
|
|
audio_bytes = asyncio.run(self._synthesize_audio(text, voice))
|
|
content_type = "audio/wav"
|
|
|
|
# Signal avatar: idle
|
|
_vtube_fire_and_forget("/expression", {"event": "idle"})
|
|
|
|
self.send_response(200)
|
|
self.send_header("Content-Type", content_type)
|
|
self.send_header("Access-Control-Allow-Origin", "*")
|
|
self.end_headers()
|
|
self.wfile.write(audio_bytes)
|
|
|
|
except Exception as e:
|
|
_vtube_fire_and_forget("/expression", {"event": "error"})
|
|
self._send_json_response(500, {"error": str(e)})
|
|
|
|
def _synthesize_elevenlabs(self, text: str, voice_id: str, model: str = None) -> tuple[bytes, str]:
|
|
"""Call ElevenLabs TTS API and return (audio_bytes, content_type)."""
|
|
api_key = os.environ.get("ELEVENLABS_API_KEY", "")
|
|
if not api_key:
|
|
raise RuntimeError("ELEVENLABS_API_KEY not set in environment")
|
|
if not voice_id:
|
|
raise RuntimeError("No ElevenLabs voice ID provided")
|
|
|
|
model = model or "eleven_multilingual_v2"
|
|
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
|
payload = json.dumps({
|
|
"text": text,
|
|
"model_id": model,
|
|
"voice_settings": {"stability": 0.5, "similarity_boost": 0.75},
|
|
}).encode()
|
|
|
|
req = urllib.request.Request(
|
|
url,
|
|
data=payload,
|
|
headers={
|
|
"Content-Type": "application/json",
|
|
"xi-api-key": api_key,
|
|
"Accept": "audio/mpeg",
|
|
},
|
|
method="POST",
|
|
)
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
audio_bytes = resp.read()
|
|
return audio_bytes, "audio/mpeg"
|
|
|
|
def do_OPTIONS(self):
|
|
"""Handle CORS preflight requests."""
|
|
self.send_response(204)
|
|
self.send_header("Access-Control-Allow-Origin", "*")
|
|
self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
|
|
self.send_header("Access-Control-Allow-Headers", "Content-Type")
|
|
self.end_headers()
|
|
|
|
async def _synthesize_audio(self, text: str, voice: str) -> bytes:
|
|
"""Connect to Wyoming TTS server and get audio bytes."""
|
|
client = AsyncTcpClient("127.0.0.1", 10301)
|
|
await client.connect()
|
|
|
|
# Read the initial Info event
|
|
await client.read_event()
|
|
|
|
# Send Synthesize event
|
|
await client.write_event(Synthesize(text=text, voice=SynthesizeVoice(name=voice)).event())
|
|
|
|
audio_data = bytearray()
|
|
rate = 24000
|
|
width = 2
|
|
channels = 1
|
|
|
|
while True:
|
|
event = await client.read_event()
|
|
if event is None:
|
|
break
|
|
|
|
if AudioStart.is_type(event.type):
|
|
start = AudioStart.from_event(event)
|
|
rate = start.rate
|
|
width = start.width
|
|
channels = start.channels
|
|
elif AudioChunk.is_type(event.type):
|
|
chunk = AudioChunk.from_event(event)
|
|
audio_data.extend(chunk.audio)
|
|
elif AudioStop.is_type(event.type):
|
|
break
|
|
|
|
await client.disconnect()
|
|
|
|
# Package raw PCM into WAV
|
|
wav_io = io.BytesIO()
|
|
with wave.open(wav_io, 'wb') as wav_file:
|
|
wav_file.setnchannels(channels)
|
|
wav_file.setsampwidth(width)
|
|
wav_file.setframerate(rate)
|
|
wav_file.writeframes(audio_data)
|
|
|
|
return wav_io.getvalue()
|
|
|
|
def _handle_stt_request(self):
|
|
"""Handle STT request — accept WAV audio, return transcribed text."""
|
|
content_length = int(self.headers.get("Content-Length", 0))
|
|
if content_length == 0:
|
|
self._send_json_response(400, {"error": "Empty body"})
|
|
return
|
|
|
|
try:
|
|
audio_bytes = self.rfile.read(content_length)
|
|
|
|
# Parse WAV to get PCM data and format
|
|
wav_io = io.BytesIO(audio_bytes)
|
|
with wave.open(wav_io, 'rb') as wav_file:
|
|
rate = wav_file.getframerate()
|
|
width = wav_file.getsampwidth()
|
|
channels = wav_file.getnchannels()
|
|
pcm_data = wav_file.readframes(wav_file.getnframes())
|
|
|
|
# Run the async Wyoming client
|
|
text = asyncio.run(self._transcribe_audio(pcm_data, rate, width, channels))
|
|
|
|
self.send_response(200)
|
|
self.send_header("Content-Type", "application/json")
|
|
self.send_header("Access-Control-Allow-Origin", "*")
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps({"text": text}).encode())
|
|
|
|
except wave.Error as e:
|
|
self._send_json_response(400, {"error": f"Invalid WAV: {e}"})
|
|
except Exception as e:
|
|
self._send_json_response(500, {"error": str(e)})
|
|
|
|
async def _transcribe_audio(self, pcm_data: bytes, rate: int, width: int, channels: int) -> str:
|
|
"""Connect to Wyoming STT server and transcribe audio."""
|
|
client = AsyncTcpClient("127.0.0.1", 10300)
|
|
await client.connect()
|
|
|
|
# Send Transcribe request (STT server does not send an initial Info event)
|
|
await client.write_event(Transcribe(language="en").event())
|
|
|
|
# Send audio
|
|
await client.write_event(AudioStart(rate=rate, width=width, channels=channels).event())
|
|
|
|
# Send in chunks (1 second each)
|
|
bytes_per_second = rate * width * channels
|
|
for offset in range(0, len(pcm_data), bytes_per_second):
|
|
chunk = pcm_data[offset:offset + bytes_per_second]
|
|
await client.write_event(AudioChunk(rate=rate, width=width, channels=channels, audio=chunk).event())
|
|
|
|
await client.write_event(AudioStop().event())
|
|
|
|
# Read transcript
|
|
while True:
|
|
event = await client.read_event()
|
|
if event is None:
|
|
break
|
|
if Transcript.is_type(event.type):
|
|
transcript = Transcript.from_event(event)
|
|
await client.disconnect()
|
|
return transcript.text
|
|
|
|
await client.disconnect()
|
|
return ""
|
|
|
|
def _handle_wake_word(self):
|
|
"""Handle wake word detection notification."""
|
|
content_length = int(self.headers.get("Content-Length", 0))
|
|
wake_word_data = {}
|
|
if content_length > 0:
|
|
try:
|
|
body = self.rfile.read(content_length).decode()
|
|
wake_word_data = json.loads(body)
|
|
except (json.JSONDecodeError, ConnectionResetError, OSError):
|
|
# Client may close connection early, that's ok
|
|
pass
|
|
|
|
print(f"[OpenClaw Bridge] Wake word detected: {wake_word_data.get('wake_word', 'unknown')}")
|
|
self._send_json_response(200, {"status": "ok", "message": "Wake word received"})
|
|
|
|
@staticmethod
|
|
def _call_openclaw(message: str, agent: str, timeout: int, model: str = None) -> str:
|
|
"""Call OpenClaw CLI and return stdout."""
|
|
cmd = ["/opt/homebrew/bin/openclaw", "agent", "--message", message, "--agent", agent]
|
|
if model:
|
|
cmd.extend(["--model", model])
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout,
|
|
check=True,
|
|
)
|
|
return result.stdout.strip()
|
|
|
|
@staticmethod
|
|
def _needs_followup(response: str) -> bool:
|
|
"""Detect if the model promised to act but didn't actually do it.
|
|
Returns True if the response looks like a 'will do' without a result."""
|
|
if not response:
|
|
return False
|
|
resp_lower = response.lower()
|
|
# If the response contains a URL or JSON-like output, it probably completed
|
|
if "http://" in response or "https://" in response or '"status"' in response:
|
|
return False
|
|
# If it contains a tool result indicator (ha-ctl output, gaze-ctl output)
|
|
if any(kw in resp_lower for kw in ["image_url", "seed", "entity_id", "state:", "turned on", "turned off"]):
|
|
return False
|
|
# Detect promise-like language without substance
|
|
promise_phrases = [
|
|
"let me", "i'll ", "i will ", "sure thing", "sure,", "right away",
|
|
"generating", "one moment", "working on", "hang on", "just a moment",
|
|
"on it", "let me generate", "let me create",
|
|
]
|
|
has_promise = any(phrase in resp_lower for phrase in promise_phrases)
|
|
# Short responses with promise language are likely incomplete
|
|
if has_promise and len(response) < 200:
|
|
return True
|
|
return False
|
|
|
|
def _handle_agent_request(self):
|
|
"""Handle agent message request."""
|
|
content_length = int(self.headers.get("Content-Length", 0))
|
|
if content_length == 0:
|
|
self._send_json_response(400, {"error": "Empty body"})
|
|
return
|
|
|
|
try:
|
|
body = self.rfile.read(content_length).decode()
|
|
data = json.loads(body)
|
|
except json.JSONDecodeError:
|
|
self._send_json_response(400, {"error": "Invalid JSON"})
|
|
return
|
|
|
|
message = data.get("message")
|
|
agent = data.get("agent", "main")
|
|
satellite_id = data.get("satellite_id")
|
|
explicit_character_id = data.get("character_id")
|
|
|
|
if not message:
|
|
self._send_json_response(400, {"error": "Message is required"})
|
|
return
|
|
|
|
# Resolve character: explicit ID > satellite mapping > default
|
|
if explicit_character_id:
|
|
character_id = explicit_character_id
|
|
else:
|
|
character_id = resolve_character_id(satellite_id)
|
|
system_prompt = load_character_prompt(character_id=character_id)
|
|
|
|
# Set the active TTS config for the Wyoming server to pick up
|
|
char = load_character(character_id)
|
|
tts_config = char.get("tts", {})
|
|
if tts_config:
|
|
set_active_tts_voice(character_id, tts_config)
|
|
engine = tts_config.get("engine", "kokoro")
|
|
voice_label = tts_config.get("kokoro_voice", "") if engine == "kokoro" else tts_config.get("elevenlabs_voice_id", "")
|
|
print(f"[OpenClaw Bridge] Active TTS: {engine} / {voice_label}")
|
|
|
|
if satellite_id:
|
|
print(f"[OpenClaw Bridge] Satellite: {satellite_id} → character: {character_id}")
|
|
elif explicit_character_id:
|
|
print(f"[OpenClaw Bridge] Character: {character_id}")
|
|
if system_prompt:
|
|
message = f"System Context: {system_prompt}\n\nUser Request: {message}"
|
|
|
|
# Load mode and resolve model routing
|
|
mode_data = load_mode()
|
|
model_override = resolve_model(mode_data)
|
|
active_model = model_override or DEFAULT_MODEL
|
|
if model_override:
|
|
print(f"[OpenClaw Bridge] Mode: PUBLIC → {model_override}")
|
|
else:
|
|
print(f"[OpenClaw Bridge] Mode: PRIVATE ({active_model})")
|
|
|
|
# Check if model is warm to set appropriate timeout
|
|
warm = is_model_warm()
|
|
timeout = TIMEOUT_WARM if warm else TIMEOUT_COLD
|
|
print(f"[OpenClaw Bridge] Model {'warm' if warm else 'cold'}, timeout={timeout}s")
|
|
|
|
# Signal avatar: thinking
|
|
_vtube_fire_and_forget("/expression", {"event": "thinking"})
|
|
|
|
# Call OpenClaw CLI (use full path for launchd compatibility)
|
|
try:
|
|
response_text = self._call_openclaw(message, agent, timeout, model=model_override)
|
|
|
|
# Re-prompt if the model promised to act but didn't call a tool.
|
|
# Detect "I'll do X" / "Let me X" responses that lack any result.
|
|
if self._needs_followup(response_text):
|
|
print(f"[OpenClaw Bridge] Response looks like a promise without action, re-prompting")
|
|
followup = (
|
|
"You just said you would do something but didn't actually call the exec tool. "
|
|
"Do NOT explain what you will do — call the tool NOW using exec and return the result."
|
|
)
|
|
response_text = self._call_openclaw(followup, agent, timeout, model=model_override)
|
|
|
|
# Signal avatar: idle (TTS handler will override to 'speaking' if voice is used)
|
|
_vtube_fire_and_forget("/expression", {"event": "idle"})
|
|
self._send_json_response(200, {"response": response_text, "model": active_model})
|
|
except subprocess.TimeoutExpired:
|
|
self._send_json_response(504, {"error": f"OpenClaw command timed out after {timeout}s (model was {'warm' if warm else 'cold'})"})
|
|
except subprocess.CalledProcessError as e:
|
|
error_msg = e.stderr.strip() if e.stderr else "OpenClaw command failed"
|
|
self._send_json_response(500, {"error": error_msg})
|
|
except FileNotFoundError:
|
|
self._send_json_response(500, {"error": "OpenClaw CLI not found"})
|
|
except Exception as e:
|
|
self._send_json_response(500, {"error": str(e)})
|
|
|
|
def do_GET(self):
|
|
"""Handle GET requests (health check)."""
|
|
parsed_path = urlparse(self.path)
|
|
|
|
if parsed_path.path == "/status" or parsed_path.path == "/":
|
|
self._send_json_response(200, {
|
|
"status": "ok",
|
|
"service": "OpenClaw HTTP Bridge",
|
|
"version": "1.0.0"
|
|
})
|
|
else:
|
|
self._send_json_response(404, {"error": "Not found"})
|
|
|
|
|
|
class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
|
|
daemon_threads = True
|
|
|
|
|
|
def main():
|
|
"""Run the HTTP bridge server."""
|
|
parser = argparse.ArgumentParser(description="OpenClaw HTTP Bridge")
|
|
parser.add_argument(
|
|
"--port",
|
|
type=int,
|
|
default=8081,
|
|
help="Port to listen on (default: 8081)"
|
|
)
|
|
parser.add_argument(
|
|
"--host",
|
|
default="0.0.0.0",
|
|
help="Host to bind to (default: 0.0.0.0)"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
ThreadingHTTPServer.allow_reuse_address = True
|
|
server = ThreadingHTTPServer((args.host, args.port), OpenClawBridgeHandler)
|
|
print(f"OpenClaw HTTP Bridge running on http://{args.host}:{args.port}")
|
|
print(f"Endpoint: POST http://{args.host}:{args.port}/api/agent/message")
|
|
print("Press Ctrl+C to stop")
|
|
|
|
try:
|
|
server.serve_forever()
|
|
except KeyboardInterrupt:
|
|
print("\nShutting down...")
|
|
server.shutdown()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|