feat: complete voice pipeline — fix wake word crash, bridge timeout, HA conversation agent
- Fix Wyoming satellite crash on wake word: convert macOS .aiff chimes to .wav (Python wave module only reads RIFF format, not AIFF) - Fix OpenClaw HTTP bridge: increase subprocess timeout 30s → 120s, add SO_REUSEADDR - Fix HA conversation component: use HTTP agent (not CLI) since HA runs in Docker on a different machine; update default host to Mac Mini IP, timeout to 120s - Rewrite character manager as Vite+React app with schema validation - Add Wyoming satellite wake word command, ElevenLabs TTS server, wakeword monitor - Add Phase 5 development plan - Update TODO.md: mark voice pipeline and agent tasks complete Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -26,8 +26,29 @@ import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import asyncio
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from urllib.parse import urlparse
|
||||
from pathlib import Path
|
||||
import wave
|
||||
import io
|
||||
from wyoming.client import AsyncTcpClient
|
||||
from wyoming.tts import Synthesize
|
||||
from wyoming.audio import AudioStart, AudioChunk, AudioStop
|
||||
from wyoming.info import Info
|
||||
|
||||
|
||||
def load_character_prompt() -> str:
|
||||
"""Load the active character system prompt."""
|
||||
character_path = Path.home() / ".openclaw" / "characters" / "aria.json"
|
||||
if not character_path.exists():
|
||||
return ""
|
||||
try:
|
||||
with open(character_path) as f:
|
||||
data = json.load(f)
|
||||
return data.get("system_prompt", "")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
class OpenClawBridgeHandler(BaseHTTPRequestHandler):
|
||||
@@ -48,17 +69,129 @@ class OpenClawBridgeHandler(BaseHTTPRequestHandler):
|
||||
"""Handle POST requests."""
|
||||
parsed_path = urlparse(self.path)
|
||||
|
||||
# Only handle the agent message endpoint
|
||||
if parsed_path.path != "/api/agent/message":
|
||||
self._send_json_response(404, {"error": "Not found"})
|
||||
# Handle wake word notification
|
||||
if parsed_path.path == "/wake":
|
||||
self._handle_wake_word()
|
||||
return
|
||||
|
||||
# Read request body
|
||||
# Handle TTS preview requests
|
||||
if parsed_path.path == "/api/tts":
|
||||
self._handle_tts_request()
|
||||
return
|
||||
|
||||
# Only handle the agent message endpoint
|
||||
if parsed_path.path == "/api/agent/message":
|
||||
self._handle_agent_request()
|
||||
return
|
||||
|
||||
self._send_json_response(404, {"error": "Not found"})
|
||||
|
||||
def _handle_tts_request(self):
|
||||
"""Handle TTS request and return wav audio."""
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
if content_length == 0:
|
||||
self._send_json_response(400, {"error": "Empty request body"})
|
||||
self._send_json_response(400, {"error": "Empty body"})
|
||||
return
|
||||
|
||||
try:
|
||||
body = self.rfile.read(content_length).decode()
|
||||
data = json.loads(body)
|
||||
except json.JSONDecodeError:
|
||||
self._send_json_response(400, {"error": "Invalid JSON"})
|
||||
return
|
||||
|
||||
text = data.get("text", "Hello, this is a test.")
|
||||
voice = data.get("voice", "af_heart")
|
||||
|
||||
try:
|
||||
# Run the async Wyoming client
|
||||
audio_bytes = asyncio.run(self._synthesize_audio(text, voice))
|
||||
|
||||
# Send WAV response
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "audio/wav")
|
||||
# Allow CORS for local testing from Vite
|
||||
self.send_header("Access-Control-Allow-Origin", "*")
|
||||
self.end_headers()
|
||||
self.wfile.write(audio_bytes)
|
||||
|
||||
except Exception as e:
|
||||
self._send_json_response(500, {"error": str(e)})
|
||||
|
||||
def do_OPTIONS(self):
|
||||
"""Handle CORS preflight requests."""
|
||||
self.send_response(204)
|
||||
self.send_header("Access-Control-Allow-Origin", "*")
|
||||
self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
|
||||
self.send_header("Access-Control-Allow-Headers", "Content-Type")
|
||||
self.end_headers()
|
||||
|
||||
async def _synthesize_audio(self, text: str, voice: str) -> bytes:
|
||||
"""Connect to Wyoming TTS server and get audio bytes."""
|
||||
client = AsyncTcpClient("127.0.0.1", 10301)
|
||||
await client.connect()
|
||||
|
||||
# Read the initial Info event
|
||||
await client.read_event()
|
||||
|
||||
# Send Synthesize event
|
||||
await client.write_event(Synthesize(text=text, voice=voice).event())
|
||||
|
||||
audio_data = bytearray()
|
||||
rate = 24000
|
||||
width = 2
|
||||
channels = 1
|
||||
|
||||
while True:
|
||||
event = await client.read_event()
|
||||
if event is None:
|
||||
break
|
||||
|
||||
if AudioStart.is_type(event.type):
|
||||
start = AudioStart.from_event(event)
|
||||
rate = start.rate
|
||||
width = start.width
|
||||
channels = start.channels
|
||||
elif AudioChunk.is_type(event.type):
|
||||
chunk = AudioChunk.from_event(event)
|
||||
audio_data.extend(chunk.audio)
|
||||
elif AudioStop.is_type(event.type):
|
||||
break
|
||||
|
||||
await client.disconnect()
|
||||
|
||||
# Package raw PCM into WAV
|
||||
wav_io = io.BytesIO()
|
||||
with wave.open(wav_io, 'wb') as wav_file:
|
||||
wav_file.setnchannels(channels)
|
||||
wav_file.setsampwidth(width)
|
||||
wav_file.setframerate(rate)
|
||||
wav_file.writeframes(audio_data)
|
||||
|
||||
return wav_io.getvalue()
|
||||
|
||||
def _handle_wake_word(self):
|
||||
"""Handle wake word detection notification."""
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
wake_word_data = {}
|
||||
if content_length > 0:
|
||||
try:
|
||||
body = self.rfile.read(content_length).decode()
|
||||
wake_word_data = json.loads(body)
|
||||
except (json.JSONDecodeError, ConnectionResetError, OSError):
|
||||
# Client may close connection early, that's ok
|
||||
pass
|
||||
|
||||
print(f"[OpenClaw Bridge] Wake word detected: {wake_word_data.get('wake_word', 'unknown')}")
|
||||
self._send_json_response(200, {"status": "ok", "message": "Wake word received"})
|
||||
|
||||
def _handle_agent_request(self):
|
||||
"""Handle agent message request."""
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
if content_length == 0:
|
||||
self._send_json_response(400, {"error": "Empty body"})
|
||||
return
|
||||
|
||||
try:
|
||||
body = self.rfile.read(content_length).decode()
|
||||
data = json.loads(body)
|
||||
@@ -66,21 +199,25 @@ class OpenClawBridgeHandler(BaseHTTPRequestHandler):
|
||||
self._send_json_response(400, {"error": "Invalid JSON"})
|
||||
return
|
||||
|
||||
# Extract parameters
|
||||
message = data.get("message", "").strip()
|
||||
message = data.get("message")
|
||||
agent = data.get("agent", "main")
|
||||
|
||||
if not message:
|
||||
self._send_json_response(400, {"error": "Message is required"})
|
||||
return
|
||||
|
||||
# Inject system prompt
|
||||
system_prompt = load_character_prompt()
|
||||
if system_prompt:
|
||||
message = f"System Context: {system_prompt}\n\nUser Request: {message}"
|
||||
|
||||
# Call OpenClaw CLI (use full path for launchd compatibility)
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["/opt/homebrew/bin/openclaw", "agent", "--message", message, "--agent", agent],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
timeout=120,
|
||||
check=True
|
||||
)
|
||||
response_text = result.stdout.strip()
|
||||
@@ -125,6 +262,7 @@ def main():
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
HTTPServer.allow_reuse_address = True
|
||||
server = HTTPServer((args.host, args.port), OpenClawBridgeHandler)
|
||||
print(f"OpenClaw HTTP Bridge running on http://{args.host}:{args.port}")
|
||||
print(f"Endpoint: POST http://{args.host}:{args.port}/api/agent/message")
|
||||
|
||||
Reference in New Issue
Block a user