Files
homeai/homeai-voice/tts/wyoming_kokoro_server.py
Aodhan Collins 1bfd7fbd08 feat: character dashboard with TTS voice preview, fix Wyoming API compat
- Add HomeAI dashboard: service status monitor, character profile manager, character editor
- Add TTS voice preview in character editor (Kokoro via OpenClaw bridge → Wyoming)
  - Custom preview text, loading/playing states, stop control, speed via playbackRate
- Fix Wyoming API breaking changes: remove `version` from TtsVoice/TtsProgram,
  use SynthesizeVoice object instead of bare string in Synthesize calls
- Vite dev server proxies /api/tts and /api/health to avoid CORS issues

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 20:40:11 +00:00

146 lines
4.8 KiB
Python

#!/usr/bin/env python3
"""Wyoming TTS server backed by Kokoro ONNX.
Usage:
python wyoming_kokoro_server.py --uri tcp://0.0.0.0:10301 --voice af_heart
"""
import argparse
import asyncio
import logging
import os
import numpy as np
from wyoming.audio import AudioChunk, AudioStart, AudioStop
from wyoming.event import Event
from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
from wyoming.server import AsyncEventHandler, AsyncServer
from wyoming.tts import Synthesize
_LOGGER = logging.getLogger(__name__)
SAMPLE_RATE = 24000
SAMPLE_WIDTH = 2 # int16
CHANNELS = 1
CHUNK_SECONDS = 1 # stream in 1-second chunks
def _load_kokoro():
from kokoro_onnx import Kokoro
model_dir = os.path.expanduser("~/models/kokoro")
return Kokoro(
os.path.join(model_dir, "kokoro-v1.0.onnx"),
os.path.join(model_dir, "voices-v1.0.bin"),
)
class KokoroEventHandler(AsyncEventHandler):
def __init__(self, tts, default_voice: str, speed: float, *args, **kwargs):
super().__init__(*args, **kwargs)
self._tts = tts
self._default_voice = default_voice
self._speed = speed
# Send info immediately on connect
asyncio.ensure_future(self._send_info())
async def _send_info(self):
info = Info(
tts=[
TtsProgram(
name="kokoro",
description="Kokoro ONNX TTS",
attribution=Attribution(
name="thewh1teagle/kokoro-onnx",
url="https://github.com/thewh1teagle/kokoro-onnx",
),
installed=True,
voices=[
TtsVoice(
name=self._default_voice,
description="Kokoro voice",
attribution=Attribution(name="kokoro", url=""),
installed=True,
languages=["en-us"],
speakers=[TtsVoiceSpeaker(name=self._default_voice)],
)
],
)
]
)
await self.write_event(info.event())
async def handle_event(self, event: Event) -> bool:
if Synthesize.is_type(event.type):
synthesize = Synthesize.from_event(event)
text = synthesize.text
voice = self._default_voice
if synthesize.voice and synthesize.voice.name:
voice = synthesize.voice.name
_LOGGER.debug("Synthesizing %r with voice=%s speed=%.1f", text, voice, self._speed)
try:
loop = asyncio.get_event_loop()
samples, sample_rate = await loop.run_in_executor(
None, lambda: self._tts.create(text, voice=voice, speed=self._speed)
)
samples_int16 = (np.clip(samples, -1.0, 1.0) * 32767).astype(np.int16)
audio_bytes = samples_int16.tobytes()
await self.write_event(
AudioStart(rate=SAMPLE_RATE, width=SAMPLE_WIDTH, channels=CHANNELS).event()
)
chunk_size = SAMPLE_RATE * SAMPLE_WIDTH * CHANNELS * CHUNK_SECONDS
for i in range(0, len(audio_bytes), chunk_size):
await self.write_event(
AudioChunk(
rate=SAMPLE_RATE,
width=SAMPLE_WIDTH,
channels=CHANNELS,
audio=audio_bytes[i : i + chunk_size],
).event()
)
await self.write_event(AudioStop().event())
_LOGGER.info("Synthesized %.1fs of audio", len(samples) / sample_rate)
except Exception:
_LOGGER.exception("Synthesis error")
await self.write_event(AudioStop().event())
return True # keep connection open
async def main():
parser = argparse.ArgumentParser()
parser.add_argument("--uri", default="tcp://0.0.0.0:10301")
parser.add_argument("--voice", default="af_heart")
parser.add_argument("--speed", type=float, default=1.0)
parser.add_argument("--debug", action="store_true")
args = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.debug else logging.INFO,
format="%(asctime)s %(levelname)s %(name)s %(message)s",
)
_LOGGER.info("Loading Kokoro ONNX model...")
tts = _load_kokoro()
_LOGGER.info("Kokoro loaded. Starting Wyoming TTS on %s (voice=%s)", args.uri, args.voice)
server = AsyncServer.from_uri(args.uri)
def handler_factory(reader, writer):
return KokoroEventHandler(tts, args.voice, args.speed, reader, writer)
await server.run(handler_factory)
if __name__ == "__main__":
asyncio.run(main())