feat: upgrade voice pipeline — MLX Whisper STT (20x faster), Qwen3.5 MoE LLM, fix HA tool calling

- Replace faster-whisper with wyoming-mlx-whisper (whisper-large-v3-turbo, MLX Metal GPU) STT latency: 8.4s → 400ms for short voice commands - Add Qwen3.5-35B-A3B (MoE, 3B active params, Q8_0) to Ollama — 26.7 tok/s vs 5.4 tok/s (70B) - Add model preload launchd service to pin voice model in VRAM permanently - Fix HA tool calling: set commands.native=true, symlink ha-ctl to PATH - Add pipeline benchmark script (STT/LLM/TTS latency profiling) - Add service restart buttons and STT endpoint to dashboard - Bind Vite dev server to 0.0.0.0 for LAN access Total estimated pipeline latency: ~27s → ~4s Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 18:03:12 +00:00
parent 1bfd7fbd08
commit af6b7bd945
10 changed files with 721 additions and 27 deletions
--- a/homeai-character/vite.config.js
+++ b/homeai-character/vite.config.js
@@ -53,6 +53,70 @@ function healthCheckPlugin() {
          res.end(JSON.stringify({ status: 'offline', responseTime: null }));
        }
      });
+      // Service restart — runs launchctl or docker restart
+      server.middlewares.use('/api/service/restart', async (req, res) => {
+        if (req.method === 'OPTIONS') {
+          res.writeHead(204, { 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Methods': 'POST', 'Access-Control-Allow-Headers': 'Content-Type' });
+          res.end();
+          return;
+        }
+        if (req.method !== 'POST') {
+          res.writeHead(405);
+          res.end();
+          return;
+        }
+        try {
+          const chunks = [];
+          for await (const chunk of req) chunks.push(chunk);
+          const { type, id } = JSON.parse(Buffer.concat(chunks).toString());
+
+          if (!type || !id) {
+            res.writeHead(400, { 'Content-Type': 'application/json' });
+            res.end(JSON.stringify({ ok: false, error: 'Missing type or id' }));
+            return;
+          }
+
+          // Whitelist valid service IDs to prevent command injection
+          const ALLOWED_LAUNCHD = [
+            'gui/501/com.homeai.ollama',
+            'gui/501/com.homeai.openclaw',
+            'gui/501/com.homeai.openclaw-bridge',
+            'gui/501/com.homeai.wyoming-stt',
+            'gui/501/com.homeai.wyoming-tts',
+            'gui/501/com.homeai.wyoming-satellite',
+            'gui/501/com.homeai.character-dashboard',
+          ];
+          const ALLOWED_DOCKER = [
+            'homeai-open-webui',
+            'homeai-uptime-kuma',
+            'homeai-n8n',
+            'homeai-code-server',
+          ];
+
+          let cmd;
+          if (type === 'launchd' && ALLOWED_LAUNCHD.includes(id)) {
+            cmd = ['launchctl', 'kickstart', '-k', id];
+          } else if (type === 'docker' && ALLOWED_DOCKER.includes(id)) {
+            cmd = ['docker', 'restart', id];
+          } else {
+            res.writeHead(403, { 'Content-Type': 'application/json' });
+            res.end(JSON.stringify({ ok: false, error: 'Service not in allowed list' }));
+            return;
+          }
+
+          const { execFile } = await import('child_process');
+          const { promisify } = await import('util');
+          const execFileAsync = promisify(execFile);
+          const { stdout, stderr } = await execFileAsync(cmd[0], cmd.slice(1), { timeout: 30000 });
+
+          res.writeHead(200, { 'Content-Type': 'application/json' });
+          res.end(JSON.stringify({ ok: true, stdout: stdout.trim(), stderr: stderr.trim() }));
+        } catch (err) {
+          res.writeHead(500, { 'Content-Type': 'application/json' });
+          res.end(JSON.stringify({ ok: false, error: err.message }));
+        }
+      });
+
      // TTS preview proxy — forwards POST to OpenClaw bridge, returns audio
      server.middlewares.use('/api/tts', async (req, res) => {
        if (req.method !== 'POST') {
@@ -99,4 +163,7 @@ export default defineConfig({
    tailwindcss(),
    react(),
  ],
+  server: {
+    host: '0.0.0.0',
+  },
 })