Character schema v2: background, dialogue_style, appearance, skills, gaze_presets with automatic v1→v2 migration. LLM-assisted character creation via Character MCP server. Two-tier memory system (personal per-character + general shared) with budget-based injection into LLM system prompt. Per-character TTS voice routing via state file — Wyoming TTS server reads active config to route between Kokoro (local) and ElevenLabs (cloud PCM 24kHz). Dashboard: memories page, conversation history, character profile on cards, auto-TTS engine selection from character config. Also includes VTube Studio expression bridge and ComfyUI API guide. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
74 lines
2.0 KiB
Bash
Executable File
74 lines
2.0 KiB
Bash
Executable File
#!/bin/bash
|
|
# Keep voice pipeline models warm in Ollama VRAM.
|
|
# Runs as a loop — checks every 5 minutes, re-pins any model that got evicted.
|
|
# Only pins lightweight/MoE models — large dense models (70B) use default expiry.
|
|
|
|
OLLAMA_URL="http://localhost:11434"
|
|
CHECK_INTERVAL=300 # seconds between checks
|
|
|
|
# Medium model can be overridden via env var (e.g. by persona config)
|
|
HOMEAI_MEDIUM_MODEL="${HOMEAI_MEDIUM_MODEL:-qwen3.5:35b-a3b}"
|
|
|
|
# Models to keep warm: "name|description"
|
|
MODELS=(
|
|
"qwen2.5:7b|small (4.7GB) — fast fallback"
|
|
"${HOMEAI_MEDIUM_MODEL}|medium — persona default"
|
|
)
|
|
|
|
wait_for_ollama() {
|
|
for i in $(seq 1 30); do
|
|
curl -sf "$OLLAMA_URL/api/tags" > /dev/null 2>&1 && return 0
|
|
sleep 2
|
|
done
|
|
return 1
|
|
}
|
|
|
|
is_model_loaded() {
|
|
local model="$1"
|
|
curl -sf "$OLLAMA_URL/api/ps" 2>/dev/null \
|
|
| python3 -c "
|
|
import json, sys
|
|
data = json.load(sys.stdin)
|
|
names = [m['name'] for m in data.get('models', [])]
|
|
sys.exit(0 if '$model' in names else 1)
|
|
" 2>/dev/null
|
|
}
|
|
|
|
pin_model() {
|
|
local model="$1"
|
|
local desc="$2"
|
|
if is_model_loaded "$model"; then
|
|
echo "[keepwarm] $model already loaded — skipping"
|
|
return 0
|
|
fi
|
|
echo "[keepwarm] Loading $model ($desc) with keep_alive=-1..."
|
|
curl -sf "$OLLAMA_URL/api/generate" \
|
|
-d "{\"model\":\"$model\",\"prompt\":\"ready\",\"stream\":false,\"keep_alive\":-1,\"options\":{\"num_ctx\":512}}" \
|
|
> /dev/null 2>&1
|
|
if [ $? -eq 0 ]; then
|
|
echo "[keepwarm] $model pinned in VRAM"
|
|
else
|
|
echo "[keepwarm] ERROR: failed to load $model"
|
|
fi
|
|
}
|
|
|
|
# --- Main loop ---
|
|
|
|
echo "[keepwarm] Starting model keep-warm daemon (interval: ${CHECK_INTERVAL}s)"
|
|
|
|
# Initial wait for Ollama
|
|
if ! wait_for_ollama; then
|
|
echo "[keepwarm] ERROR: Ollama not reachable after 60s, exiting"
|
|
exit 1
|
|
fi
|
|
echo "[keepwarm] Ollama is online"
|
|
|
|
while true; do
|
|
for entry in "${MODELS[@]}"; do
|
|
IFS='|' read -r model desc <<< "$entry"
|
|
pin_model "$model" "$desc"
|
|
done
|
|
|
|
sleep "$CHECK_INTERVAL"
|
|
done
|