Add self-deploying setup scripts for all sub-projects (P1-P8)

- Root setup.sh orchestrator with per-phase dispatch (./setup.sh p1..p8 | all | status) - Makefile convenience targets (make infra, make llm, make status, etc.) - scripts/common.sh: shared bash library for OS detection, Docker helpers, service management (launchd/systemd), package install, env management - .env.example + .gitignore: shared config template and secret exclusions P1 (homeai-infra): full implementation - docker-compose.yml: Uptime Kuma, code-server, n8n - Note: Home Assistant, Portainer, Gitea are pre-existing instances - setup.sh: Docker install, homeai network, container health checks P2 (homeai-llm): full implementation - Ollama native install with CUDA/ROCm/Metal auto-detection - launchd plist (macOS) + systemd service (Linux) for auto-start - scripts/pull-models.sh: idempotent model puller from manifest - scripts/benchmark.sh: tokens/sec measurement per model - Open WebUI on port 3030 (avoids Gitea :3000 conflict) P3-P8: working stubs with prerequisite checks and TODO sections Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-04 21:10:53 +00:00
parent 38247d7cc4
commit 7978eaea14
23 changed files with 2525 additions and 0 deletions
--- a/homeai-llm/docker/.env.example
+++ b/homeai-llm/docker/.env.example
@@ -0,0 +1,7 @@
+# homeai-llm Docker secrets
+# Copy to .env — never commit .env
+
+DATA_DIR=${HOME}/homeai-data
+
+# Open WebUI
+WEBUI_SECRET_KEY=changeme_random_32_char_string_here
--- a/homeai-llm/docker/docker-compose.yml
+++ b/homeai-llm/docker/docker-compose.yml
@@ -0,0 +1,45 @@
+---
+# homeai-llm/docker/docker-compose.yml
+# P2 — Open WebUI
+#
+# Ollama runs NATIVELY (not in Docker) for GPU acceleration.
+# This compose file only starts the Open WebUI frontend.
+#
+# Prerequisites:
+#   - Ollama installed and running on the host at port 11434
+#   - `homeai` Docker network exists (created by P1 setup)
+#
+# Usage:
+#   docker compose -f docker/docker-compose.yml up -d
+
+name: homeai-llm
+
+services:
+
+  # ─── Open WebUI ──────────────────────────────────────────────────────────────
+  open-webui:
+    container_name: homeai-open-webui
+    image: ghcr.io/open-webui/open-webui:main
+    restart: unless-stopped
+    ports:
+      - "3030:8080"             # Exposed on 3030 to avoid conflict with Gitea (3000)
+    volumes:
+      - ${DATA_DIR:-~/homeai-data}/open-webui:/app/backend/data
+    environment:
+      # Connect to Ollama on the host
+      - OLLAMA_BASE_URL=http://host.docker.internal:11434
+      - WEBUI_SECRET_KEY=${WEBUI_SECRET_KEY:-changeme_random_32_char}
+      - ENABLE_SIGNUP=true
+      - DEFAULT_MODELS=llama3.3:70b
+    extra_hosts:
+      - "host.docker.internal:host-gateway"  # Linux compat
+    networks:
+      - homeai
+    labels:
+      - homeai.service=open-webui
+      - homeai.url=http://localhost:3030
+
+networks:
+  homeai:
+    external: true
+    name: homeai
--- a/homeai-llm/launchd/com.homeai.ollama.plist
+++ b/homeai-llm/launchd/com.homeai.ollama.plist
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
+  "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+  <key>Label</key>
+  <string>com.homeai.ollama</string>
+
+  <key>ProgramArguments</key>
+  <array>
+    <string>/usr/local/bin/ollama</string>
+    <string>serve</string>
+  </array>
+
+  <key>EnvironmentVariables</key>
+  <dict>
+    <key>OLLAMA_HOST</key>
+    <string>0.0.0.0:11434</string>
+    <!-- Metal GPU is used automatically on Apple Silicon; no env var needed -->
+  </dict>
+
+  <key>RunAtLoad</key>
+  <true/>
+
+  <key>KeepAlive</key>
+  <true/>
+
+  <key>StandardOutPath</key>
+  <string>/tmp/homeai-ollama.log</string>
+
+  <key>StandardErrorPath</key>
+  <string>/tmp/homeai-ollama-error.log</string>
+
+  <key>ThrottleInterval</key>
+  <integer>5</integer>
+</dict>
+</plist>
--- a/homeai-llm/ollama-models.txt
+++ b/homeai-llm/ollama-models.txt
@@ -0,0 +1,21 @@
+# Ollama model manifest
+# One model per line. Lines starting with # are ignored.
+# Format: <model>:<tag>  or just <model> for latest
+#
+# Pull all models:   bash scripts/pull-models.sh
+# Pull specific:     ollama pull <model>
+
+# ─── Primary (main conversation) ───────────────────────────────────────────────
+llama3.3:70b
+
+# ─── Alternative primary ───────────────────────────────────────────────────────
+qwen2.5:72b
+
+# ─── Fast / low-latency (voice pipeline, quick tasks) ─────────────────────────
+qwen2.5:7b
+
+# ─── Code generation ───────────────────────────────────────────────────────────
+qwen2.5-coder:32b
+
+# ─── Embeddings (mem0 memory store) ────────────────────────────────────────────
+nomic-embed-text
--- a/homeai-llm/scripts/benchmark.sh
+++ b/homeai-llm/scripts/benchmark.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+# scripts/benchmark.sh — Benchmark Ollama model inference speed
+#
+# Measures tokens/sec for each installed model.
+# Results written to benchmark-results.md
+#
+# Usage:
+#   bash scripts/benchmark.sh
+#   bash scripts/benchmark.sh qwen2.5:7b   # benchmark one model
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)"
+source "${REPO_DIR}/scripts/common.sh"
+
+RESULTS_FILE="${SCRIPT_DIR}/../benchmark-results.md"
+PROMPT="Tell me a short story about a robot who loves cooking. Keep it to exactly 200 words."
+
+if ! command_exists ollama; then
+  die "Ollama not found."
+fi
+
+if ! curl -sf http://localhost:11434 -o /dev/null; then
+  die "Ollama is not running."
+fi
+
+benchmark_model() {
+  local model="$1"
+  log_step "Benchmarking $model..."
+
+  local start end elapsed
+  start=$(date +%s%3N)
+
+  local response
+  response=$(ollama run "$model" "$PROMPT" 2>&1) || {
+    log_error "Model $model failed to run."
+    echo "| $model | ERROR | — |"
+    return
+  }
+
+  end=$(date +%s%3N)
+  elapsed=$(( (end - start) ))
+
+  local word_count
+  word_count=$(echo "$response" | wc -w)
+  local tokens_est=$(( word_count * 4 / 3 ))  # rough estimate: 1 token ≈ 0.75 words
+  local elapsed_sec
+  elapsed_sec=$(echo "scale=1; $elapsed / 1000" | bc)
+  local tps
+  tps=$(echo "scale=1; $tokens_est / ($elapsed / 1000)" | bc 2>/dev/null || echo "?")
+
+  printf "  %-30s  %6s tok/s  (%ss)\n" "$model" "$tps" "$elapsed_sec"
+  echo "| \`$model\` | ${tps} tok/s | ${elapsed_sec}s |"
+}
+
+log_section "Ollama Benchmark"
+log_info "Prompt: '$PROMPT'"
+echo ""
+
+if [[ -n "${1:-}" ]]; then
+  models=("$@")
+else
+  # Get list of installed models
+  mapfile -t models < <(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}')
+fi
+
+if [[ ${#models[@]} -eq 0 ]]; then
+  die "No models installed. Run: bash scripts/pull-models.sh"
+fi
+
+{
+  echo "# Ollama Benchmark Results"
+  echo "> Generated: $(date)"
+  echo ""
+  echo "| Model | Speed | Time for ~200 tok |"
+  echo "|---|---|---|"
+} > "$RESULTS_FILE"
+
+for model in "${models[@]}"; do
+  benchmark_model "$model" | tee -a "$RESULTS_FILE"
+done
+
+echo "" >> "$RESULTS_FILE"
+
+log_success "Results written to $RESULTS_FILE"
+echo ""
+cat "$RESULTS_FILE"
--- a/homeai-llm/scripts/pull-models.sh
+++ b/homeai-llm/scripts/pull-models.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+# scripts/pull-models.sh — Pull all Ollama models from the manifest
+#
+# Usage:
+#   bash scripts/pull-models.sh                  # pull all models
+#   bash scripts/pull-models.sh nomic-embed-text # pull specific model
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)"
+source "${REPO_DIR}/scripts/common.sh"
+
+MANIFEST="${SCRIPT_DIR}/../ollama-models.txt"
+
+if ! command_exists ollama; then
+  die "Ollama not found. Run: bash homeai-llm/setup.sh first."
+fi
+
+if ! curl -sf http://localhost:11434 -o /dev/null; then
+  die "Ollama is not running. Start it first."
+fi
+
+# If a specific model is given as arg, just pull that
+if [[ $# -gt 0 ]]; then
+  for model in "$@"; do
+    log_info "Pulling $model..."
+    ollama pull "$model"
+    log_success "Pulled $model"
+  done
+  exit 0
+fi
+
+# Pull all models from manifest
+log_section "Pulling Ollama models"
+
+total=0; pulled=0; skipped=0; failed=0
+
+while IFS= read -r line || [[ -n "$line" ]]; do
+  # Skip comments and blank lines
+  [[ "$line" =~ ^[[:space:]]*# ]] && continue
+  [[ -z "${line// }" ]] && continue
+
+  model="${line%% *}"   # strip any trailing comment
+  total=$((total + 1))
+
+  # Check if model is already present
+  if ollama list 2>/dev/null | grep -q "^${model%%:*}"; then
+    tag="${model##*:}"
+    model_name="${model%%:*}"
+    if [[ "$tag" != "$model_name" ]]; then
+      # Has explicit tag — check exact match
+      if ollama list 2>/dev/null | grep -q "^${model_name}.*${tag}"; then
+        log_info "Already present: $model — skipping"
+        skipped=$((skipped + 1))
+        continue
+      fi
+    else
+      log_info "Already present: $model — skipping"
+      skipped=$((skipped + 1))
+      continue
+    fi
+  fi
+
+  log_step "Pulling $model..."
+  if ollama pull "$model"; then
+    log_success "Pulled $model"
+    pulled=$((pulled + 1))
+  else
+    log_error "Failed to pull $model"
+    failed=$((failed + 1))
+  fi
+
+done < "$MANIFEST"
+
+echo ""
+log_info "Pull complete: ${pulled} pulled, ${skipped} already present, ${failed} failed (of ${total} total)"
+
+if [[ $failed -gt 0 ]]; then
+  log_warn "Some models failed to pull. Check your internet connection and retry."
+  exit 1
+fi
+
+echo ""
+log_info "Installed models:"
+ollama list
--- a/homeai-llm/setup.sh
+++ b/homeai-llm/setup.sh
@@ -0,0 +1,227 @@
+#!/usr/bin/env bash
+# homeai-llm/setup.sh — P2: Ollama + Open WebUI
+#
+# Installs Ollama natively (for GPU access), sets up auto-start,
+# pulls models from the manifest, and starts Open WebUI in Docker.
+#
+# GPU support:
+#   Linux  — CUDA (NVIDIA) or ROCm (AMD) or CPU fallback
+#   macOS  — Metal (automatic for Apple Silicon)
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
+# shellcheck source=../scripts/common.sh
+source "${REPO_DIR}/scripts/common.sh"
+
+COMPOSE_FILE="${SCRIPT_DIR}/docker/docker-compose.yml"
+ENV_FILE="${SCRIPT_DIR}/docker/.env"
+ENV_EXAMPLE="${SCRIPT_DIR}/docker/.env.example"
+MANIFEST="${SCRIPT_DIR}/ollama-models.txt"
+
+# ─── Pre-flight ────────────────────────────────────────────────────────────────
+preflight() {
+  log_section "P2 Preflight"
+  detect_platform
+  detect_gpu
+
+  # Check P1 dependency (homeai Docker network must exist)
+  if ! docker network inspect homeai &>/dev/null 2>&1; then
+    log_warn "Docker network 'homeai' not found. Has P1 been run?"
+    log_warn "Run: ./setup.sh p1 first, or: docker network create homeai"
+    if ! confirm "Create 'homeai' network now and continue?"; then
+      die "Aborted. Run ./setup.sh p1 first."
+    fi
+    docker network create homeai
+  fi
+
+  # Bootstrap .env for Open WebUI
+  if [[ ! -f "$ENV_FILE" && -f "$ENV_EXAMPLE" ]]; then
+    cp "$ENV_EXAMPLE" "$ENV_FILE"
+    log_warn "Created ${ENV_FILE} from .env.example"
+    log_warn "Set WEBUI_SECRET_KEY in ${ENV_FILE} (run: openssl rand -hex 16)"
+  fi
+
+  # Create data dir
+  load_env "$ENV_FILE" 2>/dev/null || true
+  local data_dir="${DATA_DIR:-${HOME}/homeai-data}"
+  mkdir -p "${data_dir}/open-webui"
+}
+
+# ─── Ollama Installation ───────────────────────────────────────────────────────
+install_ollama() {
+  log_section "Ollama"
+
+  if command_exists ollama; then
+    log_success "Ollama already installed: $(ollama --version 2>/dev/null || echo 'version unknown')"
+    return
+  fi
+
+  log_info "Installing Ollama..."
+
+  if [[ "$OS_TYPE" == "macos" ]]; then
+    if command_exists brew; then
+      brew install ollama
+    else
+      log_info "Downloading Ollama for macOS..."
+      curl -fsSL https://ollama.com/install.sh | sh
+    fi
+  else
+    # Linux — official install script handles CUDA/ROCm detection
+    log_info "Downloading and running Ollama installer..."
+    curl -fsSL https://ollama.com/install.sh | sh
+  fi
+
+  if ! command_exists ollama; then
+    die "Ollama installation failed. Check the output above."
+  fi
+
+  log_success "Ollama installed: $(ollama --version 2>/dev/null || echo 'ok')"
+}
+
+# ─── Ollama Service ────────────────────────────────────────────────────────────
+setup_ollama_service() {
+  log_section "Ollama service"
+
+  # Check if already running
+  if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then
+    log_success "Ollama is already running."
+    return
+  fi
+
+  install_service \
+    "homeai-ollama" \
+    "${SCRIPT_DIR}/systemd/homeai-ollama.service" \
+    "${SCRIPT_DIR}/launchd/com.homeai.ollama.plist"
+
+  # Give it a few seconds to start
+  log_step "Waiting for Ollama to start..."
+  local i=0
+  while ! curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; do
+    sleep 2; i=$((i + 2))
+    if [[ $i -ge 30 ]]; then
+      log_warn "Ollama did not start within 30s. Trying to start manually..."
+      ollama serve &>/dev/null &
+      sleep 5
+      break
+    fi
+  done
+
+  if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then
+    log_success "Ollama is running."
+  else
+    die "Ollama failed to start. Check: ollama serve"
+  fi
+}
+
+# ─── GPU Verification ──────────────────────────────────────────────────────────
+verify_gpu() {
+  log_section "GPU verification"
+
+  local models_response
+  models_response=$(curl -sf http://localhost:11434/api/tags 2>/dev/null || echo '{}')
+
+  case "$GPU_TYPE" in
+    metal)
+      log_success "Apple Silicon Metal GPU — inference will be fast."
+      ;;
+    cuda)
+      log_info "NVIDIA CUDA GPU detected: ${GPU_INFO:-unknown}"
+      # Verify Ollama can see it
+      if ollama run qwen2.5:7b "Say OK" &>/dev/null 2>&1; then
+        log_success "CUDA inference verified."
+      else
+        log_warn "Could not verify CUDA inference. Ollama may fall back to CPU."
+      fi
+      ;;
+    rocm)
+      log_info "AMD ROCm GPU detected: ${GPU_INFO:-unknown}"
+      log_warn "ROCm support depends on your GPU and driver version."
+      ;;
+    none)
+      log_warn "No GPU detected — Ollama will use CPU."
+      log_warn "70B parameter models will be very slow on CPU. Consider qwen2.5:7b for testing."
+      ;;
+  esac
+}
+
+# ─── Pull Models ───────────────────────────────────────────────────────────────
+pull_models() {
+  log_section "Pulling models"
+
+  if [[ ! -f "$MANIFEST" ]]; then
+    log_warn "No model manifest at $MANIFEST — skipping model pull."
+    return
+  fi
+
+  # On CPU-only, skip the big models and warn
+  if [[ "$GPU_TYPE" == "none" ]]; then
+    log_warn "CPU-only mode: skipping 70B models (too slow). Pulling small models only."
+    log_warn "Edit $MANIFEST to select which models to pull, then run:"
+    log_warn "  bash ${SCRIPT_DIR}/scripts/pull-models.sh"
+    log_warn "Pulling only: qwen2.5:7b and nomic-embed-text"
+    ollama pull qwen2.5:7b
+    ollama pull nomic-embed-text
+    return
+  fi
+
+  bash "${SCRIPT_DIR}/scripts/pull-models.sh"
+}
+
+# ─── Open WebUI ────────────────────────────────────────────────────────────────
+start_open_webui() {
+  log_section "Open WebUI"
+
+  ensure_docker_running
+
+  log_step "Pulling Open WebUI image..."
+  docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" pull
+
+  log_step "Starting Open WebUI..."
+  docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" up -d
+
+  wait_for_http "http://localhost:3030" "Open WebUI" 90
+}
+
+# ─── Register services ─────────────────────────────────────────────────────────
+register_services() {
+  write_env_service "OLLAMA_URL"       "http://localhost:11434"
+  write_env_service "OLLAMA_API_URL"   "http://localhost:11434/v1"
+  write_env_service "OPEN_WEBUI_URL"  "http://localhost:3030"
+  log_success "Service URLs written to ~/.env.services"
+}
+
+# ─── Summary ───────────────────────────────────────────────────────────────────
+print_llm_summary() {
+  local model_list
+  model_list=$(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}' | tr '\n' ', ' | sed 's/,$//')
+
+  print_summary "P2 LLM — Ready" \
+    "Ollama API"   "http://localhost:11434" \
+    "OpenAI compat" "http://localhost:11434/v1" \
+    "Open WebUI"   "http://localhost:3030" \
+    "GPU"          "${GPU_TYPE}" \
+    "Models"       "${model_list:-none pulled yet}"
+
+  echo "  Next steps:"
+  echo "    1. Open http://localhost:3030 and create your admin account"
+  echo "    2. Test a chat with $OLLAMA_PRIMARY_MODEL"
+  echo "    3. Run benchmark: bash ${SCRIPT_DIR}/scripts/benchmark.sh"
+  echo "    4. Run: ./setup.sh p3  (Voice pipeline)"
+  echo ""
+}
+
+# ─── Main ──────────────────────────────────────────────────────────────────────
+main() {
+  preflight
+  install_ollama
+  setup_ollama_service
+  verify_gpu
+  pull_models
+  start_open_webui
+  register_services
+  print_llm_summary
+}
+
+main "$@"
--- a/homeai-llm/systemd/homeai-ollama.service
+++ b/homeai-llm/systemd/homeai-ollama.service
@@ -0,0 +1,26 @@
+[Unit]
+Description=Ollama AI inference server (HomeAI)
+Documentation=https://ollama.com
+After=network-online.target
+Wants=network-online.target
+
+[Service]
+Type=simple
+User=%i
+ExecStart=/usr/local/bin/ollama serve
+Restart=always
+RestartSec=5
+
+# Environment
+Environment=OLLAMA_HOST=0.0.0.0:11434
+Environment=OLLAMA_MODELS=/usr/share/ollama/.ollama/models
+
+# Limits
+LimitNOFILE=65536
+
+# CUDA GPU support
+# Uncomment and set if you have multiple GPUs:
+# Environment=CUDA_VISIBLE_DEVICES=0
+
+[Install]
+WantedBy=default.target