Add self-deploying setup scripts for all sub-projects (P1-P8)

- Root setup.sh orchestrator with per-phase dispatch (./setup.sh p1..p8 | all | status) - Makefile convenience targets (make infra, make llm, make status, etc.) - scripts/common.sh: shared bash library for OS detection, Docker helpers, service management (launchd/systemd), package install, env management - .env.example + .gitignore: shared config template and secret exclusions P1 (homeai-infra): full implementation - docker-compose.yml: Uptime Kuma, code-server, n8n - Note: Home Assistant, Portainer, Gitea are pre-existing instances - setup.sh: Docker install, homeai network, container health checks P2 (homeai-llm): full implementation - Ollama native install with CUDA/ROCm/Metal auto-detection - launchd plist (macOS) + systemd service (Linux) for auto-start - scripts/pull-models.sh: idempotent model puller from manifest - scripts/benchmark.sh: tokens/sec measurement per model - Open WebUI on port 3030 (avoids Gitea :3000 conflict) P3-P8: working stubs with prerequisite checks and TODO sections Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-04 21:10:53 +00:00
parent 38247d7cc4
commit 7978eaea14
23 changed files with 2525 additions and 0 deletions
--- a/homeai-llm/setup.sh
+++ b/homeai-llm/setup.sh
@@ -0,0 +1,227 @@
+#!/usr/bin/env bash
+# homeai-llm/setup.sh — P2: Ollama + Open WebUI
+#
+# Installs Ollama natively (for GPU access), sets up auto-start,
+# pulls models from the manifest, and starts Open WebUI in Docker.
+#
+# GPU support:
+#   Linux  — CUDA (NVIDIA) or ROCm (AMD) or CPU fallback
+#   macOS  — Metal (automatic for Apple Silicon)
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
+# shellcheck source=../scripts/common.sh
+source "${REPO_DIR}/scripts/common.sh"
+
+COMPOSE_FILE="${SCRIPT_DIR}/docker/docker-compose.yml"
+ENV_FILE="${SCRIPT_DIR}/docker/.env"
+ENV_EXAMPLE="${SCRIPT_DIR}/docker/.env.example"
+MANIFEST="${SCRIPT_DIR}/ollama-models.txt"
+
+# ─── Pre-flight ────────────────────────────────────────────────────────────────
+preflight() {
+  log_section "P2 Preflight"
+  detect_platform
+  detect_gpu
+
+  # Check P1 dependency (homeai Docker network must exist)
+  if ! docker network inspect homeai &>/dev/null 2>&1; then
+    log_warn "Docker network 'homeai' not found. Has P1 been run?"
+    log_warn "Run: ./setup.sh p1 first, or: docker network create homeai"
+    if ! confirm "Create 'homeai' network now and continue?"; then
+      die "Aborted. Run ./setup.sh p1 first."
+    fi
+    docker network create homeai
+  fi
+
+  # Bootstrap .env for Open WebUI
+  if [[ ! -f "$ENV_FILE" && -f "$ENV_EXAMPLE" ]]; then
+    cp "$ENV_EXAMPLE" "$ENV_FILE"
+    log_warn "Created ${ENV_FILE} from .env.example"
+    log_warn "Set WEBUI_SECRET_KEY in ${ENV_FILE} (run: openssl rand -hex 16)"
+  fi
+
+  # Create data dir
+  load_env "$ENV_FILE" 2>/dev/null || true
+  local data_dir="${DATA_DIR:-${HOME}/homeai-data}"
+  mkdir -p "${data_dir}/open-webui"
+}
+
+# ─── Ollama Installation ───────────────────────────────────────────────────────
+install_ollama() {
+  log_section "Ollama"
+
+  if command_exists ollama; then
+    log_success "Ollama already installed: $(ollama --version 2>/dev/null || echo 'version unknown')"
+    return
+  fi
+
+  log_info "Installing Ollama..."
+
+  if [[ "$OS_TYPE" == "macos" ]]; then
+    if command_exists brew; then
+      brew install ollama
+    else
+      log_info "Downloading Ollama for macOS..."
+      curl -fsSL https://ollama.com/install.sh | sh
+    fi
+  else
+    # Linux — official install script handles CUDA/ROCm detection
+    log_info "Downloading and running Ollama installer..."
+    curl -fsSL https://ollama.com/install.sh | sh
+  fi
+
+  if ! command_exists ollama; then
+    die "Ollama installation failed. Check the output above."
+  fi
+
+  log_success "Ollama installed: $(ollama --version 2>/dev/null || echo 'ok')"
+}
+
+# ─── Ollama Service ────────────────────────────────────────────────────────────
+setup_ollama_service() {
+  log_section "Ollama service"
+
+  # Check if already running
+  if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then
+    log_success "Ollama is already running."
+    return
+  fi
+
+  install_service \
+    "homeai-ollama" \
+    "${SCRIPT_DIR}/systemd/homeai-ollama.service" \
+    "${SCRIPT_DIR}/launchd/com.homeai.ollama.plist"
+
+  # Give it a few seconds to start
+  log_step "Waiting for Ollama to start..."
+  local i=0
+  while ! curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; do
+    sleep 2; i=$((i + 2))
+    if [[ $i -ge 30 ]]; then
+      log_warn "Ollama did not start within 30s. Trying to start manually..."
+      ollama serve &>/dev/null &
+      sleep 5
+      break
+    fi
+  done
+
+  if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then
+    log_success "Ollama is running."
+  else
+    die "Ollama failed to start. Check: ollama serve"
+  fi
+}
+
+# ─── GPU Verification ──────────────────────────────────────────────────────────
+verify_gpu() {
+  log_section "GPU verification"
+
+  local models_response
+  models_response=$(curl -sf http://localhost:11434/api/tags 2>/dev/null || echo '{}')
+
+  case "$GPU_TYPE" in
+    metal)
+      log_success "Apple Silicon Metal GPU — inference will be fast."
+      ;;
+    cuda)
+      log_info "NVIDIA CUDA GPU detected: ${GPU_INFO:-unknown}"
+      # Verify Ollama can see it
+      if ollama run qwen2.5:7b "Say OK" &>/dev/null 2>&1; then
+        log_success "CUDA inference verified."
+      else
+        log_warn "Could not verify CUDA inference. Ollama may fall back to CPU."
+      fi
+      ;;
+    rocm)
+      log_info "AMD ROCm GPU detected: ${GPU_INFO:-unknown}"
+      log_warn "ROCm support depends on your GPU and driver version."
+      ;;
+    none)
+      log_warn "No GPU detected — Ollama will use CPU."
+      log_warn "70B parameter models will be very slow on CPU. Consider qwen2.5:7b for testing."
+      ;;
+  esac
+}
+
+# ─── Pull Models ───────────────────────────────────────────────────────────────
+pull_models() {
+  log_section "Pulling models"
+
+  if [[ ! -f "$MANIFEST" ]]; then
+    log_warn "No model manifest at $MANIFEST — skipping model pull."
+    return
+  fi
+
+  # On CPU-only, skip the big models and warn
+  if [[ "$GPU_TYPE" == "none" ]]; then
+    log_warn "CPU-only mode: skipping 70B models (too slow). Pulling small models only."
+    log_warn "Edit $MANIFEST to select which models to pull, then run:"
+    log_warn "  bash ${SCRIPT_DIR}/scripts/pull-models.sh"
+    log_warn "Pulling only: qwen2.5:7b and nomic-embed-text"
+    ollama pull qwen2.5:7b
+    ollama pull nomic-embed-text
+    return
+  fi
+
+  bash "${SCRIPT_DIR}/scripts/pull-models.sh"
+}
+
+# ─── Open WebUI ────────────────────────────────────────────────────────────────
+start_open_webui() {
+  log_section "Open WebUI"
+
+  ensure_docker_running
+
+  log_step "Pulling Open WebUI image..."
+  docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" pull
+
+  log_step "Starting Open WebUI..."
+  docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" up -d
+
+  wait_for_http "http://localhost:3030" "Open WebUI" 90
+}
+
+# ─── Register services ─────────────────────────────────────────────────────────
+register_services() {
+  write_env_service "OLLAMA_URL"       "http://localhost:11434"
+  write_env_service "OLLAMA_API_URL"   "http://localhost:11434/v1"
+  write_env_service "OPEN_WEBUI_URL"  "http://localhost:3030"
+  log_success "Service URLs written to ~/.env.services"
+}
+
+# ─── Summary ───────────────────────────────────────────────────────────────────
+print_llm_summary() {
+  local model_list
+  model_list=$(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}' | tr '\n' ', ' | sed 's/,$//')
+
+  print_summary "P2 LLM — Ready" \
+    "Ollama API"   "http://localhost:11434" \
+    "OpenAI compat" "http://localhost:11434/v1" \
+    "Open WebUI"   "http://localhost:3030" \
+    "GPU"          "${GPU_TYPE}" \
+    "Models"       "${model_list:-none pulled yet}"
+
+  echo "  Next steps:"
+  echo "    1. Open http://localhost:3030 and create your admin account"
+  echo "    2. Test a chat with $OLLAMA_PRIMARY_MODEL"
+  echo "    3. Run benchmark: bash ${SCRIPT_DIR}/scripts/benchmark.sh"
+  echo "    4. Run: ./setup.sh p3  (Voice pipeline)"
+  echo ""
+}
+
+# ─── Main ──────────────────────────────────────────────────────────────────────
+main() {
+  preflight
+  install_ollama
+  setup_ollama_service
+  verify_gpu
+  pull_models
+  start_open_webui
+  register_services
+  print_llm_summary
+}
+
+main "$@"