homeai/homeai-llm/setup.sh

#!/usr/bin/env bash
# homeai-llm/setup.sh — P2: Ollama + Open WebUI
#
# Installs Ollama natively (for GPU access), sets up auto-start,
# pulls models from the manifest, and starts Open WebUI in Docker.
#
# GPU support:
#   Linux  — CUDA (NVIDIA) or ROCm (AMD) or CPU fallback
#   macOS  — Metal (automatic for Apple Silicon)

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
# shellcheck source=../scripts/common.sh
source "${REPO_DIR}/scripts/common.sh"

COMPOSE_FILE="${SCRIPT_DIR}/docker/docker-compose.yml"
ENV_FILE="${SCRIPT_DIR}/docker/.env"
ENV_EXAMPLE="${SCRIPT_DIR}/docker/.env.example"
MANIFEST="${SCRIPT_DIR}/ollama-models.txt"

# ─── Pre-flight ────────────────────────────────────────────────────────────────
preflight() {
  log_section "P2 Preflight"
  detect_platform
  detect_gpu

  # Check P1 dependency (homeai Docker network must exist)
  if ! docker network inspect homeai &>/dev/null 2>&1; then
    log_warn "Docker network 'homeai' not found. Has P1 been run?"
    log_warn "Run: ./setup.sh p1 first, or: docker network create homeai"
    if ! confirm "Create 'homeai' network now and continue?"; then
      die "Aborted. Run ./setup.sh p1 first."
    fi
    docker network create homeai
  fi

  # Bootstrap .env for Open WebUI
  if [[ ! -f "$ENV_FILE" && -f "$ENV_EXAMPLE" ]]; then
    cp "$ENV_EXAMPLE" "$ENV_FILE"
    log_warn "Created ${ENV_FILE} from .env.example"
    log_warn "Set WEBUI_SECRET_KEY in ${ENV_FILE} (run: openssl rand -hex 16)"
  fi

  # Create data dir
  load_env "$ENV_FILE" 2>/dev/null || true
  local data_dir="${DATA_DIR:-${HOME}/homeai-data}"
  mkdir -p "${data_dir}/open-webui"
}

# ─── Ollama Installation ───────────────────────────────────────────────────────
install_ollama() {
  log_section "Ollama"

  if command_exists ollama; then
    log_success "Ollama already installed: $(ollama --version 2>/dev/null || echo 'version unknown')"
    return
  fi

  log_info "Installing Ollama..."

  if [[ "$OS_TYPE" == "macos" ]]; then
    if command_exists brew; then
      brew install ollama
    else
      log_info "Downloading Ollama for macOS..."
      curl -fsSL https://ollama.com/install.sh | sh
    fi
  else
    # Linux — official install script handles CUDA/ROCm detection
    log_info "Downloading and running Ollama installer..."
    curl -fsSL https://ollama.com/install.sh | sh
  fi

  if ! command_exists ollama; then
    die "Ollama installation failed. Check the output above."
  fi

  log_success "Ollama installed: $(ollama --version 2>/dev/null || echo 'ok')"
}

# ─── Ollama Service ────────────────────────────────────────────────────────────
setup_ollama_service() {
  log_section "Ollama service"

  # Check if already running
  if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then
    log_success "Ollama is already running."
    return
  fi

  install_service \
    "homeai-ollama" \
    "${SCRIPT_DIR}/systemd/homeai-ollama.service" \
    "${SCRIPT_DIR}/launchd/com.homeai.ollama.plist"

  # Give it a few seconds to start
  log_step "Waiting for Ollama to start..."
  local i=0
  while ! curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; do
    sleep 2; i=$((i + 2))
    if [[ $i -ge 30 ]]; then
      log_warn "Ollama did not start within 30s. Trying to start manually..."
      ollama serve &>/dev/null &
      sleep 5
      break
    fi
  done

  if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then
    log_success "Ollama is running."
  else
    die "Ollama failed to start. Check: ollama serve"
  fi
}

# ─── GPU Verification ──────────────────────────────────────────────────────────
verify_gpu() {
  log_section "GPU verification"

  local models_response
  models_response=$(curl -sf http://localhost:11434/api/tags 2>/dev/null || echo '{}')

  case "$GPU_TYPE" in
    metal)
      log_success "Apple Silicon Metal GPU — inference will be fast."
      ;;
    cuda)
      log_info "NVIDIA CUDA GPU detected: ${GPU_INFO:-unknown}"
      # Verify Ollama can see it
      if ollama run qwen2.5:7b "Say OK" &>/dev/null 2>&1; then
        log_success "CUDA inference verified."
      else
        log_warn "Could not verify CUDA inference. Ollama may fall back to CPU."
      fi
      ;;
    rocm)
      log_info "AMD ROCm GPU detected: ${GPU_INFO:-unknown}"
      log_warn "ROCm support depends on your GPU and driver version."
      ;;
    none)
      log_warn "No GPU detected — Ollama will use CPU."
      log_warn "70B parameter models will be very slow on CPU. Consider qwen2.5:7b for testing."
      ;;
  esac
}

# ─── Pull Models ───────────────────────────────────────────────────────────────
pull_models() {
  log_section "Pulling models"

  if [[ ! -f "$MANIFEST" ]]; then
    log_warn "No model manifest at $MANIFEST — skipping model pull."
    return
  fi

  # On CPU-only, skip the big models and warn
  if [[ "$GPU_TYPE" == "none" ]]; then
    log_warn "CPU-only mode: skipping 70B models (too slow). Pulling small models only."
    log_warn "Edit $MANIFEST to select which models to pull, then run:"
    log_warn "  bash ${SCRIPT_DIR}/scripts/pull-models.sh"
    log_warn "Pulling only: qwen2.5:7b and nomic-embed-text"
    ollama pull qwen2.5:7b
    ollama pull nomic-embed-text
    return
  fi

  bash "${SCRIPT_DIR}/scripts/pull-models.sh"
}

# ─── Open WebUI ────────────────────────────────────────────────────────────────
start_open_webui() {
  log_section "Open WebUI"

  ensure_docker_running

  log_step "Pulling Open WebUI image..."
  docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" pull

  log_step "Starting Open WebUI..."
  docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" up -d

  wait_for_http "http://localhost:3030" "Open WebUI" 90
}

# ─── Register services ─────────────────────────────────────────────────────────
register_services() {
  write_env_service "OLLAMA_URL"       "http://localhost:11434"
  write_env_service "OLLAMA_API_URL"   "http://localhost:11434/v1"
  write_env_service "OPEN_WEBUI_URL"  "http://localhost:3030"
  log_success "Service URLs written to ~/.env.services"
}

# ─── Summary ───────────────────────────────────────────────────────────────────
print_llm_summary() {
  local model_list
  model_list=$(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}' | tr '\n' ', ' | sed 's/,$//')

  print_summary "P2 LLM — Ready" \
    "Ollama API"   "http://localhost:11434" \
    "OpenAI compat" "http://localhost:11434/v1" \
    "Open WebUI"   "http://localhost:3030" \
    "GPU"          "${GPU_TYPE}" \
    "Models"       "${model_list:-none pulled yet}"

  echo "  Next steps:"
  echo "    1. Open http://localhost:3030 and create your admin account"
  echo "    2. Test a chat with $OLLAMA_PRIMARY_MODEL"
  echo "    3. Run benchmark: bash ${SCRIPT_DIR}/scripts/benchmark.sh"
  echo "    4. Run: ./setup.sh p3  (Voice pipeline)"
  echo ""
}

# ─── Main ──────────────────────────────────────────────────────────────────────
main() {
  preflight
  install_ollama
  setup_ollama_service
  verify_gpu
  pull_models
  start_open_webui
  register_services
  print_llm_summary
}

main "$@"