#!/usr/bin/env bash # homeai-llm/setup.sh — P2: Ollama + Open WebUI # # Installs Ollama natively (for GPU access), sets up auto-start, # pulls models from the manifest, and starts Open WebUI in Docker. # # GPU support: # Linux — CUDA (NVIDIA) or ROCm (AMD) or CPU fallback # macOS — Metal (automatic for Apple Silicon) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" # shellcheck source=../scripts/common.sh source "${REPO_DIR}/scripts/common.sh" COMPOSE_FILE="${SCRIPT_DIR}/docker/docker-compose.yml" ENV_FILE="${SCRIPT_DIR}/docker/.env" ENV_EXAMPLE="${SCRIPT_DIR}/docker/.env.example" MANIFEST="${SCRIPT_DIR}/ollama-models.txt" # ─── Pre-flight ──────────────────────────────────────────────────────────────── preflight() { log_section "P2 Preflight" detect_platform detect_gpu # Check P1 dependency (homeai Docker network must exist) if ! docker network inspect homeai &>/dev/null 2>&1; then log_warn "Docker network 'homeai' not found. Has P1 been run?" log_warn "Run: ./setup.sh p1 first, or: docker network create homeai" if ! confirm "Create 'homeai' network now and continue?"; then die "Aborted. Run ./setup.sh p1 first." fi docker network create homeai fi # Bootstrap .env for Open WebUI if [[ ! -f "$ENV_FILE" && -f "$ENV_EXAMPLE" ]]; then cp "$ENV_EXAMPLE" "$ENV_FILE" log_warn "Created ${ENV_FILE} from .env.example" log_warn "Set WEBUI_SECRET_KEY in ${ENV_FILE} (run: openssl rand -hex 16)" fi # Create data dir load_env "$ENV_FILE" 2>/dev/null || true local data_dir="${DATA_DIR:-${HOME}/homeai-data}" mkdir -p "${data_dir}/open-webui" } # ─── Ollama Installation ─────────────────────────────────────────────────────── install_ollama() { log_section "Ollama" if command_exists ollama; then log_success "Ollama already installed: $(ollama --version 2>/dev/null || echo 'version unknown')" return fi log_info "Installing Ollama..." if [[ "$OS_TYPE" == "macos" ]]; then if command_exists brew; then brew install ollama else log_info "Downloading Ollama for macOS..." curl -fsSL https://ollama.com/install.sh | sh fi else # Linux — official install script handles CUDA/ROCm detection log_info "Downloading and running Ollama installer..." curl -fsSL https://ollama.com/install.sh | sh fi if ! command_exists ollama; then die "Ollama installation failed. Check the output above." fi log_success "Ollama installed: $(ollama --version 2>/dev/null || echo 'ok')" } # ─── Ollama Service ──────────────────────────────────────────────────────────── setup_ollama_service() { log_section "Ollama service" # Check if already running if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then log_success "Ollama is already running." return fi install_service \ "homeai-ollama" \ "${SCRIPT_DIR}/systemd/homeai-ollama.service" \ "${SCRIPT_DIR}/launchd/com.homeai.ollama.plist" # Give it a few seconds to start log_step "Waiting for Ollama to start..." local i=0 while ! curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; do sleep 2; i=$((i + 2)) if [[ $i -ge 30 ]]; then log_warn "Ollama did not start within 30s. Trying to start manually..." ollama serve &>/dev/null & sleep 5 break fi done if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then log_success "Ollama is running." else die "Ollama failed to start. Check: ollama serve" fi } # ─── GPU Verification ────────────────────────────────────────────────────────── verify_gpu() { log_section "GPU verification" local models_response models_response=$(curl -sf http://localhost:11434/api/tags 2>/dev/null || echo '{}') case "$GPU_TYPE" in metal) log_success "Apple Silicon Metal GPU — inference will be fast." ;; cuda) log_info "NVIDIA CUDA GPU detected: ${GPU_INFO:-unknown}" # Verify Ollama can see it if ollama run qwen2.5:7b "Say OK" &>/dev/null 2>&1; then log_success "CUDA inference verified." else log_warn "Could not verify CUDA inference. Ollama may fall back to CPU." fi ;; rocm) log_info "AMD ROCm GPU detected: ${GPU_INFO:-unknown}" log_warn "ROCm support depends on your GPU and driver version." ;; none) log_warn "No GPU detected — Ollama will use CPU." log_warn "70B parameter models will be very slow on CPU. Consider qwen2.5:7b for testing." ;; esac } # ─── Pull Models ─────────────────────────────────────────────────────────────── pull_models() { log_section "Pulling models" if [[ ! -f "$MANIFEST" ]]; then log_warn "No model manifest at $MANIFEST — skipping model pull." return fi # On CPU-only, skip the big models and warn if [[ "$GPU_TYPE" == "none" ]]; then log_warn "CPU-only mode: skipping 70B models (too slow). Pulling small models only." log_warn "Edit $MANIFEST to select which models to pull, then run:" log_warn " bash ${SCRIPT_DIR}/scripts/pull-models.sh" log_warn "Pulling only: qwen2.5:7b and nomic-embed-text" ollama pull qwen2.5:7b ollama pull nomic-embed-text return fi bash "${SCRIPT_DIR}/scripts/pull-models.sh" } # ─── Open WebUI ──────────────────────────────────────────────────────────────── start_open_webui() { log_section "Open WebUI" ensure_docker_running log_step "Pulling Open WebUI image..." docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" pull log_step "Starting Open WebUI..." docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" up -d wait_for_http "http://localhost:3030" "Open WebUI" 90 } # ─── Register services ───────────────────────────────────────────────────────── register_services() { write_env_service "OLLAMA_URL" "http://localhost:11434" write_env_service "OLLAMA_API_URL" "http://localhost:11434/v1" write_env_service "OPEN_WEBUI_URL" "http://localhost:3030" log_success "Service URLs written to ~/.env.services" } # ─── Summary ─────────────────────────────────────────────────────────────────── print_llm_summary() { local model_list model_list=$(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}' | tr '\n' ', ' | sed 's/,$//') print_summary "P2 LLM — Ready" \ "Ollama API" "http://localhost:11434" \ "OpenAI compat" "http://localhost:11434/v1" \ "Open WebUI" "http://localhost:3030" \ "GPU" "${GPU_TYPE}" \ "Models" "${model_list:-none pulled yet}" echo " Next steps:" echo " 1. Open http://localhost:3030 and create your admin account" echo " 2. Test a chat with $OLLAMA_PRIMARY_MODEL" echo " 3. Run benchmark: bash ${SCRIPT_DIR}/scripts/benchmark.sh" echo " 4. Run: ./setup.sh p3 (Voice pipeline)" echo "" } # ─── Main ────────────────────────────────────────────────────────────────────── main() { preflight install_ollama setup_ollama_service verify_gpu pull_models start_open_webui register_services print_llm_summary } main "$@"