Files
homeai/homeai-llm/setup.sh
Aodhan Collins 7978eaea14 Add self-deploying setup scripts for all sub-projects (P1-P8)
- Root setup.sh orchestrator with per-phase dispatch (./setup.sh p1..p8 | all | status)
- Makefile convenience targets (make infra, make llm, make status, etc.)
- scripts/common.sh: shared bash library for OS detection, Docker helpers,
  service management (launchd/systemd), package install, env management
- .env.example + .gitignore: shared config template and secret exclusions

P1 (homeai-infra): full implementation
- docker-compose.yml: Uptime Kuma, code-server, n8n
- Note: Home Assistant, Portainer, Gitea are pre-existing instances
- setup.sh: Docker install, homeai network, container health checks

P2 (homeai-llm): full implementation
- Ollama native install with CUDA/ROCm/Metal auto-detection
- launchd plist (macOS) + systemd service (Linux) for auto-start
- scripts/pull-models.sh: idempotent model puller from manifest
- scripts/benchmark.sh: tokens/sec measurement per model
- Open WebUI on port 3030 (avoids Gitea :3000 conflict)

P3-P8: working stubs with prerequisite checks and TODO sections

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-04 21:10:53 +00:00

228 lines
8.1 KiB
Bash

#!/usr/bin/env bash
# homeai-llm/setup.sh — P2: Ollama + Open WebUI
#
# Installs Ollama natively (for GPU access), sets up auto-start,
# pulls models from the manifest, and starts Open WebUI in Docker.
#
# GPU support:
# Linux — CUDA (NVIDIA) or ROCm (AMD) or CPU fallback
# macOS — Metal (automatic for Apple Silicon)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
# shellcheck source=../scripts/common.sh
source "${REPO_DIR}/scripts/common.sh"
COMPOSE_FILE="${SCRIPT_DIR}/docker/docker-compose.yml"
ENV_FILE="${SCRIPT_DIR}/docker/.env"
ENV_EXAMPLE="${SCRIPT_DIR}/docker/.env.example"
MANIFEST="${SCRIPT_DIR}/ollama-models.txt"
# ─── Pre-flight ────────────────────────────────────────────────────────────────
preflight() {
log_section "P2 Preflight"
detect_platform
detect_gpu
# Check P1 dependency (homeai Docker network must exist)
if ! docker network inspect homeai &>/dev/null 2>&1; then
log_warn "Docker network 'homeai' not found. Has P1 been run?"
log_warn "Run: ./setup.sh p1 first, or: docker network create homeai"
if ! confirm "Create 'homeai' network now and continue?"; then
die "Aborted. Run ./setup.sh p1 first."
fi
docker network create homeai
fi
# Bootstrap .env for Open WebUI
if [[ ! -f "$ENV_FILE" && -f "$ENV_EXAMPLE" ]]; then
cp "$ENV_EXAMPLE" "$ENV_FILE"
log_warn "Created ${ENV_FILE} from .env.example"
log_warn "Set WEBUI_SECRET_KEY in ${ENV_FILE} (run: openssl rand -hex 16)"
fi
# Create data dir
load_env "$ENV_FILE" 2>/dev/null || true
local data_dir="${DATA_DIR:-${HOME}/homeai-data}"
mkdir -p "${data_dir}/open-webui"
}
# ─── Ollama Installation ───────────────────────────────────────────────────────
install_ollama() {
log_section "Ollama"
if command_exists ollama; then
log_success "Ollama already installed: $(ollama --version 2>/dev/null || echo 'version unknown')"
return
fi
log_info "Installing Ollama..."
if [[ "$OS_TYPE" == "macos" ]]; then
if command_exists brew; then
brew install ollama
else
log_info "Downloading Ollama for macOS..."
curl -fsSL https://ollama.com/install.sh | sh
fi
else
# Linux — official install script handles CUDA/ROCm detection
log_info "Downloading and running Ollama installer..."
curl -fsSL https://ollama.com/install.sh | sh
fi
if ! command_exists ollama; then
die "Ollama installation failed. Check the output above."
fi
log_success "Ollama installed: $(ollama --version 2>/dev/null || echo 'ok')"
}
# ─── Ollama Service ────────────────────────────────────────────────────────────
setup_ollama_service() {
log_section "Ollama service"
# Check if already running
if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then
log_success "Ollama is already running."
return
fi
install_service \
"homeai-ollama" \
"${SCRIPT_DIR}/systemd/homeai-ollama.service" \
"${SCRIPT_DIR}/launchd/com.homeai.ollama.plist"
# Give it a few seconds to start
log_step "Waiting for Ollama to start..."
local i=0
while ! curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; do
sleep 2; i=$((i + 2))
if [[ $i -ge 30 ]]; then
log_warn "Ollama did not start within 30s. Trying to start manually..."
ollama serve &>/dev/null &
sleep 5
break
fi
done
if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then
log_success "Ollama is running."
else
die "Ollama failed to start. Check: ollama serve"
fi
}
# ─── GPU Verification ──────────────────────────────────────────────────────────
verify_gpu() {
log_section "GPU verification"
local models_response
models_response=$(curl -sf http://localhost:11434/api/tags 2>/dev/null || echo '{}')
case "$GPU_TYPE" in
metal)
log_success "Apple Silicon Metal GPU — inference will be fast."
;;
cuda)
log_info "NVIDIA CUDA GPU detected: ${GPU_INFO:-unknown}"
# Verify Ollama can see it
if ollama run qwen2.5:7b "Say OK" &>/dev/null 2>&1; then
log_success "CUDA inference verified."
else
log_warn "Could not verify CUDA inference. Ollama may fall back to CPU."
fi
;;
rocm)
log_info "AMD ROCm GPU detected: ${GPU_INFO:-unknown}"
log_warn "ROCm support depends on your GPU and driver version."
;;
none)
log_warn "No GPU detected — Ollama will use CPU."
log_warn "70B parameter models will be very slow on CPU. Consider qwen2.5:7b for testing."
;;
esac
}
# ─── Pull Models ───────────────────────────────────────────────────────────────
pull_models() {
log_section "Pulling models"
if [[ ! -f "$MANIFEST" ]]; then
log_warn "No model manifest at $MANIFEST — skipping model pull."
return
fi
# On CPU-only, skip the big models and warn
if [[ "$GPU_TYPE" == "none" ]]; then
log_warn "CPU-only mode: skipping 70B models (too slow). Pulling small models only."
log_warn "Edit $MANIFEST to select which models to pull, then run:"
log_warn " bash ${SCRIPT_DIR}/scripts/pull-models.sh"
log_warn "Pulling only: qwen2.5:7b and nomic-embed-text"
ollama pull qwen2.5:7b
ollama pull nomic-embed-text
return
fi
bash "${SCRIPT_DIR}/scripts/pull-models.sh"
}
# ─── Open WebUI ────────────────────────────────────────────────────────────────
start_open_webui() {
log_section "Open WebUI"
ensure_docker_running
log_step "Pulling Open WebUI image..."
docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" pull
log_step "Starting Open WebUI..."
docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" up -d
wait_for_http "http://localhost:3030" "Open WebUI" 90
}
# ─── Register services ─────────────────────────────────────────────────────────
register_services() {
write_env_service "OLLAMA_URL" "http://localhost:11434"
write_env_service "OLLAMA_API_URL" "http://localhost:11434/v1"
write_env_service "OPEN_WEBUI_URL" "http://localhost:3030"
log_success "Service URLs written to ~/.env.services"
}
# ─── Summary ───────────────────────────────────────────────────────────────────
print_llm_summary() {
local model_list
model_list=$(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}' | tr '\n' ', ' | sed 's/,$//')
print_summary "P2 LLM — Ready" \
"Ollama API" "http://localhost:11434" \
"OpenAI compat" "http://localhost:11434/v1" \
"Open WebUI" "http://localhost:3030" \
"GPU" "${GPU_TYPE}" \
"Models" "${model_list:-none pulled yet}"
echo " Next steps:"
echo " 1. Open http://localhost:3030 and create your admin account"
echo " 2. Test a chat with $OLLAMA_PRIMARY_MODEL"
echo " 3. Run benchmark: bash ${SCRIPT_DIR}/scripts/benchmark.sh"
echo " 4. Run: ./setup.sh p3 (Voice pipeline)"
echo ""
}
# ─── Main ──────────────────────────────────────────────────────────────────────
main() {
preflight
install_ollama
setup_ollama_service
verify_gpu
pull_models
start_open_webui
register_services
print_llm_summary
}
main "$@"