Add self-deploying setup scripts for all sub-projects (P1-P8)

- Root setup.sh orchestrator with per-phase dispatch (./setup.sh p1..p8 | all | status)
- Makefile convenience targets (make infra, make llm, make status, etc.)
- scripts/common.sh: shared bash library for OS detection, Docker helpers,
  service management (launchd/systemd), package install, env management
- .env.example + .gitignore: shared config template and secret exclusions

P1 (homeai-infra): full implementation
- docker-compose.yml: Uptime Kuma, code-server, n8n
- Note: Home Assistant, Portainer, Gitea are pre-existing instances
- setup.sh: Docker install, homeai network, container health checks

P2 (homeai-llm): full implementation
- Ollama native install with CUDA/ROCm/Metal auto-detection
- launchd plist (macOS) + systemd service (Linux) for auto-start
- scripts/pull-models.sh: idempotent model puller from manifest
- scripts/benchmark.sh: tokens/sec measurement per model
- Open WebUI on port 3030 (avoids Gitea :3000 conflict)

P3-P8: working stubs with prerequisite checks and TODO sections

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Aodhan Collins
2026-03-04 21:10:53 +00:00
parent 38247d7cc4
commit 7978eaea14
23 changed files with 2525 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
# homeai-llm Docker secrets
# Copy to .env — never commit .env
DATA_DIR=${HOME}/homeai-data
# Open WebUI
WEBUI_SECRET_KEY=changeme_random_32_char_string_here

View File

@@ -0,0 +1,45 @@
---
# homeai-llm/docker/docker-compose.yml
# P2 — Open WebUI
#
# Ollama runs NATIVELY (not in Docker) for GPU acceleration.
# This compose file only starts the Open WebUI frontend.
#
# Prerequisites:
# - Ollama installed and running on the host at port 11434
# - `homeai` Docker network exists (created by P1 setup)
#
# Usage:
# docker compose -f docker/docker-compose.yml up -d
name: homeai-llm
services:
# ─── Open WebUI ──────────────────────────────────────────────────────────────
open-webui:
container_name: homeai-open-webui
image: ghcr.io/open-webui/open-webui:main
restart: unless-stopped
ports:
- "3030:8080" # Exposed on 3030 to avoid conflict with Gitea (3000)
volumes:
- ${DATA_DIR:-~/homeai-data}/open-webui:/app/backend/data
environment:
# Connect to Ollama on the host
- OLLAMA_BASE_URL=http://host.docker.internal:11434
- WEBUI_SECRET_KEY=${WEBUI_SECRET_KEY:-changeme_random_32_char}
- ENABLE_SIGNUP=true
- DEFAULT_MODELS=llama3.3:70b
extra_hosts:
- "host.docker.internal:host-gateway" # Linux compat
networks:
- homeai
labels:
- homeai.service=open-webui
- homeai.url=http://localhost:3030
networks:
homeai:
external: true
name: homeai

View File

@@ -0,0 +1,37 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
"http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>com.homeai.ollama</string>
<key>ProgramArguments</key>
<array>
<string>/usr/local/bin/ollama</string>
<string>serve</string>
</array>
<key>EnvironmentVariables</key>
<dict>
<key>OLLAMA_HOST</key>
<string>0.0.0.0:11434</string>
<!-- Metal GPU is used automatically on Apple Silicon; no env var needed -->
</dict>
<key>RunAtLoad</key>
<true/>
<key>KeepAlive</key>
<true/>
<key>StandardOutPath</key>
<string>/tmp/homeai-ollama.log</string>
<key>StandardErrorPath</key>
<string>/tmp/homeai-ollama-error.log</string>
<key>ThrottleInterval</key>
<integer>5</integer>
</dict>
</plist>

View File

@@ -0,0 +1,21 @@
# Ollama model manifest
# One model per line. Lines starting with # are ignored.
# Format: <model>:<tag> or just <model> for latest
#
# Pull all models: bash scripts/pull-models.sh
# Pull specific: ollama pull <model>
# ─── Primary (main conversation) ───────────────────────────────────────────────
llama3.3:70b
# ─── Alternative primary ───────────────────────────────────────────────────────
qwen2.5:72b
# ─── Fast / low-latency (voice pipeline, quick tasks) ─────────────────────────
qwen2.5:7b
# ─── Code generation ───────────────────────────────────────────────────────────
qwen2.5-coder:32b
# ─── Embeddings (mem0 memory store) ────────────────────────────────────────────
nomic-embed-text

View File

@@ -0,0 +1,88 @@
#!/usr/bin/env bash
# scripts/benchmark.sh — Benchmark Ollama model inference speed
#
# Measures tokens/sec for each installed model.
# Results written to benchmark-results.md
#
# Usage:
# bash scripts/benchmark.sh
# bash scripts/benchmark.sh qwen2.5:7b # benchmark one model
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)"
source "${REPO_DIR}/scripts/common.sh"
RESULTS_FILE="${SCRIPT_DIR}/../benchmark-results.md"
PROMPT="Tell me a short story about a robot who loves cooking. Keep it to exactly 200 words."
if ! command_exists ollama; then
die "Ollama not found."
fi
if ! curl -sf http://localhost:11434 -o /dev/null; then
die "Ollama is not running."
fi
benchmark_model() {
local model="$1"
log_step "Benchmarking $model..."
local start end elapsed
start=$(date +%s%3N)
local response
response=$(ollama run "$model" "$PROMPT" 2>&1) || {
log_error "Model $model failed to run."
echo "| $model | ERROR | — |"
return
}
end=$(date +%s%3N)
elapsed=$(( (end - start) ))
local word_count
word_count=$(echo "$response" | wc -w)
local tokens_est=$(( word_count * 4 / 3 )) # rough estimate: 1 token ≈ 0.75 words
local elapsed_sec
elapsed_sec=$(echo "scale=1; $elapsed / 1000" | bc)
local tps
tps=$(echo "scale=1; $tokens_est / ($elapsed / 1000)" | bc 2>/dev/null || echo "?")
printf " %-30s %6s tok/s (%ss)\n" "$model" "$tps" "$elapsed_sec"
echo "| \`$model\` | ${tps} tok/s | ${elapsed_sec}s |"
}
log_section "Ollama Benchmark"
log_info "Prompt: '$PROMPT'"
echo ""
if [[ -n "${1:-}" ]]; then
models=("$@")
else
# Get list of installed models
mapfile -t models < <(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}')
fi
if [[ ${#models[@]} -eq 0 ]]; then
die "No models installed. Run: bash scripts/pull-models.sh"
fi
{
echo "# Ollama Benchmark Results"
echo "> Generated: $(date)"
echo ""
echo "| Model | Speed | Time for ~200 tok |"
echo "|---|---|---|"
} > "$RESULTS_FILE"
for model in "${models[@]}"; do
benchmark_model "$model" | tee -a "$RESULTS_FILE"
done
echo "" >> "$RESULTS_FILE"
log_success "Results written to $RESULTS_FILE"
echo ""
cat "$RESULTS_FILE"

View File

@@ -0,0 +1,86 @@
#!/usr/bin/env bash
# scripts/pull-models.sh — Pull all Ollama models from the manifest
#
# Usage:
# bash scripts/pull-models.sh # pull all models
# bash scripts/pull-models.sh nomic-embed-text # pull specific model
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)"
source "${REPO_DIR}/scripts/common.sh"
MANIFEST="${SCRIPT_DIR}/../ollama-models.txt"
if ! command_exists ollama; then
die "Ollama not found. Run: bash homeai-llm/setup.sh first."
fi
if ! curl -sf http://localhost:11434 -o /dev/null; then
die "Ollama is not running. Start it first."
fi
# If a specific model is given as arg, just pull that
if [[ $# -gt 0 ]]; then
for model in "$@"; do
log_info "Pulling $model..."
ollama pull "$model"
log_success "Pulled $model"
done
exit 0
fi
# Pull all models from manifest
log_section "Pulling Ollama models"
total=0; pulled=0; skipped=0; failed=0
while IFS= read -r line || [[ -n "$line" ]]; do
# Skip comments and blank lines
[[ "$line" =~ ^[[:space:]]*# ]] && continue
[[ -z "${line// }" ]] && continue
model="${line%% *}" # strip any trailing comment
total=$((total + 1))
# Check if model is already present
if ollama list 2>/dev/null | grep -q "^${model%%:*}"; then
tag="${model##*:}"
model_name="${model%%:*}"
if [[ "$tag" != "$model_name" ]]; then
# Has explicit tag — check exact match
if ollama list 2>/dev/null | grep -q "^${model_name}.*${tag}"; then
log_info "Already present: $model — skipping"
skipped=$((skipped + 1))
continue
fi
else
log_info "Already present: $model — skipping"
skipped=$((skipped + 1))
continue
fi
fi
log_step "Pulling $model..."
if ollama pull "$model"; then
log_success "Pulled $model"
pulled=$((pulled + 1))
else
log_error "Failed to pull $model"
failed=$((failed + 1))
fi
done < "$MANIFEST"
echo ""
log_info "Pull complete: ${pulled} pulled, ${skipped} already present, ${failed} failed (of ${total} total)"
if [[ $failed -gt 0 ]]; then
log_warn "Some models failed to pull. Check your internet connection and retry."
exit 1
fi
echo ""
log_info "Installed models:"
ollama list

227
homeai-llm/setup.sh Normal file
View File

@@ -0,0 +1,227 @@
#!/usr/bin/env bash
# homeai-llm/setup.sh — P2: Ollama + Open WebUI
#
# Installs Ollama natively (for GPU access), sets up auto-start,
# pulls models from the manifest, and starts Open WebUI in Docker.
#
# GPU support:
# Linux — CUDA (NVIDIA) or ROCm (AMD) or CPU fallback
# macOS — Metal (automatic for Apple Silicon)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
# shellcheck source=../scripts/common.sh
source "${REPO_DIR}/scripts/common.sh"
COMPOSE_FILE="${SCRIPT_DIR}/docker/docker-compose.yml"
ENV_FILE="${SCRIPT_DIR}/docker/.env"
ENV_EXAMPLE="${SCRIPT_DIR}/docker/.env.example"
MANIFEST="${SCRIPT_DIR}/ollama-models.txt"
# ─── Pre-flight ────────────────────────────────────────────────────────────────
preflight() {
log_section "P2 Preflight"
detect_platform
detect_gpu
# Check P1 dependency (homeai Docker network must exist)
if ! docker network inspect homeai &>/dev/null 2>&1; then
log_warn "Docker network 'homeai' not found. Has P1 been run?"
log_warn "Run: ./setup.sh p1 first, or: docker network create homeai"
if ! confirm "Create 'homeai' network now and continue?"; then
die "Aborted. Run ./setup.sh p1 first."
fi
docker network create homeai
fi
# Bootstrap .env for Open WebUI
if [[ ! -f "$ENV_FILE" && -f "$ENV_EXAMPLE" ]]; then
cp "$ENV_EXAMPLE" "$ENV_FILE"
log_warn "Created ${ENV_FILE} from .env.example"
log_warn "Set WEBUI_SECRET_KEY in ${ENV_FILE} (run: openssl rand -hex 16)"
fi
# Create data dir
load_env "$ENV_FILE" 2>/dev/null || true
local data_dir="${DATA_DIR:-${HOME}/homeai-data}"
mkdir -p "${data_dir}/open-webui"
}
# ─── Ollama Installation ───────────────────────────────────────────────────────
install_ollama() {
log_section "Ollama"
if command_exists ollama; then
log_success "Ollama already installed: $(ollama --version 2>/dev/null || echo 'version unknown')"
return
fi
log_info "Installing Ollama..."
if [[ "$OS_TYPE" == "macos" ]]; then
if command_exists brew; then
brew install ollama
else
log_info "Downloading Ollama for macOS..."
curl -fsSL https://ollama.com/install.sh | sh
fi
else
# Linux — official install script handles CUDA/ROCm detection
log_info "Downloading and running Ollama installer..."
curl -fsSL https://ollama.com/install.sh | sh
fi
if ! command_exists ollama; then
die "Ollama installation failed. Check the output above."
fi
log_success "Ollama installed: $(ollama --version 2>/dev/null || echo 'ok')"
}
# ─── Ollama Service ────────────────────────────────────────────────────────────
setup_ollama_service() {
log_section "Ollama service"
# Check if already running
if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then
log_success "Ollama is already running."
return
fi
install_service \
"homeai-ollama" \
"${SCRIPT_DIR}/systemd/homeai-ollama.service" \
"${SCRIPT_DIR}/launchd/com.homeai.ollama.plist"
# Give it a few seconds to start
log_step "Waiting for Ollama to start..."
local i=0
while ! curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; do
sleep 2; i=$((i + 2))
if [[ $i -ge 30 ]]; then
log_warn "Ollama did not start within 30s. Trying to start manually..."
ollama serve &>/dev/null &
sleep 5
break
fi
done
if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then
log_success "Ollama is running."
else
die "Ollama failed to start. Check: ollama serve"
fi
}
# ─── GPU Verification ──────────────────────────────────────────────────────────
verify_gpu() {
log_section "GPU verification"
local models_response
models_response=$(curl -sf http://localhost:11434/api/tags 2>/dev/null || echo '{}')
case "$GPU_TYPE" in
metal)
log_success "Apple Silicon Metal GPU — inference will be fast."
;;
cuda)
log_info "NVIDIA CUDA GPU detected: ${GPU_INFO:-unknown}"
# Verify Ollama can see it
if ollama run qwen2.5:7b "Say OK" &>/dev/null 2>&1; then
log_success "CUDA inference verified."
else
log_warn "Could not verify CUDA inference. Ollama may fall back to CPU."
fi
;;
rocm)
log_info "AMD ROCm GPU detected: ${GPU_INFO:-unknown}"
log_warn "ROCm support depends on your GPU and driver version."
;;
none)
log_warn "No GPU detected — Ollama will use CPU."
log_warn "70B parameter models will be very slow on CPU. Consider qwen2.5:7b for testing."
;;
esac
}
# ─── Pull Models ───────────────────────────────────────────────────────────────
pull_models() {
log_section "Pulling models"
if [[ ! -f "$MANIFEST" ]]; then
log_warn "No model manifest at $MANIFEST — skipping model pull."
return
fi
# On CPU-only, skip the big models and warn
if [[ "$GPU_TYPE" == "none" ]]; then
log_warn "CPU-only mode: skipping 70B models (too slow). Pulling small models only."
log_warn "Edit $MANIFEST to select which models to pull, then run:"
log_warn " bash ${SCRIPT_DIR}/scripts/pull-models.sh"
log_warn "Pulling only: qwen2.5:7b and nomic-embed-text"
ollama pull qwen2.5:7b
ollama pull nomic-embed-text
return
fi
bash "${SCRIPT_DIR}/scripts/pull-models.sh"
}
# ─── Open WebUI ────────────────────────────────────────────────────────────────
start_open_webui() {
log_section "Open WebUI"
ensure_docker_running
log_step "Pulling Open WebUI image..."
docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" pull
log_step "Starting Open WebUI..."
docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" up -d
wait_for_http "http://localhost:3030" "Open WebUI" 90
}
# ─── Register services ─────────────────────────────────────────────────────────
register_services() {
write_env_service "OLLAMA_URL" "http://localhost:11434"
write_env_service "OLLAMA_API_URL" "http://localhost:11434/v1"
write_env_service "OPEN_WEBUI_URL" "http://localhost:3030"
log_success "Service URLs written to ~/.env.services"
}
# ─── Summary ───────────────────────────────────────────────────────────────────
print_llm_summary() {
local model_list
model_list=$(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}' | tr '\n' ', ' | sed 's/,$//')
print_summary "P2 LLM — Ready" \
"Ollama API" "http://localhost:11434" \
"OpenAI compat" "http://localhost:11434/v1" \
"Open WebUI" "http://localhost:3030" \
"GPU" "${GPU_TYPE}" \
"Models" "${model_list:-none pulled yet}"
echo " Next steps:"
echo " 1. Open http://localhost:3030 and create your admin account"
echo " 2. Test a chat with $OLLAMA_PRIMARY_MODEL"
echo " 3. Run benchmark: bash ${SCRIPT_DIR}/scripts/benchmark.sh"
echo " 4. Run: ./setup.sh p3 (Voice pipeline)"
echo ""
}
# ─── Main ──────────────────────────────────────────────────────────────────────
main() {
preflight
install_ollama
setup_ollama_service
verify_gpu
pull_models
start_open_webui
register_services
print_llm_summary
}
main "$@"

View File

@@ -0,0 +1,26 @@
[Unit]
Description=Ollama AI inference server (HomeAI)
Documentation=https://ollama.com
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=%i
ExecStart=/usr/local/bin/ollama serve
Restart=always
RestartSec=5
# Environment
Environment=OLLAMA_HOST=0.0.0.0:11434
Environment=OLLAMA_MODELS=/usr/share/ollama/.ollama/models
# Limits
LimitNOFILE=65536
# CUDA GPU support
# Uncomment and set if you have multiple GPUs:
# Environment=CUDA_VISIBLE_DEVICES=0
[Install]
WantedBy=default.target