Add self-deploying setup scripts for all sub-projects (P1-P8)
- Root setup.sh orchestrator with per-phase dispatch (./setup.sh p1..p8 | all | status) - Makefile convenience targets (make infra, make llm, make status, etc.) - scripts/common.sh: shared bash library for OS detection, Docker helpers, service management (launchd/systemd), package install, env management - .env.example + .gitignore: shared config template and secret exclusions P1 (homeai-infra): full implementation - docker-compose.yml: Uptime Kuma, code-server, n8n - Note: Home Assistant, Portainer, Gitea are pre-existing instances - setup.sh: Docker install, homeai network, container health checks P2 (homeai-llm): full implementation - Ollama native install with CUDA/ROCm/Metal auto-detection - launchd plist (macOS) + systemd service (Linux) for auto-start - scripts/pull-models.sh: idempotent model puller from manifest - scripts/benchmark.sh: tokens/sec measurement per model - Open WebUI on port 3030 (avoids Gitea :3000 conflict) P3-P8: working stubs with prerequisite checks and TODO sections Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
7
homeai-llm/docker/.env.example
Normal file
7
homeai-llm/docker/.env.example
Normal file
@@ -0,0 +1,7 @@
|
||||
# homeai-llm Docker secrets
|
||||
# Copy to .env — never commit .env
|
||||
|
||||
DATA_DIR=${HOME}/homeai-data
|
||||
|
||||
# Open WebUI
|
||||
WEBUI_SECRET_KEY=changeme_random_32_char_string_here
|
||||
45
homeai-llm/docker/docker-compose.yml
Normal file
45
homeai-llm/docker/docker-compose.yml
Normal file
@@ -0,0 +1,45 @@
|
||||
---
|
||||
# homeai-llm/docker/docker-compose.yml
|
||||
# P2 — Open WebUI
|
||||
#
|
||||
# Ollama runs NATIVELY (not in Docker) for GPU acceleration.
|
||||
# This compose file only starts the Open WebUI frontend.
|
||||
#
|
||||
# Prerequisites:
|
||||
# - Ollama installed and running on the host at port 11434
|
||||
# - `homeai` Docker network exists (created by P1 setup)
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker/docker-compose.yml up -d
|
||||
|
||||
name: homeai-llm
|
||||
|
||||
services:
|
||||
|
||||
# ─── Open WebUI ──────────────────────────────────────────────────────────────
|
||||
open-webui:
|
||||
container_name: homeai-open-webui
|
||||
image: ghcr.io/open-webui/open-webui:main
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "3030:8080" # Exposed on 3030 to avoid conflict with Gitea (3000)
|
||||
volumes:
|
||||
- ${DATA_DIR:-~/homeai-data}/open-webui:/app/backend/data
|
||||
environment:
|
||||
# Connect to Ollama on the host
|
||||
- OLLAMA_BASE_URL=http://host.docker.internal:11434
|
||||
- WEBUI_SECRET_KEY=${WEBUI_SECRET_KEY:-changeme_random_32_char}
|
||||
- ENABLE_SIGNUP=true
|
||||
- DEFAULT_MODELS=llama3.3:70b
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway" # Linux compat
|
||||
networks:
|
||||
- homeai
|
||||
labels:
|
||||
- homeai.service=open-webui
|
||||
- homeai.url=http://localhost:3030
|
||||
|
||||
networks:
|
||||
homeai:
|
||||
external: true
|
||||
name: homeai
|
||||
37
homeai-llm/launchd/com.homeai.ollama.plist
Normal file
37
homeai-llm/launchd/com.homeai.ollama.plist
Normal file
@@ -0,0 +1,37 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
|
||||
"http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>Label</key>
|
||||
<string>com.homeai.ollama</string>
|
||||
|
||||
<key>ProgramArguments</key>
|
||||
<array>
|
||||
<string>/usr/local/bin/ollama</string>
|
||||
<string>serve</string>
|
||||
</array>
|
||||
|
||||
<key>EnvironmentVariables</key>
|
||||
<dict>
|
||||
<key>OLLAMA_HOST</key>
|
||||
<string>0.0.0.0:11434</string>
|
||||
<!-- Metal GPU is used automatically on Apple Silicon; no env var needed -->
|
||||
</dict>
|
||||
|
||||
<key>RunAtLoad</key>
|
||||
<true/>
|
||||
|
||||
<key>KeepAlive</key>
|
||||
<true/>
|
||||
|
||||
<key>StandardOutPath</key>
|
||||
<string>/tmp/homeai-ollama.log</string>
|
||||
|
||||
<key>StandardErrorPath</key>
|
||||
<string>/tmp/homeai-ollama-error.log</string>
|
||||
|
||||
<key>ThrottleInterval</key>
|
||||
<integer>5</integer>
|
||||
</dict>
|
||||
</plist>
|
||||
21
homeai-llm/ollama-models.txt
Normal file
21
homeai-llm/ollama-models.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
# Ollama model manifest
|
||||
# One model per line. Lines starting with # are ignored.
|
||||
# Format: <model>:<tag> or just <model> for latest
|
||||
#
|
||||
# Pull all models: bash scripts/pull-models.sh
|
||||
# Pull specific: ollama pull <model>
|
||||
|
||||
# ─── Primary (main conversation) ───────────────────────────────────────────────
|
||||
llama3.3:70b
|
||||
|
||||
# ─── Alternative primary ───────────────────────────────────────────────────────
|
||||
qwen2.5:72b
|
||||
|
||||
# ─── Fast / low-latency (voice pipeline, quick tasks) ─────────────────────────
|
||||
qwen2.5:7b
|
||||
|
||||
# ─── Code generation ───────────────────────────────────────────────────────────
|
||||
qwen2.5-coder:32b
|
||||
|
||||
# ─── Embeddings (mem0 memory store) ────────────────────────────────────────────
|
||||
nomic-embed-text
|
||||
88
homeai-llm/scripts/benchmark.sh
Normal file
88
homeai-llm/scripts/benchmark.sh
Normal file
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env bash
|
||||
# scripts/benchmark.sh — Benchmark Ollama model inference speed
|
||||
#
|
||||
# Measures tokens/sec for each installed model.
|
||||
# Results written to benchmark-results.md
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/benchmark.sh
|
||||
# bash scripts/benchmark.sh qwen2.5:7b # benchmark one model
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||
source "${REPO_DIR}/scripts/common.sh"
|
||||
|
||||
RESULTS_FILE="${SCRIPT_DIR}/../benchmark-results.md"
|
||||
PROMPT="Tell me a short story about a robot who loves cooking. Keep it to exactly 200 words."
|
||||
|
||||
if ! command_exists ollama; then
|
||||
die "Ollama not found."
|
||||
fi
|
||||
|
||||
if ! curl -sf http://localhost:11434 -o /dev/null; then
|
||||
die "Ollama is not running."
|
||||
fi
|
||||
|
||||
benchmark_model() {
|
||||
local model="$1"
|
||||
log_step "Benchmarking $model..."
|
||||
|
||||
local start end elapsed
|
||||
start=$(date +%s%3N)
|
||||
|
||||
local response
|
||||
response=$(ollama run "$model" "$PROMPT" 2>&1) || {
|
||||
log_error "Model $model failed to run."
|
||||
echo "| $model | ERROR | — |"
|
||||
return
|
||||
}
|
||||
|
||||
end=$(date +%s%3N)
|
||||
elapsed=$(( (end - start) ))
|
||||
|
||||
local word_count
|
||||
word_count=$(echo "$response" | wc -w)
|
||||
local tokens_est=$(( word_count * 4 / 3 )) # rough estimate: 1 token ≈ 0.75 words
|
||||
local elapsed_sec
|
||||
elapsed_sec=$(echo "scale=1; $elapsed / 1000" | bc)
|
||||
local tps
|
||||
tps=$(echo "scale=1; $tokens_est / ($elapsed / 1000)" | bc 2>/dev/null || echo "?")
|
||||
|
||||
printf " %-30s %6s tok/s (%ss)\n" "$model" "$tps" "$elapsed_sec"
|
||||
echo "| \`$model\` | ${tps} tok/s | ${elapsed_sec}s |"
|
||||
}
|
||||
|
||||
log_section "Ollama Benchmark"
|
||||
log_info "Prompt: '$PROMPT'"
|
||||
echo ""
|
||||
|
||||
if [[ -n "${1:-}" ]]; then
|
||||
models=("$@")
|
||||
else
|
||||
# Get list of installed models
|
||||
mapfile -t models < <(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}')
|
||||
fi
|
||||
|
||||
if [[ ${#models[@]} -eq 0 ]]; then
|
||||
die "No models installed. Run: bash scripts/pull-models.sh"
|
||||
fi
|
||||
|
||||
{
|
||||
echo "# Ollama Benchmark Results"
|
||||
echo "> Generated: $(date)"
|
||||
echo ""
|
||||
echo "| Model | Speed | Time for ~200 tok |"
|
||||
echo "|---|---|---|"
|
||||
} > "$RESULTS_FILE"
|
||||
|
||||
for model in "${models[@]}"; do
|
||||
benchmark_model "$model" | tee -a "$RESULTS_FILE"
|
||||
done
|
||||
|
||||
echo "" >> "$RESULTS_FILE"
|
||||
|
||||
log_success "Results written to $RESULTS_FILE"
|
||||
echo ""
|
||||
cat "$RESULTS_FILE"
|
||||
86
homeai-llm/scripts/pull-models.sh
Normal file
86
homeai-llm/scripts/pull-models.sh
Normal file
@@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env bash
|
||||
# scripts/pull-models.sh — Pull all Ollama models from the manifest
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/pull-models.sh # pull all models
|
||||
# bash scripts/pull-models.sh nomic-embed-text # pull specific model
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||
source "${REPO_DIR}/scripts/common.sh"
|
||||
|
||||
MANIFEST="${SCRIPT_DIR}/../ollama-models.txt"
|
||||
|
||||
if ! command_exists ollama; then
|
||||
die "Ollama not found. Run: bash homeai-llm/setup.sh first."
|
||||
fi
|
||||
|
||||
if ! curl -sf http://localhost:11434 -o /dev/null; then
|
||||
die "Ollama is not running. Start it first."
|
||||
fi
|
||||
|
||||
# If a specific model is given as arg, just pull that
|
||||
if [[ $# -gt 0 ]]; then
|
||||
for model in "$@"; do
|
||||
log_info "Pulling $model..."
|
||||
ollama pull "$model"
|
||||
log_success "Pulled $model"
|
||||
done
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Pull all models from manifest
|
||||
log_section "Pulling Ollama models"
|
||||
|
||||
total=0; pulled=0; skipped=0; failed=0
|
||||
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
# Skip comments and blank lines
|
||||
[[ "$line" =~ ^[[:space:]]*# ]] && continue
|
||||
[[ -z "${line// }" ]] && continue
|
||||
|
||||
model="${line%% *}" # strip any trailing comment
|
||||
total=$((total + 1))
|
||||
|
||||
# Check if model is already present
|
||||
if ollama list 2>/dev/null | grep -q "^${model%%:*}"; then
|
||||
tag="${model##*:}"
|
||||
model_name="${model%%:*}"
|
||||
if [[ "$tag" != "$model_name" ]]; then
|
||||
# Has explicit tag — check exact match
|
||||
if ollama list 2>/dev/null | grep -q "^${model_name}.*${tag}"; then
|
||||
log_info "Already present: $model — skipping"
|
||||
skipped=$((skipped + 1))
|
||||
continue
|
||||
fi
|
||||
else
|
||||
log_info "Already present: $model — skipping"
|
||||
skipped=$((skipped + 1))
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
log_step "Pulling $model..."
|
||||
if ollama pull "$model"; then
|
||||
log_success "Pulled $model"
|
||||
pulled=$((pulled + 1))
|
||||
else
|
||||
log_error "Failed to pull $model"
|
||||
failed=$((failed + 1))
|
||||
fi
|
||||
|
||||
done < "$MANIFEST"
|
||||
|
||||
echo ""
|
||||
log_info "Pull complete: ${pulled} pulled, ${skipped} already present, ${failed} failed (of ${total} total)"
|
||||
|
||||
if [[ $failed -gt 0 ]]; then
|
||||
log_warn "Some models failed to pull. Check your internet connection and retry."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
log_info "Installed models:"
|
||||
ollama list
|
||||
227
homeai-llm/setup.sh
Normal file
227
homeai-llm/setup.sh
Normal file
@@ -0,0 +1,227 @@
|
||||
#!/usr/bin/env bash
|
||||
# homeai-llm/setup.sh — P2: Ollama + Open WebUI
|
||||
#
|
||||
# Installs Ollama natively (for GPU access), sets up auto-start,
|
||||
# pulls models from the manifest, and starts Open WebUI in Docker.
|
||||
#
|
||||
# GPU support:
|
||||
# Linux — CUDA (NVIDIA) or ROCm (AMD) or CPU fallback
|
||||
# macOS — Metal (automatic for Apple Silicon)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
# shellcheck source=../scripts/common.sh
|
||||
source "${REPO_DIR}/scripts/common.sh"
|
||||
|
||||
COMPOSE_FILE="${SCRIPT_DIR}/docker/docker-compose.yml"
|
||||
ENV_FILE="${SCRIPT_DIR}/docker/.env"
|
||||
ENV_EXAMPLE="${SCRIPT_DIR}/docker/.env.example"
|
||||
MANIFEST="${SCRIPT_DIR}/ollama-models.txt"
|
||||
|
||||
# ─── Pre-flight ────────────────────────────────────────────────────────────────
|
||||
preflight() {
|
||||
log_section "P2 Preflight"
|
||||
detect_platform
|
||||
detect_gpu
|
||||
|
||||
# Check P1 dependency (homeai Docker network must exist)
|
||||
if ! docker network inspect homeai &>/dev/null 2>&1; then
|
||||
log_warn "Docker network 'homeai' not found. Has P1 been run?"
|
||||
log_warn "Run: ./setup.sh p1 first, or: docker network create homeai"
|
||||
if ! confirm "Create 'homeai' network now and continue?"; then
|
||||
die "Aborted. Run ./setup.sh p1 first."
|
||||
fi
|
||||
docker network create homeai
|
||||
fi
|
||||
|
||||
# Bootstrap .env for Open WebUI
|
||||
if [[ ! -f "$ENV_FILE" && -f "$ENV_EXAMPLE" ]]; then
|
||||
cp "$ENV_EXAMPLE" "$ENV_FILE"
|
||||
log_warn "Created ${ENV_FILE} from .env.example"
|
||||
log_warn "Set WEBUI_SECRET_KEY in ${ENV_FILE} (run: openssl rand -hex 16)"
|
||||
fi
|
||||
|
||||
# Create data dir
|
||||
load_env "$ENV_FILE" 2>/dev/null || true
|
||||
local data_dir="${DATA_DIR:-${HOME}/homeai-data}"
|
||||
mkdir -p "${data_dir}/open-webui"
|
||||
}
|
||||
|
||||
# ─── Ollama Installation ───────────────────────────────────────────────────────
|
||||
install_ollama() {
|
||||
log_section "Ollama"
|
||||
|
||||
if command_exists ollama; then
|
||||
log_success "Ollama already installed: $(ollama --version 2>/dev/null || echo 'version unknown')"
|
||||
return
|
||||
fi
|
||||
|
||||
log_info "Installing Ollama..."
|
||||
|
||||
if [[ "$OS_TYPE" == "macos" ]]; then
|
||||
if command_exists brew; then
|
||||
brew install ollama
|
||||
else
|
||||
log_info "Downloading Ollama for macOS..."
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
fi
|
||||
else
|
||||
# Linux — official install script handles CUDA/ROCm detection
|
||||
log_info "Downloading and running Ollama installer..."
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
fi
|
||||
|
||||
if ! command_exists ollama; then
|
||||
die "Ollama installation failed. Check the output above."
|
||||
fi
|
||||
|
||||
log_success "Ollama installed: $(ollama --version 2>/dev/null || echo 'ok')"
|
||||
}
|
||||
|
||||
# ─── Ollama Service ────────────────────────────────────────────────────────────
|
||||
setup_ollama_service() {
|
||||
log_section "Ollama service"
|
||||
|
||||
# Check if already running
|
||||
if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then
|
||||
log_success "Ollama is already running."
|
||||
return
|
||||
fi
|
||||
|
||||
install_service \
|
||||
"homeai-ollama" \
|
||||
"${SCRIPT_DIR}/systemd/homeai-ollama.service" \
|
||||
"${SCRIPT_DIR}/launchd/com.homeai.ollama.plist"
|
||||
|
||||
# Give it a few seconds to start
|
||||
log_step "Waiting for Ollama to start..."
|
||||
local i=0
|
||||
while ! curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; do
|
||||
sleep 2; i=$((i + 2))
|
||||
if [[ $i -ge 30 ]]; then
|
||||
log_warn "Ollama did not start within 30s. Trying to start manually..."
|
||||
ollama serve &>/dev/null &
|
||||
sleep 5
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if curl -sf http://localhost:11434 -o /dev/null 2>/dev/null; then
|
||||
log_success "Ollama is running."
|
||||
else
|
||||
die "Ollama failed to start. Check: ollama serve"
|
||||
fi
|
||||
}
|
||||
|
||||
# ─── GPU Verification ──────────────────────────────────────────────────────────
|
||||
verify_gpu() {
|
||||
log_section "GPU verification"
|
||||
|
||||
local models_response
|
||||
models_response=$(curl -sf http://localhost:11434/api/tags 2>/dev/null || echo '{}')
|
||||
|
||||
case "$GPU_TYPE" in
|
||||
metal)
|
||||
log_success "Apple Silicon Metal GPU — inference will be fast."
|
||||
;;
|
||||
cuda)
|
||||
log_info "NVIDIA CUDA GPU detected: ${GPU_INFO:-unknown}"
|
||||
# Verify Ollama can see it
|
||||
if ollama run qwen2.5:7b "Say OK" &>/dev/null 2>&1; then
|
||||
log_success "CUDA inference verified."
|
||||
else
|
||||
log_warn "Could not verify CUDA inference. Ollama may fall back to CPU."
|
||||
fi
|
||||
;;
|
||||
rocm)
|
||||
log_info "AMD ROCm GPU detected: ${GPU_INFO:-unknown}"
|
||||
log_warn "ROCm support depends on your GPU and driver version."
|
||||
;;
|
||||
none)
|
||||
log_warn "No GPU detected — Ollama will use CPU."
|
||||
log_warn "70B parameter models will be very slow on CPU. Consider qwen2.5:7b for testing."
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# ─── Pull Models ───────────────────────────────────────────────────────────────
|
||||
pull_models() {
|
||||
log_section "Pulling models"
|
||||
|
||||
if [[ ! -f "$MANIFEST" ]]; then
|
||||
log_warn "No model manifest at $MANIFEST — skipping model pull."
|
||||
return
|
||||
fi
|
||||
|
||||
# On CPU-only, skip the big models and warn
|
||||
if [[ "$GPU_TYPE" == "none" ]]; then
|
||||
log_warn "CPU-only mode: skipping 70B models (too slow). Pulling small models only."
|
||||
log_warn "Edit $MANIFEST to select which models to pull, then run:"
|
||||
log_warn " bash ${SCRIPT_DIR}/scripts/pull-models.sh"
|
||||
log_warn "Pulling only: qwen2.5:7b and nomic-embed-text"
|
||||
ollama pull qwen2.5:7b
|
||||
ollama pull nomic-embed-text
|
||||
return
|
||||
fi
|
||||
|
||||
bash "${SCRIPT_DIR}/scripts/pull-models.sh"
|
||||
}
|
||||
|
||||
# ─── Open WebUI ────────────────────────────────────────────────────────────────
|
||||
start_open_webui() {
|
||||
log_section "Open WebUI"
|
||||
|
||||
ensure_docker_running
|
||||
|
||||
log_step "Pulling Open WebUI image..."
|
||||
docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" pull
|
||||
|
||||
log_step "Starting Open WebUI..."
|
||||
docker_compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" up -d
|
||||
|
||||
wait_for_http "http://localhost:3030" "Open WebUI" 90
|
||||
}
|
||||
|
||||
# ─── Register services ─────────────────────────────────────────────────────────
|
||||
register_services() {
|
||||
write_env_service "OLLAMA_URL" "http://localhost:11434"
|
||||
write_env_service "OLLAMA_API_URL" "http://localhost:11434/v1"
|
||||
write_env_service "OPEN_WEBUI_URL" "http://localhost:3030"
|
||||
log_success "Service URLs written to ~/.env.services"
|
||||
}
|
||||
|
||||
# ─── Summary ───────────────────────────────────────────────────────────────────
|
||||
print_llm_summary() {
|
||||
local model_list
|
||||
model_list=$(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}' | tr '\n' ', ' | sed 's/,$//')
|
||||
|
||||
print_summary "P2 LLM — Ready" \
|
||||
"Ollama API" "http://localhost:11434" \
|
||||
"OpenAI compat" "http://localhost:11434/v1" \
|
||||
"Open WebUI" "http://localhost:3030" \
|
||||
"GPU" "${GPU_TYPE}" \
|
||||
"Models" "${model_list:-none pulled yet}"
|
||||
|
||||
echo " Next steps:"
|
||||
echo " 1. Open http://localhost:3030 and create your admin account"
|
||||
echo " 2. Test a chat with $OLLAMA_PRIMARY_MODEL"
|
||||
echo " 3. Run benchmark: bash ${SCRIPT_DIR}/scripts/benchmark.sh"
|
||||
echo " 4. Run: ./setup.sh p3 (Voice pipeline)"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# ─── Main ──────────────────────────────────────────────────────────────────────
|
||||
main() {
|
||||
preflight
|
||||
install_ollama
|
||||
setup_ollama_service
|
||||
verify_gpu
|
||||
pull_models
|
||||
start_open_webui
|
||||
register_services
|
||||
print_llm_summary
|
||||
}
|
||||
|
||||
main "$@"
|
||||
26
homeai-llm/systemd/homeai-ollama.service
Normal file
26
homeai-llm/systemd/homeai-ollama.service
Normal file
@@ -0,0 +1,26 @@
|
||||
[Unit]
|
||||
Description=Ollama AI inference server (HomeAI)
|
||||
Documentation=https://ollama.com
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=%i
|
||||
ExecStart=/usr/local/bin/ollama serve
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
|
||||
# Environment
|
||||
Environment=OLLAMA_HOST=0.0.0.0:11434
|
||||
Environment=OLLAMA_MODELS=/usr/share/ollama/.ollama/models
|
||||
|
||||
# Limits
|
||||
LimitNOFILE=65536
|
||||
|
||||
# CUDA GPU support
|
||||
# Uncomment and set if you have multiple GPUs:
|
||||
# Environment=CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
Reference in New Issue
Block a user