Files
homeai/homeai-rpi/setup.sh
Aodhan Collins 1e52c002c2 feat: Raspberry Pi 5 kitchen satellite — Wyoming voice satellite with ReSpeaker pHAT
Add full Pi 5 satellite setup with ReSpeaker 2-Mics pHAT for kitchen
voice control via Wyoming protocol. Includes satellite_wrapper.py that
monkey-patches WakeStreamingSatellite to fix three compounding bugs:

- TTS echo suppression: mutes wake word detection while speaker plays
- Server writer race fix: checks _writer before streaming, re-arms on None
- Streaming timeout: auto-recovers after 30s if pipeline hangs
- Error recovery: resets streaming state on server Error events

Also includes Pi 5 hardware workarounds (wm8960 overlay, stereo-only
audio wrappers, ALSA mixer calibration) and deploy.sh with fast
iteration commands (--push-wrapper, --test-logs).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 20:09:47 +00:00

528 lines
20 KiB
Bash
Executable File

#!/usr/bin/env bash
# homeai-rpi/setup.sh — Bootstrap a Raspberry Pi as a Wyoming Satellite
#
# Run this ON the Pi (or push via deploy.sh from Mac Mini):
# curl -sL http://10.0.0.101:3000/aodhan/homeai/raw/branch/main/homeai-rpi/setup.sh | bash
# — or —
# ./setup.sh
#
# Prerequisites:
# - Raspberry Pi 5 with Raspberry Pi OS (Bookworm)
# - ReSpeaker 2-Mics pHAT installed and driver loaded (card shows in aplay -l)
# - Network connectivity to Mac Mini (10.0.0.101)
set -euo pipefail
# ─── Configuration ──────────────────────────────────────────────────────────
SATELLITE_NAME="homeai-kitchen"
SATELLITE_AREA="Kitchen"
MAC_MINI_IP="10.0.0.101"
# ReSpeaker 2-Mics pHAT — card 2 on Pi 5
# Using plughw for automatic format conversion (sample rate, channels)
MIC_DEVICE="plughw:2,0"
SPK_DEVICE="plughw:2,0"
# Wyoming satellite port (unique per satellite if running multiple)
SATELLITE_PORT="10700"
# Directories
INSTALL_DIR="${HOME}/homeai-satellite"
VENV_DIR="${INSTALL_DIR}/venv"
SOUNDS_DIR="${INSTALL_DIR}/sounds"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $*"; }
log_ok() { echo -e "${GREEN}[OK]${NC} $*"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
log_error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; }
log_step() { echo -e "${CYAN}[STEP]${NC} $*"; }
# ─── Preflight checks ──────────────────────────────────────────────────────
log_step "Preflight checks..."
# Check we're on a Pi
if ! grep -qi "raspberry\|bcm" /proc/cpuinfo 2>/dev/null; then
log_warn "This doesn't look like a Raspberry Pi — proceeding anyway"
fi
# Check ReSpeaker is available
if ! aplay -l 2>/dev/null | grep -q "seeed-2mic-voicecard"; then
log_error "ReSpeaker 2-Mics pHAT not found in aplay -l. Is the driver loaded?"
fi
log_ok "ReSpeaker 2-Mics pHAT detected"
# Check Python 3
if ! command -v python3 &>/dev/null; then
log_error "python3 not found. Install with: sudo apt install python3 python3-venv python3-pip"
fi
log_ok "Python $(python3 --version | cut -d' ' -f2)"
# ─── Install system dependencies ───────────────────────────────────────────
log_step "Installing system dependencies..."
sudo apt-get update -qq
# Allow non-zero exit — pre-existing DKMS/kernel issues (e.g. seeed-voicecard
# failing to build against a pending kernel update) can cause apt to return
# errors even though our packages installed successfully.
sudo apt-get install -y -qq \
python3-venv \
python3-pip \
alsa-utils \
sox \
libsox-fmt-all \
libopenblas0 \
2>/dev/null || log_warn "apt-get returned errors (likely pre-existing kernel/DKMS issue — continuing)"
# Verify the packages we actually need are present
for cmd in sox arecord aplay; do
command -v "$cmd" &>/dev/null || log_error "${cmd} not found after install"
done
log_ok "System dependencies installed"
# ─── Create install directory ───────────────────────────────────────────────
log_step "Setting up ${INSTALL_DIR}..."
mkdir -p "${INSTALL_DIR}" "${SOUNDS_DIR}"
# ─── Create Python venv ────────────────────────────────────────────────────
if [[ ! -d "${VENV_DIR}" ]]; then
log_step "Creating Python virtual environment..."
python3 -m venv "${VENV_DIR}"
fi
source "${VENV_DIR}/bin/activate"
pip install --upgrade pip setuptools wheel -q
# ─── Install Wyoming Satellite + openWakeWord ──────────────────────────────
log_step "Installing Wyoming Satellite..."
pip install wyoming-satellite -q
log_step "Installing openWakeWord..."
pip install openwakeword -q
log_step "Installing numpy..."
pip install numpy -q
log_ok "All Python packages installed"
# ─── Copy wakeword command script ──────────────────────────────────────────
log_step "Installing wake word detection script..."
cat > "${INSTALL_DIR}/wakeword_command.py" << 'PYEOF'
#!/usr/bin/env python3
"""Wake word detection command for Wyoming Satellite.
The satellite feeds raw 16kHz 16-bit mono audio via stdin.
This script reads that audio, runs openWakeWord, and prints
the wake word name to stdout when detected.
Usage (called by wyoming-satellite --wake-command):
python wakeword_command.py [--wake-word hey_jarvis] [--threshold 0.3]
"""
import argparse
import sys
import numpy as np
import logging
_LOGGER = logging.getLogger(__name__)
SAMPLE_RATE = 16000
CHUNK_SIZE = 1280 # ~80ms at 16kHz — recommended by openWakeWord
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--wake-word", default="hey_jarvis")
parser.add_argument("--threshold", type=float, default=0.5)
parser.add_argument("--cooldown", type=float, default=3.0)
parser.add_argument("--debug", action="store_true")
args = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.debug else logging.WARNING,
format="%(asctime)s %(levelname)s %(message)s",
stream=sys.stderr,
)
import openwakeword
from openwakeword.model import Model
oww = Model(
wakeword_models=[args.wake_word],
inference_framework="onnx",
)
import time
last_trigger = 0.0
bytes_per_chunk = CHUNK_SIZE * 2 # 16-bit = 2 bytes per sample
_LOGGER.debug("Wake word command ready, reading audio from stdin")
try:
while True:
raw = sys.stdin.buffer.read(bytes_per_chunk)
if not raw:
break
if len(raw) < bytes_per_chunk:
raw = raw + b'\x00' * (bytes_per_chunk - len(raw))
chunk = np.frombuffer(raw, dtype=np.int16)
oww.predict(chunk)
for ww, scores in oww.prediction_buffer.items():
score = scores[-1] if scores else 0.0
if score >= args.threshold:
now = time.time()
if now - last_trigger >= args.cooldown:
last_trigger = now
print(ww, flush=True)
_LOGGER.debug("Wake word detected: %s (score=%.3f)", ww, score)
except (KeyboardInterrupt, BrokenPipeError):
pass
if __name__ == "__main__":
main()
PYEOF
chmod +x "${INSTALL_DIR}/wakeword_command.py"
log_ok "Wake word script installed"
# ─── Copy satellite wrapper ──────────────────────────────────────────────
log_step "Installing satellite wrapper (echo suppression + writer resilience)..."
cat > "${INSTALL_DIR}/satellite_wrapper.py" << 'WRAPEOF'
#!/usr/bin/env python3
"""Wyoming Satellite wrapper — echo suppression, writer resilience, streaming timeout.
Monkey-patches WakeStreamingSatellite to fix three compounding bugs that cause
the satellite to freeze after the first voice command:
1. TTS Echo: Mic picks up speaker audio → false wake word trigger
2. Server Writer Race: _writer is None when wake word fires → silent drop
3. No Streaming Timeout: stuck in is_streaming=True forever
4. Error events don't reset streaming state in upstream code
"""
import asyncio
import logging
import time
from wyoming.audio import AudioChunk, AudioStart, AudioStop
from wyoming.error import Error
from wyoming.wake import Detection
from wyoming_satellite.satellite import WakeStreamingSatellite
_LOGGER = logging.getLogger()
_GRACE_SECONDS = 5.0
_MAX_MUTE_SECONDS = 45.0
_STREAMING_TIMEOUT = 30.0
_orig_event_from_server = WakeStreamingSatellite.event_from_server
_orig_event_from_mic = WakeStreamingSatellite.event_from_mic
_orig_event_from_wake = WakeStreamingSatellite.event_from_wake
_orig_trigger_detection = WakeStreamingSatellite.trigger_detection
_orig_trigger_transcript = WakeStreamingSatellite.trigger_transcript
async def _patched_trigger_detection(self, detection):
self._speaker_mute_start = time.monotonic()
self._speaker_active = True
_LOGGER.debug("Speaker active (awake.wav) — wake detection muted")
await _orig_trigger_detection(self, detection)
async def _patched_trigger_transcript(self, transcript):
self._speaker_active = True
_LOGGER.debug("Speaker active (done.wav) — wake detection muted")
await _orig_trigger_transcript(self, transcript)
async def _patched_event_from_server(self, event):
if AudioStart.is_type(event.type):
self._speaker_active = True
self._speaker_mute_start = time.monotonic()
_LOGGER.debug("Speaker active (TTS) — wake detection muted")
elif AudioStop.is_type(event.type):
self._speaker_unmute_at = time.monotonic() + _GRACE_SECONDS
_LOGGER.debug("TTS finished — will unmute wake in %.1fs", _GRACE_SECONDS)
if Error.is_type(event.type) and self.is_streaming:
_LOGGER.warning("Error from server while streaming — resetting")
self.is_streaming = False
await _orig_event_from_server(self, event)
if Error.is_type(event.type) and not self.is_streaming:
await self.trigger_streaming_stop()
await self._send_wake_detect()
_LOGGER.info("Waiting for wake word (after error)")
async def _patched_event_from_mic(self, event, audio_bytes=None):
if self.is_streaming:
elapsed = time.monotonic() - getattr(self, "_streaming_start_time", 0)
if elapsed > _STREAMING_TIMEOUT:
_LOGGER.warning(
"Streaming timeout (%.0fs) — no Transcript received, resetting",
elapsed,
)
self.is_streaming = False
await self.event_to_server(AudioStop().event())
await self.trigger_streaming_stop()
await self._send_wake_detect()
_LOGGER.info("Waiting for wake word (after timeout)")
return
if getattr(self, "_speaker_active", False) and not self.is_streaming:
now = time.monotonic()
unmute_at = getattr(self, "_speaker_unmute_at", None)
if unmute_at and now >= unmute_at:
self._speaker_active = False
self._speaker_unmute_at = None
_LOGGER.debug("Wake detection unmuted (grace period elapsed)")
elif now - getattr(self, "_speaker_mute_start", now) > _MAX_MUTE_SECONDS:
self._speaker_active = False
self._speaker_unmute_at = None
_LOGGER.warning("Wake detection force-unmuted (max mute timeout)")
elif AudioChunk.is_type(event.type):
return
await _orig_event_from_mic(self, event, audio_bytes)
async def _patched_event_from_wake(self, event):
if self.is_streaming:
return
if Detection.is_type(event.type):
if self._writer is None:
_LOGGER.warning(
"Wake word detected but no server connection — re-arming"
)
await self._send_wake_detect()
return
self.is_streaming = True
self._streaming_start_time = time.monotonic()
_LOGGER.debug("Streaming audio")
await self._send_run_pipeline()
await self.forward_event(event)
await self.trigger_detection(Detection.from_event(event))
await self.trigger_streaming_start()
WakeStreamingSatellite.event_from_server = _patched_event_from_server
WakeStreamingSatellite.event_from_mic = _patched_event_from_mic
WakeStreamingSatellite.event_from_wake = _patched_event_from_wake
WakeStreamingSatellite.trigger_detection = _patched_trigger_detection
WakeStreamingSatellite.trigger_transcript = _patched_trigger_transcript
WakeStreamingSatellite._speaker_active = False
WakeStreamingSatellite._speaker_unmute_at = None
WakeStreamingSatellite._speaker_mute_start = 0.0
WakeStreamingSatellite._streaming_start_time = 0.0
if __name__ == "__main__":
from wyoming_satellite.__main__ import main
try:
asyncio.run(main())
except KeyboardInterrupt:
pass
WRAPEOF
chmod +x "${INSTALL_DIR}/satellite_wrapper.py"
log_ok "Satellite wrapper installed"
# ─── Download wake word model ──────────────────────────────────────────────
log_step "Downloading hey_jarvis wake word model..."
"${VENV_DIR}/bin/python3" -c "
import openwakeword
openwakeword.utils.download_models(model_names=['hey_jarvis'])
print('Model downloaded')
" 2>&1 | grep -v "device_discovery"
log_ok "Wake word model ready"
# ─── Create mic capture wrapper ────────────────────────────────────────────
log_step "Creating mic capture wrapper (stereo → mono conversion)..."
cat > "${INSTALL_DIR}/mic-capture.sh" << 'MICEOF'
#!/bin/bash
# Record stereo from ReSpeaker WM8960, convert to mono 16kHz 16-bit for Wyoming
arecord -D plughw:2,0 -r 16000 -c 2 -f S16_LE -t raw -q - | sox -t raw -r 16000 -c 2 -b 16 -e signed-integer - -t raw -r 16000 -c 1 -b 16 -e signed-integer -
MICEOF
chmod +x "${INSTALL_DIR}/mic-capture.sh"
log_ok "Mic capture wrapper installed"
# ─── Create speaker playback wrapper ──────────────────────────────────────
log_step "Creating speaker playback wrapper (mono → stereo conversion)..."
cat > "${INSTALL_DIR}/speaker-playback.sh" << 'SPKEOF'
#!/bin/bash
# Convert mono 24kHz 16-bit input to stereo for WM8960 playback
sox -t raw -r 24000 -c 1 -b 16 -e signed-integer - -t raw -r 24000 -c 2 -b 16 -e signed-integer - | aplay -D plughw:2,0 -r 24000 -c 2 -f S16_LE -t raw -q -
SPKEOF
chmod +x "${INSTALL_DIR}/speaker-playback.sh"
log_ok "Speaker playback wrapper installed"
# ─── Fix ReSpeaker overlay for Pi 5 ────────────────────────────────────────
log_step "Configuring wm8960-soundcard overlay (Pi 5 compatible)..."
# Disable the seeed-voicecard service (loads wrong overlay for Pi 5)
if systemctl is-enabled seeed-voicecard.service &>/dev/null; then
sudo systemctl disable seeed-voicecard.service 2>/dev/null || true
log_info "Disabled seeed-voicecard service"
fi
# Add upstream wm8960-soundcard overlay to config.txt if not present
if ! grep -q "dtoverlay=wm8960-soundcard" /boot/firmware/config.txt 2>/dev/null; then
sudo bash -c 'echo "dtoverlay=wm8960-soundcard" >> /boot/firmware/config.txt'
log_info "Added wm8960-soundcard overlay to /boot/firmware/config.txt"
fi
# Load overlay now if not already active
if ! dtoverlay -l 2>/dev/null | grep -q wm8960-soundcard; then
sudo dtoverlay -r seeed-2mic-voicecard 2>/dev/null || true
sudo dtoverlay wm8960-soundcard 2>/dev/null || true
fi
log_ok "Audio overlay configured"
# ─── Generate feedback sounds ──────────────────────────────────────────────
log_step "Generating feedback sounds..."
# Must be plain 16-bit PCM WAV — Python wave module can't read WAVE_FORMAT_EXTENSIBLE
# Awake chime — short rising tone
sox -n -r 16000 -b 16 -c 1 -e signed-integer "${SOUNDS_DIR}/awake.wav" \
synth 0.15 sin 800 fade t 0.01 0.15 0.05 \
vol 0.5 \
2>/dev/null || log_warn "Could not generate awake.wav (sox issue)"
# Done chime — short falling tone
sox -n -r 16000 -b 16 -c 1 -e signed-integer "${SOUNDS_DIR}/done.wav" \
synth 0.15 sin 600 fade t 0.01 0.15 0.05 \
vol 0.5 \
2>/dev/null || log_warn "Could not generate done.wav (sox issue)"
log_ok "Feedback sounds ready"
# ─── Set ALSA mixer defaults ───────────────────────────────────────────────
log_step "Configuring ALSA mixer for ReSpeaker..."
# Playback — 80% volume, unmute
amixer -c 2 sset 'Playback' 80% unmute 2>/dev/null || true
amixer -c 2 sset 'Speaker' 80% unmute 2>/dev/null || true
# Capture — max out capture volume
amixer -c 2 sset 'Capture' 100% cap 2>/dev/null || true
# Enable mic input boost (critical — without this, signal is near-silent)
amixer -c 2 cset name='Left Input Mixer Boost Switch' on 2>/dev/null || true
amixer -c 2 cset name='Right Input Mixer Boost Switch' on 2>/dev/null || true
# Mic preamp boost to +13dB (1 of 3 — higher causes clipping)
amixer -c 2 cset name='Left Input Boost Mixer LINPUT1 Volume' 1 2>/dev/null || true
amixer -c 2 cset name='Right Input Boost Mixer RINPUT1 Volume' 1 2>/dev/null || true
# ADC capture volume — moderate to avoid clipping (max=255)
amixer -c 2 cset name='ADC PCM Capture Volume' 180,180 2>/dev/null || true
log_ok "ALSA mixer configured"
# ─── Install systemd service ───────────────────────────────────────────────
log_step "Installing systemd service..."
sudo tee /etc/systemd/system/homeai-satellite.service > /dev/null << SVCEOF
[Unit]
Description=HomeAI Wyoming Satellite (${SATELLITE_AREA})
After=network-online.target sound.target
Wants=network-online.target
[Service]
Type=simple
User=${USER}
WorkingDirectory=${INSTALL_DIR}
ExecStart=${VENV_DIR}/bin/python3 ${INSTALL_DIR}/satellite_wrapper.py \\
--uri tcp://0.0.0.0:${SATELLITE_PORT} \\
--name "${SATELLITE_NAME}" \\
--area "${SATELLITE_AREA}" \\
--mic-command ${INSTALL_DIR}/mic-capture.sh \\
--snd-command ${INSTALL_DIR}/speaker-playback.sh \\
--mic-command-rate 16000 \\
--mic-command-width 2 \\
--mic-command-channels 1 \\
--snd-command-rate 24000 \\
--snd-command-width 2 \\
--snd-command-channels 1 \\
--wake-command "${VENV_DIR}/bin/python3 ${INSTALL_DIR}/wakeword_command.py --wake-word hey_jarvis --threshold 0.5" \\
--wake-command-rate 16000 \\
--wake-command-width 2 \\
--wake-command-channels 1 \\
--awake-wav ${SOUNDS_DIR}/awake.wav \\
--done-wav ${SOUNDS_DIR}/done.wav
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
SVCEOF
sudo systemctl daemon-reload
sudo systemctl enable homeai-satellite.service
sudo systemctl restart homeai-satellite.service
log_ok "systemd service installed and started"
# ─── Verify ────────────────────────────────────────────────────────────────
log_step "Verifying satellite..."
sleep 2
if systemctl is-active --quiet homeai-satellite.service; then
log_ok "Satellite is running!"
else
log_warn "Satellite may not have started cleanly. Check logs:"
echo " journalctl -u homeai-satellite.service -f"
fi
echo ""
echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}"
echo -e "${GREEN} HomeAI Kitchen Satellite — Setup Complete${NC}"
echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}"
echo ""
echo " Satellite: ${SATELLITE_NAME} (${SATELLITE_AREA})"
echo " Port: ${SATELLITE_PORT}"
echo " Mic: ${MIC_DEVICE} (ReSpeaker 2-Mics)"
echo " Speaker: ${SPK_DEVICE} (ReSpeaker 3.5mm)"
echo " Wake word: hey_jarvis"
echo ""
echo " Next steps:"
echo " 1. In Home Assistant, go to Settings → Devices & Services → Add Integration"
echo " 2. Search for 'Wyoming Protocol'"
echo " 3. Enter host: $(hostname -I | awk '{print $1}') port: ${SATELLITE_PORT}"
echo " 4. Assign the HomeAI voice pipeline to this satellite"
echo ""
echo " Useful commands:"
echo " journalctl -u homeai-satellite.service -f # live logs"
echo " sudo systemctl restart homeai-satellite # restart"
echo " sudo systemctl status homeai-satellite # status"
echo " arecord -D ${MIC_DEVICE} -d 3 -f S16_LE -r 16000 /tmp/test.wav # test mic"
echo " aplay -D ${SPK_DEVICE} /tmp/test.wav # test speaker"
echo ""