homeai/homeai-rpi/setup.sh

#!/usr/bin/env bash
# homeai-rpi/setup.sh — Bootstrap a Raspberry Pi as a Wyoming Satellite
#
# Run this ON the Pi (or push via deploy.sh from Mac Mini):
#   curl -sL http://10.0.0.101:3000/aodhan/homeai/raw/branch/main/homeai-rpi/setup.sh | bash
#   — or —
#   ./setup.sh
#
# Prerequisites:
#   - Raspberry Pi 5 with Raspberry Pi OS (Bookworm)
#   - ReSpeaker 2-Mics pHAT installed and driver loaded (card shows in aplay -l)
#   - Network connectivity to Mac Mini (10.0.0.101)

set -euo pipefail

# ─── Configuration ──────────────────────────────────────────────────────────

SATELLITE_NAME="homeai-kitchen"
SATELLITE_AREA="Kitchen"
MAC_MINI_IP="10.0.0.101"

# ReSpeaker 2-Mics pHAT — card 2 on Pi 5
# Using plughw for automatic format conversion (sample rate, channels)
MIC_DEVICE="plughw:2,0"
SPK_DEVICE="plughw:2,0"

# Wyoming satellite port (unique per satellite if running multiple)
SATELLITE_PORT="10700"

# Directories
INSTALL_DIR="${HOME}/homeai-satellite"
VENV_DIR="${INSTALL_DIR}/venv"
SOUNDS_DIR="${INSTALL_DIR}/sounds"

# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'

log_info()  { echo -e "${BLUE}[INFO]${NC} $*"; }
log_ok()    { echo -e "${GREEN}[OK]${NC} $*"; }
log_warn()  { echo -e "${YELLOW}[WARN]${NC} $*"; }
log_error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; }
log_step()  { echo -e "${CYAN}[STEP]${NC} $*"; }

# ─── Preflight checks ──────────────────────────────────────────────────────

log_step "Preflight checks..."

# Check we're on a Pi
if ! grep -qi "raspberry\|bcm" /proc/cpuinfo 2>/dev/null; then
  log_warn "This doesn't look like a Raspberry Pi — proceeding anyway"
fi

# Check ReSpeaker is available
if ! aplay -l 2>/dev/null | grep -q "seeed-2mic-voicecard"; then
  log_error "ReSpeaker 2-Mics pHAT not found in aplay -l. Is the driver loaded?"
fi
log_ok "ReSpeaker 2-Mics pHAT detected"

# Check Python 3
if ! command -v python3 &>/dev/null; then
  log_error "python3 not found. Install with: sudo apt install python3 python3-venv python3-pip"
fi
log_ok "Python $(python3 --version | cut -d' ' -f2)"

# ─── Install system dependencies ───────────────────────────────────────────

log_step "Installing system dependencies..."
sudo apt-get update -qq
# Allow non-zero exit — pre-existing DKMS/kernel issues (e.g. seeed-voicecard
# failing to build against a pending kernel update) can cause apt to return
# errors even though our packages installed successfully.
sudo apt-get install -y -qq \
  python3-venv \
  python3-pip \
  alsa-utils \
  sox \
  libsox-fmt-all \
  libopenblas0 \
  2>/dev/null || log_warn "apt-get returned errors (likely pre-existing kernel/DKMS issue — continuing)"

# Verify the packages we actually need are present
for cmd in sox arecord aplay; do
  command -v "$cmd" &>/dev/null || log_error "${cmd} not found after install"
done
log_ok "System dependencies installed"

# ─── Create install directory ───────────────────────────────────────────────

log_step "Setting up ${INSTALL_DIR}..."
mkdir -p "${INSTALL_DIR}" "${SOUNDS_DIR}"

# ─── Create Python venv ────────────────────────────────────────────────────

if [[ ! -d "${VENV_DIR}" ]]; then
  log_step "Creating Python virtual environment..."
  python3 -m venv "${VENV_DIR}"
fi

source "${VENV_DIR}/bin/activate"
pip install --upgrade pip setuptools wheel -q

# ─── Install Wyoming Satellite + openWakeWord ──────────────────────────────

log_step "Installing Wyoming Satellite..."
pip install wyoming-satellite -q

log_step "Installing openWakeWord..."
pip install openwakeword -q

log_step "Installing numpy..."
pip install numpy -q

log_ok "All Python packages installed"

# ─── Copy wakeword command script ──────────────────────────────────────────

log_step "Installing wake word detection script..."
cat > "${INSTALL_DIR}/wakeword_command.py" << 'PYEOF'
#!/usr/bin/env python3
"""Wake word detection command for Wyoming Satellite.

The satellite feeds raw 16kHz 16-bit mono audio via stdin.
This script reads that audio, runs openWakeWord, and prints
the wake word name to stdout when detected.

Usage (called by wyoming-satellite --wake-command):
    python wakeword_command.py [--wake-word hey_jarvis] [--threshold 0.3]
"""

import argparse
import sys
import numpy as np
import logging

_LOGGER = logging.getLogger(__name__)

SAMPLE_RATE = 16000
CHUNK_SIZE = 1280  # ~80ms at 16kHz — recommended by openWakeWord


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--wake-word", default="hey_jarvis")
    parser.add_argument("--threshold", type=float, default=0.5)
    parser.add_argument("--cooldown", type=float, default=3.0)
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()

    logging.basicConfig(
        level=logging.DEBUG if args.debug else logging.WARNING,
        format="%(asctime)s %(levelname)s %(message)s",
        stream=sys.stderr,
    )

    import openwakeword
    from openwakeword.model import Model

    oww = Model(
        wakeword_models=[args.wake_word],
        inference_framework="onnx",
    )

    import time
    last_trigger = 0.0
    bytes_per_chunk = CHUNK_SIZE * 2  # 16-bit = 2 bytes per sample

    _LOGGER.debug("Wake word command ready, reading audio from stdin")

    try:
        while True:
            raw = sys.stdin.buffer.read(bytes_per_chunk)
            if not raw:
                break
            if len(raw) < bytes_per_chunk:
                raw = raw + b'\x00' * (bytes_per_chunk - len(raw))

            chunk = np.frombuffer(raw, dtype=np.int16)
            oww.predict(chunk)

            for ww, scores in oww.prediction_buffer.items():
                score = scores[-1] if scores else 0.0
                if score >= args.threshold:
                    now = time.time()
                    if now - last_trigger >= args.cooldown:
                        last_trigger = now
                        print(ww, flush=True)
                        _LOGGER.debug("Wake word detected: %s (score=%.3f)", ww, score)
    except (KeyboardInterrupt, BrokenPipeError):
        pass


if __name__ == "__main__":
    main()
PYEOF
chmod +x "${INSTALL_DIR}/wakeword_command.py"
log_ok "Wake word script installed"

# ─── Copy satellite wrapper ──────────────────────────────────────────────

log_step "Installing satellite wrapper (echo suppression + writer resilience)..."
cat > "${INSTALL_DIR}/satellite_wrapper.py" << 'WRAPEOF'
#!/usr/bin/env python3
"""Wyoming Satellite wrapper — echo suppression, writer resilience, streaming timeout.

Monkey-patches WakeStreamingSatellite to fix three compounding bugs that cause
the satellite to freeze after the first voice command:

1. TTS Echo: Mic picks up speaker audio → false wake word trigger
2. Server Writer Race: _writer is None when wake word fires → silent drop
3. No Streaming Timeout: stuck in is_streaming=True forever
4. Error events don't reset streaming state in upstream code
"""

import asyncio
import logging
import time

from wyoming.audio import AudioChunk, AudioStart, AudioStop
from wyoming.error import Error
from wyoming.wake import Detection

from wyoming_satellite.satellite import WakeStreamingSatellite

_LOGGER = logging.getLogger()

_GRACE_SECONDS = 5.0
_MAX_MUTE_SECONDS = 45.0
_STREAMING_TIMEOUT = 30.0

_orig_event_from_server = WakeStreamingSatellite.event_from_server
_orig_event_from_mic = WakeStreamingSatellite.event_from_mic
_orig_event_from_wake = WakeStreamingSatellite.event_from_wake
_orig_trigger_detection = WakeStreamingSatellite.trigger_detection
_orig_trigger_transcript = WakeStreamingSatellite.trigger_transcript


async def _patched_trigger_detection(self, detection):
    self._speaker_mute_start = time.monotonic()
    self._speaker_active = True
    _LOGGER.debug("Speaker active (awake.wav) — wake detection muted")
    await _orig_trigger_detection(self, detection)


async def _patched_trigger_transcript(self, transcript):
    self._speaker_active = True
    _LOGGER.debug("Speaker active (done.wav) — wake detection muted")
    await _orig_trigger_transcript(self, transcript)


async def _patched_event_from_server(self, event):
    if AudioStart.is_type(event.type):
        self._speaker_active = True
        self._speaker_mute_start = time.monotonic()
        _LOGGER.debug("Speaker active (TTS) — wake detection muted")
    elif AudioStop.is_type(event.type):
        self._speaker_unmute_at = time.monotonic() + _GRACE_SECONDS
        _LOGGER.debug("TTS finished — will unmute wake in %.1fs", _GRACE_SECONDS)

    if Error.is_type(event.type) and self.is_streaming:
        _LOGGER.warning("Error from server while streaming — resetting")
        self.is_streaming = False

    await _orig_event_from_server(self, event)

    if Error.is_type(event.type) and not self.is_streaming:
        await self.trigger_streaming_stop()
        await self._send_wake_detect()
        _LOGGER.info("Waiting for wake word (after error)")


async def _patched_event_from_mic(self, event, audio_bytes=None):
    if self.is_streaming:
        elapsed = time.monotonic() - getattr(self, "_streaming_start_time", 0)
        if elapsed > _STREAMING_TIMEOUT:
            _LOGGER.warning(
                "Streaming timeout (%.0fs) — no Transcript received, resetting",
                elapsed,
            )
            self.is_streaming = False
            await self.event_to_server(AudioStop().event())
            await self.trigger_streaming_stop()
            await self._send_wake_detect()
            _LOGGER.info("Waiting for wake word (after timeout)")
            return

    if getattr(self, "_speaker_active", False) and not self.is_streaming:
        now = time.monotonic()
        unmute_at = getattr(self, "_speaker_unmute_at", None)
        if unmute_at and now >= unmute_at:
            self._speaker_active = False
            self._speaker_unmute_at = None
            _LOGGER.debug("Wake detection unmuted (grace period elapsed)")
        elif now - getattr(self, "_speaker_mute_start", now) > _MAX_MUTE_SECONDS:
            self._speaker_active = False
            self._speaker_unmute_at = None
            _LOGGER.warning("Wake detection force-unmuted (max mute timeout)")
        elif AudioChunk.is_type(event.type):
            return

    await _orig_event_from_mic(self, event, audio_bytes)


async def _patched_event_from_wake(self, event):
    if self.is_streaming:
        return
    if Detection.is_type(event.type):
        if self._writer is None:
            _LOGGER.warning(
                "Wake word detected but no server connection — re-arming"
            )
            await self._send_wake_detect()
            return
        self.is_streaming = True
        self._streaming_start_time = time.monotonic()
        _LOGGER.debug("Streaming audio")
        await self._send_run_pipeline()
        await self.forward_event(event)
        await self.trigger_detection(Detection.from_event(event))
        await self.trigger_streaming_start()


WakeStreamingSatellite.event_from_server = _patched_event_from_server
WakeStreamingSatellite.event_from_mic = _patched_event_from_mic
WakeStreamingSatellite.event_from_wake = _patched_event_from_wake
WakeStreamingSatellite.trigger_detection = _patched_trigger_detection
WakeStreamingSatellite.trigger_transcript = _patched_trigger_transcript
WakeStreamingSatellite._speaker_active = False
WakeStreamingSatellite._speaker_unmute_at = None
WakeStreamingSatellite._speaker_mute_start = 0.0
WakeStreamingSatellite._streaming_start_time = 0.0

if __name__ == "__main__":
    from wyoming_satellite.__main__ import main

    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        pass
WRAPEOF
chmod +x "${INSTALL_DIR}/satellite_wrapper.py"
log_ok "Satellite wrapper installed"

# ─── Download wake word model ──────────────────────────────────────────────

log_step "Downloading hey_jarvis wake word model..."
"${VENV_DIR}/bin/python3" -c "
import openwakeword
openwakeword.utils.download_models(model_names=['hey_jarvis'])
print('Model downloaded')
" 2>&1 | grep -v "device_discovery"
log_ok "Wake word model ready"

# ─── Create mic capture wrapper ────────────────────────────────────────────

log_step "Creating mic capture wrapper (stereo → mono conversion)..."
cat > "${INSTALL_DIR}/mic-capture.sh" << 'MICEOF'
#!/bin/bash
# Record stereo from ReSpeaker WM8960, convert to mono 16kHz 16-bit for Wyoming
arecord -D plughw:2,0 -r 16000 -c 2 -f S16_LE -t raw -q - | sox -t raw -r 16000 -c 2 -b 16 -e signed-integer - -t raw -r 16000 -c 1 -b 16 -e signed-integer -
MICEOF
chmod +x "${INSTALL_DIR}/mic-capture.sh"
log_ok "Mic capture wrapper installed"

# ─── Create speaker playback wrapper ──────────────────────────────────────

log_step "Creating speaker playback wrapper (mono → stereo conversion)..."
cat > "${INSTALL_DIR}/speaker-playback.sh" << 'SPKEOF'
#!/bin/bash
# Convert mono 24kHz 16-bit input to stereo for WM8960 playback
sox -t raw -r 24000 -c 1 -b 16 -e signed-integer - -t raw -r 24000 -c 2 -b 16 -e signed-integer - | aplay -D plughw:2,0 -r 24000 -c 2 -f S16_LE -t raw -q -
SPKEOF
chmod +x "${INSTALL_DIR}/speaker-playback.sh"
log_ok "Speaker playback wrapper installed"

# ─── Fix ReSpeaker overlay for Pi 5 ────────────────────────────────────────

log_step "Configuring wm8960-soundcard overlay (Pi 5 compatible)..."

# Disable the seeed-voicecard service (loads wrong overlay for Pi 5)
if systemctl is-enabled seeed-voicecard.service &>/dev/null; then
  sudo systemctl disable seeed-voicecard.service 2>/dev/null || true
  log_info "Disabled seeed-voicecard service"
fi

# Add upstream wm8960-soundcard overlay to config.txt if not present
if ! grep -q "dtoverlay=wm8960-soundcard" /boot/firmware/config.txt 2>/dev/null; then
  sudo bash -c 'echo "dtoverlay=wm8960-soundcard" >> /boot/firmware/config.txt'
  log_info "Added wm8960-soundcard overlay to /boot/firmware/config.txt"
fi

# Load overlay now if not already active
if ! dtoverlay -l 2>/dev/null | grep -q wm8960-soundcard; then
  sudo dtoverlay -r seeed-2mic-voicecard 2>/dev/null || true
  sudo dtoverlay wm8960-soundcard 2>/dev/null || true
fi

log_ok "Audio overlay configured"

# ─── Generate feedback sounds ──────────────────────────────────────────────

log_step "Generating feedback sounds..."

# Must be plain 16-bit PCM WAV — Python wave module can't read WAVE_FORMAT_EXTENSIBLE
# Awake chime — short rising tone
sox -n -r 16000 -b 16 -c 1 -e signed-integer "${SOUNDS_DIR}/awake.wav" \
  synth 0.15 sin 800 fade t 0.01 0.15 0.05 \
  vol 0.5 \
  2>/dev/null || log_warn "Could not generate awake.wav (sox issue)"

# Done chime — short falling tone
sox -n -r 16000 -b 16 -c 1 -e signed-integer "${SOUNDS_DIR}/done.wav" \
  synth 0.15 sin 600 fade t 0.01 0.15 0.05 \
  vol 0.5 \
  2>/dev/null || log_warn "Could not generate done.wav (sox issue)"

log_ok "Feedback sounds ready"

# ─── Set ALSA mixer defaults ───────────────────────────────────────────────

log_step "Configuring ALSA mixer for ReSpeaker..."

# Playback — 80% volume, unmute
amixer -c 2 sset 'Playback' 80% unmute 2>/dev/null || true
amixer -c 2 sset 'Speaker' 80% unmute 2>/dev/null || true

# Capture — max out capture volume
amixer -c 2 sset 'Capture' 100% cap 2>/dev/null || true

# Enable mic input boost (critical — without this, signal is near-silent)
amixer -c 2 cset name='Left Input Mixer Boost Switch' on 2>/dev/null || true
amixer -c 2 cset name='Right Input Mixer Boost Switch' on 2>/dev/null || true

# Mic preamp boost to +13dB (1 of 3 — higher causes clipping)
amixer -c 2 cset name='Left Input Boost Mixer LINPUT1 Volume' 1 2>/dev/null || true
amixer -c 2 cset name='Right Input Boost Mixer RINPUT1 Volume' 1 2>/dev/null || true

# ADC capture volume — moderate to avoid clipping (max=255)
amixer -c 2 cset name='ADC PCM Capture Volume' 180,180 2>/dev/null || true

log_ok "ALSA mixer configured"

# ─── Install systemd service ───────────────────────────────────────────────

log_step "Installing systemd service..."

sudo tee /etc/systemd/system/homeai-satellite.service > /dev/null << SVCEOF
[Unit]
Description=HomeAI Wyoming Satellite (${SATELLITE_AREA})
After=network-online.target sound.target
Wants=network-online.target

[Service]
Type=simple
User=${USER}
WorkingDirectory=${INSTALL_DIR}
ExecStart=${VENV_DIR}/bin/python3 ${INSTALL_DIR}/satellite_wrapper.py \\
    --uri tcp://0.0.0.0:${SATELLITE_PORT} \\
    --name "${SATELLITE_NAME}" \\
    --area "${SATELLITE_AREA}" \\
    --mic-command ${INSTALL_DIR}/mic-capture.sh \\
    --snd-command ${INSTALL_DIR}/speaker-playback.sh \\
    --mic-command-rate 16000 \\
    --mic-command-width 2 \\
    --mic-command-channels 1 \\
    --snd-command-rate 24000 \\
    --snd-command-width 2 \\
    --snd-command-channels 1 \\
    --wake-command "${VENV_DIR}/bin/python3 ${INSTALL_DIR}/wakeword_command.py --wake-word hey_jarvis --threshold 0.5" \\
    --wake-command-rate 16000 \\
    --wake-command-width 2 \\
    --wake-command-channels 1 \\
    --awake-wav ${SOUNDS_DIR}/awake.wav \\
    --done-wav ${SOUNDS_DIR}/done.wav
Restart=always
RestartSec=5

[Install]
WantedBy=multi-user.target
SVCEOF

sudo systemctl daemon-reload
sudo systemctl enable homeai-satellite.service
sudo systemctl restart homeai-satellite.service

log_ok "systemd service installed and started"

# ─── Verify ────────────────────────────────────────────────────────────────

log_step "Verifying satellite..."
sleep 2

if systemctl is-active --quiet homeai-satellite.service; then
  log_ok "Satellite is running!"
else
  log_warn "Satellite may not have started cleanly. Check logs:"
  echo "  journalctl -u homeai-satellite.service -f"
fi

echo ""
echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}"
echo -e "${GREEN} HomeAI Kitchen Satellite — Setup Complete${NC}"
echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}"
echo ""
echo "  Satellite:   ${SATELLITE_NAME} (${SATELLITE_AREA})"
echo "  Port:        ${SATELLITE_PORT}"
echo "  Mic:         ${MIC_DEVICE} (ReSpeaker 2-Mics)"
echo "  Speaker:     ${SPK_DEVICE} (ReSpeaker 3.5mm)"
echo "  Wake word:   hey_jarvis"
echo ""
echo "  Next steps:"
echo "    1. In Home Assistant, go to Settings → Devices & Services → Add Integration"
echo "    2. Search for 'Wyoming Protocol'"
echo "    3. Enter host: $(hostname -I | awk '{print $1}')  port: ${SATELLITE_PORT}"
echo "    4. Assign the HomeAI voice pipeline to this satellite"
echo ""
echo "  Useful commands:"
echo "    journalctl -u homeai-satellite.service -f    # live logs"
echo "    sudo systemctl restart homeai-satellite       # restart"
echo "    sudo systemctl status homeai-satellite        # status"
echo "    arecord -D ${MIC_DEVICE} -d 3 -f S16_LE -r 16000 /tmp/test.wav  # test mic"
echo "    aplay -D ${SPK_DEVICE} /tmp/test.wav          # test speaker"
echo ""