diff --git a/homeai-esp32/PLAN.md b/homeai-esp32/PLAN.md index 857bfe3..0816133 100644 --- a/homeai-esp32/PLAN.md +++ b/homeai-esp32/PLAN.md @@ -64,6 +64,9 @@ ESP32-S3-BOX-3 | Display Reset | GPIO48 | inverted | | Backlight | GPIO47 | LEDC PWM | | Left top button | GPIO0 | strapping pin — mute toggle / factory reset | +| Sensor dock I2C SCL | GPIO40 | sensor bus (AHT-30, AT581x radar) | +| Sensor dock I2C SDA | GPIO41 | sensor bus (AHT-30, AT581x radar) | +| Radar presence output | GPIO21 | AT581x digital detection pin | --- @@ -102,7 +105,18 @@ On-device `micro_wake_word` component with `hey_jarvis` model. Can optionally be ### Display -`ili9xxx` platform with model `S3BOX`. Uses `update_interval: never` — display updates are triggered by scripts on voice assistant state changes. Static 320×240 PNG images for each state are compiled into firmware. +`ili9xxx` platform with model `S3BOX`. Uses `update_interval: never` — display updates are triggered by scripts on voice assistant state changes. Static 320×240 PNG images for each state are compiled into firmware. No text overlays — voice-only interaction. + +Screen auto-dims after a configurable idle timeout (default 1 min, adjustable 1–60 min via HA entity). Wakes on voice activity or radar presence detection. + +### Sensor Dock (ESP32-S3-BOX-3-SENSOR) + +Optional accessory dock connected via secondary I2C bus (GPIO40/41, 100kHz): + +- **AHT-30** (temp/humidity) — `aht10` component with variant AHT20, 30s update interval +- **AT581x mmWave radar** — presence detection via GPIO21, I2C for settings config +- **Radar RF switch** — toggle radar on/off from HA +- Radar configured on boot: sensing_distance=600, trigger_keep=5s, hw_frontend_reset=true ### Voice Assistant @@ -202,7 +216,7 @@ cd ~/gitea/homeai/homeai-esp32 ## Known Constraints -- **Memory**: voice_assistant + micro_wake_word + display is near the limit. Do NOT add Bluetooth or LVGL widgets — they will cause crashes. +- **Memory**: voice_assistant + micro_wake_word + display + sensor dock is near the limit. Do NOT add Bluetooth or LVGL widgets — they will cause crashes. - **WiFi**: 2.4GHz only. 5GHz networks are not supported. - **Speaker**: 1W built-in. Volume capped at 85% to avoid distortion. - **Display**: Static PNGs compiled into firmware. To change images, reflash via OTA (~1-2 min). diff --git a/homeai-esp32/deploy.sh b/homeai-esp32/deploy.sh index 11f39d3..f0e0c1b 100755 --- a/homeai-esp32/deploy.sh +++ b/homeai-esp32/deploy.sh @@ -116,7 +116,28 @@ check_images() { Place 320x240 PNGs in ${ILLUSTRATIONS_DIR}/ or use --regen-images to generate placeholders." fi - log_ok "All ${#REQUIRED_IMAGES[@]} illustrations present in illustrations/" + # Resize any images that aren't 320x240 + local resized=0 + for name in "${REQUIRED_IMAGES[@]}"; do + local img_path="${ILLUSTRATIONS_DIR}/${name}.png" + local dims + dims=$("${PYTHON}" -c "from PIL import Image; im=Image.open('${img_path}'); print(f'{im.width}x{im.height}')") + if [[ "$dims" != "320x240" ]]; then + log_warn "${name}.png is ${dims}, resizing to 320x240..." + "${PYTHON}" -c " +from PIL import Image +im = Image.open('${img_path}') +im = im.resize((320, 240), Image.LANCZOS) +im.save('${img_path}') +" + resized=$((resized + 1)) + fi + done + if [[ $resized -gt 0 ]]; then + log_ok "Resized ${resized} image(s) to 320x240" + fi + + log_ok "All ${#REQUIRED_IMAGES[@]} illustrations present and 320x240" for name in "${REQUIRED_IMAGES[@]}"; do local size size=$(wc -c < "${ILLUSTRATIONS_DIR}/${name}.png" | tr -d ' ') @@ -208,10 +229,8 @@ if $REGEN_IMAGES; then generate_faces fi -# Check existing images if deploying with --images-only (or always before deploy) -if $IMAGES_ONLY; then - check_images -fi +# Check existing images (verify present + resize if not 320x240) +check_images # Validate only if $VALIDATE_ONLY; then diff --git a/homeai-esp32/esphome/homeai-living-room.yaml b/homeai-esp32/esphome/homeai-living-room.yaml index fed3544..b0f4fcf 100644 --- a/homeai-esp32/esphome/homeai-living-room.yaml +++ b/homeai-esp32/esphome/homeai-living-room.yaml @@ -33,8 +33,8 @@ substitutions: voice_assist_muted_phase_id: "12" voice_assist_timer_finished_phase_id: "20" - font_glyphsets: "GF_Latin_Core" font_family: Figtree + font_glyphsets: "GF_Latin_Core" esphome: name: ${name} @@ -45,6 +45,11 @@ esphome: priority: 600 then: - script.execute: draw_display + - at581x.settings: + id: radar + hw_frontend_reset: true + sensing_distance: 600 + trigger_keep: 5000ms - delay: 30s - if: condition: @@ -136,6 +141,14 @@ binary_sensor: - ON for at least 10s then: - button.press: factory_reset_btn + - platform: gpio + pin: GPIO21 + name: Presence + id: radar_presence + device_class: occupancy + on_press: + - script.execute: screen_wake + - script.execute: screen_idle_timer # --- Display backlight --- @@ -157,8 +170,13 @@ light: # --- Audio hardware --- i2c: - scl: GPIO18 - sda: GPIO8 + - id: audio_bus + scl: GPIO18 + sda: GPIO8 + - id: sensor_bus + scl: GPIO40 + sda: GPIO41 + frequency: 100kHz i2s_audio: - id: i2s_audio_bus @@ -171,12 +189,14 @@ i2s_audio: audio_adc: - platform: es7210 id: es7210_adc + i2c_id: audio_bus bits_per_sample: 16bit sample_rate: 16000 audio_dac: - platform: es8311 id: es8311_dac + i2c_id: audio_bus bits_per_sample: 16bit sample_rate: 48000 @@ -265,25 +285,11 @@ voice_assistant: volume_multiplier: 2.0 on_listening: - lambda: id(voice_assistant_phase) = ${voice_assist_listening_phase_id}; - - text_sensor.template.publish: - id: text_request - state: "..." - - text_sensor.template.publish: - id: text_response - state: "..." - script.execute: draw_display on_stt_vad_end: - lambda: id(voice_assistant_phase) = ${voice_assist_thinking_phase_id}; - script.execute: draw_display - on_stt_end: - - text_sensor.template.publish: - id: text_request - state: !lambda return x; - - script.execute: draw_display on_tts_start: - - text_sensor.template.publish: - id: text_response - state: !lambda return x; - lambda: id(voice_assistant_phase) = ${voice_assist_replying_phase_id}; - script.execute: draw_display on_end: @@ -305,12 +311,6 @@ voice_assistant: - micro_wake_word.start: - script.execute: set_idle_or_mute_phase - script.execute: draw_display - - text_sensor.template.publish: - id: text_request - state: "" - - text_sensor.template.publish: - id: text_response - state: "" on_error: - if: condition: @@ -371,36 +371,43 @@ script: - lambda: | switch(id(voice_assistant_phase)) { case ${voice_assist_listening_phase_id}: + id(screen_wake).execute(); id(s3_box_lcd).show_page(listening_page); id(s3_box_lcd).update(); break; case ${voice_assist_thinking_phase_id}: + id(screen_wake).execute(); id(s3_box_lcd).show_page(thinking_page); id(s3_box_lcd).update(); break; case ${voice_assist_replying_phase_id}: + id(screen_wake).execute(); id(s3_box_lcd).show_page(replying_page); id(s3_box_lcd).update(); break; case ${voice_assist_error_phase_id}: + id(screen_wake).execute(); id(s3_box_lcd).show_page(error_page); id(s3_box_lcd).update(); break; case ${voice_assist_muted_phase_id}: id(s3_box_lcd).show_page(muted_page); id(s3_box_lcd).update(); + id(screen_idle_timer).execute(); break; case ${voice_assist_not_ready_phase_id}: id(s3_box_lcd).show_page(no_ha_page); id(s3_box_lcd).update(); break; case ${voice_assist_timer_finished_phase_id}: + id(screen_wake).execute(); id(s3_box_lcd).show_page(timer_finished_page); id(s3_box_lcd).update(); break; default: id(s3_box_lcd).show_page(idle_page); id(s3_box_lcd).update(); + id(screen_idle_timer).execute(); } else: - display.page.show: no_ha_page @@ -545,6 +552,23 @@ script: else: - lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id}; + - id: screen_idle_timer + mode: restart + then: + - delay: !lambda return id(screen_off_delay).state * 60000; + - light.turn_off: led + + - id: screen_wake + mode: restart + then: + - if: + condition: + light.is_off: led + then: + - light.turn_on: + id: led + brightness: 100% + # --- Switches --- switch: @@ -556,6 +580,10 @@ switch: restore_mode: RESTORE_DEFAULT_ON entity_category: config disabled_by_default: true + - platform: at581x + at581x_id: radar + name: Radar RF + entity_category: config - platform: template name: Mute id: mute @@ -646,6 +674,46 @@ select: then: - micro_wake_word.start +# --- Screen idle timeout (minutes) --- + +number: + - platform: template + name: Screen off delay + id: screen_off_delay + icon: "mdi:timer-outline" + entity_category: config + unit_of_measurement: min + optimistic: true + restore_value: true + min_value: 1 + max_value: 60 + step: 1 + initial_value: 1 + +# --- Sensor dock (ESP32-S3-BOX-3-SENSOR) --- + +sensor: + - platform: aht10 + variant: AHT20 + i2c_id: sensor_bus + temperature: + name: Temperature + filters: + - sliding_window_moving_average: + window_size: 5 + send_every: 5 + humidity: + name: Humidity + filters: + - sliding_window_moving_average: + window_size: 5 + send_every: 5 + update_interval: 30s + +at581x: + i2c_id: sensor_bus + id: radar + # --- Global variables --- globals: @@ -719,26 +787,9 @@ image: type: RGB transparency: alpha_channel -# --- Fonts --- +# --- Fonts (timer widget only) --- font: - - file: - type: gfonts - family: ${font_family} - weight: 300 - italic: true - id: font_request - size: 15 - glyphsets: - - ${font_glyphsets} - - file: - type: gfonts - family: ${font_family} - weight: 300 - id: font_response - size: 15 - glyphsets: - - ${font_glyphsets} - file: type: gfonts family: ${font_family} @@ -748,28 +799,6 @@ font: glyphsets: - ${font_glyphsets} -# --- Text sensors (request/response display) --- - -text_sensor: - - id: text_request - platform: template - on_value: - lambda: |- - if(id(text_request).state.length()>32) { - std::string name = id(text_request).state.c_str(); - std::string truncated = esphome::str_truncate(name.c_str(),31); - id(text_request).state = (truncated+"...").c_str(); - } - - id: text_response - platform: template - on_value: - lambda: |- - if(id(text_response).state.length()>32) { - std::string name = id(text_response).state.c_str(); - std::string truncated = esphome::str_truncate(name.c_str(),31); - id(text_response).state = (truncated+"...").c_str(); - } - # --- Colors --- color: @@ -825,20 +854,11 @@ display: lambda: |- it.fill(id(thinking_color)); it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_thinking), ImageAlign::CENTER); - it.filled_rectangle(20, 20, 280, 30, Color::WHITE); - it.rectangle(20, 20, 280, 30, Color::BLACK); - it.printf(30, 25, id(font_request), Color::BLACK, "%s", id(text_request).state.c_str()); id(draw_timer_timeline).execute(); - id: replying_page lambda: |- it.fill(id(replying_color)); it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_replying), ImageAlign::CENTER); - it.filled_rectangle(20, 20, 280, 30, Color::WHITE); - it.rectangle(20, 20, 280, 30, Color::BLACK); - it.filled_rectangle(20, 190, 280, 30, Color::WHITE); - it.rectangle(20, 190, 280, 30, Color::BLACK); - it.printf(30, 25, id(font_request), Color::BLACK, "%s", id(text_request).state.c_str()); - it.printf(30, 195, id(font_response), Color::BLACK, "%s", id(text_response).state.c_str()); id(draw_timer_timeline).execute(); - id: timer_finished_page lambda: |- diff --git a/homeai-rpi/deploy.sh b/homeai-rpi/deploy.sh new file mode 100755 index 0000000..e4c9c0a --- /dev/null +++ b/homeai-rpi/deploy.sh @@ -0,0 +1,159 @@ +#!/usr/bin/env bash +# homeai-rpi/deploy.sh — Deploy/manage Wyoming Satellite on Raspberry Pi from Mac Mini +# +# Usage: +# ./deploy.sh — full setup (push + install on Pi) +# ./deploy.sh --status — check satellite status +# ./deploy.sh --restart — restart satellite service +# ./deploy.sh --logs — tail satellite logs +# ./deploy.sh --test-audio — record 3s from mic, play back through speaker +# ./deploy.sh --update — update Python packages only + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# ─── Pi connection ────────────────────────────────────────────────────────── + +PI_HOST="SELBINA.local" +PI_USER="aodhan" +PI_SSH="${PI_USER}@${PI_HOST}" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +NC='\033[0m' + +log_info() { echo -e "${BLUE}[INFO]${NC} $*"; } +log_ok() { echo -e "${GREEN}[OK]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; } +log_step() { echo -e "${CYAN}[STEP]${NC} $*"; } + +# ─── SSH helpers ──────────────────────────────────────────────────────────── + +pi_ssh() { + ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new "${PI_SSH}" "$@" +} + +pi_scp() { + scp -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new "$@" +} + +check_connectivity() { + log_step "Checking connectivity to ${PI_HOST}..." + if ! ping -c 1 -t 3 "${PI_HOST}" &>/dev/null; then + log_error "Cannot reach ${PI_HOST}. Is the Pi on?" + fi + if ! pi_ssh "echo ok" &>/dev/null; then + log_error "SSH to ${PI_SSH} failed. Set up SSH keys: + ssh-copy-id ${PI_SSH}" + fi + log_ok "Connected to ${PI_HOST}" +} + +# ─── Commands ─────────────────────────────────────────────────────────────── + +cmd_setup() { + check_connectivity + + log_step "Pushing setup script to Pi..." + pi_scp "${SCRIPT_DIR}/setup.sh" "${PI_SSH}:~/homeai-satellite-setup.sh" + + log_step "Running setup on Pi..." + pi_ssh "chmod +x ~/homeai-satellite-setup.sh && ~/homeai-satellite-setup.sh" + + log_ok "Setup complete!" +} + +cmd_status() { + check_connectivity + log_step "Satellite status:" + pi_ssh "systemctl status homeai-satellite.service --no-pager" || true +} + +cmd_restart() { + check_connectivity + log_step "Restarting satellite..." + pi_ssh "sudo systemctl restart homeai-satellite.service" + sleep 2 + pi_ssh "systemctl is-active homeai-satellite.service" && log_ok "Satellite running" || log_warn "Satellite not active" +} + +cmd_logs() { + check_connectivity + log_info "Tailing satellite logs (Ctrl+C to stop)..." + pi_ssh "journalctl -u homeai-satellite.service -f --no-hostname" +} + +cmd_test_audio() { + check_connectivity + log_step "Recording 3 seconds from mic..." + pi_ssh "arecord -D plughw:2,0 -d 3 -f S16_LE -r 16000 -c 1 /tmp/homeai-test.wav 2>/dev/null" + log_step "Playing back through speaker..." + pi_ssh "aplay -D plughw:2,0 /tmp/homeai-test.wav 2>/dev/null" + log_ok "Audio test complete. Did you hear yourself?" +} + +cmd_update() { + check_connectivity + log_step "Updating Python packages on Pi..." + pi_ssh "source ~/homeai-satellite/venv/bin/activate && pip install --upgrade wyoming-satellite openwakeword -q" + + log_step "Pushing latest scripts..." + pi_scp "${SCRIPT_DIR}/satellite_wrapper.py" "${PI_SSH}:~/homeai-satellite/satellite_wrapper.py" + pi_ssh "sudo systemctl restart homeai-satellite.service" + + log_ok "Updated and restarted" +} + +cmd_push_wrapper() { + check_connectivity + log_step "Pushing satellite_wrapper.py..." + pi_scp "${SCRIPT_DIR}/satellite_wrapper.py" "${PI_SSH}:~/homeai-satellite/satellite_wrapper.py" + log_step "Restarting satellite..." + pi_ssh "sudo systemctl restart homeai-satellite.service" + sleep 2 + pi_ssh "systemctl is-active homeai-satellite.service" && log_ok "Satellite running" || log_warn "Satellite not active — check logs" +} + +cmd_test_logs() { + check_connectivity + log_info "Filtered satellite logs — key events only (Ctrl+C to stop)..." + pi_ssh "journalctl -u homeai-satellite.service -f --no-hostname" \ + | grep --line-buffered -iE \ + 'Waiting for wake|Streaming audio|transcript|synthesize|Speaker active|unmute|_writer|timeout|error|Error|Wake word detected|re-arming|resetting' +} + +# ─── Main ─────────────────────────────────────────────────────────────────── + +case "${1:-}" in + --status) cmd_status ;; + --restart) cmd_restart ;; + --logs) cmd_logs ;; + --test-audio) cmd_test_audio ;; + --test-logs) cmd_test_logs ;; + --update) cmd_update ;; + --push-wrapper) cmd_push_wrapper ;; + --help|-h) + echo "Usage: $0 [command]" + echo "" + echo "Commands:" + echo " (none) Full setup — push and install satellite on Pi" + echo " --status Check satellite service status" + echo " --restart Restart satellite service" + echo " --logs Tail satellite logs (live, all)" + echo " --test-logs Tail filtered logs (key events only)" + echo " --test-audio Record 3s from mic, play back on speaker" + echo " --push-wrapper Push satellite_wrapper.py and restart (fast iteration)" + echo " --update Update packages and restart" + echo " --help Show this help" + echo "" + echo "Pi: ${PI_SSH} (${PI_HOST})" + ;; + "") cmd_setup ;; + *) log_error "Unknown command: $1. Use --help for usage." ;; +esac diff --git a/homeai-rpi/satellite_wrapper.py b/homeai-rpi/satellite_wrapper.py new file mode 100644 index 0000000..51d1007 --- /dev/null +++ b/homeai-rpi/satellite_wrapper.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 +"""Wyoming Satellite wrapper — echo suppression, writer resilience, streaming timeout. + +Monkey-patches WakeStreamingSatellite to fix three compounding bugs that cause +the satellite to freeze after the first voice command: + +1. TTS Echo: Mic picks up speaker audio → false wake word trigger → Whisper + hallucinates on silence. Fix: mute mic→wake forwarding while speaker is active. + +2. Server Writer Race: HA disconnects after first command, _writer becomes None. + If wake word fires before HA reconnects, _send_run_pipeline() silently drops + the event → satellite stuck in is_streaming=True forever. + Fix: check _writer before entering streaming mode; re-arm wake if no server. + +3. No Streaming Timeout: Once stuck in streaming mode, there's no recovery. + Fix: auto-reset after 30s if no Transcript arrives. + +4. Error events don't reset streaming state in upstream code. + Fix: reset is_streaming on Error events from server. + +Usage: python3 satellite_wrapper.py +""" + +import asyncio +import logging +import time + +from wyoming.audio import AudioChunk, AudioStart, AudioStop +from wyoming.error import Error +from wyoming.wake import Detection + +from wyoming_satellite.satellite import WakeStreamingSatellite + +_LOGGER = logging.getLogger() + +# ─── Tuning constants ──────────────────────────────────────────────────────── + +# How long to keep wake muted after the last AudioStop from the server. +# Must be long enough for sox→aplay buffer to drain (~1-2s) plus audio decay. +_GRACE_SECONDS = 5.0 + +# Safety valve — unmute even if no AudioStop arrives (e.g. long TTS response). +_MAX_MUTE_SECONDS = 45.0 + +# Max time in streaming mode without receiving a Transcript or Error. +# Prevents permanent freeze if server never responds. +_STREAMING_TIMEOUT = 30.0 + +# ─── Save original methods ─────────────────────────────────────────────────── + +_orig_event_from_server = WakeStreamingSatellite.event_from_server +_orig_event_from_mic = WakeStreamingSatellite.event_from_mic +_orig_event_from_wake = WakeStreamingSatellite.event_from_wake +_orig_trigger_detection = WakeStreamingSatellite.trigger_detection +_orig_trigger_transcript = WakeStreamingSatellite.trigger_transcript + +# ─── Patch A: Mute wake on awake.wav ───────────────────────────────────────── + + +async def _patched_trigger_detection(self, detection): + """Mute wake word detection when awake.wav starts playing.""" + self._speaker_mute_start = time.monotonic() + self._speaker_active = True + _LOGGER.debug("Speaker active (awake.wav) — wake detection muted") + await _orig_trigger_detection(self, detection) + + +# ─── Patch B: Mute wake on done.wav ────────────────────────────────────────── + + +async def _patched_trigger_transcript(self, transcript): + """Keep muted through done.wav playback.""" + self._speaker_active = True + _LOGGER.debug("Speaker active (done.wav) — wake detection muted") + await _orig_trigger_transcript(self, transcript) + + +# ─── Patch C: Echo tracking + error recovery ───────────────────────────────── + + +async def _patched_event_from_server(self, event): + """Track TTS audio for echo suppression; reset streaming on errors.""" + # Echo suppression: track when speaker is active + if AudioStart.is_type(event.type): + self._speaker_active = True + self._speaker_mute_start = time.monotonic() + _LOGGER.debug("Speaker active (TTS) — wake detection muted") + elif AudioStop.is_type(event.type): + self._speaker_unmute_at = time.monotonic() + _GRACE_SECONDS + _LOGGER.debug( + "TTS finished — will unmute wake in %.1fs", _GRACE_SECONDS + ) + + # Error recovery: reset streaming state if server reports an error + if Error.is_type(event.type) and self.is_streaming: + _LOGGER.warning("Error from server while streaming — resetting") + self.is_streaming = False + + # Call original handler (plays done.wav, forwards TTS audio, etc.) + await _orig_event_from_server(self, event) + + # After original handler: if Error arrived, re-arm wake detection + if Error.is_type(event.type) and not self.is_streaming: + await self.trigger_streaming_stop() + await self._send_wake_detect() + _LOGGER.info("Waiting for wake word (after error)") + + +# ─── Patch D: Echo suppression + streaming timeout ─────────────────────────── + + +async def _patched_event_from_mic(self, event, audio_bytes=None): + """Drop mic audio during speaker playback; timeout stuck streaming.""" + + # --- Streaming timeout --- + if self.is_streaming: + elapsed = time.monotonic() - getattr(self, "_streaming_start_time", 0) + if elapsed > _STREAMING_TIMEOUT: + _LOGGER.warning( + "Streaming timeout (%.0fs) — no Transcript received, resetting", + elapsed, + ) + self.is_streaming = False + # Tell server we're done sending audio + await self.event_to_server(AudioStop().event()) + await self.trigger_streaming_stop() + await self._send_wake_detect() + _LOGGER.info("Waiting for wake word (after timeout)") + return + + # --- Echo suppression --- + if getattr(self, "_speaker_active", False) and not self.is_streaming: + now = time.monotonic() + + # Check if grace period has elapsed after AudioStop + unmute_at = getattr(self, "_speaker_unmute_at", None) + if unmute_at and now >= unmute_at: + self._speaker_active = False + self._speaker_unmute_at = None + _LOGGER.debug("Wake detection unmuted (grace period elapsed)") + # Safety valve — don't stay muted forever + elif now - getattr(self, "_speaker_mute_start", now) > _MAX_MUTE_SECONDS: + self._speaker_active = False + self._speaker_unmute_at = None + _LOGGER.warning("Wake detection force-unmuted (max mute timeout)") + elif AudioChunk.is_type(event.type): + # Drop this mic chunk — don't feed speaker audio to wake word + return + + await _orig_event_from_mic(self, event, audio_bytes) + + +# ─── Patch E: Writer check before streaming (THE CRITICAL FIX) ─────────────── + + +async def _patched_event_from_wake(self, event): + """Check server connection before entering streaming mode.""" + if self.is_streaming: + return + + if Detection.is_type(event.type): + # THE FIX: If no server connection, don't enter streaming mode. + # Without this, _send_run_pipeline() silently drops the RunPipeline + # event, and the satellite is stuck in is_streaming=True forever. + if self._writer is None: + _LOGGER.warning( + "Wake word detected but no server connection — re-arming" + ) + await self._send_wake_detect() + return + + self.is_streaming = True + self._streaming_start_time = time.monotonic() + _LOGGER.debug("Streaming audio") + await self._send_run_pipeline() + await self.forward_event(event) + await self.trigger_detection(Detection.from_event(event)) + await self.trigger_streaming_start() + + +# ─── Apply patches ─────────────────────────────────────────────────────────── + +WakeStreamingSatellite.event_from_server = _patched_event_from_server +WakeStreamingSatellite.event_from_mic = _patched_event_from_mic +WakeStreamingSatellite.event_from_wake = _patched_event_from_wake +WakeStreamingSatellite.trigger_detection = _patched_trigger_detection +WakeStreamingSatellite.trigger_transcript = _patched_trigger_transcript + +# Instance attributes (set as class defaults so they exist before __init__) +WakeStreamingSatellite._speaker_active = False +WakeStreamingSatellite._speaker_unmute_at = None +WakeStreamingSatellite._speaker_mute_start = 0.0 +WakeStreamingSatellite._streaming_start_time = 0.0 + +# ─── Run the original main ─────────────────────────────────────────────────── + +if __name__ == "__main__": + from wyoming_satellite.__main__ import main + + try: + asyncio.run(main()) + except KeyboardInterrupt: + pass diff --git a/homeai-rpi/setup.sh b/homeai-rpi/setup.sh new file mode 100755 index 0000000..08b2c73 --- /dev/null +++ b/homeai-rpi/setup.sh @@ -0,0 +1,527 @@ +#!/usr/bin/env bash +# homeai-rpi/setup.sh — Bootstrap a Raspberry Pi as a Wyoming Satellite +# +# Run this ON the Pi (or push via deploy.sh from Mac Mini): +# curl -sL http://10.0.0.101:3000/aodhan/homeai/raw/branch/main/homeai-rpi/setup.sh | bash +# — or — +# ./setup.sh +# +# Prerequisites: +# - Raspberry Pi 5 with Raspberry Pi OS (Bookworm) +# - ReSpeaker 2-Mics pHAT installed and driver loaded (card shows in aplay -l) +# - Network connectivity to Mac Mini (10.0.0.101) + +set -euo pipefail + +# ─── Configuration ────────────────────────────────────────────────────────── + +SATELLITE_NAME="homeai-kitchen" +SATELLITE_AREA="Kitchen" +MAC_MINI_IP="10.0.0.101" + +# ReSpeaker 2-Mics pHAT — card 2 on Pi 5 +# Using plughw for automatic format conversion (sample rate, channels) +MIC_DEVICE="plughw:2,0" +SPK_DEVICE="plughw:2,0" + +# Wyoming satellite port (unique per satellite if running multiple) +SATELLITE_PORT="10700" + +# Directories +INSTALL_DIR="${HOME}/homeai-satellite" +VENV_DIR="${INSTALL_DIR}/venv" +SOUNDS_DIR="${INSTALL_DIR}/sounds" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +NC='\033[0m' + +log_info() { echo -e "${BLUE}[INFO]${NC} $*"; } +log_ok() { echo -e "${GREEN}[OK]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; } +log_step() { echo -e "${CYAN}[STEP]${NC} $*"; } + +# ─── Preflight checks ────────────────────────────────────────────────────── + +log_step "Preflight checks..." + +# Check we're on a Pi +if ! grep -qi "raspberry\|bcm" /proc/cpuinfo 2>/dev/null; then + log_warn "This doesn't look like a Raspberry Pi — proceeding anyway" +fi + +# Check ReSpeaker is available +if ! aplay -l 2>/dev/null | grep -q "seeed-2mic-voicecard"; then + log_error "ReSpeaker 2-Mics pHAT not found in aplay -l. Is the driver loaded?" +fi +log_ok "ReSpeaker 2-Mics pHAT detected" + +# Check Python 3 +if ! command -v python3 &>/dev/null; then + log_error "python3 not found. Install with: sudo apt install python3 python3-venv python3-pip" +fi +log_ok "Python $(python3 --version | cut -d' ' -f2)" + +# ─── Install system dependencies ─────────────────────────────────────────── + +log_step "Installing system dependencies..." +sudo apt-get update -qq +# Allow non-zero exit — pre-existing DKMS/kernel issues (e.g. seeed-voicecard +# failing to build against a pending kernel update) can cause apt to return +# errors even though our packages installed successfully. +sudo apt-get install -y -qq \ + python3-venv \ + python3-pip \ + alsa-utils \ + sox \ + libsox-fmt-all \ + libopenblas0 \ + 2>/dev/null || log_warn "apt-get returned errors (likely pre-existing kernel/DKMS issue — continuing)" + +# Verify the packages we actually need are present +for cmd in sox arecord aplay; do + command -v "$cmd" &>/dev/null || log_error "${cmd} not found after install" +done +log_ok "System dependencies installed" + +# ─── Create install directory ─────────────────────────────────────────────── + +log_step "Setting up ${INSTALL_DIR}..." +mkdir -p "${INSTALL_DIR}" "${SOUNDS_DIR}" + +# ─── Create Python venv ──────────────────────────────────────────────────── + +if [[ ! -d "${VENV_DIR}" ]]; then + log_step "Creating Python virtual environment..." + python3 -m venv "${VENV_DIR}" +fi + +source "${VENV_DIR}/bin/activate" +pip install --upgrade pip setuptools wheel -q + +# ─── Install Wyoming Satellite + openWakeWord ────────────────────────────── + +log_step "Installing Wyoming Satellite..." +pip install wyoming-satellite -q + +log_step "Installing openWakeWord..." +pip install openwakeword -q + +log_step "Installing numpy..." +pip install numpy -q + +log_ok "All Python packages installed" + +# ─── Copy wakeword command script ────────────────────────────────────────── + +log_step "Installing wake word detection script..." +cat > "${INSTALL_DIR}/wakeword_command.py" << 'PYEOF' +#!/usr/bin/env python3 +"""Wake word detection command for Wyoming Satellite. + +The satellite feeds raw 16kHz 16-bit mono audio via stdin. +This script reads that audio, runs openWakeWord, and prints +the wake word name to stdout when detected. + +Usage (called by wyoming-satellite --wake-command): + python wakeword_command.py [--wake-word hey_jarvis] [--threshold 0.3] +""" + +import argparse +import sys +import numpy as np +import logging + +_LOGGER = logging.getLogger(__name__) + +SAMPLE_RATE = 16000 +CHUNK_SIZE = 1280 # ~80ms at 16kHz — recommended by openWakeWord + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--wake-word", default="hey_jarvis") + parser.add_argument("--threshold", type=float, default=0.5) + parser.add_argument("--cooldown", type=float, default=3.0) + parser.add_argument("--debug", action="store_true") + args = parser.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.debug else logging.WARNING, + format="%(asctime)s %(levelname)s %(message)s", + stream=sys.stderr, + ) + + import openwakeword + from openwakeword.model import Model + + oww = Model( + wakeword_models=[args.wake_word], + inference_framework="onnx", + ) + + import time + last_trigger = 0.0 + bytes_per_chunk = CHUNK_SIZE * 2 # 16-bit = 2 bytes per sample + + _LOGGER.debug("Wake word command ready, reading audio from stdin") + + try: + while True: + raw = sys.stdin.buffer.read(bytes_per_chunk) + if not raw: + break + if len(raw) < bytes_per_chunk: + raw = raw + b'\x00' * (bytes_per_chunk - len(raw)) + + chunk = np.frombuffer(raw, dtype=np.int16) + oww.predict(chunk) + + for ww, scores in oww.prediction_buffer.items(): + score = scores[-1] if scores else 0.0 + if score >= args.threshold: + now = time.time() + if now - last_trigger >= args.cooldown: + last_trigger = now + print(ww, flush=True) + _LOGGER.debug("Wake word detected: %s (score=%.3f)", ww, score) + except (KeyboardInterrupt, BrokenPipeError): + pass + + +if __name__ == "__main__": + main() +PYEOF +chmod +x "${INSTALL_DIR}/wakeword_command.py" +log_ok "Wake word script installed" + +# ─── Copy satellite wrapper ────────────────────────────────────────────── + +log_step "Installing satellite wrapper (echo suppression + writer resilience)..." +cat > "${INSTALL_DIR}/satellite_wrapper.py" << 'WRAPEOF' +#!/usr/bin/env python3 +"""Wyoming Satellite wrapper — echo suppression, writer resilience, streaming timeout. + +Monkey-patches WakeStreamingSatellite to fix three compounding bugs that cause +the satellite to freeze after the first voice command: + +1. TTS Echo: Mic picks up speaker audio → false wake word trigger +2. Server Writer Race: _writer is None when wake word fires → silent drop +3. No Streaming Timeout: stuck in is_streaming=True forever +4. Error events don't reset streaming state in upstream code +""" + +import asyncio +import logging +import time + +from wyoming.audio import AudioChunk, AudioStart, AudioStop +from wyoming.error import Error +from wyoming.wake import Detection + +from wyoming_satellite.satellite import WakeStreamingSatellite + +_LOGGER = logging.getLogger() + +_GRACE_SECONDS = 5.0 +_MAX_MUTE_SECONDS = 45.0 +_STREAMING_TIMEOUT = 30.0 + +_orig_event_from_server = WakeStreamingSatellite.event_from_server +_orig_event_from_mic = WakeStreamingSatellite.event_from_mic +_orig_event_from_wake = WakeStreamingSatellite.event_from_wake +_orig_trigger_detection = WakeStreamingSatellite.trigger_detection +_orig_trigger_transcript = WakeStreamingSatellite.trigger_transcript + + +async def _patched_trigger_detection(self, detection): + self._speaker_mute_start = time.monotonic() + self._speaker_active = True + _LOGGER.debug("Speaker active (awake.wav) — wake detection muted") + await _orig_trigger_detection(self, detection) + + +async def _patched_trigger_transcript(self, transcript): + self._speaker_active = True + _LOGGER.debug("Speaker active (done.wav) — wake detection muted") + await _orig_trigger_transcript(self, transcript) + + +async def _patched_event_from_server(self, event): + if AudioStart.is_type(event.type): + self._speaker_active = True + self._speaker_mute_start = time.monotonic() + _LOGGER.debug("Speaker active (TTS) — wake detection muted") + elif AudioStop.is_type(event.type): + self._speaker_unmute_at = time.monotonic() + _GRACE_SECONDS + _LOGGER.debug("TTS finished — will unmute wake in %.1fs", _GRACE_SECONDS) + + if Error.is_type(event.type) and self.is_streaming: + _LOGGER.warning("Error from server while streaming — resetting") + self.is_streaming = False + + await _orig_event_from_server(self, event) + + if Error.is_type(event.type) and not self.is_streaming: + await self.trigger_streaming_stop() + await self._send_wake_detect() + _LOGGER.info("Waiting for wake word (after error)") + + +async def _patched_event_from_mic(self, event, audio_bytes=None): + if self.is_streaming: + elapsed = time.monotonic() - getattr(self, "_streaming_start_time", 0) + if elapsed > _STREAMING_TIMEOUT: + _LOGGER.warning( + "Streaming timeout (%.0fs) — no Transcript received, resetting", + elapsed, + ) + self.is_streaming = False + await self.event_to_server(AudioStop().event()) + await self.trigger_streaming_stop() + await self._send_wake_detect() + _LOGGER.info("Waiting for wake word (after timeout)") + return + + if getattr(self, "_speaker_active", False) and not self.is_streaming: + now = time.monotonic() + unmute_at = getattr(self, "_speaker_unmute_at", None) + if unmute_at and now >= unmute_at: + self._speaker_active = False + self._speaker_unmute_at = None + _LOGGER.debug("Wake detection unmuted (grace period elapsed)") + elif now - getattr(self, "_speaker_mute_start", now) > _MAX_MUTE_SECONDS: + self._speaker_active = False + self._speaker_unmute_at = None + _LOGGER.warning("Wake detection force-unmuted (max mute timeout)") + elif AudioChunk.is_type(event.type): + return + + await _orig_event_from_mic(self, event, audio_bytes) + + +async def _patched_event_from_wake(self, event): + if self.is_streaming: + return + if Detection.is_type(event.type): + if self._writer is None: + _LOGGER.warning( + "Wake word detected but no server connection — re-arming" + ) + await self._send_wake_detect() + return + self.is_streaming = True + self._streaming_start_time = time.monotonic() + _LOGGER.debug("Streaming audio") + await self._send_run_pipeline() + await self.forward_event(event) + await self.trigger_detection(Detection.from_event(event)) + await self.trigger_streaming_start() + + +WakeStreamingSatellite.event_from_server = _patched_event_from_server +WakeStreamingSatellite.event_from_mic = _patched_event_from_mic +WakeStreamingSatellite.event_from_wake = _patched_event_from_wake +WakeStreamingSatellite.trigger_detection = _patched_trigger_detection +WakeStreamingSatellite.trigger_transcript = _patched_trigger_transcript +WakeStreamingSatellite._speaker_active = False +WakeStreamingSatellite._speaker_unmute_at = None +WakeStreamingSatellite._speaker_mute_start = 0.0 +WakeStreamingSatellite._streaming_start_time = 0.0 + +if __name__ == "__main__": + from wyoming_satellite.__main__ import main + + try: + asyncio.run(main()) + except KeyboardInterrupt: + pass +WRAPEOF +chmod +x "${INSTALL_DIR}/satellite_wrapper.py" +log_ok "Satellite wrapper installed" + +# ─── Download wake word model ────────────────────────────────────────────── + +log_step "Downloading hey_jarvis wake word model..." +"${VENV_DIR}/bin/python3" -c " +import openwakeword +openwakeword.utils.download_models(model_names=['hey_jarvis']) +print('Model downloaded') +" 2>&1 | grep -v "device_discovery" +log_ok "Wake word model ready" + +# ─── Create mic capture wrapper ──────────────────────────────────────────── + +log_step "Creating mic capture wrapper (stereo → mono conversion)..." +cat > "${INSTALL_DIR}/mic-capture.sh" << 'MICEOF' +#!/bin/bash +# Record stereo from ReSpeaker WM8960, convert to mono 16kHz 16-bit for Wyoming +arecord -D plughw:2,0 -r 16000 -c 2 -f S16_LE -t raw -q - | sox -t raw -r 16000 -c 2 -b 16 -e signed-integer - -t raw -r 16000 -c 1 -b 16 -e signed-integer - +MICEOF +chmod +x "${INSTALL_DIR}/mic-capture.sh" +log_ok "Mic capture wrapper installed" + +# ─── Create speaker playback wrapper ────────────────────────────────────── + +log_step "Creating speaker playback wrapper (mono → stereo conversion)..." +cat > "${INSTALL_DIR}/speaker-playback.sh" << 'SPKEOF' +#!/bin/bash +# Convert mono 24kHz 16-bit input to stereo for WM8960 playback +sox -t raw -r 24000 -c 1 -b 16 -e signed-integer - -t raw -r 24000 -c 2 -b 16 -e signed-integer - | aplay -D plughw:2,0 -r 24000 -c 2 -f S16_LE -t raw -q - +SPKEOF +chmod +x "${INSTALL_DIR}/speaker-playback.sh" +log_ok "Speaker playback wrapper installed" + +# ─── Fix ReSpeaker overlay for Pi 5 ──────────────────────────────────────── + +log_step "Configuring wm8960-soundcard overlay (Pi 5 compatible)..." + +# Disable the seeed-voicecard service (loads wrong overlay for Pi 5) +if systemctl is-enabled seeed-voicecard.service &>/dev/null; then + sudo systemctl disable seeed-voicecard.service 2>/dev/null || true + log_info "Disabled seeed-voicecard service" +fi + +# Add upstream wm8960-soundcard overlay to config.txt if not present +if ! grep -q "dtoverlay=wm8960-soundcard" /boot/firmware/config.txt 2>/dev/null; then + sudo bash -c 'echo "dtoverlay=wm8960-soundcard" >> /boot/firmware/config.txt' + log_info "Added wm8960-soundcard overlay to /boot/firmware/config.txt" +fi + +# Load overlay now if not already active +if ! dtoverlay -l 2>/dev/null | grep -q wm8960-soundcard; then + sudo dtoverlay -r seeed-2mic-voicecard 2>/dev/null || true + sudo dtoverlay wm8960-soundcard 2>/dev/null || true +fi + +log_ok "Audio overlay configured" + +# ─── Generate feedback sounds ────────────────────────────────────────────── + +log_step "Generating feedback sounds..." + +# Must be plain 16-bit PCM WAV — Python wave module can't read WAVE_FORMAT_EXTENSIBLE +# Awake chime — short rising tone +sox -n -r 16000 -b 16 -c 1 -e signed-integer "${SOUNDS_DIR}/awake.wav" \ + synth 0.15 sin 800 fade t 0.01 0.15 0.05 \ + vol 0.5 \ + 2>/dev/null || log_warn "Could not generate awake.wav (sox issue)" + +# Done chime — short falling tone +sox -n -r 16000 -b 16 -c 1 -e signed-integer "${SOUNDS_DIR}/done.wav" \ + synth 0.15 sin 600 fade t 0.01 0.15 0.05 \ + vol 0.5 \ + 2>/dev/null || log_warn "Could not generate done.wav (sox issue)" + +log_ok "Feedback sounds ready" + +# ─── Set ALSA mixer defaults ─────────────────────────────────────────────── + +log_step "Configuring ALSA mixer for ReSpeaker..." + +# Playback — 80% volume, unmute +amixer -c 2 sset 'Playback' 80% unmute 2>/dev/null || true +amixer -c 2 sset 'Speaker' 80% unmute 2>/dev/null || true + +# Capture — max out capture volume +amixer -c 2 sset 'Capture' 100% cap 2>/dev/null || true + +# Enable mic input boost (critical — without this, signal is near-silent) +amixer -c 2 cset name='Left Input Mixer Boost Switch' on 2>/dev/null || true +amixer -c 2 cset name='Right Input Mixer Boost Switch' on 2>/dev/null || true + +# Mic preamp boost to +13dB (1 of 3 — higher causes clipping) +amixer -c 2 cset name='Left Input Boost Mixer LINPUT1 Volume' 1 2>/dev/null || true +amixer -c 2 cset name='Right Input Boost Mixer RINPUT1 Volume' 1 2>/dev/null || true + +# ADC capture volume — moderate to avoid clipping (max=255) +amixer -c 2 cset name='ADC PCM Capture Volume' 180,180 2>/dev/null || true + +log_ok "ALSA mixer configured" + +# ─── Install systemd service ─────────────────────────────────────────────── + +log_step "Installing systemd service..." + +sudo tee /etc/systemd/system/homeai-satellite.service > /dev/null << SVCEOF +[Unit] +Description=HomeAI Wyoming Satellite (${SATELLITE_AREA}) +After=network-online.target sound.target +Wants=network-online.target + +[Service] +Type=simple +User=${USER} +WorkingDirectory=${INSTALL_DIR} +ExecStart=${VENV_DIR}/bin/python3 ${INSTALL_DIR}/satellite_wrapper.py \\ + --uri tcp://0.0.0.0:${SATELLITE_PORT} \\ + --name "${SATELLITE_NAME}" \\ + --area "${SATELLITE_AREA}" \\ + --mic-command ${INSTALL_DIR}/mic-capture.sh \\ + --snd-command ${INSTALL_DIR}/speaker-playback.sh \\ + --mic-command-rate 16000 \\ + --mic-command-width 2 \\ + --mic-command-channels 1 \\ + --snd-command-rate 24000 \\ + --snd-command-width 2 \\ + --snd-command-channels 1 \\ + --wake-command "${VENV_DIR}/bin/python3 ${INSTALL_DIR}/wakeword_command.py --wake-word hey_jarvis --threshold 0.5" \\ + --wake-command-rate 16000 \\ + --wake-command-width 2 \\ + --wake-command-channels 1 \\ + --awake-wav ${SOUNDS_DIR}/awake.wav \\ + --done-wav ${SOUNDS_DIR}/done.wav +Restart=always +RestartSec=5 + +[Install] +WantedBy=multi-user.target +SVCEOF + +sudo systemctl daemon-reload +sudo systemctl enable homeai-satellite.service +sudo systemctl restart homeai-satellite.service + +log_ok "systemd service installed and started" + +# ─── Verify ──────────────────────────────────────────────────────────────── + +log_step "Verifying satellite..." +sleep 2 + +if systemctl is-active --quiet homeai-satellite.service; then + log_ok "Satellite is running!" +else + log_warn "Satellite may not have started cleanly. Check logs:" + echo " journalctl -u homeai-satellite.service -f" +fi + +echo "" +echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}" +echo -e "${GREEN} HomeAI Kitchen Satellite — Setup Complete${NC}" +echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}" +echo "" +echo " Satellite: ${SATELLITE_NAME} (${SATELLITE_AREA})" +echo " Port: ${SATELLITE_PORT}" +echo " Mic: ${MIC_DEVICE} (ReSpeaker 2-Mics)" +echo " Speaker: ${SPK_DEVICE} (ReSpeaker 3.5mm)" +echo " Wake word: hey_jarvis" +echo "" +echo " Next steps:" +echo " 1. In Home Assistant, go to Settings → Devices & Services → Add Integration" +echo " 2. Search for 'Wyoming Protocol'" +echo " 3. Enter host: $(hostname -I | awk '{print $1}') port: ${SATELLITE_PORT}" +echo " 4. Assign the HomeAI voice pipeline to this satellite" +echo "" +echo " Useful commands:" +echo " journalctl -u homeai-satellite.service -f # live logs" +echo " sudo systemctl restart homeai-satellite # restart" +echo " sudo systemctl status homeai-satellite # status" +echo " arecord -D ${MIC_DEVICE} -d 3 -f S16_LE -r 16000 /tmp/test.wav # test mic" +echo " aplay -D ${SPK_DEVICE} /tmp/test.wav # test speaker" +echo "" diff --git a/homeai-rpi/speaker-playback.sh b/homeai-rpi/speaker-playback.sh new file mode 100755 index 0000000..789de39 --- /dev/null +++ b/homeai-rpi/speaker-playback.sh @@ -0,0 +1,3 @@ +#!/bin/bash +# Convert mono 24kHz 16-bit input to stereo for WM8960 playback +sox -t raw -r 24000 -c 1 -b 16 -e signed-integer - -t raw -r 24000 -c 2 -b 16 -e signed-integer - | aplay -D plughw:2,0 -r 24000 -c 2 -f S16_LE -t raw -q -