--- # HomeAI Living Room Satellite — ESP32-S3-BOX-3 # Based on official ESPHome voice assistant config # https://github.com/esphome/wake-word-voice-assistants substitutions: name: homeai-living-room friendly_name: HomeAI Living Room # Face illustrations — compiled into firmware (320x240 PNG) loading_illustration_file: illustrations/loading.png idle_illustration_file: illustrations/idle.png listening_illustration_file: illustrations/listening.png thinking_illustration_file: illustrations/thinking.png replying_illustration_file: illustrations/replying.png error_illustration_file: illustrations/error.png timer_finished_illustration_file: illustrations/timer_finished.png # Dark background for all states (matches HomeAI dashboard theme) loading_illustration_background_color: "000000" idle_illustration_background_color: "000000" listening_illustration_background_color: "000000" thinking_illustration_background_color: "000000" replying_illustration_background_color: "000000" error_illustration_background_color: "000000" voice_assist_idle_phase_id: "1" voice_assist_listening_phase_id: "2" voice_assist_thinking_phase_id: "3" voice_assist_replying_phase_id: "4" voice_assist_not_ready_phase_id: "10" voice_assist_error_phase_id: "11" voice_assist_muted_phase_id: "12" voice_assist_timer_finished_phase_id: "20" font_glyphsets: "GF_Latin_Core" font_family: Figtree esphome: name: ${name} friendly_name: ${friendly_name} min_version: 2025.5.0 name_add_mac_suffix: false on_boot: priority: 600 then: - script.execute: draw_display - delay: 30s - if: condition: lambda: return id(init_in_progress); then: - lambda: id(init_in_progress) = false; - script.execute: draw_display esp32: board: esp32s3box flash_size: 16MB cpu_frequency: 240MHz framework: type: esp-idf sdkconfig_options: CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240: "y" CONFIG_ESP32S3_DATA_CACHE_64KB: "y" CONFIG_ESP32S3_DATA_CACHE_LINE_64B: "y" psram: mode: octal speed: 80MHz wifi: ssid: !secret wifi_ssid password: !secret wifi_password ap: ssid: "HomeAI Fallback" on_connect: - script.execute: draw_display on_disconnect: - script.execute: draw_display captive_portal: api: encryption: key: !secret api_key # Prevent device from rebooting if HA connection drops temporarily reboot_timeout: 0s on_client_connected: - script.execute: draw_display on_client_disconnected: # Debounce: wait 5s before showing "HA not found" to avoid flicker on brief drops - delay: 5s - if: condition: not: api.connected: then: - script.execute: draw_display ota: - platform: esphome id: ota_esphome logger: hardware_uart: USB_SERIAL_JTAG button: - platform: factory_reset id: factory_reset_btn internal: true binary_sensor: - platform: gpio pin: number: GPIO0 ignore_strapping_warning: true mode: INPUT_PULLUP inverted: true id: left_top_button internal: true on_multi_click: # Short press: dismiss timer / toggle mute - timing: - ON for at least 50ms - OFF for at least 50ms then: - if: condition: switch.is_on: timer_ringing then: - switch.turn_off: timer_ringing else: - switch.toggle: mute # Long press (10s): factory reset - timing: - ON for at least 10s then: - button.press: factory_reset_btn # --- Display backlight --- output: - platform: ledc pin: GPIO47 id: backlight_output light: - platform: monochromatic id: led name: Screen icon: "mdi:television" entity_category: config output: backlight_output restore_mode: RESTORE_DEFAULT_ON default_transition_length: 250ms # --- Audio hardware --- i2c: scl: GPIO18 sda: GPIO8 i2s_audio: - id: i2s_audio_bus i2s_lrclk_pin: number: GPIO45 ignore_strapping_warning: true i2s_bclk_pin: GPIO17 i2s_mclk_pin: GPIO2 audio_adc: - platform: es7210 id: es7210_adc bits_per_sample: 16bit sample_rate: 16000 audio_dac: - platform: es8311 id: es8311_dac bits_per_sample: 16bit sample_rate: 48000 microphone: - platform: i2s_audio id: box_mic sample_rate: 16000 i2s_din_pin: GPIO16 bits_per_sample: 16bit adc_type: external speaker: - platform: i2s_audio id: box_speaker i2s_dout_pin: GPIO15 dac_type: external sample_rate: 48000 bits_per_sample: 16bit channel: left audio_dac: es8311_dac buffer_duration: 100ms media_player: - platform: speaker name: None id: speaker_media_player volume_min: 0.5 volume_max: 0.85 announcement_pipeline: speaker: box_speaker format: FLAC sample_rate: 48000 num_channels: 1 files: - id: timer_finished_sound file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/timer_finished.flac on_announcement: - if: condition: - microphone.is_capturing: then: - script.execute: stop_wake_word - if: condition: - lambda: return id(wake_word_engine_location).current_option() == "In Home Assistant"; then: - wait_until: - not: voice_assistant.is_running: - if: condition: not: voice_assistant.is_running: then: - lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id}; - script.execute: draw_display on_idle: - if: condition: not: voice_assistant.is_running: then: - script.execute: start_wake_word - script.execute: set_idle_or_mute_phase - script.execute: draw_display # --- Wake word (on-device) --- micro_wake_word: id: mww models: - hey_jarvis on_wake_word_detected: - voice_assistant.start: wake_word: !lambda return wake_word; # --- Voice assistant --- voice_assistant: id: va microphone: box_mic media_player: speaker_media_player micro_wake_word: mww noise_suppression_level: 2 auto_gain: 31dBFS volume_multiplier: 2.0 on_listening: - lambda: id(voice_assistant_phase) = ${voice_assist_listening_phase_id}; - text_sensor.template.publish: id: text_request state: "..." - text_sensor.template.publish: id: text_response state: "..." - script.execute: draw_display on_stt_vad_end: - lambda: id(voice_assistant_phase) = ${voice_assist_thinking_phase_id}; - script.execute: draw_display on_stt_end: - text_sensor.template.publish: id: text_request state: !lambda return x; - script.execute: draw_display on_tts_start: - text_sensor.template.publish: id: text_response state: !lambda return x; - lambda: id(voice_assistant_phase) = ${voice_assist_replying_phase_id}; - script.execute: draw_display on_end: - wait_until: condition: - media_player.is_announcing: timeout: 0.5s - wait_until: - and: - not: media_player.is_announcing: - not: speaker.is_playing: - if: condition: - lambda: return id(wake_word_engine_location).current_option() == "On device"; then: - lambda: id(va).set_use_wake_word(false); - micro_wake_word.start: - script.execute: set_idle_or_mute_phase - script.execute: draw_display - text_sensor.template.publish: id: text_request state: "" - text_sensor.template.publish: id: text_response state: "" on_error: - if: condition: lambda: return !id(init_in_progress); then: - lambda: id(voice_assistant_phase) = ${voice_assist_error_phase_id}; - script.execute: draw_display - delay: 1s - if: condition: switch.is_off: mute then: - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id}; else: - lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id}; - script.execute: draw_display on_client_connected: - lambda: id(init_in_progress) = false; - script.execute: start_wake_word - script.execute: set_idle_or_mute_phase - script.execute: draw_display on_client_disconnected: - script.execute: stop_wake_word - lambda: id(voice_assistant_phase) = ${voice_assist_not_ready_phase_id}; - script.execute: draw_display on_timer_started: - script.execute: draw_display on_timer_cancelled: - script.execute: draw_display on_timer_updated: - script.execute: draw_display on_timer_tick: - script.execute: draw_display on_timer_finished: - switch.turn_on: timer_ringing - wait_until: media_player.is_announcing: - lambda: id(voice_assistant_phase) = ${voice_assist_timer_finished_phase_id}; - script.execute: draw_display # --- Scripts --- script: - id: draw_display then: - if: condition: lambda: return !id(init_in_progress); then: - if: condition: wifi.connected: then: - if: condition: api.connected: then: - lambda: | switch(id(voice_assistant_phase)) { case ${voice_assist_listening_phase_id}: id(s3_box_lcd).show_page(listening_page); id(s3_box_lcd).update(); break; case ${voice_assist_thinking_phase_id}: id(s3_box_lcd).show_page(thinking_page); id(s3_box_lcd).update(); break; case ${voice_assist_replying_phase_id}: id(s3_box_lcd).show_page(replying_page); id(s3_box_lcd).update(); break; case ${voice_assist_error_phase_id}: id(s3_box_lcd).show_page(error_page); id(s3_box_lcd).update(); break; case ${voice_assist_muted_phase_id}: id(s3_box_lcd).show_page(muted_page); id(s3_box_lcd).update(); break; case ${voice_assist_not_ready_phase_id}: id(s3_box_lcd).show_page(no_ha_page); id(s3_box_lcd).update(); break; case ${voice_assist_timer_finished_phase_id}: id(s3_box_lcd).show_page(timer_finished_page); id(s3_box_lcd).update(); break; default: id(s3_box_lcd).show_page(idle_page); id(s3_box_lcd).update(); } else: - display.page.show: no_ha_page - component.update: s3_box_lcd else: - display.page.show: no_wifi_page - component.update: s3_box_lcd else: - display.page.show: initializing_page - component.update: s3_box_lcd - id: fetch_first_active_timer then: - lambda: | const auto &timers = id(va).get_timers(); auto output_timer = timers.begin()->second; for (const auto &timer : timers) { if (timer.second.is_active && timer.second.seconds_left <= output_timer.seconds_left) { output_timer = timer.second; } } id(global_first_active_timer) = output_timer; - id: check_if_timers_active then: - lambda: | const auto &timers = id(va).get_timers(); bool output = false; for (const auto &timer : timers) { if (timer.second.is_active) { output = true; } } id(global_is_timer_active) = output; - id: fetch_first_timer then: - lambda: | const auto &timers = id(va).get_timers(); auto output_timer = timers.begin()->second; for (const auto &timer : timers) { if (timer.second.seconds_left <= output_timer.seconds_left) { output_timer = timer.second; } } id(global_first_timer) = output_timer; - id: check_if_timers then: - lambda: | const auto &timers = id(va).get_timers(); bool output = false; for (const auto &timer : timers) { if (timer.second.is_active) { output = true; } } id(global_is_timer) = output; - id: draw_timer_timeline then: - lambda: | id(check_if_timers_active).execute(); id(check_if_timers).execute(); if (id(global_is_timer_active)){ id(fetch_first_active_timer).execute(); int active_pixels = round( 320 * id(global_first_active_timer).seconds_left / max(id(global_first_active_timer).total_seconds, static_cast(1)) ); if (active_pixels > 0){ id(s3_box_lcd).filled_rectangle(0, 225, 320, 15, Color::WHITE); id(s3_box_lcd).filled_rectangle(0, 226, active_pixels, 13, id(active_timer_color)); } } else if (id(global_is_timer)){ id(fetch_first_timer).execute(); int active_pixels = round( 320 * id(global_first_timer).seconds_left / max(id(global_first_timer).total_seconds, static_cast(1))); if (active_pixels > 0){ id(s3_box_lcd).filled_rectangle(0, 225, 320, 15, Color::WHITE); id(s3_box_lcd).filled_rectangle(0, 226, active_pixels, 13, id(paused_timer_color)); } } - id: draw_active_timer_widget then: - lambda: | id(check_if_timers_active).execute(); if (id(global_is_timer_active)){ id(s3_box_lcd).filled_rectangle(80, 40, 160, 50, Color::WHITE); id(s3_box_lcd).rectangle(80, 40, 160, 50, Color::BLACK); id(fetch_first_active_timer).execute(); int hours_left = floor(id(global_first_active_timer).seconds_left / 3600); int minutes_left = floor((id(global_first_active_timer).seconds_left - hours_left * 3600) / 60); int seconds_left = id(global_first_active_timer).seconds_left - hours_left * 3600 - minutes_left * 60; auto display_hours = (hours_left < 10 ? "0" : "") + std::to_string(hours_left); auto display_minute = (minutes_left < 10 ? "0" : "") + std::to_string(minutes_left); auto display_seconds = (seconds_left < 10 ? "0" : "") + std::to_string(seconds_left); std::string display_string = ""; if (hours_left > 0) { display_string = display_hours + ":" + display_minute; } else { display_string = display_minute + ":" + display_seconds; } id(s3_box_lcd).printf(120, 47, id(font_timer), Color::BLACK, "%s", display_string.c_str()); } - id: start_wake_word then: - if: condition: and: - not: - voice_assistant.is_running: - lambda: return id(wake_word_engine_location).current_option() == "On device"; then: - lambda: id(va).set_use_wake_word(false); - micro_wake_word.start: - if: condition: and: - not: - voice_assistant.is_running: - lambda: return id(wake_word_engine_location).current_option() == "In Home Assistant"; then: - lambda: id(va).set_use_wake_word(true); - voice_assistant.start_continuous: - id: stop_wake_word then: - if: condition: lambda: return id(wake_word_engine_location).current_option() == "In Home Assistant"; then: - lambda: id(va).set_use_wake_word(false); - voice_assistant.stop: - if: condition: lambda: return id(wake_word_engine_location).current_option() == "On device"; then: - micro_wake_word.stop: - id: set_idle_or_mute_phase then: - if: condition: switch.is_off: mute then: - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id}; else: - lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id}; # --- Switches --- switch: - platform: gpio name: Speaker Enable pin: number: GPIO46 ignore_strapping_warning: true restore_mode: RESTORE_DEFAULT_ON entity_category: config disabled_by_default: true - platform: template name: Mute id: mute icon: "mdi:microphone-off" optimistic: true restore_mode: RESTORE_DEFAULT_OFF entity_category: config on_turn_off: - microphone.unmute: - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id}; - script.execute: draw_display on_turn_on: - microphone.mute: - lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id}; - script.execute: draw_display - platform: template id: timer_ringing optimistic: true internal: true restore_mode: ALWAYS_OFF on_turn_off: - lambda: |- id(speaker_media_player) ->make_call() .set_command(media_player::MediaPlayerCommand::MEDIA_PLAYER_COMMAND_REPEAT_OFF) .set_announcement(true) .perform(); id(speaker_media_player)->set_playlist_delay_ms(speaker::AudioPipelineType::ANNOUNCEMENT, 0); - media_player.stop: announcement: true on_turn_on: - lambda: |- id(speaker_media_player) ->make_call() .set_command(media_player::MediaPlayerCommand::MEDIA_PLAYER_COMMAND_REPEAT_ONE) .set_announcement(true) .perform(); id(speaker_media_player)->set_playlist_delay_ms(speaker::AudioPipelineType::ANNOUNCEMENT, 1000); - media_player.speaker.play_on_device_media_file: media_file: timer_finished_sound announcement: true - delay: 15min - switch.turn_off: timer_ringing # --- Wake word engine location selector --- select: - platform: template entity_category: config name: Wake word engine location id: wake_word_engine_location icon: "mdi:account-voice" optimistic: true restore_value: true options: - In Home Assistant - On device initial_option: On device on_value: - if: condition: lambda: return !id(init_in_progress); then: - wait_until: lambda: return id(voice_assistant_phase) == ${voice_assist_muted_phase_id} || id(voice_assistant_phase) == ${voice_assist_idle_phase_id}; - if: condition: lambda: return x == "In Home Assistant"; then: - micro_wake_word.stop - delay: 500ms - if: condition: switch.is_off: mute then: - lambda: id(va).set_use_wake_word(true); - voice_assistant.start_continuous: - if: condition: lambda: return x == "On device"; then: - lambda: id(va).set_use_wake_word(false); - voice_assistant.stop - delay: 500ms - if: condition: switch.is_off: mute then: - micro_wake_word.start # --- Global variables --- globals: - id: init_in_progress type: bool restore_value: false initial_value: "true" - id: voice_assistant_phase type: int restore_value: false initial_value: ${voice_assist_not_ready_phase_id} - id: global_first_active_timer type: voice_assistant::Timer restore_value: false - id: global_is_timer_active type: bool restore_value: false - id: global_first_timer type: voice_assistant::Timer restore_value: false - id: global_is_timer type: bool restore_value: false # --- Display images --- image: - file: ${error_illustration_file} id: casita_error resize: 320x240 type: RGB transparency: alpha_channel - file: ${idle_illustration_file} id: casita_idle resize: 320x240 type: RGB transparency: alpha_channel - file: ${listening_illustration_file} id: casita_listening resize: 320x240 type: RGB transparency: alpha_channel - file: ${thinking_illustration_file} id: casita_thinking resize: 320x240 type: RGB transparency: alpha_channel - file: ${replying_illustration_file} id: casita_replying resize: 320x240 type: RGB transparency: alpha_channel - file: ${timer_finished_illustration_file} id: casita_timer_finished resize: 320x240 type: RGB transparency: alpha_channel - file: ${loading_illustration_file} id: casita_initializing resize: 320x240 type: RGB transparency: alpha_channel - file: https://github.com/esphome/wake-word-voice-assistants/raw/main/error_box_illustrations/error-no-wifi.png id: error_no_wifi resize: 320x240 type: RGB transparency: alpha_channel - file: https://github.com/esphome/wake-word-voice-assistants/raw/main/error_box_illustrations/error-no-ha.png id: error_no_ha resize: 320x240 type: RGB transparency: alpha_channel # --- Fonts --- font: - file: type: gfonts family: ${font_family} weight: 300 italic: true id: font_request size: 15 glyphsets: - ${font_glyphsets} - file: type: gfonts family: ${font_family} weight: 300 id: font_response size: 15 glyphsets: - ${font_glyphsets} - file: type: gfonts family: ${font_family} weight: 300 id: font_timer size: 30 glyphsets: - ${font_glyphsets} # --- Text sensors (request/response display) --- text_sensor: - id: text_request platform: template on_value: lambda: |- if(id(text_request).state.length()>32) { std::string name = id(text_request).state.c_str(); std::string truncated = esphome::str_truncate(name.c_str(),31); id(text_request).state = (truncated+"...").c_str(); } - id: text_response platform: template on_value: lambda: |- if(id(text_response).state.length()>32) { std::string name = id(text_response).state.c_str(); std::string truncated = esphome::str_truncate(name.c_str(),31); id(text_response).state = (truncated+"...").c_str(); } # --- Colors --- color: - id: idle_color hex: ${idle_illustration_background_color} - id: listening_color hex: ${listening_illustration_background_color} - id: thinking_color hex: ${thinking_illustration_background_color} - id: replying_color hex: ${replying_illustration_background_color} - id: loading_color hex: ${loading_illustration_background_color} - id: error_color hex: ${error_illustration_background_color} - id: active_timer_color hex: "26ed3a" - id: paused_timer_color hex: "3b89e3" # --- SPI + Display --- spi: - id: spi_bus clk_pin: 7 mosi_pin: 6 display: - platform: ili9xxx id: s3_box_lcd model: S3BOX invert_colors: false data_rate: 40MHz cs_pin: 5 dc_pin: 4 reset_pin: number: 48 inverted: true update_interval: never pages: - id: idle_page lambda: |- it.fill(id(idle_color)); it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_idle), ImageAlign::CENTER); id(draw_timer_timeline).execute(); id(draw_active_timer_widget).execute(); - id: listening_page lambda: |- it.fill(id(listening_color)); it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_listening), ImageAlign::CENTER); id(draw_timer_timeline).execute(); - id: thinking_page lambda: |- it.fill(id(thinking_color)); it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_thinking), ImageAlign::CENTER); it.filled_rectangle(20, 20, 280, 30, Color::WHITE); it.rectangle(20, 20, 280, 30, Color::BLACK); it.printf(30, 25, id(font_request), Color::BLACK, "%s", id(text_request).state.c_str()); id(draw_timer_timeline).execute(); - id: replying_page lambda: |- it.fill(id(replying_color)); it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_replying), ImageAlign::CENTER); it.filled_rectangle(20, 20, 280, 30, Color::WHITE); it.rectangle(20, 20, 280, 30, Color::BLACK); it.filled_rectangle(20, 190, 280, 30, Color::WHITE); it.rectangle(20, 190, 280, 30, Color::BLACK); it.printf(30, 25, id(font_request), Color::BLACK, "%s", id(text_request).state.c_str()); it.printf(30, 195, id(font_response), Color::BLACK, "%s", id(text_response).state.c_str()); id(draw_timer_timeline).execute(); - id: timer_finished_page lambda: |- it.fill(id(idle_color)); it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_timer_finished), ImageAlign::CENTER); - id: error_page lambda: |- it.fill(id(error_color)); it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_error), ImageAlign::CENTER); - id: no_ha_page lambda: |- it.image((it.get_width() / 2), (it.get_height() / 2), id(error_no_ha), ImageAlign::CENTER); - id: no_wifi_page lambda: |- it.image((it.get_width() / 2), (it.get_height() / 2), id(error_no_wifi), ImageAlign::CENTER); - id: initializing_page lambda: |- it.fill(id(loading_color)); it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_initializing), ImageAlign::CENTER); - id: muted_page lambda: |- it.fill(Color::BLACK); id(draw_timer_timeline).execute(); id(draw_active_timer_widget).execute();