From f42448d50123e376205df17bc295917e89d943f5 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Fri, 5 Jul 2024 10:12:55 +1000
Subject: [PATCH 1/9] WIP start on accepting two streams in out audio output

---
 src/drivers/bluetooth.cpp                  | 87 +++++++++++++---------
 src/drivers/include/drivers/bluetooth.hpp  | 13 ++--
 src/drivers/include/drivers/i2s_dac.hpp    |  5 +-
 src/drivers/include/drivers/pcm_buffer.hpp |  9 ++-
 src/drivers/pcm_buffer.cpp                 | 24 ++++--
 5 files changed, 88 insertions(+), 50 deletions(-)
diff --git a/src/drivers/bluetooth.cpp b/src/drivers/bluetooth.cpp
index 412cba1f..23c4f8d8 100644
--- a/src/drivers/bluetooth.cpp
+++ b/src/drivers/bluetooth.cpp
@@ -37,7 +37,8 @@ namespace drivers {
 
 [[maybe_unused]] static constexpr char kTag[] = "bluetooth";
 
-DRAM_ATTR static PcmBuffer* sStream = nullptr;
+DRAM_ATTR static PcmBuffer* sStream1 = nullptr;
+DRAM_ATTR static PcmBuffer* sStream2 = nullptr;
 DRAM_ATTR static std::atomic<float> sVolumeFactor = 1.f;
 
 static tasks::WorkerPool* sBgWorker;
@@ -96,13 +97,15 @@ IRAM_ATTR auto a2dp_data_cb(uint8_t* buf, int32_t buf_size) -> int32_t {
   if (buf == nullptr || buf_size <= 0) {
     return 0;
   }
-  PcmBuffer* stream = sStream;
-  if (stream == nullptr) {
+  PcmBuffer* stream1 = sStream1;
+  PcmBuffer* stream2 = sStream2;
+  if (stream1 == nullptr || stream2 == nullptr) {
     return 0;
   }
 
   int16_t* samples = reinterpret_cast<int16_t*>(buf);
-  stream->receive({samples, static_cast<size_t>(buf_size / 2)}, false);
+  stream1->receive({samples, static_cast<size_t>(buf_size / 2)}, false, false);
+  stream2->receive({samples, static_cast<size_t>(buf_size / 2)}, true, false);
 
   // Apply software volume scaling.
   float factor = sVolumeFactor.load();
@@ -181,14 +184,16 @@ auto Bluetooth::PreferredDevice() -> std::optional<bluetooth::MacAndName> {
   return bluetooth::BluetoothState::preferred_device();
 }
 
-auto Bluetooth::SetSource(PcmBuffer* src) -> void {
+auto Bluetooth::SetSources(PcmBuffer* src1, PcmBuffer* src2) -> void {
   auto lock = bluetooth::BluetoothState::lock();
-  if (src == bluetooth::BluetoothState::source()) {
+  PcmBuffer *cur1, *cur2;
+  std::tie(cur1, cur2) = bluetooth::BluetoothState::sources();
+  if (src1 == cur1 && src2 == cur2) {
     return;
   }
-  bluetooth::BluetoothState::source(src);
+  bluetooth::BluetoothState::sources(src1, src2);
   tinyfsm::FsmList<bluetooth::BluetoothState>::dispatch(
-      bluetooth::events::SourceChanged{});
+      bluetooth::events::SourcesChanged{});
 }
 
 auto Bluetooth::SetVolumeFactor(float f) -> void {
@@ -348,7 +353,6 @@ std::optional<MacAndName> BluetoothState::sPreferredDevice_{};
 std::optional<MacAndName> BluetoothState::sConnectingDevice_{};
 int BluetoothState::sConnectAttemptsRemaining_{0};
 
-std::atomic<PcmBuffer*> BluetoothState::sSource_;
 std::function<void(Event)> BluetoothState::sEventHandler_;
 
 auto BluetoothState::Init(NvsStorage& storage) -> void {
@@ -377,12 +381,13 @@ auto BluetoothState::preferred_device(std::optional<MacAndName> addr) -> void {
   sPreferredDevice_ = addr;
 }
 
-auto BluetoothState::source() -> PcmBuffer* {
-  return sSource_.load();
+auto BluetoothState::sources() -> std::pair<PcmBuffer*, PcmBuffer*> {
+  return {sStream1, sStream2};
 }
 
-auto BluetoothState::source(PcmBuffer* src) -> void {
-  sSource_.store(src);
+auto BluetoothState::sources(PcmBuffer* src1, PcmBuffer* src2) -> void {
+  sStream1 = src1;
+  sStream2 = src2;
 }
 
 auto BluetoothState::event_handler(std::function<void(Event)> cb) -> void {
@@ -508,11 +513,13 @@ void Disabled::react(const events::Enable&) {
   // AVRCP Target
   err = esp_avrc_tg_init();
   if (err != ESP_OK) {
-    ESP_LOGE(kTag, "Error during target init: %s %d", esp_err_to_name(err), err);
+    ESP_LOGE(kTag, "Error during target init: %s %d", esp_err_to_name(err),
+             err);
   }
   err = esp_avrc_tg_register_callback(avrcp_tg_cb);
   if (err != ESP_OK) {
-    ESP_LOGE(kTag, "Error registering AVRC tg callback: %s %d", esp_err_to_name(err), err);
+    ESP_LOGE(kTag, "Error registering AVRC tg callback: %s %d",
+             esp_err_to_name(err), err);
   }
 
   // Set the supported passthrough commands on the tg
@@ -522,19 +529,20 @@ void Disabled::react(const events::Enable&) {
   do {
     // Sleep for a bit
     vTaskDelay(pdMS_TO_TICKS(10));
-    err = esp_avrc_tg_get_psth_cmd_filter(ESP_AVRC_PSTH_FILTER_ALLOWED_CMD, &psth);
+    err = esp_avrc_tg_get_psth_cmd_filter(ESP_AVRC_PSTH_FILTER_ALLOWED_CMD,
+                                          &psth);
   } while (err != ESP_OK);
 
-  err = esp_avrc_tg_set_psth_cmd_filter(ESP_AVRC_PSTH_FILTER_SUPPORTED_CMD, &psth);
+  err = esp_avrc_tg_set_psth_cmd_filter(ESP_AVRC_PSTH_FILTER_SUPPORTED_CMD,
+                                        &psth);
   if (err != ESP_OK) {
     ESP_LOGE(kTag, "Error: %s %d", esp_err_to_name(err), err);
   }
   esp_avrc_rn_evt_cap_mask_t evt_set = {0};
   esp_avrc_rn_evt_bit_mask_operation(ESP_AVRC_BIT_MASK_OP_SET, &evt_set,
-                                      ESP_AVRC_RN_VOLUME_CHANGE);
+                                     ESP_AVRC_RN_VOLUME_CHANGE);
   assert(esp_avrc_tg_set_rn_evt_cap(&evt_set) == ESP_OK);
 
-
   // Initialise A2DP. This handles streaming audio. Currently ESP-IDF's SBC
   // encoder only supports 2 channels of interleaved 16 bit samples, at
   // 44.1kHz, so there is no additional configuration to be done for the
@@ -724,9 +732,8 @@ void Connected::react(const events::PreferredDeviceChanged& ev) {
   transit<Connecting>();
 }
 
-void Connected::react(const events::SourceChanged& ev) {
-  sStream = sSource_;
-  if (sStream != nullptr) {
+void Connected::react(const events::SourcesChanged& ev) {
+  if (sStream1 != nullptr && sStream2 != nullptr) {
     ESP_LOGI(kTag, "checking source is ready");
     esp_a2d_media_ctrl(ESP_A2D_MEDIA_CTRL_CHECK_SRC_RDY);
   } else {
@@ -775,7 +782,8 @@ void Connected::react(events::internal::Avrc ev) {
   switch (ev.type) {
     case ESP_AVRC_CT_CONNECTION_STATE_EVT:
       if (ev.param.conn_stat.connected) {
-        auto err = esp_avrc_ct_send_register_notification_cmd(4, ESP_AVRC_RN_VOLUME_CHANGE, 0);
+        auto err = esp_avrc_ct_send_register_notification_cmd(
+            4, ESP_AVRC_RN_VOLUME_CHANGE, 0);
         if (err != ESP_OK) {
           ESP_LOGE(kTag, "Error: %s %d", esp_err_to_name(err), err);
         }
@@ -787,15 +795,20 @@ void Connected::react(events::internal::Avrc ev) {
     case ESP_AVRC_CT_REMOTE_FEATURES_EVT:
       // The remote device is telling us about its capabilities! We don't
       // currently care about any of them.
-      ESP_LOGI(kTag, "Recieved capabilitites: %lu", ev.param.rmt_feats.feat_mask);
+      ESP_LOGI(kTag, "Recieved capabilitites: %lu",
+               ev.param.rmt_feats.feat_mask);
       break;
     case ESP_AVRC_CT_CHANGE_NOTIFY_EVT:
       if (ev.param.change_ntf.event_id == ESP_AVRC_RN_VOLUME_CHANGE) {
         if (sEventHandler_) {
-          std::invoke(sEventHandler_, bluetooth::RemoteVolumeChanged{.new_vol = ev.param.change_ntf.event_parameter.volume});
+          std::invoke(
+              sEventHandler_,
+              bluetooth::RemoteVolumeChanged{
+                  .new_vol = ev.param.change_ntf.event_parameter.volume});
         }
         // Resubscribe to volume facts
-        auto err = esp_avrc_ct_send_register_notification_cmd(4, ESP_AVRC_RN_VOLUME_CHANGE, 0);
+        auto err = esp_avrc_ct_send_register_notification_cmd(
+            4, ESP_AVRC_RN_VOLUME_CHANGE, 0);
         if (err != ESP_OK) {
           ESP_LOGE(kTag, "Error: %s %d", esp_err_to_name(err), err);
         }
@@ -809,16 +822,20 @@ void Connected::react(events::internal::Avrc ev) {
 void Connected::react(const events::internal::Avrctg ev) {
   switch (ev.type) {
     case ESP_AVRC_TG_CONNECTION_STATE_EVT:
-      ESP_LOGI(kTag, "Got connection event. Connected: %s", ev.param.conn_stat.connected ? "true" : "false");
+      ESP_LOGI(kTag, "Got connection event. Connected: %s",
+               ev.param.conn_stat.connected ? "true" : "false");
       if (ev.param.conn_stat.connected) {
       }
       break;
     case ESP_AVRC_TG_REMOTE_FEATURES_EVT:
-      ESP_LOGI(kTag, "Got remote features feat flag %d", ev.param.rmt_feats.ct_feat_flag);
-      ESP_LOGI(kTag, "Got remote features feat mask %lu", ev.param.rmt_feats.feat_mask);
+      ESP_LOGI(kTag, "Got remote features feat flag %d",
+               ev.param.rmt_feats.ct_feat_flag);
+      ESP_LOGI(kTag, "Got remote features feat mask %lu",
+               ev.param.rmt_feats.feat_mask);
       break;
     case ESP_AVRC_TG_PASSTHROUGH_CMD_EVT:
-      ESP_LOGI(kTag, "Got passthrough event keycode: %x, %d", ev.param.psth_cmd.key_code, ev.param.psth_cmd.key_state);
+      ESP_LOGI(kTag, "Got passthrough event keycode: %x, %d",
+               ev.param.psth_cmd.key_code, ev.param.psth_cmd.key_state);
       if (ev.param.psth_cmd.key_state == 1 && sEventHandler_) {
         switch (ev.param.psth_cmd.key_code) {
           case ESP_AVRC_PT_CMD_PLAY:
@@ -840,7 +857,8 @@ void Connected::react(const events::internal::Avrctg ev) {
             std::invoke(sEventHandler_, bluetooth::SimpleEvent::kBackward);
             break;
           default:
-            ESP_LOGI(kTag, "Unhandled passthrough cmd. Key code: %d", ev.param.psth_cmd.key_code);
+            ESP_LOGI(kTag, "Unhandled passthrough cmd. Key code: %d",
+                     ev.param.psth_cmd.key_code);
         }
       }
       break;
@@ -848,14 +866,15 @@ void Connected::react(const events::internal::Avrctg ev) {
       if (ev.param.reg_ntf.event_id == ESP_AVRC_RN_VOLUME_CHANGE) {
         // TODO: actually do this lol
         esp_avrc_rn_param_t rn_param;
-        rn_param.volume = 64; 
+        rn_param.volume = 64;
         auto err = esp_avrc_tg_send_rn_rsp(ESP_AVRC_RN_VOLUME_CHANGE,
-                                ESP_AVRC_RN_RSP_INTERIM, &rn_param);
+                                           ESP_AVRC_RN_RSP_INTERIM, &rn_param);
         if (err != ESP_OK) {
           ESP_LOGE(kTag, "Error: %s %d", esp_err_to_name(err), err);
         }
       } else {
-        ESP_LOGW(kTag, "unhandled AVRC TG Register Notification event: %u", ev.param.reg_ntf.event_id);
+        ESP_LOGW(kTag, "unhandled AVRC TG Register Notification event: %u",
+                 ev.param.reg_ntf.event_id);
       }
       break;
     }
diff --git a/src/drivers/include/drivers/bluetooth.hpp b/src/drivers/include/drivers/bluetooth.hpp
index 94a85263..b3b12ffc 100644
--- a/src/drivers/include/drivers/bluetooth.hpp
+++ b/src/drivers/include/drivers/bluetooth.hpp
@@ -43,7 +43,7 @@ class Bluetooth {
   auto SetPreferredDevice(std::optional<bluetooth::MacAndName> dev) -> void;
   auto PreferredDevice() -> std::optional<bluetooth::MacAndName>;
 
-  auto SetSource(PcmBuffer*) -> void;
+  auto SetSources(PcmBuffer*, PcmBuffer*) -> void;
   auto SetVolumeFactor(float) -> void;
 
   auto SetEventHandler(std::function<void(bluetooth::Event)> cb) -> void;
@@ -57,7 +57,7 @@ struct Disable : public tinyfsm::Event {};
 
 struct ConnectTimedOut : public tinyfsm::Event {};
 struct PreferredDeviceChanged : public tinyfsm::Event {};
-struct SourceChanged : public tinyfsm::Event {};
+struct SourcesChanged : public tinyfsm::Event {};
 struct DeviceDiscovered : public tinyfsm::Event {
   const Device& device;
 };
@@ -118,8 +118,8 @@ class BluetoothState : public tinyfsm::Fsm<BluetoothState> {
   static auto discovery() -> bool;
   static auto discovery(bool) -> void;
 
-  static auto source() -> PcmBuffer*;
-  static auto source(PcmBuffer*) -> void;
+  static auto sources() -> std::pair<PcmBuffer*, PcmBuffer*>;
+  static auto sources(PcmBuffer*, PcmBuffer*) -> void;
 
   static auto event_handler(std::function<void(Event)>) -> void;
 
@@ -132,7 +132,7 @@ class BluetoothState : public tinyfsm::Fsm<BluetoothState> {
   virtual void react(const events::Disable& ev) = 0;
   virtual void react(const events::ConnectTimedOut& ev){};
   virtual void react(const events::PreferredDeviceChanged& ev){};
-  virtual void react(const events::SourceChanged& ev){};
+  virtual void react(const events::SourcesChanged& ev){};
 
   virtual void react(const events::DeviceDiscovered&);
 
@@ -152,7 +152,6 @@ class BluetoothState : public tinyfsm::Fsm<BluetoothState> {
   static std::optional<bluetooth::MacAndName> sConnectingDevice_;
   static int sConnectAttemptsRemaining_;
 
-  static std::atomic<PcmBuffer*> sSource_;
   static std::function<void(Event)> sEventHandler_;
 
   auto connect(const bluetooth::MacAndName&) -> bool;
@@ -205,7 +204,7 @@ class Connected : public BluetoothState {
   void exit() override;
 
   void react(const events::PreferredDeviceChanged& ev) override;
-  void react(const events::SourceChanged& ev) override;
+  void react(const events::SourcesChanged& ev) override;
 
   void react(const events::Disable& ev) override;
   void react(events::internal::Gap ev) override;
diff --git a/src/drivers/include/drivers/i2s_dac.hpp b/src/drivers/include/drivers/i2s_dac.hpp
index cf9258c0..0fe462b4 100644
--- a/src/drivers/include/drivers/i2s_dac.hpp
+++ b/src/drivers/include/drivers/i2s_dac.hpp
@@ -40,7 +40,7 @@ constexpr size_t kI2SBufferLengthFrames = 1024;
  */
 class I2SDac {
  public:
-  static auto create(IGpios& expander, PcmBuffer&) -> std::optional<I2SDac*>;
+  static auto create(IGpios& expander, PcmBuffer&, PcmBuffer&) -> std::optional<I2SDac*>;
 
   I2SDac(IGpios& gpio, PcmBuffer&, i2s_chan_handle_t i2s_handle);
   ~I2SDac();
@@ -77,7 +77,8 @@ class I2SDac {
   auto set_channel(bool) -> void;
 
   IGpios& gpio_;
-  PcmBuffer& buffer_;
+  PcmBuffer& buffer1_;
+  PcmBuffer& buffer2_;
   i2s_chan_handle_t i2s_handle_;
 
   bool i2s_active_;
diff --git a/src/drivers/include/drivers/pcm_buffer.hpp b/src/drivers/include/drivers/pcm_buffer.hpp
index 6630f720..27e9eec6 100644
--- a/src/drivers/include/drivers/pcm_buffer.hpp
+++ b/src/drivers/include/drivers/pcm_buffer.hpp
@@ -35,8 +35,13 @@ class PcmBuffer {
    * Fills the given span with samples. If enough samples are available in
    * the buffer, then the span will be filled with samples from the buffer. Any
    * shortfall is made up by padding the given span with zeroes.
+   *
+   * If `mix` is set to true then, instead of overwriting the destination span,
+   * the retrieved samples will be mixed into any existing samples contained
+   * within the destination. This mixing uses a naive sum approach, and so may
+   * introduce clipping.
    */
-  auto receive(std::span<int16_t>, bool isr) -> BaseType_t;
+  auto receive(std::span<int16_t>, bool mix, bool isr) -> BaseType_t;
 
   auto clear() -> void;
   auto isEmpty() -> bool;
@@ -58,7 +63,7 @@ class PcmBuffer {
   PcmBuffer& operator=(const PcmBuffer&) = delete;
 
  private:
-  auto readSingle(std::span<int16_t>, bool isr)
+  auto readSingle(std::span<int16_t>, bool mix, bool isr)
       -> std::pair<size_t, BaseType_t>;
 
   StaticRingbuffer_t meta_;
diff --git a/src/drivers/pcm_buffer.cpp b/src/drivers/pcm_buffer.cpp
index 3f4a0443..b619cefb 100644
--- a/src/drivers/pcm_buffer.cpp
+++ b/src/drivers/pcm_buffer.cpp
@@ -44,14 +44,15 @@ auto PcmBuffer::send(std::span<const int16_t> data) -> void {
   sent_ += data.size();
 }
 
-IRAM_ATTR auto PcmBuffer::receive(std::span<int16_t> dest, bool isr)
+IRAM_ATTR auto PcmBuffer::receive(std::span<int16_t> dest, bool mix, bool isr)
     -> BaseType_t {
   size_t first_read = 0, second_read = 0;
   BaseType_t ret1 = false, ret2 = false;
-  std::tie(first_read, ret1) = readSingle(dest, isr);
+  std::tie(first_read, ret1) = readSingle(dest, mix, isr);
 
   if (first_read < dest.size()) {
-    std::tie(second_read, ret2) = readSingle(dest.subspan(first_read), isr);
+    std::tie(second_read, ret2) =
+        readSingle(dest.subspan(first_read), mix, isr);
   }
 
   size_t total_read = first_read + second_read;
@@ -86,7 +87,9 @@ auto PcmBuffer::totalReceived() -> uint32_t {
   return received_;
 }
 
-IRAM_ATTR auto PcmBuffer::readSingle(std::span<int16_t> dest, bool isr)
+IRAM_ATTR auto PcmBuffer::readSingle(std::span<int16_t> dest,
+                                     bool mix,
+                                     bool isr)
     -> std::pair<size_t, BaseType_t> {
   BaseType_t ret;
   size_t read_bytes = 0;
@@ -104,7 +107,18 @@ IRAM_ATTR auto PcmBuffer::readSingle(std::span<int16_t> dest, bool isr)
     return {read_samples, ret};
   }
 
-  std::memcpy(dest.data(), data, read_bytes);
+  if (mix) {
+    for (size_t i = 0; i < read_samples; i++) {
+      // Sum the two samples in a 32 bit field so that the addition is always
+      // safe.
+      int32_t sum = static_cast<int32_t>(dest[i]) +
+                    static_cast<int32_t>(reinterpret_cast<int16_t*>(data)[i]);
+      // Clip back into the range of a single sample.
+      dest[i] = std::clamp<int32_t>(sum, INT16_MIN, INT16_MAX);
+    }
+  } else {
+    std::memcpy(dest.data(), data, read_bytes);
+  }
 
   if (isr) {
     vRingbufferReturnItem(ringbuf_, data);

From 41e0605f17a784e8f125b3ad10ddfe5ef63337d9 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Mon, 8 Jul 2024 15:06:43 +1000
Subject: [PATCH 2/9] Give PcmBuffer pairs a name, and wire them up in the
 audio stack

---
 src/drivers/bluetooth.cpp                  | 35 ++++++++++------------
 src/drivers/i2s_dac.cpp                    | 20 +++++++------
 src/drivers/include/drivers/bluetooth.hpp  |  6 ++--
 src/drivers/include/drivers/i2s_dac.hpp    |  8 ++---
 src/drivers/include/drivers/pcm_buffer.hpp | 12 ++++++++
 src/drivers/pcm_buffer.cpp                 |  2 +-
 src/tangara/audio/audio_fsm.cpp            | 33 ++++++++++++--------
 src/tangara/audio/audio_fsm.hpp            |  2 +-
 src/tangara/audio/bt_audio_output.cpp      |  8 ++---
 src/tangara/audio/bt_audio_output.hpp      |  4 +--
 src/tangara/audio/i2s_audio_output.cpp     |  6 ++--
 src/tangara/audio/i2s_audio_output.hpp     |  4 +--
 12 files changed, 79 insertions(+), 61 deletions(-)

diff --git a/src/drivers/bluetooth.cpp b/src/drivers/bluetooth.cpp
index 23c4f8d8..acb38ce4 100644
--- a/src/drivers/bluetooth.cpp
+++ b/src/drivers/bluetooth.cpp
@@ -37,8 +37,7 @@ namespace drivers {
 
 [[maybe_unused]] static constexpr char kTag[] = "bluetooth";
 
-DRAM_ATTR static PcmBuffer* sStream1 = nullptr;
-DRAM_ATTR static PcmBuffer* sStream2 = nullptr;
+DRAM_ATTR static OutputBuffers* sStreams = nullptr;
 DRAM_ATTR static std::atomic<float> sVolumeFactor = 1.f;
 
 static tasks::WorkerPool* sBgWorker;
@@ -97,15 +96,16 @@ IRAM_ATTR auto a2dp_data_cb(uint8_t* buf, int32_t buf_size) -> int32_t {
   if (buf == nullptr || buf_size <= 0) {
     return 0;
   }
-  PcmBuffer* stream1 = sStream1;
-  PcmBuffer* stream2 = sStream2;
-  if (stream1 == nullptr || stream2 == nullptr) {
+  OutputBuffers* streams = sStreams;
+  if (streams == nullptr) {
     return 0;
   }
 
   int16_t* samples = reinterpret_cast<int16_t*>(buf);
-  stream1->receive({samples, static_cast<size_t>(buf_size / 2)}, false, false);
-  stream2->receive({samples, static_cast<size_t>(buf_size / 2)}, true, false);
+  streams->first.receive({samples, static_cast<size_t>(buf_size / 2)}, false,
+                         false);
+  streams->second.receive({samples, static_cast<size_t>(buf_size / 2)}, true,
+                          false);
 
   // Apply software volume scaling.
   float factor = sVolumeFactor.load();
@@ -184,14 +184,13 @@ auto Bluetooth::PreferredDevice() -> std::optional<bluetooth::MacAndName> {
   return bluetooth::BluetoothState::preferred_device();
 }
 
-auto Bluetooth::SetSources(PcmBuffer* src1, PcmBuffer* src2) -> void {
+auto Bluetooth::SetSources(OutputBuffers* src) -> void {
   auto lock = bluetooth::BluetoothState::lock();
-  PcmBuffer *cur1, *cur2;
-  std::tie(cur1, cur2) = bluetooth::BluetoothState::sources();
-  if (src1 == cur1 && src2 == cur2) {
+  OutputBuffers* cur = bluetooth::BluetoothState::sources();
+  if (src == cur) {
     return;
   }
-  bluetooth::BluetoothState::sources(src1, src2);
+  bluetooth::BluetoothState::sources(src);
   tinyfsm::FsmList<bluetooth::BluetoothState>::dispatch(
       bluetooth::events::SourcesChanged{});
 }
@@ -381,13 +380,12 @@ auto BluetoothState::preferred_device(std::optional<MacAndName> addr) -> void {
   sPreferredDevice_ = addr;
 }
 
-auto BluetoothState::sources() -> std::pair<PcmBuffer*, PcmBuffer*> {
-  return {sStream1, sStream2};
+auto BluetoothState::sources() -> OutputBuffers* {
+  return sStreams;
 }
 
-auto BluetoothState::sources(PcmBuffer* src1, PcmBuffer* src2) -> void {
-  sStream1 = src1;
-  sStream2 = src2;
+auto BluetoothState::sources(OutputBuffers* src) -> void {
+  sStreams = src;
 }
 
 auto BluetoothState::event_handler(std::function<void(Event)> cb) -> void {
@@ -715,7 +713,6 @@ void Connected::entry() {
                        sPreferredDevice_->mac != stored_pref->mac)) {
     sStorage_->PreferredBluetoothDevice(sPreferredDevice_);
   }
-  // TODO: if we already have a source, immediately start playing
 }
 
 void Connected::exit() {
@@ -733,7 +730,7 @@ void Connected::react(const events::PreferredDeviceChanged& ev) {
 }
 
 void Connected::react(const events::SourcesChanged& ev) {
-  if (sStream1 != nullptr && sStream2 != nullptr) {
+  if (sStreams != nullptr) {
     ESP_LOGI(kTag, "checking source is ready");
     esp_a2d_media_ctrl(ESP_A2D_MEDIA_CTRL_CHECK_SRC_RDY);
   } else {
diff --git a/src/drivers/i2s_dac.cpp b/src/drivers/i2s_dac.cpp
index b1044896..4e2e171a 100644
--- a/src/drivers/i2s_dac.cpp
+++ b/src/drivers/i2s_dac.cpp
@@ -52,10 +52,12 @@ extern "C" IRAM_ATTR auto callback(i2s_chan_handle_t handle,
   assert(event->size % 4 == 0);
 
   uint8_t* buf = *reinterpret_cast<uint8_t**>(event->data);
-  auto* src = reinterpret_cast<PcmBuffer*>(user_ctx);
+  auto* src = reinterpret_cast<OutputBuffers*>(user_ctx);
 
-  BaseType_t ret =
-      src->receive({reinterpret_cast<int16_t*>(buf), event->size / 2}, true);
+  BaseType_t ret1 = src->first.receive(
+      {reinterpret_cast<int16_t*>(buf), event->size / 2}, false, true);
+  BaseType_t ret2 = src->second.receive(
+      {reinterpret_cast<int16_t*>(buf), event->size / 2}, true, true);
 
   // The ESP32's I2S peripheral has a different endianness to its processors.
   // ESP-IDF handles this difference for stereo channels, but not for mono
@@ -70,10 +72,10 @@ extern "C" IRAM_ATTR auto callback(i2s_chan_handle_t handle,
     }
   }
 
-  return ret;
+  return ret1 || ret2;
 }
 
-auto I2SDac::create(IGpios& expander, PcmBuffer& buf)
+auto I2SDac::create(IGpios& expander, OutputBuffers& bufs)
     -> std::optional<I2SDac*> {
   i2s_chan_handle_t i2s_handle;
   i2s_chan_config_t channel_config{
@@ -90,7 +92,7 @@ auto I2SDac::create(IGpios& expander, PcmBuffer& buf)
   // First, instantiate the instance so it can do all of its power on
   // configuration.
   std::unique_ptr<I2SDac> dac =
-      std::make_unique<I2SDac>(expander, buf, i2s_handle);
+      std::make_unique<I2SDac>(expander, bufs, i2s_handle);
 
   // Whilst we wait for the initial boot, we can work on installing the I2S
   // driver.
@@ -122,14 +124,14 @@ auto I2SDac::create(IGpios& expander, PcmBuffer& buf)
       .on_sent = callback,
       .on_send_q_ovf = NULL,
   };
-  i2s_channel_register_event_callback(i2s_handle, &callbacks, &buf);
+  i2s_channel_register_event_callback(i2s_handle, &callbacks, &bufs);
 
   return dac.release();
 }
 
-I2SDac::I2SDac(IGpios& gpio, PcmBuffer& buf, i2s_chan_handle_t i2s_handle)
+I2SDac::I2SDac(IGpios& gpio, OutputBuffers& bufs, i2s_chan_handle_t i2s_handle)
     : gpio_(gpio),
-      buffer_(buf),
+      buffers_(bufs),
       i2s_handle_(i2s_handle),
       i2s_active_(false),
       clock_config_(I2S_STD_CLK_DEFAULT_CONFIG(48000)),
diff --git a/src/drivers/include/drivers/bluetooth.hpp b/src/drivers/include/drivers/bluetooth.hpp
index b3b12ffc..eaecfb2b 100644
--- a/src/drivers/include/drivers/bluetooth.hpp
+++ b/src/drivers/include/drivers/bluetooth.hpp
@@ -43,7 +43,7 @@ class Bluetooth {
   auto SetPreferredDevice(std::optional<bluetooth::MacAndName> dev) -> void;
   auto PreferredDevice() -> std::optional<bluetooth::MacAndName>;
 
-  auto SetSources(PcmBuffer*, PcmBuffer*) -> void;
+  auto SetSources(OutputBuffers*) -> void;
   auto SetVolumeFactor(float) -> void;
 
   auto SetEventHandler(std::function<void(bluetooth::Event)> cb) -> void;
@@ -118,8 +118,8 @@ class BluetoothState : public tinyfsm::Fsm<BluetoothState> {
   static auto discovery() -> bool;
   static auto discovery(bool) -> void;
 
-  static auto sources() -> std::pair<PcmBuffer*, PcmBuffer*>;
-  static auto sources(PcmBuffer*, PcmBuffer*) -> void;
+  static auto sources() -> OutputBuffers*;
+  static auto sources(OutputBuffers*) -> void;
 
   static auto event_handler(std::function<void(Event)>) -> void;
 
diff --git a/src/drivers/include/drivers/i2s_dac.hpp b/src/drivers/include/drivers/i2s_dac.hpp
index 0fe462b4..891acb56 100644
--- a/src/drivers/include/drivers/i2s_dac.hpp
+++ b/src/drivers/include/drivers/i2s_dac.hpp
@@ -40,9 +40,10 @@ constexpr size_t kI2SBufferLengthFrames = 1024;
  */
 class I2SDac {
  public:
-  static auto create(IGpios& expander, PcmBuffer&, PcmBuffer&) -> std::optional<I2SDac*>;
+  static auto create(IGpios& expander, OutputBuffers&)
+      -> std::optional<I2SDac*>;
 
-  I2SDac(IGpios& gpio, PcmBuffer&, i2s_chan_handle_t i2s_handle);
+  I2SDac(IGpios& gpio, OutputBuffers&, i2s_chan_handle_t i2s_handle);
   ~I2SDac();
 
   auto SetPaused(bool) -> void;
@@ -77,8 +78,7 @@ class I2SDac {
   auto set_channel(bool) -> void;
 
   IGpios& gpio_;
-  PcmBuffer& buffer1_;
-  PcmBuffer& buffer2_;
+  OutputBuffers& buffers_;
   i2s_chan_handle_t i2s_handle_;
 
   bool i2s_active_;
diff --git a/src/drivers/include/drivers/pcm_buffer.hpp b/src/drivers/include/drivers/pcm_buffer.hpp
index 27e9eec6..968c3398 100644
--- a/src/drivers/include/drivers/pcm_buffer.hpp
+++ b/src/drivers/include/drivers/pcm_buffer.hpp
@@ -74,4 +74,16 @@ class PcmBuffer {
   RingbufHandle_t ringbuf_;
 };
 
+/*
+ * Convenience type for a pair of PcmBuffers. Each audio output handles mixing
+ * streams together to ensure that low-latency sounds in one channel (e.g. a
+ * system notification bleep) aren't delayed by a large audio buffer in the
+ * other channel (e.g. a long-running track).
+ *
+ * By convention, the first buffer of this pair is used for tracks, whilst the
+ * second is reserved for 'system sounds'; usually TTS, but potentially maybe
+ * other informative noises.
+ */
+using OutputBuffers = std::pair<PcmBuffer, PcmBuffer>;
+
 }  // namespace drivers
diff --git a/src/drivers/pcm_buffer.cpp b/src/drivers/pcm_buffer.cpp
index b619cefb..1d2bab1e 100644
--- a/src/drivers/pcm_buffer.cpp
+++ b/src/drivers/pcm_buffer.cpp
@@ -56,7 +56,7 @@ IRAM_ATTR auto PcmBuffer::receive(std::span<int16_t> dest, bool mix, bool isr)
   }
 
   size_t total_read = first_read + second_read;
-  if (total_read < dest.size()) {
+  if (total_read < dest.size() && !mix) {
     std::fill_n(dest.begin() + total_read, dest.size() - total_read, 0);
   }
 
diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp
index 80611082..8f04c6c1 100644
--- a/src/tangara/audio/audio_fsm.cpp
+++ b/src/tangara/audio/audio_fsm.cpp
@@ -59,10 +59,16 @@ std::shared_ptr<IAudioOutput> AudioState::sOutput;
 std::shared_ptr<I2SAudioOutput> AudioState::sI2SOutput;
 std::shared_ptr<BluetoothAudioOutput> AudioState::sBtOutput;
 
-// Two seconds of samples for two channels, at a representative sample rate.
-constexpr size_t kDrainLatencySamples = 48000 * 2 * 2;
+// For tracks, keep about two seconds' worth of samples at 2ch 48kHz. This
+// is more headroom than we need for small playback, but it doesn't hurt to
+// keep some PSRAM in our pockets for a rainy day.
+constexpr size_t kTrackDrainLatencySamples = 48000 * 2 * 2;
 
-std::unique_ptr<drivers::PcmBuffer> AudioState::sDrainBuffer;
+// For system sounds, we intentionally choose codecs that are very fast to
+// decode. This lets us get away with a much smaller drain buffer.
+constexpr size_t kSystemDrainLatencySamples = 48000;
+
+std::unique_ptr<drivers::OutputBuffers> AudioState::sDrainBuffers;
 std::optional<IAudioOutput::Format> AudioState::sDrainFormat;
 
 StreamCues AudioState::sStreamCues;
@@ -237,11 +243,11 @@ void AudioState::react(const system_fsm::BluetoothEvent& ev) {
         break;
     }
   }
-  if (std::holds_alternative<drivers::bluetooth::RemoteVolumeChanged>(ev.event)) {
-    auto volume_chg = std::get<drivers::bluetooth::RemoteVolumeChanged>(ev.event).new_vol;
-        events::Ui().Dispatch(RemoteVolumeChanged{
-          .value = volume_chg
-        });
+  if (std::holds_alternative<drivers::bluetooth::RemoteVolumeChanged>(
+          ev.event)) {
+    auto volume_chg =
+        std::get<drivers::bluetooth::RemoteVolumeChanged>(ev.event).new_vol;
+    events::Ui().Dispatch(RemoteVolumeChanged{.value = volume_chg});
   }
 }
 
@@ -354,12 +360,13 @@ namespace states {
 void Uninitialised::react(const system_fsm::BootComplete& ev) {
   sServices = ev.services;
 
-  sDrainBuffer = std::make_unique<drivers::PcmBuffer>(kDrainLatencySamples);
+  sDrainBuffers = std::make_unique<drivers::OutputBuffers>(
+      kTrackDrainLatencySamples, kSystemDrainLatencySamples);
 
   sStreamFactory.reset(new FatfsStreamFactory(*sServices));
-  sI2SOutput.reset(new I2SAudioOutput(sServices->gpios(), *sDrainBuffer));
+  sI2SOutput.reset(new I2SAudioOutput(sServices->gpios(), *sDrainBuffers));
   sBtOutput.reset(new BluetoothAudioOutput(
-      sServices->bluetooth(), *sDrainBuffer, sServices->bg_worker()));
+      sServices->bluetooth(), *sDrainBuffers, sServices->bg_worker()));
 
   auto& nvs = sServices->nvs();
   sI2SOutput->SetMaxVolume(nvs.AmpMaxVolume());
@@ -390,7 +397,7 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) {
       .left_bias = nvs.AmpLeftBias(),
   });
 
-  sSampleProcessor.reset(new SampleProcessor(*sDrainBuffer));
+  sSampleProcessor.reset(new SampleProcessor(sDrainBuffers->first));
   sSampleProcessor->SetOutput(sOutput);
 
   sDecoder.reset(Decoder::Start(sSampleProcessor));
@@ -507,7 +514,7 @@ void Playback::react(const system_fsm::SdStateChanged& ev) {
 }
 
 void Playback::react(const internal::StreamHeartbeat& ev) {
-  sStreamCues.update(sDrainBuffer->totalReceived());
+  sStreamCues.update(sDrainBuffers->first.totalReceived());
 
   if (sStreamCues.hasStream()) {
     emitPlaybackUpdate(false);
diff --git a/src/tangara/audio/audio_fsm.hpp b/src/tangara/audio/audio_fsm.hpp
index f949ce8a..1e5184b5 100644
--- a/src/tangara/audio/audio_fsm.hpp
+++ b/src/tangara/audio/audio_fsm.hpp
@@ -81,7 +81,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
   static std::shared_ptr<BluetoothAudioOutput> sBtOutput;
   static std::shared_ptr<IAudioOutput> sOutput;
 
-  static std::unique_ptr<drivers::PcmBuffer> sDrainBuffer;
+  static std::unique_ptr<drivers::OutputBuffers> sDrainBuffers;
 
   static StreamCues sStreamCues;
   static std::optional<IAudioOutput::Format> sDrainFormat;
diff --git a/src/tangara/audio/bt_audio_output.cpp b/src/tangara/audio/bt_audio_output.cpp
index 616a385f..54547622 100644
--- a/src/tangara/audio/bt_audio_output.cpp
+++ b/src/tangara/audio/bt_audio_output.cpp
@@ -33,11 +33,11 @@ namespace audio {
 static constexpr uint16_t kVolumeRange = 60;
 
 BluetoothAudioOutput::BluetoothAudioOutput(drivers::Bluetooth& bt,
-                                           drivers::PcmBuffer& buffer,
+                                           drivers::OutputBuffers& bufs,
                                            tasks::WorkerPool& p)
     : IAudioOutput(),
       bluetooth_(bt),
-      buffer_(buffer),
+      buffers_(bufs),
       bg_worker_(p),
       volume_() {}
 
@@ -45,9 +45,9 @@ BluetoothAudioOutput::~BluetoothAudioOutput() {}
 
 auto BluetoothAudioOutput::changeMode(Modes mode) -> void {
   if (mode == Modes::kOnPlaying) {
-    bluetooth_.SetSource(&buffer_);
+    bluetooth_.SetSources(&buffers_);
   } else {
-    bluetooth_.SetSource(nullptr);
+    bluetooth_.SetSources(nullptr);
   }
 }
 
diff --git a/src/tangara/audio/bt_audio_output.hpp b/src/tangara/audio/bt_audio_output.hpp
index f22f330a..53d2c1a4 100644
--- a/src/tangara/audio/bt_audio_output.hpp
+++ b/src/tangara/audio/bt_audio_output.hpp
@@ -25,7 +25,7 @@ namespace audio {
 class BluetoothAudioOutput : public IAudioOutput {
  public:
   BluetoothAudioOutput(drivers::Bluetooth& bt,
-                       drivers::PcmBuffer& buf,
+                       drivers::OutputBuffers& bufs,
                        tasks::WorkerPool&);
   ~BluetoothAudioOutput();
 
@@ -54,7 +54,7 @@ class BluetoothAudioOutput : public IAudioOutput {
 
  private:
   drivers::Bluetooth& bluetooth_;
-  drivers::PcmBuffer& buffer_;
+  drivers::OutputBuffers& buffers_;
   tasks::WorkerPool& bg_worker_;
 
   uint16_t volume_;
diff --git a/src/tangara/audio/i2s_audio_output.cpp b/src/tangara/audio/i2s_audio_output.cpp
index 8222b8c9..55c8bdb8 100644
--- a/src/tangara/audio/i2s_audio_output.cpp
+++ b/src/tangara/audio/i2s_audio_output.cpp
@@ -42,10 +42,10 @@ static constexpr uint16_t kLineLevelVolume = 0x13d;
 static constexpr uint16_t kDefaultVolume = 0x100;
 
 I2SAudioOutput::I2SAudioOutput(drivers::IGpios& expander,
-                               drivers::PcmBuffer& buffer)
+                               drivers::OutputBuffers& buffers)
     : IAudioOutput(),
       expander_(expander),
-      buffer_(buffer),
+      buffers_(buffers),
       dac_(),
       current_mode_(Modes::kOff),
       current_config_(),
@@ -72,7 +72,7 @@ auto I2SAudioOutput::changeMode(Modes mode) -> void {
   if (was_off) {
     // Ensure an I2SDac instance actually exists.
     if (!dac_) {
-      auto instance = drivers::I2SDac::create(expander_, buffer_);
+      auto instance = drivers::I2SDac::create(expander_, buffers_);
       if (!instance) {
         return;
       }
diff --git a/src/tangara/audio/i2s_audio_output.hpp b/src/tangara/audio/i2s_audio_output.hpp
index 35d888b9..2b768ddd 100644
--- a/src/tangara/audio/i2s_audio_output.hpp
+++ b/src/tangara/audio/i2s_audio_output.hpp
@@ -21,7 +21,7 @@ namespace audio {
 
 class I2SAudioOutput : public IAudioOutput {
  public:
-  I2SAudioOutput(drivers::IGpios&, drivers::PcmBuffer&);
+  I2SAudioOutput(drivers::IGpios&, drivers::OutputBuffers&);
 
   auto SetMaxVolume(uint16_t) -> void;
   auto SetVolumeDb(uint16_t) -> void;
@@ -51,7 +51,7 @@ class I2SAudioOutput : public IAudioOutput {
 
  private:
   drivers::IGpios& expander_;
-  drivers::PcmBuffer& buffer_;
+  drivers::OutputBuffers& buffers_;
 
   std::unique_ptr<drivers::I2SDac> dac_;
 

From 370d1853b5d099de28c032def4ce3e53b7d735ad Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Tue, 9 Jul 2024 14:41:02 +1000
Subject: [PATCH 3/9] Break FatfsStreamFactory's dep on ServiceLocator

---
 src/tangara/audio/audio_fsm.cpp            |  3 ++-
 src/tangara/audio/fatfs_stream_factory.cpp | 12 ++++++------
 src/tangara/audio/fatfs_stream_factory.hpp | 11 +++++------
 src/tangara/database/database.cpp          |  6 ++++++
 src/tangara/database/database.hpp          | 10 ++++++++++
 src/tangara/system_fsm/service_locator.hpp |  2 +-
 6 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp
index 8f04c6c1..ad60ab86 100644
--- a/src/tangara/audio/audio_fsm.cpp
+++ b/src/tangara/audio/audio_fsm.cpp
@@ -363,7 +363,8 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) {
   sDrainBuffers = std::make_unique<drivers::OutputBuffers>(
       kTrackDrainLatencySamples, kSystemDrainLatencySamples);
 
-  sStreamFactory.reset(new FatfsStreamFactory(*sServices));
+  sStreamFactory.reset(
+      new FatfsStreamFactory(sServices->database(), sServices->tag_parser()));
   sI2SOutput.reset(new I2SAudioOutput(sServices->gpios(), *sDrainBuffers));
   sBtOutput.reset(new BluetoothAudioOutput(
       sServices->bluetooth(), *sDrainBuffers, sServices->bg_worker()));
diff --git a/src/tangara/audio/fatfs_stream_factory.cpp b/src/tangara/audio/fatfs_stream_factory.cpp
index 80677b2d..735ec134 100644
--- a/src/tangara/audio/fatfs_stream_factory.cpp
+++ b/src/tangara/audio/fatfs_stream_factory.cpp
@@ -10,7 +10,6 @@
 #include <memory>
 #include <string>
 
-#include "database/database.hpp"
 #include "esp_log.h"
 #include "ff.h"
 #include "freertos/portmacro.h"
@@ -19,10 +18,10 @@
 #include "audio/audio_source.hpp"
 #include "audio/fatfs_source.hpp"
 #include "codec.hpp"
+#include "database/database.hpp"
 #include "database/tag_parser.hpp"
 #include "database/track.hpp"
 #include "drivers/spi.hpp"
-#include "system_fsm/service_locator.hpp"
 #include "tasks.hpp"
 #include "types.hpp"
 
@@ -30,12 +29,13 @@
 
 namespace audio {
 
-FatfsStreamFactory::FatfsStreamFactory(system_fsm::ServiceLocator& services)
-    : services_(services) {}
+FatfsStreamFactory::FatfsStreamFactory(database::Handle&& handle,
+                                       database::ITagParser& parser)
+    : db_(handle), tag_parser_(parser) {}
 
 auto FatfsStreamFactory::create(database::TrackId id, uint32_t offset)
     -> std::shared_ptr<TaggedStream> {
-  auto db = services_.database().lock();
+  auto db = db_.lock();
   if (!db) {
     return {};
   }
@@ -48,7 +48,7 @@ auto FatfsStreamFactory::create(database::TrackId id, uint32_t offset)
 
 auto FatfsStreamFactory::create(std::string path, uint32_t offset)
     -> std::shared_ptr<TaggedStream> {
-  auto tags = services_.tag_parser().ReadAndParseTags(path);
+  auto tags = tag_parser_.ReadAndParseTags(path);
   if (!tags) {
     ESP_LOGE(kTag, "failed to read tags");
     return {};
diff --git a/src/tangara/audio/fatfs_stream_factory.hpp b/src/tangara/audio/fatfs_stream_factory.hpp
index 858d2131..84073d2d 100644
--- a/src/tangara/audio/fatfs_stream_factory.hpp
+++ b/src/tangara/audio/fatfs_stream_factory.hpp
@@ -6,23 +6,21 @@
 
 #pragma once
 
-#include <stdint.h>
 #include <cstddef>
 #include <cstdint>
 #include <future>
 #include <memory>
 #include <string>
 
-#include "database/database.hpp"
-#include "database/track.hpp"
 #include "ff.h"
 #include "freertos/portmacro.h"
 
 #include "audio/audio_source.hpp"
 #include "codec.hpp"
+#include "database/database.hpp"
 #include "database/future_fetcher.hpp"
 #include "database/tag_parser.hpp"
-#include "system_fsm/service_locator.hpp"
+#include "database/track.hpp"
 #include "tasks.hpp"
 #include "types.hpp"
 
@@ -33,7 +31,7 @@ namespace audio {
  */
 class FatfsStreamFactory {
  public:
-  explicit FatfsStreamFactory(system_fsm::ServiceLocator&);
+  explicit FatfsStreamFactory(database::Handle&&, database::ITagParser&);
 
   auto create(database::TrackId, uint32_t offset = 0)
       -> std::shared_ptr<TaggedStream>;
@@ -47,7 +45,8 @@ class FatfsStreamFactory {
   auto ContainerToStreamType(database::Container)
       -> std::optional<codecs::StreamType>;
 
-  system_fsm::ServiceLocator& services_;
+  database::Handle db_;
+  database::ITagParser& tag_parser_;
 };
 
 }  // namespace audio
diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp
index cf1430b3..85700431 100644
--- a/src/tangara/database/database.cpp
+++ b/src/tangara/database/database.cpp
@@ -684,6 +684,12 @@ auto Database::countRecords(const SearchKey& c) -> size_t {
   return count;
 }
 
+Handle::Handle(std::shared_ptr<Database>& db) : db_(db) {}
+
+auto Handle::lock() -> std::shared_ptr<Database> {
+  return db_;
+}
+
 auto SearchKey::startKey() const -> std::string_view {
   if (key) {
     return *key;
diff --git a/src/tangara/database/database.hpp b/src/tangara/database/database.hpp
index d2de7c72..c2e72568 100644
--- a/src/tangara/database/database.hpp
+++ b/src/tangara/database/database.hpp
@@ -128,6 +128,16 @@ class Database {
   auto countRecords(const SearchKey& c) -> size_t;
 };
 
+class Handle {
+ public:
+  Handle(std::shared_ptr<Database>& db);
+
+  auto lock() -> std::shared_ptr<Database>;
+
+ private:
+  std::shared_ptr<Database>& db_;
+};
+
 /*
  * Container for the data needed to iterate through database records. This is a
  * lower-level type that the higher-level iterators are built from; most users
diff --git a/src/tangara/system_fsm/service_locator.hpp b/src/tangara/system_fsm/service_locator.hpp
index 3d136f3a..d441fa70 100644
--- a/src/tangara/system_fsm/service_locator.hpp
+++ b/src/tangara/system_fsm/service_locator.hpp
@@ -92,7 +92,7 @@ class ServiceLocator {
 
   auto haptics(std::unique_ptr<drivers::Haptics> i) { haptics_ = std::move(i); }
 
-  auto database() -> std::weak_ptr<database::Database> { return database_; }
+  auto database() -> database::Handle { return database_; }
 
   auto database(std::unique_ptr<database::Database> i) {
     database_ = std::move(i);

From 9475d10d1000c7e21a7ea311b0c8ee6a72ef46c4 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Fri, 19 Jul 2024 13:59:30 +1000
Subject: [PATCH 4/9] WIP initial tts player wiring

---
 src/tangara/audio/audio_fsm.cpp |  6 ++++++
 src/tangara/tts/player.cpp      | 24 +++++++++++++++++++++
 src/tangara/tts/player.hpp      | 38 +++++++++++++++++++++++++++++++++
 src/tangara/tts/provider.cpp    | 23 ++++++++++++++++++++
 src/tangara/tts/provider.hpp    | 17 +++++++++++++++
 5 files changed, 108 insertions(+)
 create mode 100644 src/tangara/tts/player.cpp
 create mode 100644 src/tangara/tts/player.hpp

diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp
index ad60ab86..dbf1954c 100644
--- a/src/tangara/audio/audio_fsm.cpp
+++ b/src/tangara/audio/audio_fsm.cpp
@@ -43,6 +43,7 @@
 #include "sample.hpp"
 #include "system_fsm/service_locator.hpp"
 #include "system_fsm/system_events.hpp"
+#include "tts/player.hpp"
 
 namespace audio {
 
@@ -369,6 +370,11 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) {
   sBtOutput.reset(new BluetoothAudioOutput(
       sServices->bluetooth(), *sDrainBuffers, sServices->bg_worker()));
 
+  auto& tts_provider = sServices->tts();
+  auto tts_player = std::make_unique<tts::Player>(
+      sServices->bg_worker(), sDrainBuffers->second, *sStreamFactory);
+  tts_provider.player(std::move(tts_player));
+
   auto& nvs = sServices->nvs();
   sI2SOutput->SetMaxVolume(nvs.AmpMaxVolume());
   sI2SOutput->SetVolume(nvs.AmpCurrentVolume());
diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp
new file mode 100644
index 00000000..70959992
--- /dev/null
+++ b/src/tangara/tts/player.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2024 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "tts/player.hpp"
+
+#include "esp_log.h"
+
+namespace tts {
+
+[[maybe_unused]] static constexpr char kTag[] = "ttsplay";
+
+Player::Player(tasks::WorkerPool& worker,
+               drivers::PcmBuffer& output,
+               audio::FatfsStreamFactory& factory)
+    : bg_(worker), stream_factory_(factory), output_(output) {}
+
+auto Player::playFile(const std::string& path) -> void {
+  ESP_LOGI(kTag, "playing '%s'", path.c_str());
+}
+
+}  // namespace tts
diff --git a/src/tangara/tts/player.hpp b/src/tangara/tts/player.hpp
new file mode 100644
index 00000000..a132b9cd
--- /dev/null
+++ b/src/tangara/tts/player.hpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2024 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <string>
+
+#include "audio/fatfs_stream_factory.hpp"
+#include "drivers/pcm_buffer.hpp"
+#include "tasks.hpp"
+
+namespace tts {
+
+/*
+ * A TTS Player is the output stage of the TTS pipeline. It receives a stream
+ * of filenames that should be played, and handles decoding these files and
+ * sending them to the output buffer.
+ */
+class Player {
+ public:
+  Player(tasks::WorkerPool&, drivers::PcmBuffer&, audio::FatfsStreamFactory&);
+
+  auto playFile(const std::string& path) -> void;
+
+  // Not copyable or movable.
+  Player(const Player&) = delete;
+  Player& operator=(const Player&) = delete;
+
+ private:
+  tasks::WorkerPool& bg_;
+  audio::FatfsStreamFactory& stream_factory_;
+  drivers::PcmBuffer& output_;
+};
+
+}  // namespace tts
diff --git a/src/tangara/tts/provider.cpp b/src/tangara/tts/provider.cpp
index 7d33bae6..24229233 100644
--- a/src/tangara/tts/provider.cpp
+++ b/src/tangara/tts/provider.cpp
@@ -5,21 +5,40 @@
  */
 
 #include "tts/provider.hpp"
+#include <stdint.h>
 
+#include <ios>
 #include <optional>
+#include <sstream>
 #include <string>
 #include <variant>
 
+#include "drivers/storage.hpp"
 #include "esp_log.h"
 
+#include "komihash.h"
 #include "tts/events.hpp"
 
 namespace tts {
 
 [[maybe_unused]] static constexpr char kTag[] = "tts";
 
+static const char* kTtsPath = "/.tangara-tts/";
+
+static auto textToFile(const std::string& text) -> std::optional<std::string> {
+  uint64_t hash = komihash(text.data(), text.size(), 0);
+  std::stringstream stream;
+  stream << drivers::kStoragePath << kTtsPath;
+  stream << std::hex << hash;
+  return stream.str();
+}
+
 Provider::Provider() {}
 
+auto Provider::player(std::unique_ptr<Player> p) -> void {
+  player_ = std::move(p);
+}
+
 auto Provider::feed(const Event& e) -> void {
   if (std::holds_alternative<SimpleEvent>(e)) {
     // ESP_LOGI(kTag, "context changed");
@@ -31,6 +50,10 @@ auto Provider::feed(const Event& e) -> void {
       // ESP_LOGI(kTag, "new selection: '%s', interactive? %i",
       // ev.new_selection->description.value_or("").c_str(),
       // ev.new_selection->is_interactive);
+      std::string new_desc = ev.new_selection->description.value_or("");
+      if (player_) {
+        player_->playFile(textToFile(new_desc).value_or(""));
+      }
     }
   }
 }
diff --git a/src/tangara/tts/provider.hpp b/src/tangara/tts/provider.hpp
index 59f61a6c..8fe143cc 100644
--- a/src/tangara/tts/provider.hpp
+++ b/src/tangara/tts/provider.hpp
@@ -6,18 +6,35 @@
 
 #pragma once
 
+#include <memory>
 #include <optional>
 #include <string>
 #include <variant>
 
 #include "tts/events.hpp"
+#include "tts/player.hpp"
 
 namespace tts {
 
+/*
+ * A TTS Provider is responsible for receiving system events that may be
+ * relevant to TTS, and digesting them into discrete 'utterances' that can be
+ * used to generate audio feedback.
+ */
 class Provider {
  public:
   Provider();
+
+  auto player(std::unique_ptr<Player>) -> void;
+
   auto feed(const Event&) -> void;
+
+  // Not copyable or movable.
+  Provider(const Provider&) = delete;
+  Provider& operator=(const Provider&) = delete;
+
+ private:
+  std::unique_ptr<Player> player_;
 };
 
 }  // namespace tts

From d0b739c66ef11a6c16f99cad6957a1782236fd8c Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Mon, 9 Sep 2024 16:39:22 +1000
Subject: [PATCH 5/9] Play basic wav files in response to tts prompts

It's currently quite limited (no stereo or sample rate conversion,
multiple messages clobber each other, only plays if music is playing),
but we're getting there!
---
 src/tangara/audio/audio_fsm.cpp |  2 +-
 src/tangara/tts/player.cpp      | 50 +++++++++++++++++++++++++++++++++
 src/tangara/tts/provider.cpp    |  3 +-
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp
index ee7215cb..dac04f75 100644
--- a/src/tangara/audio/audio_fsm.cpp
+++ b/src/tangara/audio/audio_fsm.cpp
@@ -226,7 +226,7 @@ void AudioState::react(const internal::StreamStarted& ev) {
   }
 
   sStreamCues.addCue(ev.track, ev.cue_at_sample);
-  sStreamCues.update(sDrainBuffer->totalReceived());
+  sStreamCues.update(sDrainBuffers->first.totalReceived());
 
   if (!sIsPaused && !is_in_state<states::Playback>()) {
     transit<states::Playback>();
diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp
index 70959992..3fcd88bc 100644
--- a/src/tangara/tts/player.cpp
+++ b/src/tangara/tts/player.cpp
@@ -6,7 +6,10 @@
 
 #include "tts/player.hpp"
 
+#include "codec.hpp"
 #include "esp_log.h"
+#include "sample.hpp"
+#include "types.hpp"
 
 namespace tts {
 
@@ -19,6 +22,53 @@ Player::Player(tasks::WorkerPool& worker,
 
 auto Player::playFile(const std::string& path) -> void {
   ESP_LOGI(kTag, "playing '%s'", path.c_str());
+  bg_.Dispatch<void>([=]() {
+    auto stream = stream_factory_.create(path);
+    if (!stream) {
+      ESP_LOGE(kTag, "creating stream failed");
+      return;
+    }
+    if (stream->type() != codecs::StreamType::kWav) {
+      ESP_LOGE(kTag, "stream was unsupported type");
+      return;
+    }
+    auto decoder = codecs::CreateCodecForType(stream->type());
+    if (!decoder) {
+      ESP_LOGE(kTag, "creating decoder failed");
+      return;
+    }
+    std::unique_ptr<codecs::ICodec> codec{*decoder};
+    auto open_res = codec->OpenStream(stream, 0);
+    if (open_res.has_error()) {
+      ESP_LOGE(kTag, "opening stream failed");
+      return;
+    }
+    // if (open_res->sample_rate_hz != 48000 || open_res->num_channels != 2) {
+    // ESP_LOGE(kTag, "stream format is wrong (was %u channels @ %lu hz)",
+    // open_res->num_channels, open_res->sample_rate_hz);
+    // return;
+    // }
+    sample::Sample decode_buf[4096];
+    for (;;) {
+      auto decode_res = codec->DecodeTo(decode_buf);
+      if (decode_res.has_error()) {
+        ESP_LOGE(kTag, "decoding error");
+        return;
+      }
+      if (decode_res->is_stream_finished) {
+        break;
+      }
+
+      std::span<sample::Sample> decode_span{decode_buf,
+                                            decode_res->samples_written};
+      while (!decode_span.empty()) {
+        size_t sent = output_.send(decode_span);
+        decode_span = decode_span.subspan(sent);
+      }
+    }
+
+    ESP_LOGI(kTag, "finished playing okay");
+  });
 }
 
 }  // namespace tts
diff --git a/src/tangara/tts/provider.cpp b/src/tangara/tts/provider.cpp
index 24229233..b7c1e55d 100644
--- a/src/tangara/tts/provider.cpp
+++ b/src/tangara/tts/provider.cpp
@@ -28,8 +28,7 @@ static const char* kTtsPath = "/.tangara-tts/";
 static auto textToFile(const std::string& text) -> std::optional<std::string> {
   uint64_t hash = komihash(text.data(), text.size(), 0);
   std::stringstream stream;
-  stream << drivers::kStoragePath << kTtsPath;
-  stream << std::hex << hash;
+  stream << kTtsPath << std::hex << hash << ".wav";
   return stream.str();
 }
 

From 542ebc65317ac4744a4b96c3131dace5bda10314 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Wed, 11 Sep 2024 12:57:04 +1000
Subject: [PATCH 6/9] Play TTS files in response to TTS prompts, but it's
 legible now

 - input files are upsamples and padded to stereo before playback
 - any in-progress playback is cancelled before playing a new file
---
 src/tangara/audio/processor.cpp |  33 +++++----
 src/tangara/audio/processor.hpp |  56 +++++++-------
 src/tangara/tts/player.cpp      | 125 +++++++++++++++++++++++++++-----
 src/tangara/tts/player.hpp      |   7 ++
 src/tangara/tts/provider.cpp    |   2 +-
 5 files changed, 162 insertions(+), 61 deletions(-)

diff --git a/src/tangara/audio/processor.cpp b/src/tangara/audio/processor.cpp
index aa2604b5..2fa7f78e 100644
--- a/src/tangara/audio/processor.cpp
+++ b/src/tangara/audio/processor.cpp
@@ -347,34 +347,39 @@ auto SampleProcessor::discardCommand(Args& command) -> void {
   // End of stream commands can just be dropped without further action.
 }
 
-SampleProcessor::Buffer::Buffer()
-    : buffer_(reinterpret_cast<sample::Sample*>(
-                  heap_caps_calloc(kSampleBufferLength,
-                                   sizeof(sample::Sample),
-                                   MALLOC_CAP_DMA)),
-              kSampleBufferLength),
+Buffer::Buffer(std::span<sample::Sample> storage)
+    : storage_(nullptr), buffer_(storage), samples_in_buffer_() {}
+
+Buffer::Buffer()
+    : storage_(reinterpret_cast<sample::Sample*>(
+          heap_caps_calloc(kSampleBufferLength,
+                           sizeof(sample::Sample),
+                           MALLOC_CAP_DMA))),
+      buffer_(storage_, kSampleBufferLength),
       samples_in_buffer_() {}
 
-SampleProcessor::Buffer::~Buffer() {
-  heap_caps_free(buffer_.data());
+Buffer::~Buffer() {
+  if (storage_) {
+    heap_caps_free(storage_);
+  }
 }
 
-auto SampleProcessor::Buffer::writeAcquire() -> std::span<sample::Sample> {
+auto Buffer::writeAcquire() -> std::span<sample::Sample> {
   return buffer_.subspan(samples_in_buffer_.size());
 }
 
-auto SampleProcessor::Buffer::writeCommit(size_t samples) -> void {
+auto Buffer::writeCommit(size_t samples) -> void {
   if (samples == 0) {
     return;
   }
   samples_in_buffer_ = buffer_.first(samples + samples_in_buffer_.size());
 }
 
-auto SampleProcessor::Buffer::readAcquire() -> std::span<sample::Sample> {
+auto Buffer::readAcquire() -> std::span<sample::Sample> {
   return samples_in_buffer_;
 }
 
-auto SampleProcessor::Buffer::readCommit(size_t samples) -> void {
+auto Buffer::readCommit(size_t samples) -> void {
   if (samples == 0) {
     return;
   }
@@ -389,11 +394,11 @@ auto SampleProcessor::Buffer::readCommit(size_t samples) -> void {
   }
 }
 
-auto SampleProcessor::Buffer::isEmpty() -> bool {
+auto Buffer::isEmpty() -> bool {
   return samples_in_buffer_.empty();
 }
 
-auto SampleProcessor::Buffer::clear() -> void {
+auto Buffer::clear() -> void {
   samples_in_buffer_ = {};
 }
 
diff --git a/src/tangara/audio/processor.hpp b/src/tangara/audio/processor.hpp
index 45e05291..52bace95 100644
--- a/src/tangara/audio/processor.hpp
+++ b/src/tangara/audio/processor.hpp
@@ -22,6 +22,35 @@
 
 namespace audio {
 
+/* Utility for managing buffering samples between digital filters. */
+class Buffer {
+ public:
+  Buffer(std::span<sample::Sample> storage);
+  Buffer();
+  ~Buffer();
+
+  /* Returns a span of the unused space within the buffer. */
+  auto writeAcquire() -> std::span<sample::Sample>;
+  /* Signals how many samples were just added to the writeAcquire span. */
+  auto writeCommit(size_t) -> void;
+
+  /* Returns a span of the samples stored within the buffer. */
+  auto readAcquire() -> std::span<sample::Sample>;
+  /* Signals how many samples from the readAcquire span were consumed. */
+  auto readCommit(size_t) -> void;
+
+  auto isEmpty() -> bool;
+  auto clear() -> void;
+
+  Buffer(const Buffer&) = delete;
+  Buffer& operator=(const Buffer&) = delete;
+
+ private:
+  sample::Sample* storage_;
+  std::span<sample::Sample> buffer_;
+  std::span<sample::Sample> samples_in_buffer_;
+};
+
 /*
  * Handle to a persistent task that converts samples between formats (sample
  * rate, channels, bits per sample), in order to put samples in the preferred
@@ -87,33 +116,6 @@ class SampleProcessor {
   StreamBufferHandle_t source_;
   drivers::PcmBuffer& sink_;
 
-  /* Internal utility for managing buffering samples between our filters. */
-  class Buffer {
-   public:
-    Buffer();
-    ~Buffer();
-
-    /* Returns a span of the unused space within the buffer. */
-    auto writeAcquire() -> std::span<sample::Sample>;
-    /* Signals how many samples were just added to the writeAcquire span. */
-    auto writeCommit(size_t) -> void;
-
-    /* Returns a span of the samples stored within the buffer. */
-    auto readAcquire() -> std::span<sample::Sample>;
-    /* Signals how many samples from the readAcquire span were consumed. */
-    auto readCommit(size_t) -> void;
-
-    auto isEmpty() -> bool;
-    auto clear() -> void;
-
-    Buffer(const Buffer&) = delete;
-    Buffer& operator=(const Buffer&) = delete;
-
-   private:
-    std::span<sample::Sample> buffer_;
-    std::span<sample::Sample> samples_in_buffer_;
-  };
-
   Buffer input_buffer_;
   Buffer resampled_buffer_;
   Buffer output_buffer_;
diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp
index 3fcd88bc..b5b99b5d 100644
--- a/src/tangara/tts/player.cpp
+++ b/src/tangara/tts/player.cpp
@@ -6,8 +6,12 @@
 
 #include "tts/player.hpp"
 
+#include "audio/processor.hpp"
+#include "audio/resample.hpp"
 #include "codec.hpp"
 #include "esp_log.h"
+#include "freertos/projdefs.h"
+#include "portmacro.h"
 #include "sample.hpp"
 #include "types.hpp"
 
@@ -18,57 +22,140 @@ namespace tts {
 Player::Player(tasks::WorkerPool& worker,
                drivers::PcmBuffer& output,
                audio::FatfsStreamFactory& factory)
-    : bg_(worker), stream_factory_(factory), output_(output) {}
+    : bg_(worker), stream_factory_(factory), output_(output), play_count_(0) {}
 
 auto Player::playFile(const std::string& path) -> void {
   ESP_LOGI(kTag, "playing '%s'", path.c_str());
-  bg_.Dispatch<void>([=]() {
+  int this_play = ++play_count_;
+
+  bg_.Dispatch<void>([=, this]() {
     auto stream = stream_factory_.create(path);
     if (!stream) {
       ESP_LOGE(kTag, "creating stream failed");
       return;
     }
+
+    // FIXME: Rather than hardcoding WAV support only, we should work out a
+    // proper subset of 'low memory' decoders that can all be used for TTS
+    // playback.
     if (stream->type() != codecs::StreamType::kWav) {
       ESP_LOGE(kTag, "stream was unsupported type");
       return;
     }
+
     auto decoder = codecs::CreateCodecForType(stream->type());
     if (!decoder) {
       ESP_LOGE(kTag, "creating decoder failed");
       return;
     }
+
     std::unique_ptr<codecs::ICodec> codec{*decoder};
     auto open_res = codec->OpenStream(stream, 0);
     if (open_res.has_error()) {
       ESP_LOGE(kTag, "opening stream failed");
       return;
     }
-    // if (open_res->sample_rate_hz != 48000 || open_res->num_channels != 2) {
-    // ESP_LOGE(kTag, "stream format is wrong (was %u channels @ %lu hz)",
-    // open_res->num_channels, open_res->sample_rate_hz);
-    // return;
-    // }
-    sample::Sample decode_buf[4096];
-    for (;;) {
-      auto decode_res = codec->DecodeTo(decode_buf);
+
+    decodeToSink(*open_res, std::move(codec), this_play);
+  });
+}
+
+auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
+                          std::unique_ptr<codecs::ICodec> codec,
+                          int play_count) -> void {
+  // Set up buffers to hold samples between the intermediary parts of
+  // processing. We can just use the stack for these, since this method is
+  // called only from background workers, which have enormous stacks.
+  sample::Sample decode_storage[4096];
+  audio::Buffer decode_buf(decode_storage);
+
+  sample::Sample resample_storage[4096];
+  audio::Buffer resample_buf(resample_storage);
+
+  sample::Sample stereo_storage[4096];
+  audio::Buffer stereo_buf(stereo_storage);
+
+  // Work out what processing the codec's output needs.
+  std::unique_ptr<audio::Resampler> resampler;
+  if (format.sample_rate_hz != 48000) {
+    resampler = std::make_unique<audio::Resampler>(format.sample_rate_hz, 48000,
+                                                   format.num_channels);
+  }
+  bool double_samples = format.num_channels == 1;
+
+  // FIXME: This decode-and-process loop is substantially the same as the audio
+  // processor's filter loop. Ideally we should refactor both of these loops to
+  // reuse code, however I'm holding off on doing this until we've implemented
+  // more advanced audio processing features in the audio processor (EQ, tempo
+  // shifting, etc.) as it's not clear to me yet how much the two codepaths will
+  // be diverging later anyway.
+  while (codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() ||
+         !stereo_buf.isEmpty()) {
+    if (play_count != play_count_) {
+      // FIXME: This is a little unsafe and could maybe take out the first few
+      // samples of the next file.
+      output_.clear();
+      break;
+    }
+    if (codec) {
+      auto decode_res = codec->DecodeTo(decode_buf.writeAcquire());
       if (decode_res.has_error()) {
         ESP_LOGE(kTag, "decoding error");
-        return;
+        break;
       }
+      decode_buf.writeCommit(decode_res->samples_written);
       if (decode_res->is_stream_finished) {
-        break;
+        codec.reset();
       }
+    }
+
+    if (!decode_buf.isEmpty()) {
+      auto resample_input = decode_buf.readAcquire();
+      auto resample_output = resample_buf.writeAcquire();
 
-      std::span<sample::Sample> decode_span{decode_buf,
-                                            decode_res->samples_written};
-      while (!decode_span.empty()) {
-        size_t sent = output_.send(decode_span);
-        decode_span = decode_span.subspan(sent);
+      size_t read, wrote;
+      if (resampler) {
+        std::tie(read, wrote) =
+            resampler->Process(resample_input, resample_output, false);
+      } else {
+        read = wrote = std::min(resample_input.size(), resample_output.size());
+        std::copy_n(resample_input.begin(), read, resample_output.begin());
       }
+
+      decode_buf.readCommit(read);
+      resample_buf.writeCommit(wrote);
     }
 
-    ESP_LOGI(kTag, "finished playing okay");
-  });
+    if (!resample_buf.isEmpty()) {
+      auto channels_input = resample_buf.readAcquire();
+      auto channels_output = stereo_buf.writeAcquire();
+      size_t read, wrote;
+      if (double_samples) {
+        wrote = channels_output.size();
+        read = wrote / 2;
+        if (read > channels_input.size()) {
+          read = channels_input.size();
+          wrote = read * 2;
+        }
+        for (size_t i = 0; i < read; i++) {
+          channels_output[i * 2] = channels_input[i];
+          channels_output[(i * 2) + 1] = channels_input[i];
+        }
+      } else {
+        read = wrote = std::min(channels_input.size(), channels_output.size());
+        std::copy_n(channels_input.begin(), read, channels_output.begin());
+      }
+      resample_buf.readCommit(read);
+      stereo_buf.writeCommit(wrote);
+    }
+
+    // The mixin PcmBuffer should almost always be draining, so we can force
+    // samples into it more aggressively than with the main music PcmBuffer.
+    while (!stereo_buf.isEmpty()) {
+      size_t sent = output_.send(stereo_buf.readAcquire());
+      stereo_buf.readCommit(sent);
+    }
+  }
 }
 
 }  // namespace tts
diff --git a/src/tangara/tts/player.hpp b/src/tangara/tts/player.hpp
index a132b9cd..0a3ba723 100644
--- a/src/tangara/tts/player.hpp
+++ b/src/tangara/tts/player.hpp
@@ -9,6 +9,7 @@
 #include <string>
 
 #include "audio/fatfs_stream_factory.hpp"
+#include "codec.hpp"
 #include "drivers/pcm_buffer.hpp"
 #include "tasks.hpp"
 
@@ -33,6 +34,12 @@ class Player {
   tasks::WorkerPool& bg_;
   audio::FatfsStreamFactory& stream_factory_;
   drivers::PcmBuffer& output_;
+
+  std::atomic<int> play_count_;
+
+  auto decodeToSink(const codecs::ICodec::OutputFormat&,
+                    std::unique_ptr<codecs::ICodec>,
+                    int play_count) -> void;
 };
 
 }  // namespace tts
diff --git a/src/tangara/tts/provider.cpp b/src/tangara/tts/provider.cpp
index b7c1e55d..2b1dd4e6 100644
--- a/src/tangara/tts/provider.cpp
+++ b/src/tangara/tts/provider.cpp
@@ -28,7 +28,7 @@ static const char* kTtsPath = "/.tangara-tts/";
 static auto textToFile(const std::string& text) -> std::optional<std::string> {
   uint64_t hash = komihash(text.data(), text.size(), 0);
   std::stringstream stream;
-  stream << kTtsPath << std::hex << hash << ".wav";
+  stream << kTtsPath << std::hex << hash;
   return stream.str();
 }
 

From c51709f99ff5456a5863ca39ff893f823a3642d4 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Thu, 12 Sep 2024 10:44:26 +1000
Subject: [PATCH 7/9] Pause and unpause the current audio output in response to
 TTS

---
 src/drivers/include/drivers/pcm_buffer.hpp |   3 +
 src/drivers/pcm_buffer.cpp                 |  14 ++-
 src/tangara/audio/audio_events.hpp         |   5 +-
 src/tangara/audio/audio_fsm.cpp            |  26 +++++-
 src/tangara/audio/audio_fsm.hpp            |   4 +
 src/tangara/tts/player.cpp                 | 102 ++++++++++++++-------
 src/tangara/tts/player.hpp                 |   8 +-
 7 files changed, 119 insertions(+), 43 deletions(-)

diff --git a/src/drivers/include/drivers/pcm_buffer.hpp b/src/drivers/include/drivers/pcm_buffer.hpp
index 4e5fa041..6b38be94 100644
--- a/src/drivers/include/drivers/pcm_buffer.hpp
+++ b/src/drivers/include/drivers/pcm_buffer.hpp
@@ -49,6 +49,7 @@ class PcmBuffer {
 
   auto clear() -> void;
   auto isEmpty() -> bool;
+  auto suspend(bool) -> void;
 
   /*
    * How many samples have been added to this buffer since it was created. This
@@ -75,6 +76,8 @@ class PcmBuffer {
 
   std::atomic<uint32_t> sent_;
   std::atomic<uint32_t> received_;
+  std::atomic<bool> suspended_;
+
   RingbufHandle_t ringbuf_;
 };
 
diff --git a/src/drivers/pcm_buffer.cpp b/src/drivers/pcm_buffer.cpp
index 1e416301..bc58d4b9 100644
--- a/src/drivers/pcm_buffer.cpp
+++ b/src/drivers/pcm_buffer.cpp
@@ -25,7 +25,8 @@ namespace drivers {
 
 [[maybe_unused]] static const char kTag[] = "pcmbuf";
 
-PcmBuffer::PcmBuffer(size_t size_in_samples) : sent_(0), received_(0) {
+PcmBuffer::PcmBuffer(size_t size_in_samples)
+    : sent_(0), received_(0), suspended_(false) {
   size_t size_in_bytes = size_in_samples * sizeof(int16_t);
   ESP_LOGI(kTag, "allocating pcm buffer of size %u (%uKiB)", size_in_samples,
            size_in_bytes / 1024);
@@ -51,6 +52,13 @@ auto PcmBuffer::send(std::span<const int16_t> data) -> size_t {
 
 IRAM_ATTR auto PcmBuffer::receive(std::span<int16_t> dest, bool mix, bool isr)
     -> BaseType_t {
+  if (suspended_) {
+    if (!mix) {
+      std::fill_n(dest.begin(), dest.size(), 0);
+    }
+    return false;
+  }
+
   size_t first_read = 0, second_read = 0;
   BaseType_t ret1 = false, ret2 = false;
   std::tie(first_read, ret1) = readSingle(dest, mix, isr);
@@ -86,6 +94,10 @@ auto PcmBuffer::isEmpty() -> bool {
          xRingbufferGetCurFreeSize(ringbuf_);
 }
 
+auto PcmBuffer::suspend(bool s) -> void {
+  suspended_ = s;
+}
+
 auto PcmBuffer::totalSent() -> uint32_t {
   return sent_;
 }
diff --git a/src/tangara/audio/audio_events.hpp b/src/tangara/audio/audio_events.hpp
index 91bcf48b..56d150b2 100644
--- a/src/tangara/audio/audio_events.hpp
+++ b/src/tangara/audio/audio_events.hpp
@@ -144,8 +144,11 @@ struct OutputModeChanged : tinyfsm::Event {
   std::optional<drivers::NvsStorage::Output> set_to;
 };
 
-namespace internal {
+struct TtsPlaybackChanged : tinyfsm::Event {
+  bool is_playing;
+};
 
+namespace internal {
 struct DecodingStarted : tinyfsm::Event {
   std::shared_ptr<TrackInfo> track;
 };
diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp
index dac04f75..1daf568e 100644
--- a/src/tangara/audio/audio_fsm.cpp
+++ b/src/tangara/audio/audio_fsm.cpp
@@ -76,6 +76,7 @@ std::optional<IAudioOutput::Format> AudioState::sDrainFormat;
 StreamCues AudioState::sStreamCues;
 
 bool AudioState::sIsPaused = true;
+bool AudioState::sIsTtsPlaying = false;
 
 auto AudioState::emitPlaybackUpdate(bool paused) -> void {
   std::optional<uint32_t> position;
@@ -191,6 +192,11 @@ void AudioState::react(const TogglePlayPause& ev) {
   }
 }
 
+void AudioState::react(const TtsPlaybackChanged& ev) {
+  sIsTtsPlaying = ev.is_playing;
+  updateOutputMode();
+}
+
 void AudioState::react(const internal::DecodingFinished& ev) {
   // If we just finished playing whatever's at the front of the queue, then we
   // need to advanve and start playing the next one ASAP in order to continue
@@ -369,8 +375,8 @@ void AudioState::react(const OutputModeChanged& ev) {
       sOutput = sI2SOutput;
       break;
   }
-  sOutput->mode(IAudioOutput::Modes::kOnPaused);
   sSampleProcessor->SetOutput(sOutput);
+  updateOutputMode();
 
   // Bluetooth volume isn't 'changed' until we've connected to a device.
   if (new_mode == drivers::NvsStorage::Output::kHeadphones) {
@@ -381,6 +387,14 @@ void AudioState::react(const OutputModeChanged& ev) {
   }
 }
 
+auto AudioState::updateOutputMode() -> void {
+  if (is_in_state<states::Playback>() || sIsTtsPlaying) {
+    sOutput->mode(IAudioOutput::Modes::kOnPlaying);
+  } else {
+    sOutput->mode(IAudioOutput::Modes::kOnPaused);
+  }
+}
+
 auto AudioState::commitVolume() -> void {
   auto mode = sServices->nvs().OutputMode();
   auto vol = sOutput->GetVolume();
@@ -402,6 +416,7 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) {
 
   sDrainBuffers = std::make_unique<drivers::OutputBuffers>(
       kTrackDrainLatencySamples, kSystemDrainLatencySamples);
+  sDrainBuffers->first.suspend(true);
 
   sStreamFactory.reset(
       new FatfsStreamFactory(sServices->database(), sServices->tag_parser()));
@@ -454,6 +469,10 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) {
 static const char kQueueKey[] = "audio:queue";
 static const char kCurrentFileKey[] = "audio:current";
 
+auto Standby::entry() -> void {
+  updateOutputMode();
+}
+
 void Standby::react(const system_fsm::KeyLockChanged& ev) {
   if (!ev.locking) {
     return;
@@ -539,7 +558,8 @@ static void heartbeat(TimerHandle_t) {
 
 void Playback::entry() {
   ESP_LOGI(kTag, "audio output resumed");
-  sOutput->mode(IAudioOutput::Modes::kOnPlaying);
+  sDrainBuffers->first.suspend(false);
+  updateOutputMode();
   emitPlaybackUpdate(false);
 
   if (!sHeartbeatTimer) {
@@ -552,7 +572,7 @@ void Playback::entry() {
 void Playback::exit() {
   ESP_LOGI(kTag, "audio output paused");
   xTimerStop(sHeartbeatTimer, portMAX_DELAY);
-  sOutput->mode(IAudioOutput::Modes::kOnPaused);
+  sDrainBuffers->first.suspend(true);
   emitPlaybackUpdate(true);
 }
 
diff --git a/src/tangara/audio/audio_fsm.hpp b/src/tangara/audio/audio_fsm.hpp
index 134d9ffd..bc3feb55 100644
--- a/src/tangara/audio/audio_fsm.hpp
+++ b/src/tangara/audio/audio_fsm.hpp
@@ -48,6 +48,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
   void react(const PlaySineWave&);
   void react(const SetTrack&);
   void react(const TogglePlayPause&);
+  void react(const TtsPlaybackChanged&);
 
   void react(const internal::DecodingFinished&);
   void react(const internal::StreamStarted&);
@@ -70,6 +71,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
   virtual void react(const system_fsm::HasPhonesChanged&);
 
  protected:
+  auto updateOutputMode() -> void;
   auto emitPlaybackUpdate(bool paused) -> void;
   auto commitVolume() -> void;
 
@@ -88,6 +90,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
   static std::optional<IAudioOutput::Format> sDrainFormat;
 
   static bool sIsPaused;
+  static bool sIsTtsPlaying;
 };
 
 namespace states {
@@ -102,6 +105,7 @@ class Uninitialised : public AudioState {
 
 class Standby : public AudioState {
  public:
+  void entry() override;
   void react(const system_fsm::KeyLockChanged&) override;
   void react(const system_fsm::SdStateChanged&) override;
 
diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp
index b5b99b5d..a803ce57 100644
--- a/src/tangara/tts/player.cpp
+++ b/src/tangara/tts/player.cpp
@@ -5,11 +5,14 @@
  */
 
 #include "tts/player.hpp"
+#include <mutex>
 
+#include "audio/audio_events.hpp"
 #include "audio/processor.hpp"
 #include "audio/resample.hpp"
 #include "codec.hpp"
 #include "esp_log.h"
+#include "events/event_queue.hpp"
 #include "freertos/projdefs.h"
 #include "portmacro.h"
 #include "sample.hpp"
@@ -22,47 +25,70 @@ namespace tts {
 Player::Player(tasks::WorkerPool& worker,
                drivers::PcmBuffer& output,
                audio::FatfsStreamFactory& factory)
-    : bg_(worker), stream_factory_(factory), output_(output), play_count_(0) {}
+    : bg_(worker),
+      stream_factory_(factory),
+      output_(output),
+      stream_playing_(false),
+      stream_cancelled_(false) {}
 
 auto Player::playFile(const std::string& path) -> void {
   ESP_LOGI(kTag, "playing '%s'", path.c_str());
-  int this_play = ++play_count_;
 
   bg_.Dispatch<void>([=, this]() {
-    auto stream = stream_factory_.create(path);
-    if (!stream) {
-      ESP_LOGE(kTag, "creating stream failed");
-      return;
+    // Interrupt current playback
+    {
+      std::scoped_lock<std::mutex> lock{new_stream_mutex_};
+      if (stream_playing_) {
+        stream_cancelled_ = true;
+        stream_playing_.wait(true);
+      }
+      stream_cancelled_ = false;
+      stream_playing_ = true;
     }
 
-    // FIXME: Rather than hardcoding WAV support only, we should work out a
-    // proper subset of 'low memory' decoders that can all be used for TTS
-    // playback.
-    if (stream->type() != codecs::StreamType::kWav) {
-      ESP_LOGE(kTag, "stream was unsupported type");
-      return;
-    }
+    openAndDecode(path);
 
-    auto decoder = codecs::CreateCodecForType(stream->type());
-    if (!decoder) {
-      ESP_LOGE(kTag, "creating decoder failed");
-      return;
+    if (!stream_cancelled_) {
+      events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = false});
     }
+    stream_playing_ = false;
+    stream_playing_.notify_all();
+  });
+}
 
-    std::unique_ptr<codecs::ICodec> codec{*decoder};
-    auto open_res = codec->OpenStream(stream, 0);
-    if (open_res.has_error()) {
-      ESP_LOGE(kTag, "opening stream failed");
-      return;
-    }
+auto Player::openAndDecode(const std::string& path) -> void {
+  auto stream = stream_factory_.create(path);
+  if (!stream) {
+    ESP_LOGE(kTag, "creating stream failed");
+    return;
+  }
 
-    decodeToSink(*open_res, std::move(codec), this_play);
-  });
+  // FIXME: Rather than hardcoding WAV support only, we should work out a
+  // proper subset of 'low memory' decoders that can all be used for TTS
+  // playback.
+  if (stream->type() != codecs::StreamType::kWav) {
+    ESP_LOGE(kTag, "stream was unsupported type");
+    return;
+  }
+
+  auto decoder = codecs::CreateCodecForType(stream->type());
+  if (!decoder) {
+    ESP_LOGE(kTag, "creating decoder failed");
+    return;
+  }
+
+  std::unique_ptr<codecs::ICodec> codec{*decoder};
+  auto open_res = codec->OpenStream(stream, 0);
+  if (open_res.has_error()) {
+    ESP_LOGE(kTag, "opening stream failed");
+    return;
+  }
+
+  decodeToSink(*open_res, std::move(codec));
 }
 
 auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
-                          std::unique_ptr<codecs::ICodec> codec,
-                          int play_count) -> void {
+                          std::unique_ptr<codecs::ICodec> codec) -> void {
   // Set up buffers to hold samples between the intermediary parts of
   // processing. We can just use the stack for these, since this method is
   // called only from background workers, which have enormous stacks.
@@ -83,20 +109,18 @@ auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
   }
   bool double_samples = format.num_channels == 1;
 
+  // Start our playback (wait for previous to end?)
+  events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = true});
+
   // FIXME: This decode-and-process loop is substantially the same as the audio
   // processor's filter loop. Ideally we should refactor both of these loops to
   // reuse code, however I'm holding off on doing this until we've implemented
   // more advanced audio processing features in the audio processor (EQ, tempo
   // shifting, etc.) as it's not clear to me yet how much the two codepaths will
   // be diverging later anyway.
-  while (codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() ||
-         !stereo_buf.isEmpty()) {
-    if (play_count != play_count_) {
-      // FIXME: This is a little unsafe and could maybe take out the first few
-      // samples of the next file.
-      output_.clear();
-      break;
-    }
+  while ((codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() ||
+          !stereo_buf.isEmpty()) &&
+         !stream_cancelled_) {
     if (codec) {
       auto decode_res = codec->DecodeTo(decode_buf.writeAcquire());
       if (decode_res.has_error()) {
@@ -156,6 +180,14 @@ auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
       stereo_buf.readCommit(sent);
     }
   }
+
+  while (!output_.isEmpty()) {
+    if (stream_cancelled_) {
+      output_.clear();
+    } else {
+      vTaskDelay(pdMS_TO_TICKS(100));
+    }
+  }
 }
 
 }  // namespace tts
diff --git a/src/tangara/tts/player.hpp b/src/tangara/tts/player.hpp
index 0a3ba723..47479007 100644
--- a/src/tangara/tts/player.hpp
+++ b/src/tangara/tts/player.hpp
@@ -35,11 +35,13 @@ class Player {
   audio::FatfsStreamFactory& stream_factory_;
   drivers::PcmBuffer& output_;
 
-  std::atomic<int> play_count_;
+  std::mutex new_stream_mutex_;
+  std::atomic<bool> stream_playing_;
+  std::atomic<bool> stream_cancelled_;
 
+  auto openAndDecode(const std::string& path) -> void;
   auto decodeToSink(const codecs::ICodec::OutputFormat&,
-                    std::unique_ptr<codecs::ICodec>,
-                    int play_count) -> void;
+                    std::unique_ptr<codecs::ICodec>) -> void;
 };
 
 }  // namespace tts

From d8b9e65e68214b4aa2bb24ddae3602d5788bc469 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Thu, 12 Sep 2024 11:44:20 +1000
Subject: [PATCH 8/9] Clean up some tts logging and descriptions

---
 lua/main_menu.lua                          |  2 ++
 src/codecs/wav.cpp                         |  2 --
 src/tangara/audio/fatfs_stream_factory.cpp |  1 -
 src/tangara/tts/player.cpp                 | 15 +++++++--------
 src/tangara/tts/player.hpp                 |  4 ++--
 src/tangara/tts/provider.cpp               | 13 +++++++++++--
 6 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/lua/main_menu.lua b/lua/main_menu.lua
index a6b46a8a..f95b0360 100644
--- a/lua/main_menu.lua
+++ b/lua/main_menu.lua
@@ -155,6 +155,7 @@ return widgets.MenuScreen:new {
       })
     end)
     files_btn:Image { src = img.files }
+    widgets.Description(files_btn, "File browser")
     theme.set_style(files_btn, "menu_icon")
 
     local settings_btn = bottom_bar:Button {}
@@ -162,6 +163,7 @@ return widgets.MenuScreen:new {
       backstack.push(require("settings"):new())
     end)
     settings_btn:Image { src = img.settings }
+    widgets.Description(settings_btn, "Settings")
     theme.set_style(settings_btn, "menu_icon")
   end,
 }
diff --git a/src/codecs/wav.cpp b/src/codecs/wav.cpp
index f5b9d789..746f44ca 100644
--- a/src/codecs/wav.cpp
+++ b/src/codecs/wav.cpp
@@ -137,8 +137,6 @@ auto WavDecoder::OpenStream(std::shared_ptr<IStream> input, uint32_t offset)
   // uint32_t file_size = bytes_to_u32(buffer_span.subspan(4, 4)) + 8;
 
   std::string fmt_header = bytes_to_str(buffer_span.subspan(12, 4));
-  ESP_LOGI(kTag, "fmt header found? %s",
-           (fmt_header.starts_with("fmt")) ? "yes" : "no");
   if (!fmt_header.starts_with("fmt")) {
     ESP_LOGW(kTag, "Could not find format chunk");
     return cpp::fail(Error::kMalformedData);
diff --git a/src/tangara/audio/fatfs_stream_factory.cpp b/src/tangara/audio/fatfs_stream_factory.cpp
index 735ec134..94f22ae9 100644
--- a/src/tangara/audio/fatfs_stream_factory.cpp
+++ b/src/tangara/audio/fatfs_stream_factory.cpp
@@ -50,7 +50,6 @@ auto FatfsStreamFactory::create(std::string path, uint32_t offset)
     -> std::shared_ptr<TaggedStream> {
   auto tags = tag_parser_.ReadAndParseTags(path);
   if (!tags) {
-    ESP_LOGE(kTag, "failed to read tags");
     return {};
   }
 
diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp
index a803ce57..46e8c48a 100644
--- a/src/tangara/tts/player.cpp
+++ b/src/tangara/tts/player.cpp
@@ -31,11 +31,9 @@ Player::Player(tasks::WorkerPool& worker,
       stream_playing_(false),
       stream_cancelled_(false) {}
 
-auto Player::playFile(const std::string& path) -> void {
-  ESP_LOGI(kTag, "playing '%s'", path.c_str());
-
+auto Player::playFile(const std::string& text, const std::string& file)
+    -> void {
   bg_.Dispatch<void>([=, this]() {
-    // Interrupt current playback
     {
       std::scoped_lock<std::mutex> lock{new_stream_mutex_};
       if (stream_playing_) {
@@ -46,7 +44,7 @@ auto Player::playFile(const std::string& path) -> void {
       stream_playing_ = true;
     }
 
-    openAndDecode(path);
+    openAndDecode(text, file);
 
     if (!stream_cancelled_) {
       events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = false});
@@ -56,10 +54,11 @@ auto Player::playFile(const std::string& path) -> void {
   });
 }
 
-auto Player::openAndDecode(const std::string& path) -> void {
+auto Player::openAndDecode(const std::string& text, const std::string& path)
+    -> void {
   auto stream = stream_factory_.create(path);
   if (!stream) {
-    ESP_LOGE(kTag, "creating stream failed");
+    ESP_LOGW(kTag, "missing '%s' for '%s'", path.c_str(), text.c_str());
     return;
   }
 
@@ -67,7 +66,7 @@ auto Player::openAndDecode(const std::string& path) -> void {
   // proper subset of 'low memory' decoders that can all be used for TTS
   // playback.
   if (stream->type() != codecs::StreamType::kWav) {
-    ESP_LOGE(kTag, "stream was unsupported type");
+    ESP_LOGE(kTag, "'%s' has unsupported encoding", path.c_str());
     return;
   }
 
diff --git a/src/tangara/tts/player.hpp b/src/tangara/tts/player.hpp
index 47479007..d28da474 100644
--- a/src/tangara/tts/player.hpp
+++ b/src/tangara/tts/player.hpp
@@ -24,7 +24,7 @@ class Player {
  public:
   Player(tasks::WorkerPool&, drivers::PcmBuffer&, audio::FatfsStreamFactory&);
 
-  auto playFile(const std::string& path) -> void;
+  auto playFile(const std::string& text, const std::string& path) -> void;
 
   // Not copyable or movable.
   Player(const Player&) = delete;
@@ -39,7 +39,7 @@ class Player {
   std::atomic<bool> stream_playing_;
   std::atomic<bool> stream_cancelled_;
 
-  auto openAndDecode(const std::string& path) -> void;
+  auto openAndDecode(const std::string& text, const std::string& path) -> void;
   auto decodeToSink(const codecs::ICodec::OutputFormat&,
                     std::unique_ptr<codecs::ICodec>) -> void;
 };
diff --git a/src/tangara/tts/provider.cpp b/src/tangara/tts/provider.cpp
index 2b1dd4e6..d19500e0 100644
--- a/src/tangara/tts/provider.cpp
+++ b/src/tangara/tts/provider.cpp
@@ -49,9 +49,18 @@ auto Provider::feed(const Event& e) -> void {
       // ESP_LOGI(kTag, "new selection: '%s', interactive? %i",
       // ev.new_selection->description.value_or("").c_str(),
       // ev.new_selection->is_interactive);
-      std::string new_desc = ev.new_selection->description.value_or("");
+      auto text = ev.new_selection->description;
+      if (!text) {
+        ESP_LOGW(kTag, "missing description for element");
+        return;
+      }
+      auto file = textToFile(*text);
+      if (!file) {
+        return;
+      }
+
       if (player_) {
-        player_->playFile(textToFile(new_desc).value_or(""));
+        player_->playFile(*text, *file);
       }
     }
   }

From 7d1266404d8e876ba909d101eb316868036b423c Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Tue, 17 Sep 2024 11:57:32 +1000
Subject: [PATCH 9/9] Move SPI ISR back to CPU0

See if this does anything for the itermittent spi acquire hangs
---
 src/drivers/spi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/drivers/spi.cpp b/src/drivers/spi.cpp
index 40487197..632fe89f 100644
--- a/src/drivers/spi.cpp
+++ b/src/drivers/spi.cpp
@@ -41,7 +41,7 @@ esp_err_t init_spi(void) {
       // manages its own use of DMA-capable memory.
       .max_transfer_sz = 4096,
       .flags = SPICOMMON_BUSFLAG_MASTER | SPICOMMON_BUSFLAG_IOMUX_PINS,
-      .isr_cpu_id = ESP_INTR_CPU_AFFINITY_1,
+      .isr_cpu_id = ESP_INTR_CPU_AFFINITY_0,
       .intr_flags = ESP_INTR_FLAG_LOWMED | ESP_INTR_FLAG_IRAM,
   };