From f42448d50123e376205df17bc295917e89d943f5 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Fri, 5 Jul 2024 10:12:55 +1000 Subject: [PATCH 1/9] WIP start on accepting two streams in out audio output --- src/drivers/bluetooth.cpp | 87 +++++++++++++--------- src/drivers/include/drivers/bluetooth.hpp | 13 ++-- src/drivers/include/drivers/i2s_dac.hpp | 5 +- src/drivers/include/drivers/pcm_buffer.hpp | 9 ++- src/drivers/pcm_buffer.cpp | 24 ++++-- 5 files changed, 88 insertions(+), 50 deletions(-) diff --git a/src/drivers/bluetooth.cpp b/src/drivers/bluetooth.cpp index 412cba1f..23c4f8d8 100644 --- a/src/drivers/bluetooth.cpp +++ b/src/drivers/bluetooth.cpp @@ -37,7 +37,8 @@ namespace drivers { [[maybe_unused]] static constexpr char kTag[] = "bluetooth"; -DRAM_ATTR static PcmBuffer* sStream = nullptr; +DRAM_ATTR static PcmBuffer* sStream1 = nullptr; +DRAM_ATTR static PcmBuffer* sStream2 = nullptr; DRAM_ATTR static std::atomic sVolumeFactor = 1.f; static tasks::WorkerPool* sBgWorker; @@ -96,13 +97,15 @@ IRAM_ATTR auto a2dp_data_cb(uint8_t* buf, int32_t buf_size) -> int32_t { if (buf == nullptr || buf_size <= 0) { return 0; } - PcmBuffer* stream = sStream; - if (stream == nullptr) { + PcmBuffer* stream1 = sStream1; + PcmBuffer* stream2 = sStream2; + if (stream1 == nullptr || stream2 == nullptr) { return 0; } int16_t* samples = reinterpret_cast(buf); - stream->receive({samples, static_cast(buf_size / 2)}, false); + stream1->receive({samples, static_cast(buf_size / 2)}, false, false); + stream2->receive({samples, static_cast(buf_size / 2)}, true, false); // Apply software volume scaling. float factor = sVolumeFactor.load(); @@ -181,14 +184,16 @@ auto Bluetooth::PreferredDevice() -> std::optional { return bluetooth::BluetoothState::preferred_device(); } -auto Bluetooth::SetSource(PcmBuffer* src) -> void { +auto Bluetooth::SetSources(PcmBuffer* src1, PcmBuffer* src2) -> void { auto lock = bluetooth::BluetoothState::lock(); - if (src == bluetooth::BluetoothState::source()) { + PcmBuffer *cur1, *cur2; + std::tie(cur1, cur2) = bluetooth::BluetoothState::sources(); + if (src1 == cur1 && src2 == cur2) { return; } - bluetooth::BluetoothState::source(src); + bluetooth::BluetoothState::sources(src1, src2); tinyfsm::FsmList::dispatch( - bluetooth::events::SourceChanged{}); + bluetooth::events::SourcesChanged{}); } auto Bluetooth::SetVolumeFactor(float f) -> void { @@ -348,7 +353,6 @@ std::optional BluetoothState::sPreferredDevice_{}; std::optional BluetoothState::sConnectingDevice_{}; int BluetoothState::sConnectAttemptsRemaining_{0}; -std::atomic BluetoothState::sSource_; std::function BluetoothState::sEventHandler_; auto BluetoothState::Init(NvsStorage& storage) -> void { @@ -377,12 +381,13 @@ auto BluetoothState::preferred_device(std::optional addr) -> void { sPreferredDevice_ = addr; } -auto BluetoothState::source() -> PcmBuffer* { - return sSource_.load(); +auto BluetoothState::sources() -> std::pair { + return {sStream1, sStream2}; } -auto BluetoothState::source(PcmBuffer* src) -> void { - sSource_.store(src); +auto BluetoothState::sources(PcmBuffer* src1, PcmBuffer* src2) -> void { + sStream1 = src1; + sStream2 = src2; } auto BluetoothState::event_handler(std::function cb) -> void { @@ -508,11 +513,13 @@ void Disabled::react(const events::Enable&) { // AVRCP Target err = esp_avrc_tg_init(); if (err != ESP_OK) { - ESP_LOGE(kTag, "Error during target init: %s %d", esp_err_to_name(err), err); + ESP_LOGE(kTag, "Error during target init: %s %d", esp_err_to_name(err), + err); } err = esp_avrc_tg_register_callback(avrcp_tg_cb); if (err != ESP_OK) { - ESP_LOGE(kTag, "Error registering AVRC tg callback: %s %d", esp_err_to_name(err), err); + ESP_LOGE(kTag, "Error registering AVRC tg callback: %s %d", + esp_err_to_name(err), err); } // Set the supported passthrough commands on the tg @@ -522,19 +529,20 @@ void Disabled::react(const events::Enable&) { do { // Sleep for a bit vTaskDelay(pdMS_TO_TICKS(10)); - err = esp_avrc_tg_get_psth_cmd_filter(ESP_AVRC_PSTH_FILTER_ALLOWED_CMD, &psth); + err = esp_avrc_tg_get_psth_cmd_filter(ESP_AVRC_PSTH_FILTER_ALLOWED_CMD, + &psth); } while (err != ESP_OK); - err = esp_avrc_tg_set_psth_cmd_filter(ESP_AVRC_PSTH_FILTER_SUPPORTED_CMD, &psth); + err = esp_avrc_tg_set_psth_cmd_filter(ESP_AVRC_PSTH_FILTER_SUPPORTED_CMD, + &psth); if (err != ESP_OK) { ESP_LOGE(kTag, "Error: %s %d", esp_err_to_name(err), err); } esp_avrc_rn_evt_cap_mask_t evt_set = {0}; esp_avrc_rn_evt_bit_mask_operation(ESP_AVRC_BIT_MASK_OP_SET, &evt_set, - ESP_AVRC_RN_VOLUME_CHANGE); + ESP_AVRC_RN_VOLUME_CHANGE); assert(esp_avrc_tg_set_rn_evt_cap(&evt_set) == ESP_OK); - // Initialise A2DP. This handles streaming audio. Currently ESP-IDF's SBC // encoder only supports 2 channels of interleaved 16 bit samples, at // 44.1kHz, so there is no additional configuration to be done for the @@ -724,9 +732,8 @@ void Connected::react(const events::PreferredDeviceChanged& ev) { transit(); } -void Connected::react(const events::SourceChanged& ev) { - sStream = sSource_; - if (sStream != nullptr) { +void Connected::react(const events::SourcesChanged& ev) { + if (sStream1 != nullptr && sStream2 != nullptr) { ESP_LOGI(kTag, "checking source is ready"); esp_a2d_media_ctrl(ESP_A2D_MEDIA_CTRL_CHECK_SRC_RDY); } else { @@ -775,7 +782,8 @@ void Connected::react(events::internal::Avrc ev) { switch (ev.type) { case ESP_AVRC_CT_CONNECTION_STATE_EVT: if (ev.param.conn_stat.connected) { - auto err = esp_avrc_ct_send_register_notification_cmd(4, ESP_AVRC_RN_VOLUME_CHANGE, 0); + auto err = esp_avrc_ct_send_register_notification_cmd( + 4, ESP_AVRC_RN_VOLUME_CHANGE, 0); if (err != ESP_OK) { ESP_LOGE(kTag, "Error: %s %d", esp_err_to_name(err), err); } @@ -787,15 +795,20 @@ void Connected::react(events::internal::Avrc ev) { case ESP_AVRC_CT_REMOTE_FEATURES_EVT: // The remote device is telling us about its capabilities! We don't // currently care about any of them. - ESP_LOGI(kTag, "Recieved capabilitites: %lu", ev.param.rmt_feats.feat_mask); + ESP_LOGI(kTag, "Recieved capabilitites: %lu", + ev.param.rmt_feats.feat_mask); break; case ESP_AVRC_CT_CHANGE_NOTIFY_EVT: if (ev.param.change_ntf.event_id == ESP_AVRC_RN_VOLUME_CHANGE) { if (sEventHandler_) { - std::invoke(sEventHandler_, bluetooth::RemoteVolumeChanged{.new_vol = ev.param.change_ntf.event_parameter.volume}); + std::invoke( + sEventHandler_, + bluetooth::RemoteVolumeChanged{ + .new_vol = ev.param.change_ntf.event_parameter.volume}); } // Resubscribe to volume facts - auto err = esp_avrc_ct_send_register_notification_cmd(4, ESP_AVRC_RN_VOLUME_CHANGE, 0); + auto err = esp_avrc_ct_send_register_notification_cmd( + 4, ESP_AVRC_RN_VOLUME_CHANGE, 0); if (err != ESP_OK) { ESP_LOGE(kTag, "Error: %s %d", esp_err_to_name(err), err); } @@ -809,16 +822,20 @@ void Connected::react(events::internal::Avrc ev) { void Connected::react(const events::internal::Avrctg ev) { switch (ev.type) { case ESP_AVRC_TG_CONNECTION_STATE_EVT: - ESP_LOGI(kTag, "Got connection event. Connected: %s", ev.param.conn_stat.connected ? "true" : "false"); + ESP_LOGI(kTag, "Got connection event. Connected: %s", + ev.param.conn_stat.connected ? "true" : "false"); if (ev.param.conn_stat.connected) { } break; case ESP_AVRC_TG_REMOTE_FEATURES_EVT: - ESP_LOGI(kTag, "Got remote features feat flag %d", ev.param.rmt_feats.ct_feat_flag); - ESP_LOGI(kTag, "Got remote features feat mask %lu", ev.param.rmt_feats.feat_mask); + ESP_LOGI(kTag, "Got remote features feat flag %d", + ev.param.rmt_feats.ct_feat_flag); + ESP_LOGI(kTag, "Got remote features feat mask %lu", + ev.param.rmt_feats.feat_mask); break; case ESP_AVRC_TG_PASSTHROUGH_CMD_EVT: - ESP_LOGI(kTag, "Got passthrough event keycode: %x, %d", ev.param.psth_cmd.key_code, ev.param.psth_cmd.key_state); + ESP_LOGI(kTag, "Got passthrough event keycode: %x, %d", + ev.param.psth_cmd.key_code, ev.param.psth_cmd.key_state); if (ev.param.psth_cmd.key_state == 1 && sEventHandler_) { switch (ev.param.psth_cmd.key_code) { case ESP_AVRC_PT_CMD_PLAY: @@ -840,7 +857,8 @@ void Connected::react(const events::internal::Avrctg ev) { std::invoke(sEventHandler_, bluetooth::SimpleEvent::kBackward); break; default: - ESP_LOGI(kTag, "Unhandled passthrough cmd. Key code: %d", ev.param.psth_cmd.key_code); + ESP_LOGI(kTag, "Unhandled passthrough cmd. Key code: %d", + ev.param.psth_cmd.key_code); } } break; @@ -848,14 +866,15 @@ void Connected::react(const events::internal::Avrctg ev) { if (ev.param.reg_ntf.event_id == ESP_AVRC_RN_VOLUME_CHANGE) { // TODO: actually do this lol esp_avrc_rn_param_t rn_param; - rn_param.volume = 64; + rn_param.volume = 64; auto err = esp_avrc_tg_send_rn_rsp(ESP_AVRC_RN_VOLUME_CHANGE, - ESP_AVRC_RN_RSP_INTERIM, &rn_param); + ESP_AVRC_RN_RSP_INTERIM, &rn_param); if (err != ESP_OK) { ESP_LOGE(kTag, "Error: %s %d", esp_err_to_name(err), err); } } else { - ESP_LOGW(kTag, "unhandled AVRC TG Register Notification event: %u", ev.param.reg_ntf.event_id); + ESP_LOGW(kTag, "unhandled AVRC TG Register Notification event: %u", + ev.param.reg_ntf.event_id); } break; } diff --git a/src/drivers/include/drivers/bluetooth.hpp b/src/drivers/include/drivers/bluetooth.hpp index 94a85263..b3b12ffc 100644 --- a/src/drivers/include/drivers/bluetooth.hpp +++ b/src/drivers/include/drivers/bluetooth.hpp @@ -43,7 +43,7 @@ class Bluetooth { auto SetPreferredDevice(std::optional dev) -> void; auto PreferredDevice() -> std::optional; - auto SetSource(PcmBuffer*) -> void; + auto SetSources(PcmBuffer*, PcmBuffer*) -> void; auto SetVolumeFactor(float) -> void; auto SetEventHandler(std::function cb) -> void; @@ -57,7 +57,7 @@ struct Disable : public tinyfsm::Event {}; struct ConnectTimedOut : public tinyfsm::Event {}; struct PreferredDeviceChanged : public tinyfsm::Event {}; -struct SourceChanged : public tinyfsm::Event {}; +struct SourcesChanged : public tinyfsm::Event {}; struct DeviceDiscovered : public tinyfsm::Event { const Device& device; }; @@ -118,8 +118,8 @@ class BluetoothState : public tinyfsm::Fsm { static auto discovery() -> bool; static auto discovery(bool) -> void; - static auto source() -> PcmBuffer*; - static auto source(PcmBuffer*) -> void; + static auto sources() -> std::pair; + static auto sources(PcmBuffer*, PcmBuffer*) -> void; static auto event_handler(std::function) -> void; @@ -132,7 +132,7 @@ class BluetoothState : public tinyfsm::Fsm { virtual void react(const events::Disable& ev) = 0; virtual void react(const events::ConnectTimedOut& ev){}; virtual void react(const events::PreferredDeviceChanged& ev){}; - virtual void react(const events::SourceChanged& ev){}; + virtual void react(const events::SourcesChanged& ev){}; virtual void react(const events::DeviceDiscovered&); @@ -152,7 +152,6 @@ class BluetoothState : public tinyfsm::Fsm { static std::optional sConnectingDevice_; static int sConnectAttemptsRemaining_; - static std::atomic sSource_; static std::function sEventHandler_; auto connect(const bluetooth::MacAndName&) -> bool; @@ -205,7 +204,7 @@ class Connected : public BluetoothState { void exit() override; void react(const events::PreferredDeviceChanged& ev) override; - void react(const events::SourceChanged& ev) override; + void react(const events::SourcesChanged& ev) override; void react(const events::Disable& ev) override; void react(events::internal::Gap ev) override; diff --git a/src/drivers/include/drivers/i2s_dac.hpp b/src/drivers/include/drivers/i2s_dac.hpp index cf9258c0..0fe462b4 100644 --- a/src/drivers/include/drivers/i2s_dac.hpp +++ b/src/drivers/include/drivers/i2s_dac.hpp @@ -40,7 +40,7 @@ constexpr size_t kI2SBufferLengthFrames = 1024; */ class I2SDac { public: - static auto create(IGpios& expander, PcmBuffer&) -> std::optional; + static auto create(IGpios& expander, PcmBuffer&, PcmBuffer&) -> std::optional; I2SDac(IGpios& gpio, PcmBuffer&, i2s_chan_handle_t i2s_handle); ~I2SDac(); @@ -77,7 +77,8 @@ class I2SDac { auto set_channel(bool) -> void; IGpios& gpio_; - PcmBuffer& buffer_; + PcmBuffer& buffer1_; + PcmBuffer& buffer2_; i2s_chan_handle_t i2s_handle_; bool i2s_active_; diff --git a/src/drivers/include/drivers/pcm_buffer.hpp b/src/drivers/include/drivers/pcm_buffer.hpp index 6630f720..27e9eec6 100644 --- a/src/drivers/include/drivers/pcm_buffer.hpp +++ b/src/drivers/include/drivers/pcm_buffer.hpp @@ -35,8 +35,13 @@ class PcmBuffer { * Fills the given span with samples. If enough samples are available in * the buffer, then the span will be filled with samples from the buffer. Any * shortfall is made up by padding the given span with zeroes. + * + * If `mix` is set to true then, instead of overwriting the destination span, + * the retrieved samples will be mixed into any existing samples contained + * within the destination. This mixing uses a naive sum approach, and so may + * introduce clipping. */ - auto receive(std::span, bool isr) -> BaseType_t; + auto receive(std::span, bool mix, bool isr) -> BaseType_t; auto clear() -> void; auto isEmpty() -> bool; @@ -58,7 +63,7 @@ class PcmBuffer { PcmBuffer& operator=(const PcmBuffer&) = delete; private: - auto readSingle(std::span, bool isr) + auto readSingle(std::span, bool mix, bool isr) -> std::pair; StaticRingbuffer_t meta_; diff --git a/src/drivers/pcm_buffer.cpp b/src/drivers/pcm_buffer.cpp index 3f4a0443..b619cefb 100644 --- a/src/drivers/pcm_buffer.cpp +++ b/src/drivers/pcm_buffer.cpp @@ -44,14 +44,15 @@ auto PcmBuffer::send(std::span data) -> void { sent_ += data.size(); } -IRAM_ATTR auto PcmBuffer::receive(std::span dest, bool isr) +IRAM_ATTR auto PcmBuffer::receive(std::span dest, bool mix, bool isr) -> BaseType_t { size_t first_read = 0, second_read = 0; BaseType_t ret1 = false, ret2 = false; - std::tie(first_read, ret1) = readSingle(dest, isr); + std::tie(first_read, ret1) = readSingle(dest, mix, isr); if (first_read < dest.size()) { - std::tie(second_read, ret2) = readSingle(dest.subspan(first_read), isr); + std::tie(second_read, ret2) = + readSingle(dest.subspan(first_read), mix, isr); } size_t total_read = first_read + second_read; @@ -86,7 +87,9 @@ auto PcmBuffer::totalReceived() -> uint32_t { return received_; } -IRAM_ATTR auto PcmBuffer::readSingle(std::span dest, bool isr) +IRAM_ATTR auto PcmBuffer::readSingle(std::span dest, + bool mix, + bool isr) -> std::pair { BaseType_t ret; size_t read_bytes = 0; @@ -104,7 +107,18 @@ IRAM_ATTR auto PcmBuffer::readSingle(std::span dest, bool isr) return {read_samples, ret}; } - std::memcpy(dest.data(), data, read_bytes); + if (mix) { + for (size_t i = 0; i < read_samples; i++) { + // Sum the two samples in a 32 bit field so that the addition is always + // safe. + int32_t sum = static_cast(dest[i]) + + static_cast(reinterpret_cast(data)[i]); + // Clip back into the range of a single sample. + dest[i] = std::clamp(sum, INT16_MIN, INT16_MAX); + } + } else { + std::memcpy(dest.data(), data, read_bytes); + } if (isr) { vRingbufferReturnItem(ringbuf_, data); From 41e0605f17a784e8f125b3ad10ddfe5ef63337d9 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Mon, 8 Jul 2024 15:06:43 +1000 Subject: [PATCH 2/9] Give PcmBuffer pairs a name, and wire them up in the audio stack --- src/drivers/bluetooth.cpp | 35 ++++++++++------------ src/drivers/i2s_dac.cpp | 20 +++++++------ src/drivers/include/drivers/bluetooth.hpp | 6 ++-- src/drivers/include/drivers/i2s_dac.hpp | 8 ++--- src/drivers/include/drivers/pcm_buffer.hpp | 12 ++++++++ src/drivers/pcm_buffer.cpp | 2 +- src/tangara/audio/audio_fsm.cpp | 33 ++++++++++++-------- src/tangara/audio/audio_fsm.hpp | 2 +- src/tangara/audio/bt_audio_output.cpp | 8 ++--- src/tangara/audio/bt_audio_output.hpp | 4 +-- src/tangara/audio/i2s_audio_output.cpp | 6 ++-- src/tangara/audio/i2s_audio_output.hpp | 4 +-- 12 files changed, 79 insertions(+), 61 deletions(-) diff --git a/src/drivers/bluetooth.cpp b/src/drivers/bluetooth.cpp index 23c4f8d8..acb38ce4 100644 --- a/src/drivers/bluetooth.cpp +++ b/src/drivers/bluetooth.cpp @@ -37,8 +37,7 @@ namespace drivers { [[maybe_unused]] static constexpr char kTag[] = "bluetooth"; -DRAM_ATTR static PcmBuffer* sStream1 = nullptr; -DRAM_ATTR static PcmBuffer* sStream2 = nullptr; +DRAM_ATTR static OutputBuffers* sStreams = nullptr; DRAM_ATTR static std::atomic sVolumeFactor = 1.f; static tasks::WorkerPool* sBgWorker; @@ -97,15 +96,16 @@ IRAM_ATTR auto a2dp_data_cb(uint8_t* buf, int32_t buf_size) -> int32_t { if (buf == nullptr || buf_size <= 0) { return 0; } - PcmBuffer* stream1 = sStream1; - PcmBuffer* stream2 = sStream2; - if (stream1 == nullptr || stream2 == nullptr) { + OutputBuffers* streams = sStreams; + if (streams == nullptr) { return 0; } int16_t* samples = reinterpret_cast(buf); - stream1->receive({samples, static_cast(buf_size / 2)}, false, false); - stream2->receive({samples, static_cast(buf_size / 2)}, true, false); + streams->first.receive({samples, static_cast(buf_size / 2)}, false, + false); + streams->second.receive({samples, static_cast(buf_size / 2)}, true, + false); // Apply software volume scaling. float factor = sVolumeFactor.load(); @@ -184,14 +184,13 @@ auto Bluetooth::PreferredDevice() -> std::optional { return bluetooth::BluetoothState::preferred_device(); } -auto Bluetooth::SetSources(PcmBuffer* src1, PcmBuffer* src2) -> void { +auto Bluetooth::SetSources(OutputBuffers* src) -> void { auto lock = bluetooth::BluetoothState::lock(); - PcmBuffer *cur1, *cur2; - std::tie(cur1, cur2) = bluetooth::BluetoothState::sources(); - if (src1 == cur1 && src2 == cur2) { + OutputBuffers* cur = bluetooth::BluetoothState::sources(); + if (src == cur) { return; } - bluetooth::BluetoothState::sources(src1, src2); + bluetooth::BluetoothState::sources(src); tinyfsm::FsmList::dispatch( bluetooth::events::SourcesChanged{}); } @@ -381,13 +380,12 @@ auto BluetoothState::preferred_device(std::optional addr) -> void { sPreferredDevice_ = addr; } -auto BluetoothState::sources() -> std::pair { - return {sStream1, sStream2}; +auto BluetoothState::sources() -> OutputBuffers* { + return sStreams; } -auto BluetoothState::sources(PcmBuffer* src1, PcmBuffer* src2) -> void { - sStream1 = src1; - sStream2 = src2; +auto BluetoothState::sources(OutputBuffers* src) -> void { + sStreams = src; } auto BluetoothState::event_handler(std::function cb) -> void { @@ -715,7 +713,6 @@ void Connected::entry() { sPreferredDevice_->mac != stored_pref->mac)) { sStorage_->PreferredBluetoothDevice(sPreferredDevice_); } - // TODO: if we already have a source, immediately start playing } void Connected::exit() { @@ -733,7 +730,7 @@ void Connected::react(const events::PreferredDeviceChanged& ev) { } void Connected::react(const events::SourcesChanged& ev) { - if (sStream1 != nullptr && sStream2 != nullptr) { + if (sStreams != nullptr) { ESP_LOGI(kTag, "checking source is ready"); esp_a2d_media_ctrl(ESP_A2D_MEDIA_CTRL_CHECK_SRC_RDY); } else { diff --git a/src/drivers/i2s_dac.cpp b/src/drivers/i2s_dac.cpp index b1044896..4e2e171a 100644 --- a/src/drivers/i2s_dac.cpp +++ b/src/drivers/i2s_dac.cpp @@ -52,10 +52,12 @@ extern "C" IRAM_ATTR auto callback(i2s_chan_handle_t handle, assert(event->size % 4 == 0); uint8_t* buf = *reinterpret_cast(event->data); - auto* src = reinterpret_cast(user_ctx); + auto* src = reinterpret_cast(user_ctx); - BaseType_t ret = - src->receive({reinterpret_cast(buf), event->size / 2}, true); + BaseType_t ret1 = src->first.receive( + {reinterpret_cast(buf), event->size / 2}, false, true); + BaseType_t ret2 = src->second.receive( + {reinterpret_cast(buf), event->size / 2}, true, true); // The ESP32's I2S peripheral has a different endianness to its processors. // ESP-IDF handles this difference for stereo channels, but not for mono @@ -70,10 +72,10 @@ extern "C" IRAM_ATTR auto callback(i2s_chan_handle_t handle, } } - return ret; + return ret1 || ret2; } -auto I2SDac::create(IGpios& expander, PcmBuffer& buf) +auto I2SDac::create(IGpios& expander, OutputBuffers& bufs) -> std::optional { i2s_chan_handle_t i2s_handle; i2s_chan_config_t channel_config{ @@ -90,7 +92,7 @@ auto I2SDac::create(IGpios& expander, PcmBuffer& buf) // First, instantiate the instance so it can do all of its power on // configuration. std::unique_ptr dac = - std::make_unique(expander, buf, i2s_handle); + std::make_unique(expander, bufs, i2s_handle); // Whilst we wait for the initial boot, we can work on installing the I2S // driver. @@ -122,14 +124,14 @@ auto I2SDac::create(IGpios& expander, PcmBuffer& buf) .on_sent = callback, .on_send_q_ovf = NULL, }; - i2s_channel_register_event_callback(i2s_handle, &callbacks, &buf); + i2s_channel_register_event_callback(i2s_handle, &callbacks, &bufs); return dac.release(); } -I2SDac::I2SDac(IGpios& gpio, PcmBuffer& buf, i2s_chan_handle_t i2s_handle) +I2SDac::I2SDac(IGpios& gpio, OutputBuffers& bufs, i2s_chan_handle_t i2s_handle) : gpio_(gpio), - buffer_(buf), + buffers_(bufs), i2s_handle_(i2s_handle), i2s_active_(false), clock_config_(I2S_STD_CLK_DEFAULT_CONFIG(48000)), diff --git a/src/drivers/include/drivers/bluetooth.hpp b/src/drivers/include/drivers/bluetooth.hpp index b3b12ffc..eaecfb2b 100644 --- a/src/drivers/include/drivers/bluetooth.hpp +++ b/src/drivers/include/drivers/bluetooth.hpp @@ -43,7 +43,7 @@ class Bluetooth { auto SetPreferredDevice(std::optional dev) -> void; auto PreferredDevice() -> std::optional; - auto SetSources(PcmBuffer*, PcmBuffer*) -> void; + auto SetSources(OutputBuffers*) -> void; auto SetVolumeFactor(float) -> void; auto SetEventHandler(std::function cb) -> void; @@ -118,8 +118,8 @@ class BluetoothState : public tinyfsm::Fsm { static auto discovery() -> bool; static auto discovery(bool) -> void; - static auto sources() -> std::pair; - static auto sources(PcmBuffer*, PcmBuffer*) -> void; + static auto sources() -> OutputBuffers*; + static auto sources(OutputBuffers*) -> void; static auto event_handler(std::function) -> void; diff --git a/src/drivers/include/drivers/i2s_dac.hpp b/src/drivers/include/drivers/i2s_dac.hpp index 0fe462b4..891acb56 100644 --- a/src/drivers/include/drivers/i2s_dac.hpp +++ b/src/drivers/include/drivers/i2s_dac.hpp @@ -40,9 +40,10 @@ constexpr size_t kI2SBufferLengthFrames = 1024; */ class I2SDac { public: - static auto create(IGpios& expander, PcmBuffer&, PcmBuffer&) -> std::optional; + static auto create(IGpios& expander, OutputBuffers&) + -> std::optional; - I2SDac(IGpios& gpio, PcmBuffer&, i2s_chan_handle_t i2s_handle); + I2SDac(IGpios& gpio, OutputBuffers&, i2s_chan_handle_t i2s_handle); ~I2SDac(); auto SetPaused(bool) -> void; @@ -77,8 +78,7 @@ class I2SDac { auto set_channel(bool) -> void; IGpios& gpio_; - PcmBuffer& buffer1_; - PcmBuffer& buffer2_; + OutputBuffers& buffers_; i2s_chan_handle_t i2s_handle_; bool i2s_active_; diff --git a/src/drivers/include/drivers/pcm_buffer.hpp b/src/drivers/include/drivers/pcm_buffer.hpp index 27e9eec6..968c3398 100644 --- a/src/drivers/include/drivers/pcm_buffer.hpp +++ b/src/drivers/include/drivers/pcm_buffer.hpp @@ -74,4 +74,16 @@ class PcmBuffer { RingbufHandle_t ringbuf_; }; +/* + * Convenience type for a pair of PcmBuffers. Each audio output handles mixing + * streams together to ensure that low-latency sounds in one channel (e.g. a + * system notification bleep) aren't delayed by a large audio buffer in the + * other channel (e.g. a long-running track). + * + * By convention, the first buffer of this pair is used for tracks, whilst the + * second is reserved for 'system sounds'; usually TTS, but potentially maybe + * other informative noises. + */ +using OutputBuffers = std::pair; + } // namespace drivers diff --git a/src/drivers/pcm_buffer.cpp b/src/drivers/pcm_buffer.cpp index b619cefb..1d2bab1e 100644 --- a/src/drivers/pcm_buffer.cpp +++ b/src/drivers/pcm_buffer.cpp @@ -56,7 +56,7 @@ IRAM_ATTR auto PcmBuffer::receive(std::span dest, bool mix, bool isr) } size_t total_read = first_read + second_read; - if (total_read < dest.size()) { + if (total_read < dest.size() && !mix) { std::fill_n(dest.begin() + total_read, dest.size() - total_read, 0); } diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp index 80611082..8f04c6c1 100644 --- a/src/tangara/audio/audio_fsm.cpp +++ b/src/tangara/audio/audio_fsm.cpp @@ -59,10 +59,16 @@ std::shared_ptr AudioState::sOutput; std::shared_ptr AudioState::sI2SOutput; std::shared_ptr AudioState::sBtOutput; -// Two seconds of samples for two channels, at a representative sample rate. -constexpr size_t kDrainLatencySamples = 48000 * 2 * 2; +// For tracks, keep about two seconds' worth of samples at 2ch 48kHz. This +// is more headroom than we need for small playback, but it doesn't hurt to +// keep some PSRAM in our pockets for a rainy day. +constexpr size_t kTrackDrainLatencySamples = 48000 * 2 * 2; -std::unique_ptr AudioState::sDrainBuffer; +// For system sounds, we intentionally choose codecs that are very fast to +// decode. This lets us get away with a much smaller drain buffer. +constexpr size_t kSystemDrainLatencySamples = 48000; + +std::unique_ptr AudioState::sDrainBuffers; std::optional AudioState::sDrainFormat; StreamCues AudioState::sStreamCues; @@ -237,11 +243,11 @@ void AudioState::react(const system_fsm::BluetoothEvent& ev) { break; } } - if (std::holds_alternative(ev.event)) { - auto volume_chg = std::get(ev.event).new_vol; - events::Ui().Dispatch(RemoteVolumeChanged{ - .value = volume_chg - }); + if (std::holds_alternative( + ev.event)) { + auto volume_chg = + std::get(ev.event).new_vol; + events::Ui().Dispatch(RemoteVolumeChanged{.value = volume_chg}); } } @@ -354,12 +360,13 @@ namespace states { void Uninitialised::react(const system_fsm::BootComplete& ev) { sServices = ev.services; - sDrainBuffer = std::make_unique(kDrainLatencySamples); + sDrainBuffers = std::make_unique( + kTrackDrainLatencySamples, kSystemDrainLatencySamples); sStreamFactory.reset(new FatfsStreamFactory(*sServices)); - sI2SOutput.reset(new I2SAudioOutput(sServices->gpios(), *sDrainBuffer)); + sI2SOutput.reset(new I2SAudioOutput(sServices->gpios(), *sDrainBuffers)); sBtOutput.reset(new BluetoothAudioOutput( - sServices->bluetooth(), *sDrainBuffer, sServices->bg_worker())); + sServices->bluetooth(), *sDrainBuffers, sServices->bg_worker())); auto& nvs = sServices->nvs(); sI2SOutput->SetMaxVolume(nvs.AmpMaxVolume()); @@ -390,7 +397,7 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) { .left_bias = nvs.AmpLeftBias(), }); - sSampleProcessor.reset(new SampleProcessor(*sDrainBuffer)); + sSampleProcessor.reset(new SampleProcessor(sDrainBuffers->first)); sSampleProcessor->SetOutput(sOutput); sDecoder.reset(Decoder::Start(sSampleProcessor)); @@ -507,7 +514,7 @@ void Playback::react(const system_fsm::SdStateChanged& ev) { } void Playback::react(const internal::StreamHeartbeat& ev) { - sStreamCues.update(sDrainBuffer->totalReceived()); + sStreamCues.update(sDrainBuffers->first.totalReceived()); if (sStreamCues.hasStream()) { emitPlaybackUpdate(false); diff --git a/src/tangara/audio/audio_fsm.hpp b/src/tangara/audio/audio_fsm.hpp index f949ce8a..1e5184b5 100644 --- a/src/tangara/audio/audio_fsm.hpp +++ b/src/tangara/audio/audio_fsm.hpp @@ -81,7 +81,7 @@ class AudioState : public tinyfsm::Fsm { static std::shared_ptr sBtOutput; static std::shared_ptr sOutput; - static std::unique_ptr sDrainBuffer; + static std::unique_ptr sDrainBuffers; static StreamCues sStreamCues; static std::optional sDrainFormat; diff --git a/src/tangara/audio/bt_audio_output.cpp b/src/tangara/audio/bt_audio_output.cpp index 616a385f..54547622 100644 --- a/src/tangara/audio/bt_audio_output.cpp +++ b/src/tangara/audio/bt_audio_output.cpp @@ -33,11 +33,11 @@ namespace audio { static constexpr uint16_t kVolumeRange = 60; BluetoothAudioOutput::BluetoothAudioOutput(drivers::Bluetooth& bt, - drivers::PcmBuffer& buffer, + drivers::OutputBuffers& bufs, tasks::WorkerPool& p) : IAudioOutput(), bluetooth_(bt), - buffer_(buffer), + buffers_(bufs), bg_worker_(p), volume_() {} @@ -45,9 +45,9 @@ BluetoothAudioOutput::~BluetoothAudioOutput() {} auto BluetoothAudioOutput::changeMode(Modes mode) -> void { if (mode == Modes::kOnPlaying) { - bluetooth_.SetSource(&buffer_); + bluetooth_.SetSources(&buffers_); } else { - bluetooth_.SetSource(nullptr); + bluetooth_.SetSources(nullptr); } } diff --git a/src/tangara/audio/bt_audio_output.hpp b/src/tangara/audio/bt_audio_output.hpp index f22f330a..53d2c1a4 100644 --- a/src/tangara/audio/bt_audio_output.hpp +++ b/src/tangara/audio/bt_audio_output.hpp @@ -25,7 +25,7 @@ namespace audio { class BluetoothAudioOutput : public IAudioOutput { public: BluetoothAudioOutput(drivers::Bluetooth& bt, - drivers::PcmBuffer& buf, + drivers::OutputBuffers& bufs, tasks::WorkerPool&); ~BluetoothAudioOutput(); @@ -54,7 +54,7 @@ class BluetoothAudioOutput : public IAudioOutput { private: drivers::Bluetooth& bluetooth_; - drivers::PcmBuffer& buffer_; + drivers::OutputBuffers& buffers_; tasks::WorkerPool& bg_worker_; uint16_t volume_; diff --git a/src/tangara/audio/i2s_audio_output.cpp b/src/tangara/audio/i2s_audio_output.cpp index 8222b8c9..55c8bdb8 100644 --- a/src/tangara/audio/i2s_audio_output.cpp +++ b/src/tangara/audio/i2s_audio_output.cpp @@ -42,10 +42,10 @@ static constexpr uint16_t kLineLevelVolume = 0x13d; static constexpr uint16_t kDefaultVolume = 0x100; I2SAudioOutput::I2SAudioOutput(drivers::IGpios& expander, - drivers::PcmBuffer& buffer) + drivers::OutputBuffers& buffers) : IAudioOutput(), expander_(expander), - buffer_(buffer), + buffers_(buffers), dac_(), current_mode_(Modes::kOff), current_config_(), @@ -72,7 +72,7 @@ auto I2SAudioOutput::changeMode(Modes mode) -> void { if (was_off) { // Ensure an I2SDac instance actually exists. if (!dac_) { - auto instance = drivers::I2SDac::create(expander_, buffer_); + auto instance = drivers::I2SDac::create(expander_, buffers_); if (!instance) { return; } diff --git a/src/tangara/audio/i2s_audio_output.hpp b/src/tangara/audio/i2s_audio_output.hpp index 35d888b9..2b768ddd 100644 --- a/src/tangara/audio/i2s_audio_output.hpp +++ b/src/tangara/audio/i2s_audio_output.hpp @@ -21,7 +21,7 @@ namespace audio { class I2SAudioOutput : public IAudioOutput { public: - I2SAudioOutput(drivers::IGpios&, drivers::PcmBuffer&); + I2SAudioOutput(drivers::IGpios&, drivers::OutputBuffers&); auto SetMaxVolume(uint16_t) -> void; auto SetVolumeDb(uint16_t) -> void; @@ -51,7 +51,7 @@ class I2SAudioOutput : public IAudioOutput { private: drivers::IGpios& expander_; - drivers::PcmBuffer& buffer_; + drivers::OutputBuffers& buffers_; std::unique_ptr dac_; From 370d1853b5d099de28c032def4ce3e53b7d735ad Mon Sep 17 00:00:00 2001 From: jacqueline Date: Tue, 9 Jul 2024 14:41:02 +1000 Subject: [PATCH 3/9] Break FatfsStreamFactory's dep on ServiceLocator --- src/tangara/audio/audio_fsm.cpp | 3 ++- src/tangara/audio/fatfs_stream_factory.cpp | 12 ++++++------ src/tangara/audio/fatfs_stream_factory.hpp | 11 +++++------ src/tangara/database/database.cpp | 6 ++++++ src/tangara/database/database.hpp | 10 ++++++++++ src/tangara/system_fsm/service_locator.hpp | 2 +- 6 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp index 8f04c6c1..ad60ab86 100644 --- a/src/tangara/audio/audio_fsm.cpp +++ b/src/tangara/audio/audio_fsm.cpp @@ -363,7 +363,8 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) { sDrainBuffers = std::make_unique( kTrackDrainLatencySamples, kSystemDrainLatencySamples); - sStreamFactory.reset(new FatfsStreamFactory(*sServices)); + sStreamFactory.reset( + new FatfsStreamFactory(sServices->database(), sServices->tag_parser())); sI2SOutput.reset(new I2SAudioOutput(sServices->gpios(), *sDrainBuffers)); sBtOutput.reset(new BluetoothAudioOutput( sServices->bluetooth(), *sDrainBuffers, sServices->bg_worker())); diff --git a/src/tangara/audio/fatfs_stream_factory.cpp b/src/tangara/audio/fatfs_stream_factory.cpp index 80677b2d..735ec134 100644 --- a/src/tangara/audio/fatfs_stream_factory.cpp +++ b/src/tangara/audio/fatfs_stream_factory.cpp @@ -10,7 +10,6 @@ #include #include -#include "database/database.hpp" #include "esp_log.h" #include "ff.h" #include "freertos/portmacro.h" @@ -19,10 +18,10 @@ #include "audio/audio_source.hpp" #include "audio/fatfs_source.hpp" #include "codec.hpp" +#include "database/database.hpp" #include "database/tag_parser.hpp" #include "database/track.hpp" #include "drivers/spi.hpp" -#include "system_fsm/service_locator.hpp" #include "tasks.hpp" #include "types.hpp" @@ -30,12 +29,13 @@ namespace audio { -FatfsStreamFactory::FatfsStreamFactory(system_fsm::ServiceLocator& services) - : services_(services) {} +FatfsStreamFactory::FatfsStreamFactory(database::Handle&& handle, + database::ITagParser& parser) + : db_(handle), tag_parser_(parser) {} auto FatfsStreamFactory::create(database::TrackId id, uint32_t offset) -> std::shared_ptr { - auto db = services_.database().lock(); + auto db = db_.lock(); if (!db) { return {}; } @@ -48,7 +48,7 @@ auto FatfsStreamFactory::create(database::TrackId id, uint32_t offset) auto FatfsStreamFactory::create(std::string path, uint32_t offset) -> std::shared_ptr { - auto tags = services_.tag_parser().ReadAndParseTags(path); + auto tags = tag_parser_.ReadAndParseTags(path); if (!tags) { ESP_LOGE(kTag, "failed to read tags"); return {}; diff --git a/src/tangara/audio/fatfs_stream_factory.hpp b/src/tangara/audio/fatfs_stream_factory.hpp index 858d2131..84073d2d 100644 --- a/src/tangara/audio/fatfs_stream_factory.hpp +++ b/src/tangara/audio/fatfs_stream_factory.hpp @@ -6,23 +6,21 @@ #pragma once -#include #include #include #include #include #include -#include "database/database.hpp" -#include "database/track.hpp" #include "ff.h" #include "freertos/portmacro.h" #include "audio/audio_source.hpp" #include "codec.hpp" +#include "database/database.hpp" #include "database/future_fetcher.hpp" #include "database/tag_parser.hpp" -#include "system_fsm/service_locator.hpp" +#include "database/track.hpp" #include "tasks.hpp" #include "types.hpp" @@ -33,7 +31,7 @@ namespace audio { */ class FatfsStreamFactory { public: - explicit FatfsStreamFactory(system_fsm::ServiceLocator&); + explicit FatfsStreamFactory(database::Handle&&, database::ITagParser&); auto create(database::TrackId, uint32_t offset = 0) -> std::shared_ptr; @@ -47,7 +45,8 @@ class FatfsStreamFactory { auto ContainerToStreamType(database::Container) -> std::optional; - system_fsm::ServiceLocator& services_; + database::Handle db_; + database::ITagParser& tag_parser_; }; } // namespace audio diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp index cf1430b3..85700431 100644 --- a/src/tangara/database/database.cpp +++ b/src/tangara/database/database.cpp @@ -684,6 +684,12 @@ auto Database::countRecords(const SearchKey& c) -> size_t { return count; } +Handle::Handle(std::shared_ptr& db) : db_(db) {} + +auto Handle::lock() -> std::shared_ptr { + return db_; +} + auto SearchKey::startKey() const -> std::string_view { if (key) { return *key; diff --git a/src/tangara/database/database.hpp b/src/tangara/database/database.hpp index d2de7c72..c2e72568 100644 --- a/src/tangara/database/database.hpp +++ b/src/tangara/database/database.hpp @@ -128,6 +128,16 @@ class Database { auto countRecords(const SearchKey& c) -> size_t; }; +class Handle { + public: + Handle(std::shared_ptr& db); + + auto lock() -> std::shared_ptr; + + private: + std::shared_ptr& db_; +}; + /* * Container for the data needed to iterate through database records. This is a * lower-level type that the higher-level iterators are built from; most users diff --git a/src/tangara/system_fsm/service_locator.hpp b/src/tangara/system_fsm/service_locator.hpp index 3d136f3a..d441fa70 100644 --- a/src/tangara/system_fsm/service_locator.hpp +++ b/src/tangara/system_fsm/service_locator.hpp @@ -92,7 +92,7 @@ class ServiceLocator { auto haptics(std::unique_ptr i) { haptics_ = std::move(i); } - auto database() -> std::weak_ptr { return database_; } + auto database() -> database::Handle { return database_; } auto database(std::unique_ptr i) { database_ = std::move(i); From 9475d10d1000c7e21a7ea311b0c8ee6a72ef46c4 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Fri, 19 Jul 2024 13:59:30 +1000 Subject: [PATCH 4/9] WIP initial tts player wiring --- src/tangara/audio/audio_fsm.cpp | 6 ++++++ src/tangara/tts/player.cpp | 24 +++++++++++++++++++++ src/tangara/tts/player.hpp | 38 +++++++++++++++++++++++++++++++++ src/tangara/tts/provider.cpp | 23 ++++++++++++++++++++ src/tangara/tts/provider.hpp | 17 +++++++++++++++ 5 files changed, 108 insertions(+) create mode 100644 src/tangara/tts/player.cpp create mode 100644 src/tangara/tts/player.hpp diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp index ad60ab86..dbf1954c 100644 --- a/src/tangara/audio/audio_fsm.cpp +++ b/src/tangara/audio/audio_fsm.cpp @@ -43,6 +43,7 @@ #include "sample.hpp" #include "system_fsm/service_locator.hpp" #include "system_fsm/system_events.hpp" +#include "tts/player.hpp" namespace audio { @@ -369,6 +370,11 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) { sBtOutput.reset(new BluetoothAudioOutput( sServices->bluetooth(), *sDrainBuffers, sServices->bg_worker())); + auto& tts_provider = sServices->tts(); + auto tts_player = std::make_unique( + sServices->bg_worker(), sDrainBuffers->second, *sStreamFactory); + tts_provider.player(std::move(tts_player)); + auto& nvs = sServices->nvs(); sI2SOutput->SetMaxVolume(nvs.AmpMaxVolume()); sI2SOutput->SetVolume(nvs.AmpCurrentVolume()); diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp new file mode 100644 index 00000000..70959992 --- /dev/null +++ b/src/tangara/tts/player.cpp @@ -0,0 +1,24 @@ +/* + * Copyright 2024 jacqueline + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "tts/player.hpp" + +#include "esp_log.h" + +namespace tts { + +[[maybe_unused]] static constexpr char kTag[] = "ttsplay"; + +Player::Player(tasks::WorkerPool& worker, + drivers::PcmBuffer& output, + audio::FatfsStreamFactory& factory) + : bg_(worker), stream_factory_(factory), output_(output) {} + +auto Player::playFile(const std::string& path) -> void { + ESP_LOGI(kTag, "playing '%s'", path.c_str()); +} + +} // namespace tts diff --git a/src/tangara/tts/player.hpp b/src/tangara/tts/player.hpp new file mode 100644 index 00000000..a132b9cd --- /dev/null +++ b/src/tangara/tts/player.hpp @@ -0,0 +1,38 @@ +/* + * Copyright 2024 jacqueline + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include + +#include "audio/fatfs_stream_factory.hpp" +#include "drivers/pcm_buffer.hpp" +#include "tasks.hpp" + +namespace tts { + +/* + * A TTS Player is the output stage of the TTS pipeline. It receives a stream + * of filenames that should be played, and handles decoding these files and + * sending them to the output buffer. + */ +class Player { + public: + Player(tasks::WorkerPool&, drivers::PcmBuffer&, audio::FatfsStreamFactory&); + + auto playFile(const std::string& path) -> void; + + // Not copyable or movable. + Player(const Player&) = delete; + Player& operator=(const Player&) = delete; + + private: + tasks::WorkerPool& bg_; + audio::FatfsStreamFactory& stream_factory_; + drivers::PcmBuffer& output_; +}; + +} // namespace tts diff --git a/src/tangara/tts/provider.cpp b/src/tangara/tts/provider.cpp index 7d33bae6..24229233 100644 --- a/src/tangara/tts/provider.cpp +++ b/src/tangara/tts/provider.cpp @@ -5,21 +5,40 @@ */ #include "tts/provider.hpp" +#include +#include #include +#include #include #include +#include "drivers/storage.hpp" #include "esp_log.h" +#include "komihash.h" #include "tts/events.hpp" namespace tts { [[maybe_unused]] static constexpr char kTag[] = "tts"; +static const char* kTtsPath = "/.tangara-tts/"; + +static auto textToFile(const std::string& text) -> std::optional { + uint64_t hash = komihash(text.data(), text.size(), 0); + std::stringstream stream; + stream << drivers::kStoragePath << kTtsPath; + stream << std::hex << hash; + return stream.str(); +} + Provider::Provider() {} +auto Provider::player(std::unique_ptr p) -> void { + player_ = std::move(p); +} + auto Provider::feed(const Event& e) -> void { if (std::holds_alternative(e)) { // ESP_LOGI(kTag, "context changed"); @@ -31,6 +50,10 @@ auto Provider::feed(const Event& e) -> void { // ESP_LOGI(kTag, "new selection: '%s', interactive? %i", // ev.new_selection->description.value_or("").c_str(), // ev.new_selection->is_interactive); + std::string new_desc = ev.new_selection->description.value_or(""); + if (player_) { + player_->playFile(textToFile(new_desc).value_or("")); + } } } } diff --git a/src/tangara/tts/provider.hpp b/src/tangara/tts/provider.hpp index 59f61a6c..8fe143cc 100644 --- a/src/tangara/tts/provider.hpp +++ b/src/tangara/tts/provider.hpp @@ -6,18 +6,35 @@ #pragma once +#include #include #include #include #include "tts/events.hpp" +#include "tts/player.hpp" namespace tts { +/* + * A TTS Provider is responsible for receiving system events that may be + * relevant to TTS, and digesting them into discrete 'utterances' that can be + * used to generate audio feedback. + */ class Provider { public: Provider(); + + auto player(std::unique_ptr) -> void; + auto feed(const Event&) -> void; + + // Not copyable or movable. + Provider(const Provider&) = delete; + Provider& operator=(const Provider&) = delete; + + private: + std::unique_ptr player_; }; } // namespace tts From d0b739c66ef11a6c16f99cad6957a1782236fd8c Mon Sep 17 00:00:00 2001 From: jacqueline Date: Mon, 9 Sep 2024 16:39:22 +1000 Subject: [PATCH 5/9] Play basic wav files in response to tts prompts It's currently quite limited (no stereo or sample rate conversion, multiple messages clobber each other, only plays if music is playing), but we're getting there! --- src/tangara/audio/audio_fsm.cpp | 2 +- src/tangara/tts/player.cpp | 50 +++++++++++++++++++++++++++++++++ src/tangara/tts/provider.cpp | 3 +- 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp index ee7215cb..dac04f75 100644 --- a/src/tangara/audio/audio_fsm.cpp +++ b/src/tangara/audio/audio_fsm.cpp @@ -226,7 +226,7 @@ void AudioState::react(const internal::StreamStarted& ev) { } sStreamCues.addCue(ev.track, ev.cue_at_sample); - sStreamCues.update(sDrainBuffer->totalReceived()); + sStreamCues.update(sDrainBuffers->first.totalReceived()); if (!sIsPaused && !is_in_state()) { transit(); diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp index 70959992..3fcd88bc 100644 --- a/src/tangara/tts/player.cpp +++ b/src/tangara/tts/player.cpp @@ -6,7 +6,10 @@ #include "tts/player.hpp" +#include "codec.hpp" #include "esp_log.h" +#include "sample.hpp" +#include "types.hpp" namespace tts { @@ -19,6 +22,53 @@ Player::Player(tasks::WorkerPool& worker, auto Player::playFile(const std::string& path) -> void { ESP_LOGI(kTag, "playing '%s'", path.c_str()); + bg_.Dispatch([=]() { + auto stream = stream_factory_.create(path); + if (!stream) { + ESP_LOGE(kTag, "creating stream failed"); + return; + } + if (stream->type() != codecs::StreamType::kWav) { + ESP_LOGE(kTag, "stream was unsupported type"); + return; + } + auto decoder = codecs::CreateCodecForType(stream->type()); + if (!decoder) { + ESP_LOGE(kTag, "creating decoder failed"); + return; + } + std::unique_ptr codec{*decoder}; + auto open_res = codec->OpenStream(stream, 0); + if (open_res.has_error()) { + ESP_LOGE(kTag, "opening stream failed"); + return; + } + // if (open_res->sample_rate_hz != 48000 || open_res->num_channels != 2) { + // ESP_LOGE(kTag, "stream format is wrong (was %u channels @ %lu hz)", + // open_res->num_channels, open_res->sample_rate_hz); + // return; + // } + sample::Sample decode_buf[4096]; + for (;;) { + auto decode_res = codec->DecodeTo(decode_buf); + if (decode_res.has_error()) { + ESP_LOGE(kTag, "decoding error"); + return; + } + if (decode_res->is_stream_finished) { + break; + } + + std::span decode_span{decode_buf, + decode_res->samples_written}; + while (!decode_span.empty()) { + size_t sent = output_.send(decode_span); + decode_span = decode_span.subspan(sent); + } + } + + ESP_LOGI(kTag, "finished playing okay"); + }); } } // namespace tts diff --git a/src/tangara/tts/provider.cpp b/src/tangara/tts/provider.cpp index 24229233..b7c1e55d 100644 --- a/src/tangara/tts/provider.cpp +++ b/src/tangara/tts/provider.cpp @@ -28,8 +28,7 @@ static const char* kTtsPath = "/.tangara-tts/"; static auto textToFile(const std::string& text) -> std::optional { uint64_t hash = komihash(text.data(), text.size(), 0); std::stringstream stream; - stream << drivers::kStoragePath << kTtsPath; - stream << std::hex << hash; + stream << kTtsPath << std::hex << hash << ".wav"; return stream.str(); } From 542ebc65317ac4744a4b96c3131dace5bda10314 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Wed, 11 Sep 2024 12:57:04 +1000 Subject: [PATCH 6/9] Play TTS files in response to TTS prompts, but it's legible now - input files are upsamples and padded to stereo before playback - any in-progress playback is cancelled before playing a new file --- src/tangara/audio/processor.cpp | 33 +++++---- src/tangara/audio/processor.hpp | 56 +++++++------- src/tangara/tts/player.cpp | 125 +++++++++++++++++++++++++++----- src/tangara/tts/player.hpp | 7 ++ src/tangara/tts/provider.cpp | 2 +- 5 files changed, 162 insertions(+), 61 deletions(-) diff --git a/src/tangara/audio/processor.cpp b/src/tangara/audio/processor.cpp index aa2604b5..2fa7f78e 100644 --- a/src/tangara/audio/processor.cpp +++ b/src/tangara/audio/processor.cpp @@ -347,34 +347,39 @@ auto SampleProcessor::discardCommand(Args& command) -> void { // End of stream commands can just be dropped without further action. } -SampleProcessor::Buffer::Buffer() - : buffer_(reinterpret_cast( - heap_caps_calloc(kSampleBufferLength, - sizeof(sample::Sample), - MALLOC_CAP_DMA)), - kSampleBufferLength), +Buffer::Buffer(std::span storage) + : storage_(nullptr), buffer_(storage), samples_in_buffer_() {} + +Buffer::Buffer() + : storage_(reinterpret_cast( + heap_caps_calloc(kSampleBufferLength, + sizeof(sample::Sample), + MALLOC_CAP_DMA))), + buffer_(storage_, kSampleBufferLength), samples_in_buffer_() {} -SampleProcessor::Buffer::~Buffer() { - heap_caps_free(buffer_.data()); +Buffer::~Buffer() { + if (storage_) { + heap_caps_free(storage_); + } } -auto SampleProcessor::Buffer::writeAcquire() -> std::span { +auto Buffer::writeAcquire() -> std::span { return buffer_.subspan(samples_in_buffer_.size()); } -auto SampleProcessor::Buffer::writeCommit(size_t samples) -> void { +auto Buffer::writeCommit(size_t samples) -> void { if (samples == 0) { return; } samples_in_buffer_ = buffer_.first(samples + samples_in_buffer_.size()); } -auto SampleProcessor::Buffer::readAcquire() -> std::span { +auto Buffer::readAcquire() -> std::span { return samples_in_buffer_; } -auto SampleProcessor::Buffer::readCommit(size_t samples) -> void { +auto Buffer::readCommit(size_t samples) -> void { if (samples == 0) { return; } @@ -389,11 +394,11 @@ auto SampleProcessor::Buffer::readCommit(size_t samples) -> void { } } -auto SampleProcessor::Buffer::isEmpty() -> bool { +auto Buffer::isEmpty() -> bool { return samples_in_buffer_.empty(); } -auto SampleProcessor::Buffer::clear() -> void { +auto Buffer::clear() -> void { samples_in_buffer_ = {}; } diff --git a/src/tangara/audio/processor.hpp b/src/tangara/audio/processor.hpp index 45e05291..52bace95 100644 --- a/src/tangara/audio/processor.hpp +++ b/src/tangara/audio/processor.hpp @@ -22,6 +22,35 @@ namespace audio { +/* Utility for managing buffering samples between digital filters. */ +class Buffer { + public: + Buffer(std::span storage); + Buffer(); + ~Buffer(); + + /* Returns a span of the unused space within the buffer. */ + auto writeAcquire() -> std::span; + /* Signals how many samples were just added to the writeAcquire span. */ + auto writeCommit(size_t) -> void; + + /* Returns a span of the samples stored within the buffer. */ + auto readAcquire() -> std::span; + /* Signals how many samples from the readAcquire span were consumed. */ + auto readCommit(size_t) -> void; + + auto isEmpty() -> bool; + auto clear() -> void; + + Buffer(const Buffer&) = delete; + Buffer& operator=(const Buffer&) = delete; + + private: + sample::Sample* storage_; + std::span buffer_; + std::span samples_in_buffer_; +}; + /* * Handle to a persistent task that converts samples between formats (sample * rate, channels, bits per sample), in order to put samples in the preferred @@ -87,33 +116,6 @@ class SampleProcessor { StreamBufferHandle_t source_; drivers::PcmBuffer& sink_; - /* Internal utility for managing buffering samples between our filters. */ - class Buffer { - public: - Buffer(); - ~Buffer(); - - /* Returns a span of the unused space within the buffer. */ - auto writeAcquire() -> std::span; - /* Signals how many samples were just added to the writeAcquire span. */ - auto writeCommit(size_t) -> void; - - /* Returns a span of the samples stored within the buffer. */ - auto readAcquire() -> std::span; - /* Signals how many samples from the readAcquire span were consumed. */ - auto readCommit(size_t) -> void; - - auto isEmpty() -> bool; - auto clear() -> void; - - Buffer(const Buffer&) = delete; - Buffer& operator=(const Buffer&) = delete; - - private: - std::span buffer_; - std::span samples_in_buffer_; - }; - Buffer input_buffer_; Buffer resampled_buffer_; Buffer output_buffer_; diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp index 3fcd88bc..b5b99b5d 100644 --- a/src/tangara/tts/player.cpp +++ b/src/tangara/tts/player.cpp @@ -6,8 +6,12 @@ #include "tts/player.hpp" +#include "audio/processor.hpp" +#include "audio/resample.hpp" #include "codec.hpp" #include "esp_log.h" +#include "freertos/projdefs.h" +#include "portmacro.h" #include "sample.hpp" #include "types.hpp" @@ -18,57 +22,140 @@ namespace tts { Player::Player(tasks::WorkerPool& worker, drivers::PcmBuffer& output, audio::FatfsStreamFactory& factory) - : bg_(worker), stream_factory_(factory), output_(output) {} + : bg_(worker), stream_factory_(factory), output_(output), play_count_(0) {} auto Player::playFile(const std::string& path) -> void { ESP_LOGI(kTag, "playing '%s'", path.c_str()); - bg_.Dispatch([=]() { + int this_play = ++play_count_; + + bg_.Dispatch([=, this]() { auto stream = stream_factory_.create(path); if (!stream) { ESP_LOGE(kTag, "creating stream failed"); return; } + + // FIXME: Rather than hardcoding WAV support only, we should work out a + // proper subset of 'low memory' decoders that can all be used for TTS + // playback. if (stream->type() != codecs::StreamType::kWav) { ESP_LOGE(kTag, "stream was unsupported type"); return; } + auto decoder = codecs::CreateCodecForType(stream->type()); if (!decoder) { ESP_LOGE(kTag, "creating decoder failed"); return; } + std::unique_ptr codec{*decoder}; auto open_res = codec->OpenStream(stream, 0); if (open_res.has_error()) { ESP_LOGE(kTag, "opening stream failed"); return; } - // if (open_res->sample_rate_hz != 48000 || open_res->num_channels != 2) { - // ESP_LOGE(kTag, "stream format is wrong (was %u channels @ %lu hz)", - // open_res->num_channels, open_res->sample_rate_hz); - // return; - // } - sample::Sample decode_buf[4096]; - for (;;) { - auto decode_res = codec->DecodeTo(decode_buf); + + decodeToSink(*open_res, std::move(codec), this_play); + }); +} + +auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format, + std::unique_ptr codec, + int play_count) -> void { + // Set up buffers to hold samples between the intermediary parts of + // processing. We can just use the stack for these, since this method is + // called only from background workers, which have enormous stacks. + sample::Sample decode_storage[4096]; + audio::Buffer decode_buf(decode_storage); + + sample::Sample resample_storage[4096]; + audio::Buffer resample_buf(resample_storage); + + sample::Sample stereo_storage[4096]; + audio::Buffer stereo_buf(stereo_storage); + + // Work out what processing the codec's output needs. + std::unique_ptr resampler; + if (format.sample_rate_hz != 48000) { + resampler = std::make_unique(format.sample_rate_hz, 48000, + format.num_channels); + } + bool double_samples = format.num_channels == 1; + + // FIXME: This decode-and-process loop is substantially the same as the audio + // processor's filter loop. Ideally we should refactor both of these loops to + // reuse code, however I'm holding off on doing this until we've implemented + // more advanced audio processing features in the audio processor (EQ, tempo + // shifting, etc.) as it's not clear to me yet how much the two codepaths will + // be diverging later anyway. + while (codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() || + !stereo_buf.isEmpty()) { + if (play_count != play_count_) { + // FIXME: This is a little unsafe and could maybe take out the first few + // samples of the next file. + output_.clear(); + break; + } + if (codec) { + auto decode_res = codec->DecodeTo(decode_buf.writeAcquire()); if (decode_res.has_error()) { ESP_LOGE(kTag, "decoding error"); - return; + break; } + decode_buf.writeCommit(decode_res->samples_written); if (decode_res->is_stream_finished) { - break; + codec.reset(); } + } + + if (!decode_buf.isEmpty()) { + auto resample_input = decode_buf.readAcquire(); + auto resample_output = resample_buf.writeAcquire(); - std::span decode_span{decode_buf, - decode_res->samples_written}; - while (!decode_span.empty()) { - size_t sent = output_.send(decode_span); - decode_span = decode_span.subspan(sent); + size_t read, wrote; + if (resampler) { + std::tie(read, wrote) = + resampler->Process(resample_input, resample_output, false); + } else { + read = wrote = std::min(resample_input.size(), resample_output.size()); + std::copy_n(resample_input.begin(), read, resample_output.begin()); } + + decode_buf.readCommit(read); + resample_buf.writeCommit(wrote); } - ESP_LOGI(kTag, "finished playing okay"); - }); + if (!resample_buf.isEmpty()) { + auto channels_input = resample_buf.readAcquire(); + auto channels_output = stereo_buf.writeAcquire(); + size_t read, wrote; + if (double_samples) { + wrote = channels_output.size(); + read = wrote / 2; + if (read > channels_input.size()) { + read = channels_input.size(); + wrote = read * 2; + } + for (size_t i = 0; i < read; i++) { + channels_output[i * 2] = channels_input[i]; + channels_output[(i * 2) + 1] = channels_input[i]; + } + } else { + read = wrote = std::min(channels_input.size(), channels_output.size()); + std::copy_n(channels_input.begin(), read, channels_output.begin()); + } + resample_buf.readCommit(read); + stereo_buf.writeCommit(wrote); + } + + // The mixin PcmBuffer should almost always be draining, so we can force + // samples into it more aggressively than with the main music PcmBuffer. + while (!stereo_buf.isEmpty()) { + size_t sent = output_.send(stereo_buf.readAcquire()); + stereo_buf.readCommit(sent); + } + } } } // namespace tts diff --git a/src/tangara/tts/player.hpp b/src/tangara/tts/player.hpp index a132b9cd..0a3ba723 100644 --- a/src/tangara/tts/player.hpp +++ b/src/tangara/tts/player.hpp @@ -9,6 +9,7 @@ #include #include "audio/fatfs_stream_factory.hpp" +#include "codec.hpp" #include "drivers/pcm_buffer.hpp" #include "tasks.hpp" @@ -33,6 +34,12 @@ class Player { tasks::WorkerPool& bg_; audio::FatfsStreamFactory& stream_factory_; drivers::PcmBuffer& output_; + + std::atomic play_count_; + + auto decodeToSink(const codecs::ICodec::OutputFormat&, + std::unique_ptr, + int play_count) -> void; }; } // namespace tts diff --git a/src/tangara/tts/provider.cpp b/src/tangara/tts/provider.cpp index b7c1e55d..2b1dd4e6 100644 --- a/src/tangara/tts/provider.cpp +++ b/src/tangara/tts/provider.cpp @@ -28,7 +28,7 @@ static const char* kTtsPath = "/.tangara-tts/"; static auto textToFile(const std::string& text) -> std::optional { uint64_t hash = komihash(text.data(), text.size(), 0); std::stringstream stream; - stream << kTtsPath << std::hex << hash << ".wav"; + stream << kTtsPath << std::hex << hash; return stream.str(); } From c51709f99ff5456a5863ca39ff893f823a3642d4 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Thu, 12 Sep 2024 10:44:26 +1000 Subject: [PATCH 7/9] Pause and unpause the current audio output in response to TTS --- src/drivers/include/drivers/pcm_buffer.hpp | 3 + src/drivers/pcm_buffer.cpp | 14 ++- src/tangara/audio/audio_events.hpp | 5 +- src/tangara/audio/audio_fsm.cpp | 26 +++++- src/tangara/audio/audio_fsm.hpp | 4 + src/tangara/tts/player.cpp | 102 ++++++++++++++------- src/tangara/tts/player.hpp | 8 +- 7 files changed, 119 insertions(+), 43 deletions(-) diff --git a/src/drivers/include/drivers/pcm_buffer.hpp b/src/drivers/include/drivers/pcm_buffer.hpp index 4e5fa041..6b38be94 100644 --- a/src/drivers/include/drivers/pcm_buffer.hpp +++ b/src/drivers/include/drivers/pcm_buffer.hpp @@ -49,6 +49,7 @@ class PcmBuffer { auto clear() -> void; auto isEmpty() -> bool; + auto suspend(bool) -> void; /* * How many samples have been added to this buffer since it was created. This @@ -75,6 +76,8 @@ class PcmBuffer { std::atomic sent_; std::atomic received_; + std::atomic suspended_; + RingbufHandle_t ringbuf_; }; diff --git a/src/drivers/pcm_buffer.cpp b/src/drivers/pcm_buffer.cpp index 1e416301..bc58d4b9 100644 --- a/src/drivers/pcm_buffer.cpp +++ b/src/drivers/pcm_buffer.cpp @@ -25,7 +25,8 @@ namespace drivers { [[maybe_unused]] static const char kTag[] = "pcmbuf"; -PcmBuffer::PcmBuffer(size_t size_in_samples) : sent_(0), received_(0) { +PcmBuffer::PcmBuffer(size_t size_in_samples) + : sent_(0), received_(0), suspended_(false) { size_t size_in_bytes = size_in_samples * sizeof(int16_t); ESP_LOGI(kTag, "allocating pcm buffer of size %u (%uKiB)", size_in_samples, size_in_bytes / 1024); @@ -51,6 +52,13 @@ auto PcmBuffer::send(std::span data) -> size_t { IRAM_ATTR auto PcmBuffer::receive(std::span dest, bool mix, bool isr) -> BaseType_t { + if (suspended_) { + if (!mix) { + std::fill_n(dest.begin(), dest.size(), 0); + } + return false; + } + size_t first_read = 0, second_read = 0; BaseType_t ret1 = false, ret2 = false; std::tie(first_read, ret1) = readSingle(dest, mix, isr); @@ -86,6 +94,10 @@ auto PcmBuffer::isEmpty() -> bool { xRingbufferGetCurFreeSize(ringbuf_); } +auto PcmBuffer::suspend(bool s) -> void { + suspended_ = s; +} + auto PcmBuffer::totalSent() -> uint32_t { return sent_; } diff --git a/src/tangara/audio/audio_events.hpp b/src/tangara/audio/audio_events.hpp index 91bcf48b..56d150b2 100644 --- a/src/tangara/audio/audio_events.hpp +++ b/src/tangara/audio/audio_events.hpp @@ -144,8 +144,11 @@ struct OutputModeChanged : tinyfsm::Event { std::optional set_to; }; -namespace internal { +struct TtsPlaybackChanged : tinyfsm::Event { + bool is_playing; +}; +namespace internal { struct DecodingStarted : tinyfsm::Event { std::shared_ptr track; }; diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp index dac04f75..1daf568e 100644 --- a/src/tangara/audio/audio_fsm.cpp +++ b/src/tangara/audio/audio_fsm.cpp @@ -76,6 +76,7 @@ std::optional AudioState::sDrainFormat; StreamCues AudioState::sStreamCues; bool AudioState::sIsPaused = true; +bool AudioState::sIsTtsPlaying = false; auto AudioState::emitPlaybackUpdate(bool paused) -> void { std::optional position; @@ -191,6 +192,11 @@ void AudioState::react(const TogglePlayPause& ev) { } } +void AudioState::react(const TtsPlaybackChanged& ev) { + sIsTtsPlaying = ev.is_playing; + updateOutputMode(); +} + void AudioState::react(const internal::DecodingFinished& ev) { // If we just finished playing whatever's at the front of the queue, then we // need to advanve and start playing the next one ASAP in order to continue @@ -369,8 +375,8 @@ void AudioState::react(const OutputModeChanged& ev) { sOutput = sI2SOutput; break; } - sOutput->mode(IAudioOutput::Modes::kOnPaused); sSampleProcessor->SetOutput(sOutput); + updateOutputMode(); // Bluetooth volume isn't 'changed' until we've connected to a device. if (new_mode == drivers::NvsStorage::Output::kHeadphones) { @@ -381,6 +387,14 @@ void AudioState::react(const OutputModeChanged& ev) { } } +auto AudioState::updateOutputMode() -> void { + if (is_in_state() || sIsTtsPlaying) { + sOutput->mode(IAudioOutput::Modes::kOnPlaying); + } else { + sOutput->mode(IAudioOutput::Modes::kOnPaused); + } +} + auto AudioState::commitVolume() -> void { auto mode = sServices->nvs().OutputMode(); auto vol = sOutput->GetVolume(); @@ -402,6 +416,7 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) { sDrainBuffers = std::make_unique( kTrackDrainLatencySamples, kSystemDrainLatencySamples); + sDrainBuffers->first.suspend(true); sStreamFactory.reset( new FatfsStreamFactory(sServices->database(), sServices->tag_parser())); @@ -454,6 +469,10 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) { static const char kQueueKey[] = "audio:queue"; static const char kCurrentFileKey[] = "audio:current"; +auto Standby::entry() -> void { + updateOutputMode(); +} + void Standby::react(const system_fsm::KeyLockChanged& ev) { if (!ev.locking) { return; @@ -539,7 +558,8 @@ static void heartbeat(TimerHandle_t) { void Playback::entry() { ESP_LOGI(kTag, "audio output resumed"); - sOutput->mode(IAudioOutput::Modes::kOnPlaying); + sDrainBuffers->first.suspend(false); + updateOutputMode(); emitPlaybackUpdate(false); if (!sHeartbeatTimer) { @@ -552,7 +572,7 @@ void Playback::entry() { void Playback::exit() { ESP_LOGI(kTag, "audio output paused"); xTimerStop(sHeartbeatTimer, portMAX_DELAY); - sOutput->mode(IAudioOutput::Modes::kOnPaused); + sDrainBuffers->first.suspend(true); emitPlaybackUpdate(true); } diff --git a/src/tangara/audio/audio_fsm.hpp b/src/tangara/audio/audio_fsm.hpp index 134d9ffd..bc3feb55 100644 --- a/src/tangara/audio/audio_fsm.hpp +++ b/src/tangara/audio/audio_fsm.hpp @@ -48,6 +48,7 @@ class AudioState : public tinyfsm::Fsm { void react(const PlaySineWave&); void react(const SetTrack&); void react(const TogglePlayPause&); + void react(const TtsPlaybackChanged&); void react(const internal::DecodingFinished&); void react(const internal::StreamStarted&); @@ -70,6 +71,7 @@ class AudioState : public tinyfsm::Fsm { virtual void react(const system_fsm::HasPhonesChanged&); protected: + auto updateOutputMode() -> void; auto emitPlaybackUpdate(bool paused) -> void; auto commitVolume() -> void; @@ -88,6 +90,7 @@ class AudioState : public tinyfsm::Fsm { static std::optional sDrainFormat; static bool sIsPaused; + static bool sIsTtsPlaying; }; namespace states { @@ -102,6 +105,7 @@ class Uninitialised : public AudioState { class Standby : public AudioState { public: + void entry() override; void react(const system_fsm::KeyLockChanged&) override; void react(const system_fsm::SdStateChanged&) override; diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp index b5b99b5d..a803ce57 100644 --- a/src/tangara/tts/player.cpp +++ b/src/tangara/tts/player.cpp @@ -5,11 +5,14 @@ */ #include "tts/player.hpp" +#include +#include "audio/audio_events.hpp" #include "audio/processor.hpp" #include "audio/resample.hpp" #include "codec.hpp" #include "esp_log.h" +#include "events/event_queue.hpp" #include "freertos/projdefs.h" #include "portmacro.h" #include "sample.hpp" @@ -22,47 +25,70 @@ namespace tts { Player::Player(tasks::WorkerPool& worker, drivers::PcmBuffer& output, audio::FatfsStreamFactory& factory) - : bg_(worker), stream_factory_(factory), output_(output), play_count_(0) {} + : bg_(worker), + stream_factory_(factory), + output_(output), + stream_playing_(false), + stream_cancelled_(false) {} auto Player::playFile(const std::string& path) -> void { ESP_LOGI(kTag, "playing '%s'", path.c_str()); - int this_play = ++play_count_; bg_.Dispatch([=, this]() { - auto stream = stream_factory_.create(path); - if (!stream) { - ESP_LOGE(kTag, "creating stream failed"); - return; + // Interrupt current playback + { + std::scoped_lock lock{new_stream_mutex_}; + if (stream_playing_) { + stream_cancelled_ = true; + stream_playing_.wait(true); + } + stream_cancelled_ = false; + stream_playing_ = true; } - // FIXME: Rather than hardcoding WAV support only, we should work out a - // proper subset of 'low memory' decoders that can all be used for TTS - // playback. - if (stream->type() != codecs::StreamType::kWav) { - ESP_LOGE(kTag, "stream was unsupported type"); - return; - } + openAndDecode(path); - auto decoder = codecs::CreateCodecForType(stream->type()); - if (!decoder) { - ESP_LOGE(kTag, "creating decoder failed"); - return; + if (!stream_cancelled_) { + events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = false}); } + stream_playing_ = false; + stream_playing_.notify_all(); + }); +} - std::unique_ptr codec{*decoder}; - auto open_res = codec->OpenStream(stream, 0); - if (open_res.has_error()) { - ESP_LOGE(kTag, "opening stream failed"); - return; - } +auto Player::openAndDecode(const std::string& path) -> void { + auto stream = stream_factory_.create(path); + if (!stream) { + ESP_LOGE(kTag, "creating stream failed"); + return; + } - decodeToSink(*open_res, std::move(codec), this_play); - }); + // FIXME: Rather than hardcoding WAV support only, we should work out a + // proper subset of 'low memory' decoders that can all be used for TTS + // playback. + if (stream->type() != codecs::StreamType::kWav) { + ESP_LOGE(kTag, "stream was unsupported type"); + return; + } + + auto decoder = codecs::CreateCodecForType(stream->type()); + if (!decoder) { + ESP_LOGE(kTag, "creating decoder failed"); + return; + } + + std::unique_ptr codec{*decoder}; + auto open_res = codec->OpenStream(stream, 0); + if (open_res.has_error()) { + ESP_LOGE(kTag, "opening stream failed"); + return; + } + + decodeToSink(*open_res, std::move(codec)); } auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format, - std::unique_ptr codec, - int play_count) -> void { + std::unique_ptr codec) -> void { // Set up buffers to hold samples between the intermediary parts of // processing. We can just use the stack for these, since this method is // called only from background workers, which have enormous stacks. @@ -83,20 +109,18 @@ auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format, } bool double_samples = format.num_channels == 1; + // Start our playback (wait for previous to end?) + events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = true}); + // FIXME: This decode-and-process loop is substantially the same as the audio // processor's filter loop. Ideally we should refactor both of these loops to // reuse code, however I'm holding off on doing this until we've implemented // more advanced audio processing features in the audio processor (EQ, tempo // shifting, etc.) as it's not clear to me yet how much the two codepaths will // be diverging later anyway. - while (codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() || - !stereo_buf.isEmpty()) { - if (play_count != play_count_) { - // FIXME: This is a little unsafe and could maybe take out the first few - // samples of the next file. - output_.clear(); - break; - } + while ((codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() || + !stereo_buf.isEmpty()) && + !stream_cancelled_) { if (codec) { auto decode_res = codec->DecodeTo(decode_buf.writeAcquire()); if (decode_res.has_error()) { @@ -156,6 +180,14 @@ auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format, stereo_buf.readCommit(sent); } } + + while (!output_.isEmpty()) { + if (stream_cancelled_) { + output_.clear(); + } else { + vTaskDelay(pdMS_TO_TICKS(100)); + } + } } } // namespace tts diff --git a/src/tangara/tts/player.hpp b/src/tangara/tts/player.hpp index 0a3ba723..47479007 100644 --- a/src/tangara/tts/player.hpp +++ b/src/tangara/tts/player.hpp @@ -35,11 +35,13 @@ class Player { audio::FatfsStreamFactory& stream_factory_; drivers::PcmBuffer& output_; - std::atomic play_count_; + std::mutex new_stream_mutex_; + std::atomic stream_playing_; + std::atomic stream_cancelled_; + auto openAndDecode(const std::string& path) -> void; auto decodeToSink(const codecs::ICodec::OutputFormat&, - std::unique_ptr, - int play_count) -> void; + std::unique_ptr) -> void; }; } // namespace tts From d8b9e65e68214b4aa2bb24ddae3602d5788bc469 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Thu, 12 Sep 2024 11:44:20 +1000 Subject: [PATCH 8/9] Clean up some tts logging and descriptions --- lua/main_menu.lua | 2 ++ src/codecs/wav.cpp | 2 -- src/tangara/audio/fatfs_stream_factory.cpp | 1 - src/tangara/tts/player.cpp | 15 +++++++-------- src/tangara/tts/player.hpp | 4 ++-- src/tangara/tts/provider.cpp | 13 +++++++++++-- 6 files changed, 22 insertions(+), 15 deletions(-) diff --git a/lua/main_menu.lua b/lua/main_menu.lua index a6b46a8a..f95b0360 100644 --- a/lua/main_menu.lua +++ b/lua/main_menu.lua @@ -155,6 +155,7 @@ return widgets.MenuScreen:new { }) end) files_btn:Image { src = img.files } + widgets.Description(files_btn, "File browser") theme.set_style(files_btn, "menu_icon") local settings_btn = bottom_bar:Button {} @@ -162,6 +163,7 @@ return widgets.MenuScreen:new { backstack.push(require("settings"):new()) end) settings_btn:Image { src = img.settings } + widgets.Description(settings_btn, "Settings") theme.set_style(settings_btn, "menu_icon") end, } diff --git a/src/codecs/wav.cpp b/src/codecs/wav.cpp index f5b9d789..746f44ca 100644 --- a/src/codecs/wav.cpp +++ b/src/codecs/wav.cpp @@ -137,8 +137,6 @@ auto WavDecoder::OpenStream(std::shared_ptr input, uint32_t offset) // uint32_t file_size = bytes_to_u32(buffer_span.subspan(4, 4)) + 8; std::string fmt_header = bytes_to_str(buffer_span.subspan(12, 4)); - ESP_LOGI(kTag, "fmt header found? %s", - (fmt_header.starts_with("fmt")) ? "yes" : "no"); if (!fmt_header.starts_with("fmt")) { ESP_LOGW(kTag, "Could not find format chunk"); return cpp::fail(Error::kMalformedData); diff --git a/src/tangara/audio/fatfs_stream_factory.cpp b/src/tangara/audio/fatfs_stream_factory.cpp index 735ec134..94f22ae9 100644 --- a/src/tangara/audio/fatfs_stream_factory.cpp +++ b/src/tangara/audio/fatfs_stream_factory.cpp @@ -50,7 +50,6 @@ auto FatfsStreamFactory::create(std::string path, uint32_t offset) -> std::shared_ptr { auto tags = tag_parser_.ReadAndParseTags(path); if (!tags) { - ESP_LOGE(kTag, "failed to read tags"); return {}; } diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp index a803ce57..46e8c48a 100644 --- a/src/tangara/tts/player.cpp +++ b/src/tangara/tts/player.cpp @@ -31,11 +31,9 @@ Player::Player(tasks::WorkerPool& worker, stream_playing_(false), stream_cancelled_(false) {} -auto Player::playFile(const std::string& path) -> void { - ESP_LOGI(kTag, "playing '%s'", path.c_str()); - +auto Player::playFile(const std::string& text, const std::string& file) + -> void { bg_.Dispatch([=, this]() { - // Interrupt current playback { std::scoped_lock lock{new_stream_mutex_}; if (stream_playing_) { @@ -46,7 +44,7 @@ auto Player::playFile(const std::string& path) -> void { stream_playing_ = true; } - openAndDecode(path); + openAndDecode(text, file); if (!stream_cancelled_) { events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = false}); @@ -56,10 +54,11 @@ auto Player::playFile(const std::string& path) -> void { }); } -auto Player::openAndDecode(const std::string& path) -> void { +auto Player::openAndDecode(const std::string& text, const std::string& path) + -> void { auto stream = stream_factory_.create(path); if (!stream) { - ESP_LOGE(kTag, "creating stream failed"); + ESP_LOGW(kTag, "missing '%s' for '%s'", path.c_str(), text.c_str()); return; } @@ -67,7 +66,7 @@ auto Player::openAndDecode(const std::string& path) -> void { // proper subset of 'low memory' decoders that can all be used for TTS // playback. if (stream->type() != codecs::StreamType::kWav) { - ESP_LOGE(kTag, "stream was unsupported type"); + ESP_LOGE(kTag, "'%s' has unsupported encoding", path.c_str()); return; } diff --git a/src/tangara/tts/player.hpp b/src/tangara/tts/player.hpp index 47479007..d28da474 100644 --- a/src/tangara/tts/player.hpp +++ b/src/tangara/tts/player.hpp @@ -24,7 +24,7 @@ class Player { public: Player(tasks::WorkerPool&, drivers::PcmBuffer&, audio::FatfsStreamFactory&); - auto playFile(const std::string& path) -> void; + auto playFile(const std::string& text, const std::string& path) -> void; // Not copyable or movable. Player(const Player&) = delete; @@ -39,7 +39,7 @@ class Player { std::atomic stream_playing_; std::atomic stream_cancelled_; - auto openAndDecode(const std::string& path) -> void; + auto openAndDecode(const std::string& text, const std::string& path) -> void; auto decodeToSink(const codecs::ICodec::OutputFormat&, std::unique_ptr) -> void; }; diff --git a/src/tangara/tts/provider.cpp b/src/tangara/tts/provider.cpp index 2b1dd4e6..d19500e0 100644 --- a/src/tangara/tts/provider.cpp +++ b/src/tangara/tts/provider.cpp @@ -49,9 +49,18 @@ auto Provider::feed(const Event& e) -> void { // ESP_LOGI(kTag, "new selection: '%s', interactive? %i", // ev.new_selection->description.value_or("").c_str(), // ev.new_selection->is_interactive); - std::string new_desc = ev.new_selection->description.value_or(""); + auto text = ev.new_selection->description; + if (!text) { + ESP_LOGW(kTag, "missing description for element"); + return; + } + auto file = textToFile(*text); + if (!file) { + return; + } + if (player_) { - player_->playFile(textToFile(new_desc).value_or("")); + player_->playFile(*text, *file); } } } From 7d1266404d8e876ba909d101eb316868036b423c Mon Sep 17 00:00:00 2001 From: jacqueline Date: Tue, 17 Sep 2024 11:57:32 +1000 Subject: [PATCH 9/9] Move SPI ISR back to CPU0 See if this does anything for the itermittent spi acquire hangs --- src/drivers/spi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/drivers/spi.cpp b/src/drivers/spi.cpp index 40487197..632fe89f 100644 --- a/src/drivers/spi.cpp +++ b/src/drivers/spi.cpp @@ -41,7 +41,7 @@ esp_err_t init_spi(void) { // manages its own use of DMA-capable memory. .max_transfer_sz = 4096, .flags = SPICOMMON_BUSFLAG_MASTER | SPICOMMON_BUSFLAG_IOMUX_PINS, - .isr_cpu_id = ESP_INTR_CPU_AFFINITY_1, + .isr_cpu_id = ESP_INTR_CPU_AFFINITY_0, .intr_flags = ESP_INTR_FLAG_LOWMED | ESP_INTR_FLAG_IRAM, };