diff --git a/lua/main_menu.lua b/lua/main_menu.lua index 8c5c227c..164ba650 100644 --- a/lua/main_menu.lua +++ b/lua/main_menu.lua @@ -155,6 +155,7 @@ return widgets.MenuScreen:new { }) end) files_btn:Image { src = img.files } + widgets.Description(files_btn, "File browser") theme.set_subject(files_btn, "menu_icon") local settings_btn = bottom_bar:Button {} @@ -162,6 +163,7 @@ return widgets.MenuScreen:new { backstack.push(require("settings"):new()) end) settings_btn:Image { src = img.settings } + widgets.Description(settings_btn, "Settings") theme.set_subject(settings_btn, "menu_icon") end, } diff --git a/src/codecs/wav.cpp b/src/codecs/wav.cpp index f5b9d789..746f44ca 100644 --- a/src/codecs/wav.cpp +++ b/src/codecs/wav.cpp @@ -137,8 +137,6 @@ auto WavDecoder::OpenStream(std::shared_ptr input, uint32_t offset) // uint32_t file_size = bytes_to_u32(buffer_span.subspan(4, 4)) + 8; std::string fmt_header = bytes_to_str(buffer_span.subspan(12, 4)); - ESP_LOGI(kTag, "fmt header found? %s", - (fmt_header.starts_with("fmt")) ? "yes" : "no"); if (!fmt_header.starts_with("fmt")) { ESP_LOGW(kTag, "Could not find format chunk"); return cpp::fail(Error::kMalformedData); diff --git a/src/drivers/bluetooth.cpp b/src/drivers/bluetooth.cpp index 8ec30395..3da5dd0c 100644 --- a/src/drivers/bluetooth.cpp +++ b/src/drivers/bluetooth.cpp @@ -38,7 +38,7 @@ namespace drivers { [[maybe_unused]] static constexpr char kTag[] = "bluetooth"; -DRAM_ATTR static PcmBuffer* sStream = nullptr; +DRAM_ATTR static OutputBuffers* sStreams = nullptr; DRAM_ATTR static std::atomic sVolumeFactor = 1.f; static tasks::WorkerPool* sBgWorker; @@ -97,13 +97,16 @@ IRAM_ATTR auto a2dp_data_cb(uint8_t* buf, int32_t buf_size) -> int32_t { if (buf == nullptr || buf_size <= 0) { return 0; } - PcmBuffer* stream = sStream; - if (stream == nullptr) { + OutputBuffers* streams = sStreams; + if (streams == nullptr) { return 0; } int16_t* samples = reinterpret_cast(buf); - stream->receive({samples, static_cast(buf_size / 2)}, false); + streams->first.receive({samples, static_cast(buf_size / 2)}, false, + false); + streams->second.receive({samples, static_cast(buf_size / 2)}, true, + false); // Apply software volume scaling. float factor = sVolumeFactor.load(); @@ -141,14 +144,14 @@ auto Bluetooth::enabled() -> bool { return !bluetooth::BluetoothState::is_in_state(); } -auto Bluetooth::source(PcmBuffer* src) -> void { - if (src == sStream) { +auto Bluetooth::sources(OutputBuffers* src) -> void { + auto lock = bluetooth::BluetoothState::lock(); + if (src == sStreams) { return; } - auto lock = bluetooth::BluetoothState::lock(); - sStream = src; + sStreams = src; tinyfsm::FsmList::dispatch( - bluetooth::events::SourceChanged{}); + bluetooth::events::SourcesChanged{}); } auto Bluetooth::softVolume(float f) -> void { @@ -771,8 +774,8 @@ void Connected::react(const events::PairedDeviceChanged& ev) { } } -void Connected::react(const events::SourceChanged& ev) { - if (sStream != nullptr) { +void Connected::react(const events::SourcesChanged& ev) { + if (sStreams != nullptr) { ESP_LOGI(kTag, "checking source is ready"); esp_a2d_media_ctrl(ESP_A2D_MEDIA_CTRL_CHECK_SRC_RDY); } else { diff --git a/src/drivers/i2s_dac.cpp b/src/drivers/i2s_dac.cpp index 9c9bb793..46bf8e80 100644 --- a/src/drivers/i2s_dac.cpp +++ b/src/drivers/i2s_dac.cpp @@ -52,10 +52,12 @@ extern "C" IRAM_ATTR auto callback(i2s_chan_handle_t handle, assert(event->size % 4 == 0); uint8_t* buf = reinterpret_cast(event->dma_buf); - auto* src = reinterpret_cast(user_ctx); + auto* src = reinterpret_cast(user_ctx); - BaseType_t ret = - src->receive({reinterpret_cast(buf), event->size / 2}, true); + BaseType_t ret1 = src->first.receive( + {reinterpret_cast(buf), event->size / 2}, false, true); + BaseType_t ret2 = src->second.receive( + {reinterpret_cast(buf), event->size / 2}, true, true); // The ESP32's I2S peripheral has a different endianness to its processors. // ESP-IDF handles this difference for stereo channels, but not for mono @@ -70,10 +72,10 @@ extern "C" IRAM_ATTR auto callback(i2s_chan_handle_t handle, } } - return ret; + return ret1 || ret2; } -auto I2SDac::create(IGpios& expander, PcmBuffer& buf) +auto I2SDac::create(IGpios& expander, OutputBuffers& bufs) -> std::optional { i2s_chan_handle_t i2s_handle; i2s_chan_config_t channel_config{ @@ -90,7 +92,7 @@ auto I2SDac::create(IGpios& expander, PcmBuffer& buf) // First, instantiate the instance so it can do all of its power on // configuration. std::unique_ptr dac = - std::make_unique(expander, buf, i2s_handle); + std::make_unique(expander, bufs, i2s_handle); // Whilst we wait for the initial boot, we can work on installing the I2S // driver. @@ -122,14 +124,14 @@ auto I2SDac::create(IGpios& expander, PcmBuffer& buf) .on_sent = callback, .on_send_q_ovf = NULL, }; - i2s_channel_register_event_callback(i2s_handle, &callbacks, &buf); + i2s_channel_register_event_callback(i2s_handle, &callbacks, &bufs); return dac.release(); } -I2SDac::I2SDac(IGpios& gpio, PcmBuffer& buf, i2s_chan_handle_t i2s_handle) +I2SDac::I2SDac(IGpios& gpio, OutputBuffers& bufs, i2s_chan_handle_t i2s_handle) : gpio_(gpio), - buffer_(buf), + buffers_(bufs), i2s_handle_(i2s_handle), i2s_active_(false), clock_config_(I2S_STD_CLK_DEFAULT_CONFIG(48000)), diff --git a/src/drivers/include/drivers/bluetooth.hpp b/src/drivers/include/drivers/bluetooth.hpp index 449812d6..99c71e52 100644 --- a/src/drivers/include/drivers/bluetooth.hpp +++ b/src/drivers/include/drivers/bluetooth.hpp @@ -45,7 +45,7 @@ class Bluetooth { auto enable(bool en) -> void; auto enabled() -> bool; - auto source(PcmBuffer*) -> void; + auto sources(OutputBuffers*) -> void; auto softVolume(float) -> void; enum class ConnectionState { @@ -98,7 +98,7 @@ struct Disable : public tinyfsm::Event {}; struct ConnectTimedOut : public tinyfsm::Event {}; struct PairedDeviceChanged : public tinyfsm::Event {}; -struct SourceChanged : public tinyfsm::Event {}; +struct SourcesChanged : public tinyfsm::Event {}; struct DeviceDiscovered : public tinyfsm::Event { const Device& device; }; @@ -172,7 +172,7 @@ class BluetoothState : public tinyfsm::Fsm { virtual void react(const events::Disable& ev) = 0; virtual void react(const events::ConnectTimedOut& ev){}; virtual void react(const events::PairedDeviceChanged& ev){}; - virtual void react(const events::SourceChanged& ev){}; + virtual void react(const events::SourcesChanged& ev){}; virtual void react(const events::DeviceDiscovered&); @@ -243,7 +243,7 @@ class Connected : public BluetoothState { void exit() override; void react(const events::PairedDeviceChanged& ev) override; - void react(const events::SourceChanged& ev) override; + void react(const events::SourcesChanged& ev) override; void react(const events::Disable& ev) override; void react(events::internal::Gap ev) override; diff --git a/src/drivers/include/drivers/i2s_dac.hpp b/src/drivers/include/drivers/i2s_dac.hpp index cf9258c0..891acb56 100644 --- a/src/drivers/include/drivers/i2s_dac.hpp +++ b/src/drivers/include/drivers/i2s_dac.hpp @@ -40,9 +40,10 @@ constexpr size_t kI2SBufferLengthFrames = 1024; */ class I2SDac { public: - static auto create(IGpios& expander, PcmBuffer&) -> std::optional; + static auto create(IGpios& expander, OutputBuffers&) + -> std::optional; - I2SDac(IGpios& gpio, PcmBuffer&, i2s_chan_handle_t i2s_handle); + I2SDac(IGpios& gpio, OutputBuffers&, i2s_chan_handle_t i2s_handle); ~I2SDac(); auto SetPaused(bool) -> void; @@ -77,7 +78,7 @@ class I2SDac { auto set_channel(bool) -> void; IGpios& gpio_; - PcmBuffer& buffer_; + OutputBuffers& buffers_; i2s_chan_handle_t i2s_handle_; bool i2s_active_; diff --git a/src/drivers/include/drivers/pcm_buffer.hpp b/src/drivers/include/drivers/pcm_buffer.hpp index 8f53317e..6b38be94 100644 --- a/src/drivers/include/drivers/pcm_buffer.hpp +++ b/src/drivers/include/drivers/pcm_buffer.hpp @@ -39,11 +39,17 @@ class PcmBuffer { * Fills the given span with samples. If enough samples are available in * the buffer, then the span will be filled with samples from the buffer. Any * shortfall is made up by padding the given span with zeroes. + * + * If `mix` is set to true then, instead of overwriting the destination span, + * the retrieved samples will be mixed into any existing samples contained + * within the destination. This mixing uses a naive sum approach, and so may + * introduce clipping. */ - auto receive(std::span, bool isr) -> BaseType_t; + auto receive(std::span, bool mix, bool isr) -> BaseType_t; auto clear() -> void; auto isEmpty() -> bool; + auto suspend(bool) -> void; /* * How many samples have been added to this buffer since it was created. This @@ -62,7 +68,7 @@ class PcmBuffer { PcmBuffer& operator=(const PcmBuffer&) = delete; private: - auto readSingle(std::span, bool isr) + auto readSingle(std::span, bool mix, bool isr) -> std::pair; StaticRingbuffer_t meta_; @@ -70,7 +76,21 @@ class PcmBuffer { std::atomic sent_; std::atomic received_; + std::atomic suspended_; + RingbufHandle_t ringbuf_; }; +/* + * Convenience type for a pair of PcmBuffers. Each audio output handles mixing + * streams together to ensure that low-latency sounds in one channel (e.g. a + * system notification bleep) aren't delayed by a large audio buffer in the + * other channel (e.g. a long-running track). + * + * By convention, the first buffer of this pair is used for tracks, whilst the + * second is reserved for 'system sounds'; usually TTS, but potentially maybe + * other informative noises. + */ +using OutputBuffers = std::pair; + } // namespace drivers diff --git a/src/drivers/pcm_buffer.cpp b/src/drivers/pcm_buffer.cpp index 071f5cea..bc58d4b9 100644 --- a/src/drivers/pcm_buffer.cpp +++ b/src/drivers/pcm_buffer.cpp @@ -25,7 +25,8 @@ namespace drivers { [[maybe_unused]] static const char kTag[] = "pcmbuf"; -PcmBuffer::PcmBuffer(size_t size_in_samples) : sent_(0), received_(0) { +PcmBuffer::PcmBuffer(size_t size_in_samples) + : sent_(0), received_(0), suspended_(false) { size_t size_in_bytes = size_in_samples * sizeof(int16_t); ESP_LOGI(kTag, "allocating pcm buffer of size %u (%uKiB)", size_in_samples, size_in_bytes / 1024); @@ -49,18 +50,26 @@ auto PcmBuffer::send(std::span data) -> size_t { return data.size(); } -IRAM_ATTR auto PcmBuffer::receive(std::span dest, bool isr) +IRAM_ATTR auto PcmBuffer::receive(std::span dest, bool mix, bool isr) -> BaseType_t { + if (suspended_) { + if (!mix) { + std::fill_n(dest.begin(), dest.size(), 0); + } + return false; + } + size_t first_read = 0, second_read = 0; BaseType_t ret1 = false, ret2 = false; - std::tie(first_read, ret1) = readSingle(dest, isr); + std::tie(first_read, ret1) = readSingle(dest, mix, isr); if (first_read < dest.size()) { - std::tie(second_read, ret2) = readSingle(dest.subspan(first_read), isr); + std::tie(second_read, ret2) = + readSingle(dest.subspan(first_read), mix, isr); } size_t total_read = first_read + second_read; - if (total_read < dest.size()) { + if (total_read < dest.size() && !mix) { std::fill_n(dest.begin() + total_read, dest.size() - total_read, 0); } @@ -85,6 +94,10 @@ auto PcmBuffer::isEmpty() -> bool { xRingbufferGetCurFreeSize(ringbuf_); } +auto PcmBuffer::suspend(bool s) -> void { + suspended_ = s; +} + auto PcmBuffer::totalSent() -> uint32_t { return sent_; } @@ -93,7 +106,9 @@ auto PcmBuffer::totalReceived() -> uint32_t { return received_; } -IRAM_ATTR auto PcmBuffer::readSingle(std::span dest, bool isr) +IRAM_ATTR auto PcmBuffer::readSingle(std::span dest, + bool mix, + bool isr) -> std::pair { BaseType_t ret; size_t read_bytes = 0; @@ -111,7 +126,18 @@ IRAM_ATTR auto PcmBuffer::readSingle(std::span dest, bool isr) return {read_samples, ret}; } - std::memcpy(dest.data(), data, read_bytes); + if (mix) { + for (size_t i = 0; i < read_samples; i++) { + // Sum the two samples in a 32 bit field so that the addition is always + // safe. + int32_t sum = static_cast(dest[i]) + + static_cast(reinterpret_cast(data)[i]); + // Clip back into the range of a single sample. + dest[i] = std::clamp(sum, INT16_MIN, INT16_MAX); + } + } else { + std::memcpy(dest.data(), data, read_bytes); + } if (isr) { vRingbufferReturnItem(ringbuf_, data); diff --git a/src/tangara/audio/audio_events.hpp b/src/tangara/audio/audio_events.hpp index 91bcf48b..56d150b2 100644 --- a/src/tangara/audio/audio_events.hpp +++ b/src/tangara/audio/audio_events.hpp @@ -144,8 +144,11 @@ struct OutputModeChanged : tinyfsm::Event { std::optional set_to; }; -namespace internal { +struct TtsPlaybackChanged : tinyfsm::Event { + bool is_playing; +}; +namespace internal { struct DecodingStarted : tinyfsm::Event { std::shared_ptr track; }; diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp index 5a91c6f9..1daf568e 100644 --- a/src/tangara/audio/audio_fsm.cpp +++ b/src/tangara/audio/audio_fsm.cpp @@ -44,6 +44,7 @@ #include "sample.hpp" #include "system_fsm/service_locator.hpp" #include "system_fsm/system_events.hpp" +#include "tts/player.hpp" namespace audio { @@ -60,15 +61,22 @@ std::shared_ptr AudioState::sOutput; std::shared_ptr AudioState::sI2SOutput; std::shared_ptr AudioState::sBtOutput; -// Two seconds of samples for two channels, at a representative sample rate. -constexpr size_t kDrainLatencySamples = 48000 * 2 * 2; +// For tracks, keep about two seconds' worth of samples at 2ch 48kHz. This +// is more headroom than we need for small playback, but it doesn't hurt to +// keep some PSRAM in our pockets for a rainy day. +constexpr size_t kTrackDrainLatencySamples = 48000 * 2 * 2; -std::unique_ptr AudioState::sDrainBuffer; +// For system sounds, we intentionally choose codecs that are very fast to +// decode. This lets us get away with a much smaller drain buffer. +constexpr size_t kSystemDrainLatencySamples = 48000; + +std::unique_ptr AudioState::sDrainBuffers; std::optional AudioState::sDrainFormat; StreamCues AudioState::sStreamCues; bool AudioState::sIsPaused = true; +bool AudioState::sIsTtsPlaying = false; auto AudioState::emitPlaybackUpdate(bool paused) -> void { std::optional position; @@ -184,6 +192,11 @@ void AudioState::react(const TogglePlayPause& ev) { } } +void AudioState::react(const TtsPlaybackChanged& ev) { + sIsTtsPlaying = ev.is_playing; + updateOutputMode(); +} + void AudioState::react(const internal::DecodingFinished& ev) { // If we just finished playing whatever's at the front of the queue, then we // need to advanve and start playing the next one ASAP in order to continue @@ -219,7 +232,7 @@ void AudioState::react(const internal::StreamStarted& ev) { } sStreamCues.addCue(ev.track, ev.cue_at_sample); - sStreamCues.update(sDrainBuffer->totalReceived()); + sStreamCues.update(sDrainBuffers->first.totalReceived()); if (!sIsPaused && !is_in_state()) { transit(); @@ -362,8 +375,8 @@ void AudioState::react(const OutputModeChanged& ev) { sOutput = sI2SOutput; break; } - sOutput->mode(IAudioOutput::Modes::kOnPaused); sSampleProcessor->SetOutput(sOutput); + updateOutputMode(); // Bluetooth volume isn't 'changed' until we've connected to a device. if (new_mode == drivers::NvsStorage::Output::kHeadphones) { @@ -374,6 +387,14 @@ void AudioState::react(const OutputModeChanged& ev) { } } +auto AudioState::updateOutputMode() -> void { + if (is_in_state() || sIsTtsPlaying) { + sOutput->mode(IAudioOutput::Modes::kOnPlaying); + } else { + sOutput->mode(IAudioOutput::Modes::kOnPaused); + } +} + auto AudioState::commitVolume() -> void { auto mode = sServices->nvs().OutputMode(); auto vol = sOutput->GetVolume(); @@ -393,13 +414,20 @@ namespace states { void Uninitialised::react(const system_fsm::BootComplete& ev) { sServices = ev.services; - sDrainBuffer = std::make_unique(kDrainLatencySamples); + sDrainBuffers = std::make_unique( + kTrackDrainLatencySamples, kSystemDrainLatencySamples); + sDrainBuffers->first.suspend(true); sStreamFactory.reset( new FatfsStreamFactory(sServices->database(), sServices->tag_parser())); - sI2SOutput.reset(new I2SAudioOutput(sServices->gpios(), *sDrainBuffer)); + sI2SOutput.reset(new I2SAudioOutput(sServices->gpios(), *sDrainBuffers)); sBtOutput.reset(new BluetoothAudioOutput( - sServices->bluetooth(), *sDrainBuffer, sServices->bg_worker())); + sServices->bluetooth(), *sDrainBuffers, sServices->bg_worker())); + + auto& tts_provider = sServices->tts(); + auto tts_player = std::make_unique( + sServices->bg_worker(), sDrainBuffers->second, *sStreamFactory); + tts_provider.player(std::move(tts_player)); auto& nvs = sServices->nvs(); sI2SOutput->SetMaxVolume(nvs.AmpMaxVolume()); @@ -430,7 +458,7 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) { .left_bias = nvs.AmpLeftBias(), }); - sSampleProcessor.reset(new SampleProcessor(*sDrainBuffer)); + sSampleProcessor.reset(new SampleProcessor(sDrainBuffers->first)); sSampleProcessor->SetOutput(sOutput); sDecoder.reset(Decoder::Start(sSampleProcessor)); @@ -441,6 +469,10 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) { static const char kQueueKey[] = "audio:queue"; static const char kCurrentFileKey[] = "audio:current"; +auto Standby::entry() -> void { + updateOutputMode(); +} + void Standby::react(const system_fsm::KeyLockChanged& ev) { if (!ev.locking) { return; @@ -526,7 +558,8 @@ static void heartbeat(TimerHandle_t) { void Playback::entry() { ESP_LOGI(kTag, "audio output resumed"); - sOutput->mode(IAudioOutput::Modes::kOnPlaying); + sDrainBuffers->first.suspend(false); + updateOutputMode(); emitPlaybackUpdate(false); if (!sHeartbeatTimer) { @@ -539,7 +572,7 @@ void Playback::entry() { void Playback::exit() { ESP_LOGI(kTag, "audio output paused"); xTimerStop(sHeartbeatTimer, portMAX_DELAY); - sOutput->mode(IAudioOutput::Modes::kOnPaused); + sDrainBuffers->first.suspend(true); emitPlaybackUpdate(true); } @@ -550,7 +583,7 @@ void Playback::react(const system_fsm::SdStateChanged& ev) { } void Playback::react(const internal::StreamHeartbeat& ev) { - sStreamCues.update(sDrainBuffer->totalReceived()); + sStreamCues.update(sDrainBuffers->first.totalReceived()); if (sStreamCues.hasStream()) { emitPlaybackUpdate(false); diff --git a/src/tangara/audio/audio_fsm.hpp b/src/tangara/audio/audio_fsm.hpp index 0644375f..bc3feb55 100644 --- a/src/tangara/audio/audio_fsm.hpp +++ b/src/tangara/audio/audio_fsm.hpp @@ -48,6 +48,7 @@ class AudioState : public tinyfsm::Fsm { void react(const PlaySineWave&); void react(const SetTrack&); void react(const TogglePlayPause&); + void react(const TtsPlaybackChanged&); void react(const internal::DecodingFinished&); void react(const internal::StreamStarted&); @@ -70,6 +71,7 @@ class AudioState : public tinyfsm::Fsm { virtual void react(const system_fsm::HasPhonesChanged&); protected: + auto updateOutputMode() -> void; auto emitPlaybackUpdate(bool paused) -> void; auto commitVolume() -> void; @@ -82,12 +84,13 @@ class AudioState : public tinyfsm::Fsm { static std::shared_ptr sBtOutput; static std::shared_ptr sOutput; - static std::unique_ptr sDrainBuffer; + static std::unique_ptr sDrainBuffers; static StreamCues sStreamCues; static std::optional sDrainFormat; static bool sIsPaused; + static bool sIsTtsPlaying; }; namespace states { @@ -102,6 +105,7 @@ class Uninitialised : public AudioState { class Standby : public AudioState { public: + void entry() override; void react(const system_fsm::KeyLockChanged&) override; void react(const system_fsm::SdStateChanged&) override; diff --git a/src/tangara/audio/bt_audio_output.cpp b/src/tangara/audio/bt_audio_output.cpp index 336fc758..c6c64fd1 100644 --- a/src/tangara/audio/bt_audio_output.cpp +++ b/src/tangara/audio/bt_audio_output.cpp @@ -36,11 +36,11 @@ static constexpr uint16_t kVolumeRange = 60; using ConnectionState = drivers::Bluetooth::ConnectionState; BluetoothAudioOutput::BluetoothAudioOutput(drivers::Bluetooth& bt, - drivers::PcmBuffer& buffer, + drivers::OutputBuffers& bufs, tasks::WorkerPool& p) : IAudioOutput(), bluetooth_(bt), - buffer_(buffer), + buffers_(bufs), bg_worker_(p), volume_() {} @@ -48,9 +48,9 @@ BluetoothAudioOutput::~BluetoothAudioOutput() {} auto BluetoothAudioOutput::changeMode(Modes mode) -> void { if (mode == Modes::kOnPlaying) { - bluetooth_.source(&buffer_); + bluetooth_.sources(&buffers_); } else { - bluetooth_.source(nullptr); + bluetooth_.sources(nullptr); } } diff --git a/src/tangara/audio/bt_audio_output.hpp b/src/tangara/audio/bt_audio_output.hpp index f22f330a..53d2c1a4 100644 --- a/src/tangara/audio/bt_audio_output.hpp +++ b/src/tangara/audio/bt_audio_output.hpp @@ -25,7 +25,7 @@ namespace audio { class BluetoothAudioOutput : public IAudioOutput { public: BluetoothAudioOutput(drivers::Bluetooth& bt, - drivers::PcmBuffer& buf, + drivers::OutputBuffers& bufs, tasks::WorkerPool&); ~BluetoothAudioOutput(); @@ -54,7 +54,7 @@ class BluetoothAudioOutput : public IAudioOutput { private: drivers::Bluetooth& bluetooth_; - drivers::PcmBuffer& buffer_; + drivers::OutputBuffers& buffers_; tasks::WorkerPool& bg_worker_; uint16_t volume_; diff --git a/src/tangara/audio/fatfs_stream_factory.cpp b/src/tangara/audio/fatfs_stream_factory.cpp index 735ec134..94f22ae9 100644 --- a/src/tangara/audio/fatfs_stream_factory.cpp +++ b/src/tangara/audio/fatfs_stream_factory.cpp @@ -50,7 +50,6 @@ auto FatfsStreamFactory::create(std::string path, uint32_t offset) -> std::shared_ptr { auto tags = tag_parser_.ReadAndParseTags(path); if (!tags) { - ESP_LOGE(kTag, "failed to read tags"); return {}; } diff --git a/src/tangara/audio/i2s_audio_output.cpp b/src/tangara/audio/i2s_audio_output.cpp index 8222b8c9..55c8bdb8 100644 --- a/src/tangara/audio/i2s_audio_output.cpp +++ b/src/tangara/audio/i2s_audio_output.cpp @@ -42,10 +42,10 @@ static constexpr uint16_t kLineLevelVolume = 0x13d; static constexpr uint16_t kDefaultVolume = 0x100; I2SAudioOutput::I2SAudioOutput(drivers::IGpios& expander, - drivers::PcmBuffer& buffer) + drivers::OutputBuffers& buffers) : IAudioOutput(), expander_(expander), - buffer_(buffer), + buffers_(buffers), dac_(), current_mode_(Modes::kOff), current_config_(), @@ -72,7 +72,7 @@ auto I2SAudioOutput::changeMode(Modes mode) -> void { if (was_off) { // Ensure an I2SDac instance actually exists. if (!dac_) { - auto instance = drivers::I2SDac::create(expander_, buffer_); + auto instance = drivers::I2SDac::create(expander_, buffers_); if (!instance) { return; } diff --git a/src/tangara/audio/i2s_audio_output.hpp b/src/tangara/audio/i2s_audio_output.hpp index 35d888b9..2b768ddd 100644 --- a/src/tangara/audio/i2s_audio_output.hpp +++ b/src/tangara/audio/i2s_audio_output.hpp @@ -21,7 +21,7 @@ namespace audio { class I2SAudioOutput : public IAudioOutput { public: - I2SAudioOutput(drivers::IGpios&, drivers::PcmBuffer&); + I2SAudioOutput(drivers::IGpios&, drivers::OutputBuffers&); auto SetMaxVolume(uint16_t) -> void; auto SetVolumeDb(uint16_t) -> void; @@ -51,7 +51,7 @@ class I2SAudioOutput : public IAudioOutput { private: drivers::IGpios& expander_; - drivers::PcmBuffer& buffer_; + drivers::OutputBuffers& buffers_; std::unique_ptr dac_; diff --git a/src/tangara/audio/processor.cpp b/src/tangara/audio/processor.cpp index aa2604b5..2fa7f78e 100644 --- a/src/tangara/audio/processor.cpp +++ b/src/tangara/audio/processor.cpp @@ -347,34 +347,39 @@ auto SampleProcessor::discardCommand(Args& command) -> void { // End of stream commands can just be dropped without further action. } -SampleProcessor::Buffer::Buffer() - : buffer_(reinterpret_cast( - heap_caps_calloc(kSampleBufferLength, - sizeof(sample::Sample), - MALLOC_CAP_DMA)), - kSampleBufferLength), +Buffer::Buffer(std::span storage) + : storage_(nullptr), buffer_(storage), samples_in_buffer_() {} + +Buffer::Buffer() + : storage_(reinterpret_cast( + heap_caps_calloc(kSampleBufferLength, + sizeof(sample::Sample), + MALLOC_CAP_DMA))), + buffer_(storage_, kSampleBufferLength), samples_in_buffer_() {} -SampleProcessor::Buffer::~Buffer() { - heap_caps_free(buffer_.data()); +Buffer::~Buffer() { + if (storage_) { + heap_caps_free(storage_); + } } -auto SampleProcessor::Buffer::writeAcquire() -> std::span { +auto Buffer::writeAcquire() -> std::span { return buffer_.subspan(samples_in_buffer_.size()); } -auto SampleProcessor::Buffer::writeCommit(size_t samples) -> void { +auto Buffer::writeCommit(size_t samples) -> void { if (samples == 0) { return; } samples_in_buffer_ = buffer_.first(samples + samples_in_buffer_.size()); } -auto SampleProcessor::Buffer::readAcquire() -> std::span { +auto Buffer::readAcquire() -> std::span { return samples_in_buffer_; } -auto SampleProcessor::Buffer::readCommit(size_t samples) -> void { +auto Buffer::readCommit(size_t samples) -> void { if (samples == 0) { return; } @@ -389,11 +394,11 @@ auto SampleProcessor::Buffer::readCommit(size_t samples) -> void { } } -auto SampleProcessor::Buffer::isEmpty() -> bool { +auto Buffer::isEmpty() -> bool { return samples_in_buffer_.empty(); } -auto SampleProcessor::Buffer::clear() -> void { +auto Buffer::clear() -> void { samples_in_buffer_ = {}; } diff --git a/src/tangara/audio/processor.hpp b/src/tangara/audio/processor.hpp index 45e05291..52bace95 100644 --- a/src/tangara/audio/processor.hpp +++ b/src/tangara/audio/processor.hpp @@ -22,6 +22,35 @@ namespace audio { +/* Utility for managing buffering samples between digital filters. */ +class Buffer { + public: + Buffer(std::span storage); + Buffer(); + ~Buffer(); + + /* Returns a span of the unused space within the buffer. */ + auto writeAcquire() -> std::span; + /* Signals how many samples were just added to the writeAcquire span. */ + auto writeCommit(size_t) -> void; + + /* Returns a span of the samples stored within the buffer. */ + auto readAcquire() -> std::span; + /* Signals how many samples from the readAcquire span were consumed. */ + auto readCommit(size_t) -> void; + + auto isEmpty() -> bool; + auto clear() -> void; + + Buffer(const Buffer&) = delete; + Buffer& operator=(const Buffer&) = delete; + + private: + sample::Sample* storage_; + std::span buffer_; + std::span samples_in_buffer_; +}; + /* * Handle to a persistent task that converts samples between formats (sample * rate, channels, bits per sample), in order to put samples in the preferred @@ -87,33 +116,6 @@ class SampleProcessor { StreamBufferHandle_t source_; drivers::PcmBuffer& sink_; - /* Internal utility for managing buffering samples between our filters. */ - class Buffer { - public: - Buffer(); - ~Buffer(); - - /* Returns a span of the unused space within the buffer. */ - auto writeAcquire() -> std::span; - /* Signals how many samples were just added to the writeAcquire span. */ - auto writeCommit(size_t) -> void; - - /* Returns a span of the samples stored within the buffer. */ - auto readAcquire() -> std::span; - /* Signals how many samples from the readAcquire span were consumed. */ - auto readCommit(size_t) -> void; - - auto isEmpty() -> bool; - auto clear() -> void; - - Buffer(const Buffer&) = delete; - Buffer& operator=(const Buffer&) = delete; - - private: - std::span buffer_; - std::span samples_in_buffer_; - }; - Buffer input_buffer_; Buffer resampled_buffer_; Buffer output_buffer_; diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp new file mode 100644 index 00000000..46e8c48a --- /dev/null +++ b/src/tangara/tts/player.cpp @@ -0,0 +1,192 @@ +/* + * Copyright 2024 jacqueline + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "tts/player.hpp" +#include + +#include "audio/audio_events.hpp" +#include "audio/processor.hpp" +#include "audio/resample.hpp" +#include "codec.hpp" +#include "esp_log.h" +#include "events/event_queue.hpp" +#include "freertos/projdefs.h" +#include "portmacro.h" +#include "sample.hpp" +#include "types.hpp" + +namespace tts { + +[[maybe_unused]] static constexpr char kTag[] = "ttsplay"; + +Player::Player(tasks::WorkerPool& worker, + drivers::PcmBuffer& output, + audio::FatfsStreamFactory& factory) + : bg_(worker), + stream_factory_(factory), + output_(output), + stream_playing_(false), + stream_cancelled_(false) {} + +auto Player::playFile(const std::string& text, const std::string& file) + -> void { + bg_.Dispatch([=, this]() { + { + std::scoped_lock lock{new_stream_mutex_}; + if (stream_playing_) { + stream_cancelled_ = true; + stream_playing_.wait(true); + } + stream_cancelled_ = false; + stream_playing_ = true; + } + + openAndDecode(text, file); + + if (!stream_cancelled_) { + events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = false}); + } + stream_playing_ = false; + stream_playing_.notify_all(); + }); +} + +auto Player::openAndDecode(const std::string& text, const std::string& path) + -> void { + auto stream = stream_factory_.create(path); + if (!stream) { + ESP_LOGW(kTag, "missing '%s' for '%s'", path.c_str(), text.c_str()); + return; + } + + // FIXME: Rather than hardcoding WAV support only, we should work out a + // proper subset of 'low memory' decoders that can all be used for TTS + // playback. + if (stream->type() != codecs::StreamType::kWav) { + ESP_LOGE(kTag, "'%s' has unsupported encoding", path.c_str()); + return; + } + + auto decoder = codecs::CreateCodecForType(stream->type()); + if (!decoder) { + ESP_LOGE(kTag, "creating decoder failed"); + return; + } + + std::unique_ptr codec{*decoder}; + auto open_res = codec->OpenStream(stream, 0); + if (open_res.has_error()) { + ESP_LOGE(kTag, "opening stream failed"); + return; + } + + decodeToSink(*open_res, std::move(codec)); +} + +auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format, + std::unique_ptr codec) -> void { + // Set up buffers to hold samples between the intermediary parts of + // processing. We can just use the stack for these, since this method is + // called only from background workers, which have enormous stacks. + sample::Sample decode_storage[4096]; + audio::Buffer decode_buf(decode_storage); + + sample::Sample resample_storage[4096]; + audio::Buffer resample_buf(resample_storage); + + sample::Sample stereo_storage[4096]; + audio::Buffer stereo_buf(stereo_storage); + + // Work out what processing the codec's output needs. + std::unique_ptr resampler; + if (format.sample_rate_hz != 48000) { + resampler = std::make_unique(format.sample_rate_hz, 48000, + format.num_channels); + } + bool double_samples = format.num_channels == 1; + + // Start our playback (wait for previous to end?) + events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = true}); + + // FIXME: This decode-and-process loop is substantially the same as the audio + // processor's filter loop. Ideally we should refactor both of these loops to + // reuse code, however I'm holding off on doing this until we've implemented + // more advanced audio processing features in the audio processor (EQ, tempo + // shifting, etc.) as it's not clear to me yet how much the two codepaths will + // be diverging later anyway. + while ((codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() || + !stereo_buf.isEmpty()) && + !stream_cancelled_) { + if (codec) { + auto decode_res = codec->DecodeTo(decode_buf.writeAcquire()); + if (decode_res.has_error()) { + ESP_LOGE(kTag, "decoding error"); + break; + } + decode_buf.writeCommit(decode_res->samples_written); + if (decode_res->is_stream_finished) { + codec.reset(); + } + } + + if (!decode_buf.isEmpty()) { + auto resample_input = decode_buf.readAcquire(); + auto resample_output = resample_buf.writeAcquire(); + + size_t read, wrote; + if (resampler) { + std::tie(read, wrote) = + resampler->Process(resample_input, resample_output, false); + } else { + read = wrote = std::min(resample_input.size(), resample_output.size()); + std::copy_n(resample_input.begin(), read, resample_output.begin()); + } + + decode_buf.readCommit(read); + resample_buf.writeCommit(wrote); + } + + if (!resample_buf.isEmpty()) { + auto channels_input = resample_buf.readAcquire(); + auto channels_output = stereo_buf.writeAcquire(); + size_t read, wrote; + if (double_samples) { + wrote = channels_output.size(); + read = wrote / 2; + if (read > channels_input.size()) { + read = channels_input.size(); + wrote = read * 2; + } + for (size_t i = 0; i < read; i++) { + channels_output[i * 2] = channels_input[i]; + channels_output[(i * 2) + 1] = channels_input[i]; + } + } else { + read = wrote = std::min(channels_input.size(), channels_output.size()); + std::copy_n(channels_input.begin(), read, channels_output.begin()); + } + resample_buf.readCommit(read); + stereo_buf.writeCommit(wrote); + } + + // The mixin PcmBuffer should almost always be draining, so we can force + // samples into it more aggressively than with the main music PcmBuffer. + while (!stereo_buf.isEmpty()) { + size_t sent = output_.send(stereo_buf.readAcquire()); + stereo_buf.readCommit(sent); + } + } + + while (!output_.isEmpty()) { + if (stream_cancelled_) { + output_.clear(); + } else { + vTaskDelay(pdMS_TO_TICKS(100)); + } + } +} + +} // namespace tts diff --git a/src/tangara/tts/player.hpp b/src/tangara/tts/player.hpp new file mode 100644 index 00000000..d28da474 --- /dev/null +++ b/src/tangara/tts/player.hpp @@ -0,0 +1,47 @@ +/* + * Copyright 2024 jacqueline + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include + +#include "audio/fatfs_stream_factory.hpp" +#include "codec.hpp" +#include "drivers/pcm_buffer.hpp" +#include "tasks.hpp" + +namespace tts { + +/* + * A TTS Player is the output stage of the TTS pipeline. It receives a stream + * of filenames that should be played, and handles decoding these files and + * sending them to the output buffer. + */ +class Player { + public: + Player(tasks::WorkerPool&, drivers::PcmBuffer&, audio::FatfsStreamFactory&); + + auto playFile(const std::string& text, const std::string& path) -> void; + + // Not copyable or movable. + Player(const Player&) = delete; + Player& operator=(const Player&) = delete; + + private: + tasks::WorkerPool& bg_; + audio::FatfsStreamFactory& stream_factory_; + drivers::PcmBuffer& output_; + + std::mutex new_stream_mutex_; + std::atomic stream_playing_; + std::atomic stream_cancelled_; + + auto openAndDecode(const std::string& text, const std::string& path) -> void; + auto decodeToSink(const codecs::ICodec::OutputFormat&, + std::unique_ptr) -> void; +}; + +} // namespace tts diff --git a/src/tangara/tts/provider.cpp b/src/tangara/tts/provider.cpp index 7d33bae6..d19500e0 100644 --- a/src/tangara/tts/provider.cpp +++ b/src/tangara/tts/provider.cpp @@ -5,21 +5,39 @@ */ #include "tts/provider.hpp" +#include +#include #include +#include #include #include +#include "drivers/storage.hpp" #include "esp_log.h" +#include "komihash.h" #include "tts/events.hpp" namespace tts { [[maybe_unused]] static constexpr char kTag[] = "tts"; +static const char* kTtsPath = "/.tangara-tts/"; + +static auto textToFile(const std::string& text) -> std::optional { + uint64_t hash = komihash(text.data(), text.size(), 0); + std::stringstream stream; + stream << kTtsPath << std::hex << hash; + return stream.str(); +} + Provider::Provider() {} +auto Provider::player(std::unique_ptr p) -> void { + player_ = std::move(p); +} + auto Provider::feed(const Event& e) -> void { if (std::holds_alternative(e)) { // ESP_LOGI(kTag, "context changed"); @@ -31,6 +49,19 @@ auto Provider::feed(const Event& e) -> void { // ESP_LOGI(kTag, "new selection: '%s', interactive? %i", // ev.new_selection->description.value_or("").c_str(), // ev.new_selection->is_interactive); + auto text = ev.new_selection->description; + if (!text) { + ESP_LOGW(kTag, "missing description for element"); + return; + } + auto file = textToFile(*text); + if (!file) { + return; + } + + if (player_) { + player_->playFile(*text, *file); + } } } } diff --git a/src/tangara/tts/provider.hpp b/src/tangara/tts/provider.hpp index 59f61a6c..8fe143cc 100644 --- a/src/tangara/tts/provider.hpp +++ b/src/tangara/tts/provider.hpp @@ -6,18 +6,35 @@ #pragma once +#include #include #include #include #include "tts/events.hpp" +#include "tts/player.hpp" namespace tts { +/* + * A TTS Provider is responsible for receiving system events that may be + * relevant to TTS, and digesting them into discrete 'utterances' that can be + * used to generate audio feedback. + */ class Provider { public: Provider(); + + auto player(std::unique_ptr) -> void; + auto feed(const Event&) -> void; + + // Not copyable or movable. + Provider(const Provider&) = delete; + Provider& operator=(const Provider&) = delete; + + private: + std::unique_ptr player_; }; } // namespace tts