diff --git a/src/drivers/include/drivers/pcm_buffer.hpp b/src/drivers/include/drivers/pcm_buffer.hpp index 4e5fa041..6b38be94 100644 --- a/src/drivers/include/drivers/pcm_buffer.hpp +++ b/src/drivers/include/drivers/pcm_buffer.hpp @@ -49,6 +49,7 @@ class PcmBuffer { auto clear() -> void; auto isEmpty() -> bool; + auto suspend(bool) -> void; /* * How many samples have been added to this buffer since it was created. This @@ -75,6 +76,8 @@ class PcmBuffer { std::atomic sent_; std::atomic received_; + std::atomic suspended_; + RingbufHandle_t ringbuf_; }; diff --git a/src/drivers/pcm_buffer.cpp b/src/drivers/pcm_buffer.cpp index 1e416301..bc58d4b9 100644 --- a/src/drivers/pcm_buffer.cpp +++ b/src/drivers/pcm_buffer.cpp @@ -25,7 +25,8 @@ namespace drivers { [[maybe_unused]] static const char kTag[] = "pcmbuf"; -PcmBuffer::PcmBuffer(size_t size_in_samples) : sent_(0), received_(0) { +PcmBuffer::PcmBuffer(size_t size_in_samples) + : sent_(0), received_(0), suspended_(false) { size_t size_in_bytes = size_in_samples * sizeof(int16_t); ESP_LOGI(kTag, "allocating pcm buffer of size %u (%uKiB)", size_in_samples, size_in_bytes / 1024); @@ -51,6 +52,13 @@ auto PcmBuffer::send(std::span data) -> size_t { IRAM_ATTR auto PcmBuffer::receive(std::span dest, bool mix, bool isr) -> BaseType_t { + if (suspended_) { + if (!mix) { + std::fill_n(dest.begin(), dest.size(), 0); + } + return false; + } + size_t first_read = 0, second_read = 0; BaseType_t ret1 = false, ret2 = false; std::tie(first_read, ret1) = readSingle(dest, mix, isr); @@ -86,6 +94,10 @@ auto PcmBuffer::isEmpty() -> bool { xRingbufferGetCurFreeSize(ringbuf_); } +auto PcmBuffer::suspend(bool s) -> void { + suspended_ = s; +} + auto PcmBuffer::totalSent() -> uint32_t { return sent_; } diff --git a/src/tangara/audio/audio_events.hpp b/src/tangara/audio/audio_events.hpp index 91bcf48b..56d150b2 100644 --- a/src/tangara/audio/audio_events.hpp +++ b/src/tangara/audio/audio_events.hpp @@ -144,8 +144,11 @@ struct OutputModeChanged : tinyfsm::Event { std::optional set_to; }; -namespace internal { +struct TtsPlaybackChanged : tinyfsm::Event { + bool is_playing; +}; +namespace internal { struct DecodingStarted : tinyfsm::Event { std::shared_ptr track; }; diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp index dac04f75..1daf568e 100644 --- a/src/tangara/audio/audio_fsm.cpp +++ b/src/tangara/audio/audio_fsm.cpp @@ -76,6 +76,7 @@ std::optional AudioState::sDrainFormat; StreamCues AudioState::sStreamCues; bool AudioState::sIsPaused = true; +bool AudioState::sIsTtsPlaying = false; auto AudioState::emitPlaybackUpdate(bool paused) -> void { std::optional position; @@ -191,6 +192,11 @@ void AudioState::react(const TogglePlayPause& ev) { } } +void AudioState::react(const TtsPlaybackChanged& ev) { + sIsTtsPlaying = ev.is_playing; + updateOutputMode(); +} + void AudioState::react(const internal::DecodingFinished& ev) { // If we just finished playing whatever's at the front of the queue, then we // need to advanve and start playing the next one ASAP in order to continue @@ -369,8 +375,8 @@ void AudioState::react(const OutputModeChanged& ev) { sOutput = sI2SOutput; break; } - sOutput->mode(IAudioOutput::Modes::kOnPaused); sSampleProcessor->SetOutput(sOutput); + updateOutputMode(); // Bluetooth volume isn't 'changed' until we've connected to a device. if (new_mode == drivers::NvsStorage::Output::kHeadphones) { @@ -381,6 +387,14 @@ void AudioState::react(const OutputModeChanged& ev) { } } +auto AudioState::updateOutputMode() -> void { + if (is_in_state() || sIsTtsPlaying) { + sOutput->mode(IAudioOutput::Modes::kOnPlaying); + } else { + sOutput->mode(IAudioOutput::Modes::kOnPaused); + } +} + auto AudioState::commitVolume() -> void { auto mode = sServices->nvs().OutputMode(); auto vol = sOutput->GetVolume(); @@ -402,6 +416,7 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) { sDrainBuffers = std::make_unique( kTrackDrainLatencySamples, kSystemDrainLatencySamples); + sDrainBuffers->first.suspend(true); sStreamFactory.reset( new FatfsStreamFactory(sServices->database(), sServices->tag_parser())); @@ -454,6 +469,10 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) { static const char kQueueKey[] = "audio:queue"; static const char kCurrentFileKey[] = "audio:current"; +auto Standby::entry() -> void { + updateOutputMode(); +} + void Standby::react(const system_fsm::KeyLockChanged& ev) { if (!ev.locking) { return; @@ -539,7 +558,8 @@ static void heartbeat(TimerHandle_t) { void Playback::entry() { ESP_LOGI(kTag, "audio output resumed"); - sOutput->mode(IAudioOutput::Modes::kOnPlaying); + sDrainBuffers->first.suspend(false); + updateOutputMode(); emitPlaybackUpdate(false); if (!sHeartbeatTimer) { @@ -552,7 +572,7 @@ void Playback::entry() { void Playback::exit() { ESP_LOGI(kTag, "audio output paused"); xTimerStop(sHeartbeatTimer, portMAX_DELAY); - sOutput->mode(IAudioOutput::Modes::kOnPaused); + sDrainBuffers->first.suspend(true); emitPlaybackUpdate(true); } diff --git a/src/tangara/audio/audio_fsm.hpp b/src/tangara/audio/audio_fsm.hpp index 134d9ffd..bc3feb55 100644 --- a/src/tangara/audio/audio_fsm.hpp +++ b/src/tangara/audio/audio_fsm.hpp @@ -48,6 +48,7 @@ class AudioState : public tinyfsm::Fsm { void react(const PlaySineWave&); void react(const SetTrack&); void react(const TogglePlayPause&); + void react(const TtsPlaybackChanged&); void react(const internal::DecodingFinished&); void react(const internal::StreamStarted&); @@ -70,6 +71,7 @@ class AudioState : public tinyfsm::Fsm { virtual void react(const system_fsm::HasPhonesChanged&); protected: + auto updateOutputMode() -> void; auto emitPlaybackUpdate(bool paused) -> void; auto commitVolume() -> void; @@ -88,6 +90,7 @@ class AudioState : public tinyfsm::Fsm { static std::optional sDrainFormat; static bool sIsPaused; + static bool sIsTtsPlaying; }; namespace states { @@ -102,6 +105,7 @@ class Uninitialised : public AudioState { class Standby : public AudioState { public: + void entry() override; void react(const system_fsm::KeyLockChanged&) override; void react(const system_fsm::SdStateChanged&) override; diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp index b5b99b5d..a803ce57 100644 --- a/src/tangara/tts/player.cpp +++ b/src/tangara/tts/player.cpp @@ -5,11 +5,14 @@ */ #include "tts/player.hpp" +#include +#include "audio/audio_events.hpp" #include "audio/processor.hpp" #include "audio/resample.hpp" #include "codec.hpp" #include "esp_log.h" +#include "events/event_queue.hpp" #include "freertos/projdefs.h" #include "portmacro.h" #include "sample.hpp" @@ -22,47 +25,70 @@ namespace tts { Player::Player(tasks::WorkerPool& worker, drivers::PcmBuffer& output, audio::FatfsStreamFactory& factory) - : bg_(worker), stream_factory_(factory), output_(output), play_count_(0) {} + : bg_(worker), + stream_factory_(factory), + output_(output), + stream_playing_(false), + stream_cancelled_(false) {} auto Player::playFile(const std::string& path) -> void { ESP_LOGI(kTag, "playing '%s'", path.c_str()); - int this_play = ++play_count_; bg_.Dispatch([=, this]() { - auto stream = stream_factory_.create(path); - if (!stream) { - ESP_LOGE(kTag, "creating stream failed"); - return; + // Interrupt current playback + { + std::scoped_lock lock{new_stream_mutex_}; + if (stream_playing_) { + stream_cancelled_ = true; + stream_playing_.wait(true); + } + stream_cancelled_ = false; + stream_playing_ = true; } - // FIXME: Rather than hardcoding WAV support only, we should work out a - // proper subset of 'low memory' decoders that can all be used for TTS - // playback. - if (stream->type() != codecs::StreamType::kWav) { - ESP_LOGE(kTag, "stream was unsupported type"); - return; - } + openAndDecode(path); - auto decoder = codecs::CreateCodecForType(stream->type()); - if (!decoder) { - ESP_LOGE(kTag, "creating decoder failed"); - return; + if (!stream_cancelled_) { + events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = false}); } + stream_playing_ = false; + stream_playing_.notify_all(); + }); +} - std::unique_ptr codec{*decoder}; - auto open_res = codec->OpenStream(stream, 0); - if (open_res.has_error()) { - ESP_LOGE(kTag, "opening stream failed"); - return; - } +auto Player::openAndDecode(const std::string& path) -> void { + auto stream = stream_factory_.create(path); + if (!stream) { + ESP_LOGE(kTag, "creating stream failed"); + return; + } - decodeToSink(*open_res, std::move(codec), this_play); - }); + // FIXME: Rather than hardcoding WAV support only, we should work out a + // proper subset of 'low memory' decoders that can all be used for TTS + // playback. + if (stream->type() != codecs::StreamType::kWav) { + ESP_LOGE(kTag, "stream was unsupported type"); + return; + } + + auto decoder = codecs::CreateCodecForType(stream->type()); + if (!decoder) { + ESP_LOGE(kTag, "creating decoder failed"); + return; + } + + std::unique_ptr codec{*decoder}; + auto open_res = codec->OpenStream(stream, 0); + if (open_res.has_error()) { + ESP_LOGE(kTag, "opening stream failed"); + return; + } + + decodeToSink(*open_res, std::move(codec)); } auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format, - std::unique_ptr codec, - int play_count) -> void { + std::unique_ptr codec) -> void { // Set up buffers to hold samples between the intermediary parts of // processing. We can just use the stack for these, since this method is // called only from background workers, which have enormous stacks. @@ -83,20 +109,18 @@ auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format, } bool double_samples = format.num_channels == 1; + // Start our playback (wait for previous to end?) + events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = true}); + // FIXME: This decode-and-process loop is substantially the same as the audio // processor's filter loop. Ideally we should refactor both of these loops to // reuse code, however I'm holding off on doing this until we've implemented // more advanced audio processing features in the audio processor (EQ, tempo // shifting, etc.) as it's not clear to me yet how much the two codepaths will // be diverging later anyway. - while (codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() || - !stereo_buf.isEmpty()) { - if (play_count != play_count_) { - // FIXME: This is a little unsafe and could maybe take out the first few - // samples of the next file. - output_.clear(); - break; - } + while ((codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() || + !stereo_buf.isEmpty()) && + !stream_cancelled_) { if (codec) { auto decode_res = codec->DecodeTo(decode_buf.writeAcquire()); if (decode_res.has_error()) { @@ -156,6 +180,14 @@ auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format, stereo_buf.readCommit(sent); } } + + while (!output_.isEmpty()) { + if (stream_cancelled_) { + output_.clear(); + } else { + vTaskDelay(pdMS_TO_TICKS(100)); + } + } } } // namespace tts diff --git a/src/tangara/tts/player.hpp b/src/tangara/tts/player.hpp index 0a3ba723..47479007 100644 --- a/src/tangara/tts/player.hpp +++ b/src/tangara/tts/player.hpp @@ -35,11 +35,13 @@ class Player { audio::FatfsStreamFactory& stream_factory_; drivers::PcmBuffer& output_; - std::atomic play_count_; + std::mutex new_stream_mutex_; + std::atomic stream_playing_; + std::atomic stream_cancelled_; + auto openAndDecode(const std::string& path) -> void; auto decodeToSink(const codecs::ICodec::OutputFormat&, - std::unique_ptr, - int play_count) -> void; + std::unique_ptr) -> void; }; } // namespace tts