Pause and unpause the current audio output in response to TTS

custom
jacqueline 7 months ago
parent 542ebc6531
commit c51709f99f
  1. 3
      src/drivers/include/drivers/pcm_buffer.hpp
  2. 14
      src/drivers/pcm_buffer.cpp
  3. 5
      src/tangara/audio/audio_events.hpp
  4. 26
      src/tangara/audio/audio_fsm.cpp
  5. 4
      src/tangara/audio/audio_fsm.hpp
  6. 102
      src/tangara/tts/player.cpp
  7. 8
      src/tangara/tts/player.hpp

@ -49,6 +49,7 @@ class PcmBuffer {
auto clear() -> void;
auto isEmpty() -> bool;
auto suspend(bool) -> void;
/*
* How many samples have been added to this buffer since it was created. This
@ -75,6 +76,8 @@ class PcmBuffer {
std::atomic<uint32_t> sent_;
std::atomic<uint32_t> received_;
std::atomic<bool> suspended_;
RingbufHandle_t ringbuf_;
};

@ -25,7 +25,8 @@ namespace drivers {
[[maybe_unused]] static const char kTag[] = "pcmbuf";
PcmBuffer::PcmBuffer(size_t size_in_samples) : sent_(0), received_(0) {
PcmBuffer::PcmBuffer(size_t size_in_samples)
: sent_(0), received_(0), suspended_(false) {
size_t size_in_bytes = size_in_samples * sizeof(int16_t);
ESP_LOGI(kTag, "allocating pcm buffer of size %u (%uKiB)", size_in_samples,
size_in_bytes / 1024);
@ -51,6 +52,13 @@ auto PcmBuffer::send(std::span<const int16_t> data) -> size_t {
IRAM_ATTR auto PcmBuffer::receive(std::span<int16_t> dest, bool mix, bool isr)
-> BaseType_t {
if (suspended_) {
if (!mix) {
std::fill_n(dest.begin(), dest.size(), 0);
}
return false;
}
size_t first_read = 0, second_read = 0;
BaseType_t ret1 = false, ret2 = false;
std::tie(first_read, ret1) = readSingle(dest, mix, isr);
@ -86,6 +94,10 @@ auto PcmBuffer::isEmpty() -> bool {
xRingbufferGetCurFreeSize(ringbuf_);
}
auto PcmBuffer::suspend(bool s) -> void {
suspended_ = s;
}
auto PcmBuffer::totalSent() -> uint32_t {
return sent_;
}

@ -144,8 +144,11 @@ struct OutputModeChanged : tinyfsm::Event {
std::optional<drivers::NvsStorage::Output> set_to;
};
namespace internal {
struct TtsPlaybackChanged : tinyfsm::Event {
bool is_playing;
};
namespace internal {
struct DecodingStarted : tinyfsm::Event {
std::shared_ptr<TrackInfo> track;
};

@ -76,6 +76,7 @@ std::optional<IAudioOutput::Format> AudioState::sDrainFormat;
StreamCues AudioState::sStreamCues;
bool AudioState::sIsPaused = true;
bool AudioState::sIsTtsPlaying = false;
auto AudioState::emitPlaybackUpdate(bool paused) -> void {
std::optional<uint32_t> position;
@ -191,6 +192,11 @@ void AudioState::react(const TogglePlayPause& ev) {
}
}
void AudioState::react(const TtsPlaybackChanged& ev) {
sIsTtsPlaying = ev.is_playing;
updateOutputMode();
}
void AudioState::react(const internal::DecodingFinished& ev) {
// If we just finished playing whatever's at the front of the queue, then we
// need to advanve and start playing the next one ASAP in order to continue
@ -369,8 +375,8 @@ void AudioState::react(const OutputModeChanged& ev) {
sOutput = sI2SOutput;
break;
}
sOutput->mode(IAudioOutput::Modes::kOnPaused);
sSampleProcessor->SetOutput(sOutput);
updateOutputMode();
// Bluetooth volume isn't 'changed' until we've connected to a device.
if (new_mode == drivers::NvsStorage::Output::kHeadphones) {
@ -381,6 +387,14 @@ void AudioState::react(const OutputModeChanged& ev) {
}
}
auto AudioState::updateOutputMode() -> void {
if (is_in_state<states::Playback>() || sIsTtsPlaying) {
sOutput->mode(IAudioOutput::Modes::kOnPlaying);
} else {
sOutput->mode(IAudioOutput::Modes::kOnPaused);
}
}
auto AudioState::commitVolume() -> void {
auto mode = sServices->nvs().OutputMode();
auto vol = sOutput->GetVolume();
@ -402,6 +416,7 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) {
sDrainBuffers = std::make_unique<drivers::OutputBuffers>(
kTrackDrainLatencySamples, kSystemDrainLatencySamples);
sDrainBuffers->first.suspend(true);
sStreamFactory.reset(
new FatfsStreamFactory(sServices->database(), sServices->tag_parser()));
@ -454,6 +469,10 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) {
static const char kQueueKey[] = "audio:queue";
static const char kCurrentFileKey[] = "audio:current";
auto Standby::entry() -> void {
updateOutputMode();
}
void Standby::react(const system_fsm::KeyLockChanged& ev) {
if (!ev.locking) {
return;
@ -539,7 +558,8 @@ static void heartbeat(TimerHandle_t) {
void Playback::entry() {
ESP_LOGI(kTag, "audio output resumed");
sOutput->mode(IAudioOutput::Modes::kOnPlaying);
sDrainBuffers->first.suspend(false);
updateOutputMode();
emitPlaybackUpdate(false);
if (!sHeartbeatTimer) {
@ -552,7 +572,7 @@ void Playback::entry() {
void Playback::exit() {
ESP_LOGI(kTag, "audio output paused");
xTimerStop(sHeartbeatTimer, portMAX_DELAY);
sOutput->mode(IAudioOutput::Modes::kOnPaused);
sDrainBuffers->first.suspend(true);
emitPlaybackUpdate(true);
}

@ -48,6 +48,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
void react(const PlaySineWave&);
void react(const SetTrack&);
void react(const TogglePlayPause&);
void react(const TtsPlaybackChanged&);
void react(const internal::DecodingFinished&);
void react(const internal::StreamStarted&);
@ -70,6 +71,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
virtual void react(const system_fsm::HasPhonesChanged&);
protected:
auto updateOutputMode() -> void;
auto emitPlaybackUpdate(bool paused) -> void;
auto commitVolume() -> void;
@ -88,6 +90,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
static std::optional<IAudioOutput::Format> sDrainFormat;
static bool sIsPaused;
static bool sIsTtsPlaying;
};
namespace states {
@ -102,6 +105,7 @@ class Uninitialised : public AudioState {
class Standby : public AudioState {
public:
void entry() override;
void react(const system_fsm::KeyLockChanged&) override;
void react(const system_fsm::SdStateChanged&) override;

@ -5,11 +5,14 @@
*/
#include "tts/player.hpp"
#include <mutex>
#include "audio/audio_events.hpp"
#include "audio/processor.hpp"
#include "audio/resample.hpp"
#include "codec.hpp"
#include "esp_log.h"
#include "events/event_queue.hpp"
#include "freertos/projdefs.h"
#include "portmacro.h"
#include "sample.hpp"
@ -22,47 +25,70 @@ namespace tts {
Player::Player(tasks::WorkerPool& worker,
drivers::PcmBuffer& output,
audio::FatfsStreamFactory& factory)
: bg_(worker), stream_factory_(factory), output_(output), play_count_(0) {}
: bg_(worker),
stream_factory_(factory),
output_(output),
stream_playing_(false),
stream_cancelled_(false) {}
auto Player::playFile(const std::string& path) -> void {
ESP_LOGI(kTag, "playing '%s'", path.c_str());
int this_play = ++play_count_;
bg_.Dispatch<void>([=, this]() {
auto stream = stream_factory_.create(path);
if (!stream) {
ESP_LOGE(kTag, "creating stream failed");
return;
// Interrupt current playback
{
std::scoped_lock<std::mutex> lock{new_stream_mutex_};
if (stream_playing_) {
stream_cancelled_ = true;
stream_playing_.wait(true);
}
stream_cancelled_ = false;
stream_playing_ = true;
}
// FIXME: Rather than hardcoding WAV support only, we should work out a
// proper subset of 'low memory' decoders that can all be used for TTS
// playback.
if (stream->type() != codecs::StreamType::kWav) {
ESP_LOGE(kTag, "stream was unsupported type");
return;
}
openAndDecode(path);
auto decoder = codecs::CreateCodecForType(stream->type());
if (!decoder) {
ESP_LOGE(kTag, "creating decoder failed");
return;
if (!stream_cancelled_) {
events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = false});
}
stream_playing_ = false;
stream_playing_.notify_all();
});
}
std::unique_ptr<codecs::ICodec> codec{*decoder};
auto open_res = codec->OpenStream(stream, 0);
if (open_res.has_error()) {
ESP_LOGE(kTag, "opening stream failed");
return;
}
auto Player::openAndDecode(const std::string& path) -> void {
auto stream = stream_factory_.create(path);
if (!stream) {
ESP_LOGE(kTag, "creating stream failed");
return;
}
decodeToSink(*open_res, std::move(codec), this_play);
});
// FIXME: Rather than hardcoding WAV support only, we should work out a
// proper subset of 'low memory' decoders that can all be used for TTS
// playback.
if (stream->type() != codecs::StreamType::kWav) {
ESP_LOGE(kTag, "stream was unsupported type");
return;
}
auto decoder = codecs::CreateCodecForType(stream->type());
if (!decoder) {
ESP_LOGE(kTag, "creating decoder failed");
return;
}
std::unique_ptr<codecs::ICodec> codec{*decoder};
auto open_res = codec->OpenStream(stream, 0);
if (open_res.has_error()) {
ESP_LOGE(kTag, "opening stream failed");
return;
}
decodeToSink(*open_res, std::move(codec));
}
auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
std::unique_ptr<codecs::ICodec> codec,
int play_count) -> void {
std::unique_ptr<codecs::ICodec> codec) -> void {
// Set up buffers to hold samples between the intermediary parts of
// processing. We can just use the stack for these, since this method is
// called only from background workers, which have enormous stacks.
@ -83,20 +109,18 @@ auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
}
bool double_samples = format.num_channels == 1;
// Start our playback (wait for previous to end?)
events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = true});
// FIXME: This decode-and-process loop is substantially the same as the audio
// processor's filter loop. Ideally we should refactor both of these loops to
// reuse code, however I'm holding off on doing this until we've implemented
// more advanced audio processing features in the audio processor (EQ, tempo
// shifting, etc.) as it's not clear to me yet how much the two codepaths will
// be diverging later anyway.
while (codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() ||
!stereo_buf.isEmpty()) {
if (play_count != play_count_) {
// FIXME: This is a little unsafe and could maybe take out the first few
// samples of the next file.
output_.clear();
break;
}
while ((codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() ||
!stereo_buf.isEmpty()) &&
!stream_cancelled_) {
if (codec) {
auto decode_res = codec->DecodeTo(decode_buf.writeAcquire());
if (decode_res.has_error()) {
@ -156,6 +180,14 @@ auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
stereo_buf.readCommit(sent);
}
}
while (!output_.isEmpty()) {
if (stream_cancelled_) {
output_.clear();
} else {
vTaskDelay(pdMS_TO_TICKS(100));
}
}
}
} // namespace tts

@ -35,11 +35,13 @@ class Player {
audio::FatfsStreamFactory& stream_factory_;
drivers::PcmBuffer& output_;
std::atomic<int> play_count_;
std::mutex new_stream_mutex_;
std::atomic<bool> stream_playing_;
std::atomic<bool> stream_cancelled_;
auto openAndDecode(const std::string& path) -> void;
auto decodeToSink(const codecs::ICodec::OutputFormat&,
std::unique_ptr<codecs::ICodec>,
int play_count) -> void;
std::unique_ptr<codecs::ICodec>) -> void;
};
} // namespace tts

Loading…
Cancel
Save