diff --git a/src/tangara/audio/processor.cpp b/src/tangara/audio/processor.cpp index aa2604b5..2fa7f78e 100644 --- a/src/tangara/audio/processor.cpp +++ b/src/tangara/audio/processor.cpp @@ -347,34 +347,39 @@ auto SampleProcessor::discardCommand(Args& command) -> void { // End of stream commands can just be dropped without further action. } -SampleProcessor::Buffer::Buffer() - : buffer_(reinterpret_cast( - heap_caps_calloc(kSampleBufferLength, - sizeof(sample::Sample), - MALLOC_CAP_DMA)), - kSampleBufferLength), +Buffer::Buffer(std::span storage) + : storage_(nullptr), buffer_(storage), samples_in_buffer_() {} + +Buffer::Buffer() + : storage_(reinterpret_cast( + heap_caps_calloc(kSampleBufferLength, + sizeof(sample::Sample), + MALLOC_CAP_DMA))), + buffer_(storage_, kSampleBufferLength), samples_in_buffer_() {} -SampleProcessor::Buffer::~Buffer() { - heap_caps_free(buffer_.data()); +Buffer::~Buffer() { + if (storage_) { + heap_caps_free(storage_); + } } -auto SampleProcessor::Buffer::writeAcquire() -> std::span { +auto Buffer::writeAcquire() -> std::span { return buffer_.subspan(samples_in_buffer_.size()); } -auto SampleProcessor::Buffer::writeCommit(size_t samples) -> void { +auto Buffer::writeCommit(size_t samples) -> void { if (samples == 0) { return; } samples_in_buffer_ = buffer_.first(samples + samples_in_buffer_.size()); } -auto SampleProcessor::Buffer::readAcquire() -> std::span { +auto Buffer::readAcquire() -> std::span { return samples_in_buffer_; } -auto SampleProcessor::Buffer::readCommit(size_t samples) -> void { +auto Buffer::readCommit(size_t samples) -> void { if (samples == 0) { return; } @@ -389,11 +394,11 @@ auto SampleProcessor::Buffer::readCommit(size_t samples) -> void { } } -auto SampleProcessor::Buffer::isEmpty() -> bool { +auto Buffer::isEmpty() -> bool { return samples_in_buffer_.empty(); } -auto SampleProcessor::Buffer::clear() -> void { +auto Buffer::clear() -> void { samples_in_buffer_ = {}; } diff --git a/src/tangara/audio/processor.hpp b/src/tangara/audio/processor.hpp index 45e05291..52bace95 100644 --- a/src/tangara/audio/processor.hpp +++ b/src/tangara/audio/processor.hpp @@ -22,6 +22,35 @@ namespace audio { +/* Utility for managing buffering samples between digital filters. */ +class Buffer { + public: + Buffer(std::span storage); + Buffer(); + ~Buffer(); + + /* Returns a span of the unused space within the buffer. */ + auto writeAcquire() -> std::span; + /* Signals how many samples were just added to the writeAcquire span. */ + auto writeCommit(size_t) -> void; + + /* Returns a span of the samples stored within the buffer. */ + auto readAcquire() -> std::span; + /* Signals how many samples from the readAcquire span were consumed. */ + auto readCommit(size_t) -> void; + + auto isEmpty() -> bool; + auto clear() -> void; + + Buffer(const Buffer&) = delete; + Buffer& operator=(const Buffer&) = delete; + + private: + sample::Sample* storage_; + std::span buffer_; + std::span samples_in_buffer_; +}; + /* * Handle to a persistent task that converts samples between formats (sample * rate, channels, bits per sample), in order to put samples in the preferred @@ -87,33 +116,6 @@ class SampleProcessor { StreamBufferHandle_t source_; drivers::PcmBuffer& sink_; - /* Internal utility for managing buffering samples between our filters. */ - class Buffer { - public: - Buffer(); - ~Buffer(); - - /* Returns a span of the unused space within the buffer. */ - auto writeAcquire() -> std::span; - /* Signals how many samples were just added to the writeAcquire span. */ - auto writeCommit(size_t) -> void; - - /* Returns a span of the samples stored within the buffer. */ - auto readAcquire() -> std::span; - /* Signals how many samples from the readAcquire span were consumed. */ - auto readCommit(size_t) -> void; - - auto isEmpty() -> bool; - auto clear() -> void; - - Buffer(const Buffer&) = delete; - Buffer& operator=(const Buffer&) = delete; - - private: - std::span buffer_; - std::span samples_in_buffer_; - }; - Buffer input_buffer_; Buffer resampled_buffer_; Buffer output_buffer_; diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp index 3fcd88bc..b5b99b5d 100644 --- a/src/tangara/tts/player.cpp +++ b/src/tangara/tts/player.cpp @@ -6,8 +6,12 @@ #include "tts/player.hpp" +#include "audio/processor.hpp" +#include "audio/resample.hpp" #include "codec.hpp" #include "esp_log.h" +#include "freertos/projdefs.h" +#include "portmacro.h" #include "sample.hpp" #include "types.hpp" @@ -18,57 +22,140 @@ namespace tts { Player::Player(tasks::WorkerPool& worker, drivers::PcmBuffer& output, audio::FatfsStreamFactory& factory) - : bg_(worker), stream_factory_(factory), output_(output) {} + : bg_(worker), stream_factory_(factory), output_(output), play_count_(0) {} auto Player::playFile(const std::string& path) -> void { ESP_LOGI(kTag, "playing '%s'", path.c_str()); - bg_.Dispatch([=]() { + int this_play = ++play_count_; + + bg_.Dispatch([=, this]() { auto stream = stream_factory_.create(path); if (!stream) { ESP_LOGE(kTag, "creating stream failed"); return; } + + // FIXME: Rather than hardcoding WAV support only, we should work out a + // proper subset of 'low memory' decoders that can all be used for TTS + // playback. if (stream->type() != codecs::StreamType::kWav) { ESP_LOGE(kTag, "stream was unsupported type"); return; } + auto decoder = codecs::CreateCodecForType(stream->type()); if (!decoder) { ESP_LOGE(kTag, "creating decoder failed"); return; } + std::unique_ptr codec{*decoder}; auto open_res = codec->OpenStream(stream, 0); if (open_res.has_error()) { ESP_LOGE(kTag, "opening stream failed"); return; } - // if (open_res->sample_rate_hz != 48000 || open_res->num_channels != 2) { - // ESP_LOGE(kTag, "stream format is wrong (was %u channels @ %lu hz)", - // open_res->num_channels, open_res->sample_rate_hz); - // return; - // } - sample::Sample decode_buf[4096]; - for (;;) { - auto decode_res = codec->DecodeTo(decode_buf); + + decodeToSink(*open_res, std::move(codec), this_play); + }); +} + +auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format, + std::unique_ptr codec, + int play_count) -> void { + // Set up buffers to hold samples between the intermediary parts of + // processing. We can just use the stack for these, since this method is + // called only from background workers, which have enormous stacks. + sample::Sample decode_storage[4096]; + audio::Buffer decode_buf(decode_storage); + + sample::Sample resample_storage[4096]; + audio::Buffer resample_buf(resample_storage); + + sample::Sample stereo_storage[4096]; + audio::Buffer stereo_buf(stereo_storage); + + // Work out what processing the codec's output needs. + std::unique_ptr resampler; + if (format.sample_rate_hz != 48000) { + resampler = std::make_unique(format.sample_rate_hz, 48000, + format.num_channels); + } + bool double_samples = format.num_channels == 1; + + // FIXME: This decode-and-process loop is substantially the same as the audio + // processor's filter loop. Ideally we should refactor both of these loops to + // reuse code, however I'm holding off on doing this until we've implemented + // more advanced audio processing features in the audio processor (EQ, tempo + // shifting, etc.) as it's not clear to me yet how much the two codepaths will + // be diverging later anyway. + while (codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() || + !stereo_buf.isEmpty()) { + if (play_count != play_count_) { + // FIXME: This is a little unsafe and could maybe take out the first few + // samples of the next file. + output_.clear(); + break; + } + if (codec) { + auto decode_res = codec->DecodeTo(decode_buf.writeAcquire()); if (decode_res.has_error()) { ESP_LOGE(kTag, "decoding error"); - return; + break; } + decode_buf.writeCommit(decode_res->samples_written); if (decode_res->is_stream_finished) { - break; + codec.reset(); } + } + + if (!decode_buf.isEmpty()) { + auto resample_input = decode_buf.readAcquire(); + auto resample_output = resample_buf.writeAcquire(); - std::span decode_span{decode_buf, - decode_res->samples_written}; - while (!decode_span.empty()) { - size_t sent = output_.send(decode_span); - decode_span = decode_span.subspan(sent); + size_t read, wrote; + if (resampler) { + std::tie(read, wrote) = + resampler->Process(resample_input, resample_output, false); + } else { + read = wrote = std::min(resample_input.size(), resample_output.size()); + std::copy_n(resample_input.begin(), read, resample_output.begin()); } + + decode_buf.readCommit(read); + resample_buf.writeCommit(wrote); } - ESP_LOGI(kTag, "finished playing okay"); - }); + if (!resample_buf.isEmpty()) { + auto channels_input = resample_buf.readAcquire(); + auto channels_output = stereo_buf.writeAcquire(); + size_t read, wrote; + if (double_samples) { + wrote = channels_output.size(); + read = wrote / 2; + if (read > channels_input.size()) { + read = channels_input.size(); + wrote = read * 2; + } + for (size_t i = 0; i < read; i++) { + channels_output[i * 2] = channels_input[i]; + channels_output[(i * 2) + 1] = channels_input[i]; + } + } else { + read = wrote = std::min(channels_input.size(), channels_output.size()); + std::copy_n(channels_input.begin(), read, channels_output.begin()); + } + resample_buf.readCommit(read); + stereo_buf.writeCommit(wrote); + } + + // The mixin PcmBuffer should almost always be draining, so we can force + // samples into it more aggressively than with the main music PcmBuffer. + while (!stereo_buf.isEmpty()) { + size_t sent = output_.send(stereo_buf.readAcquire()); + stereo_buf.readCommit(sent); + } + } } } // namespace tts diff --git a/src/tangara/tts/player.hpp b/src/tangara/tts/player.hpp index a132b9cd..0a3ba723 100644 --- a/src/tangara/tts/player.hpp +++ b/src/tangara/tts/player.hpp @@ -9,6 +9,7 @@ #include #include "audio/fatfs_stream_factory.hpp" +#include "codec.hpp" #include "drivers/pcm_buffer.hpp" #include "tasks.hpp" @@ -33,6 +34,12 @@ class Player { tasks::WorkerPool& bg_; audio::FatfsStreamFactory& stream_factory_; drivers::PcmBuffer& output_; + + std::atomic play_count_; + + auto decodeToSink(const codecs::ICodec::OutputFormat&, + std::unique_ptr, + int play_count) -> void; }; } // namespace tts diff --git a/src/tangara/tts/provider.cpp b/src/tangara/tts/provider.cpp index b7c1e55d..2b1dd4e6 100644 --- a/src/tangara/tts/provider.cpp +++ b/src/tangara/tts/provider.cpp @@ -28,7 +28,7 @@ static const char* kTtsPath = "/.tangara-tts/"; static auto textToFile(const std::string& text) -> std::optional { uint64_t hash = komihash(text.data(), text.size(), 0); std::stringstream stream; - stream << kTtsPath << std::hex << hash << ".wav"; + stream << kTtsPath << std::hex << hash; return stream.str(); }