Play TTS files in response to TTS prompts, but it's legible now

- input files are upsamples and padded to stereo before playback
 - any in-progress playback is cancelled before playing a new file
custom
jacqueline 7 months ago
parent d0b739c66e
commit 542ebc6531
  1. 33
      src/tangara/audio/processor.cpp
  2. 56
      src/tangara/audio/processor.hpp
  3. 125
      src/tangara/tts/player.cpp
  4. 7
      src/tangara/tts/player.hpp
  5. 2
      src/tangara/tts/provider.cpp

@ -347,34 +347,39 @@ auto SampleProcessor::discardCommand(Args& command) -> void {
// End of stream commands can just be dropped without further action. // End of stream commands can just be dropped without further action.
} }
SampleProcessor::Buffer::Buffer() Buffer::Buffer(std::span<sample::Sample> storage)
: buffer_(reinterpret_cast<sample::Sample*>( : storage_(nullptr), buffer_(storage), samples_in_buffer_() {}
heap_caps_calloc(kSampleBufferLength,
sizeof(sample::Sample), Buffer::Buffer()
MALLOC_CAP_DMA)), : storage_(reinterpret_cast<sample::Sample*>(
kSampleBufferLength), heap_caps_calloc(kSampleBufferLength,
sizeof(sample::Sample),
MALLOC_CAP_DMA))),
buffer_(storage_, kSampleBufferLength),
samples_in_buffer_() {} samples_in_buffer_() {}
SampleProcessor::Buffer::~Buffer() { Buffer::~Buffer() {
heap_caps_free(buffer_.data()); if (storage_) {
heap_caps_free(storage_);
}
} }
auto SampleProcessor::Buffer::writeAcquire() -> std::span<sample::Sample> { auto Buffer::writeAcquire() -> std::span<sample::Sample> {
return buffer_.subspan(samples_in_buffer_.size()); return buffer_.subspan(samples_in_buffer_.size());
} }
auto SampleProcessor::Buffer::writeCommit(size_t samples) -> void { auto Buffer::writeCommit(size_t samples) -> void {
if (samples == 0) { if (samples == 0) {
return; return;
} }
samples_in_buffer_ = buffer_.first(samples + samples_in_buffer_.size()); samples_in_buffer_ = buffer_.first(samples + samples_in_buffer_.size());
} }
auto SampleProcessor::Buffer::readAcquire() -> std::span<sample::Sample> { auto Buffer::readAcquire() -> std::span<sample::Sample> {
return samples_in_buffer_; return samples_in_buffer_;
} }
auto SampleProcessor::Buffer::readCommit(size_t samples) -> void { auto Buffer::readCommit(size_t samples) -> void {
if (samples == 0) { if (samples == 0) {
return; return;
} }
@ -389,11 +394,11 @@ auto SampleProcessor::Buffer::readCommit(size_t samples) -> void {
} }
} }
auto SampleProcessor::Buffer::isEmpty() -> bool { auto Buffer::isEmpty() -> bool {
return samples_in_buffer_.empty(); return samples_in_buffer_.empty();
} }
auto SampleProcessor::Buffer::clear() -> void { auto Buffer::clear() -> void {
samples_in_buffer_ = {}; samples_in_buffer_ = {};
} }

@ -22,6 +22,35 @@
namespace audio { namespace audio {
/* Utility for managing buffering samples between digital filters. */
class Buffer {
public:
Buffer(std::span<sample::Sample> storage);
Buffer();
~Buffer();
/* Returns a span of the unused space within the buffer. */
auto writeAcquire() -> std::span<sample::Sample>;
/* Signals how many samples were just added to the writeAcquire span. */
auto writeCommit(size_t) -> void;
/* Returns a span of the samples stored within the buffer. */
auto readAcquire() -> std::span<sample::Sample>;
/* Signals how many samples from the readAcquire span were consumed. */
auto readCommit(size_t) -> void;
auto isEmpty() -> bool;
auto clear() -> void;
Buffer(const Buffer&) = delete;
Buffer& operator=(const Buffer&) = delete;
private:
sample::Sample* storage_;
std::span<sample::Sample> buffer_;
std::span<sample::Sample> samples_in_buffer_;
};
/* /*
* Handle to a persistent task that converts samples between formats (sample * Handle to a persistent task that converts samples between formats (sample
* rate, channels, bits per sample), in order to put samples in the preferred * rate, channels, bits per sample), in order to put samples in the preferred
@ -87,33 +116,6 @@ class SampleProcessor {
StreamBufferHandle_t source_; StreamBufferHandle_t source_;
drivers::PcmBuffer& sink_; drivers::PcmBuffer& sink_;
/* Internal utility for managing buffering samples between our filters. */
class Buffer {
public:
Buffer();
~Buffer();
/* Returns a span of the unused space within the buffer. */
auto writeAcquire() -> std::span<sample::Sample>;
/* Signals how many samples were just added to the writeAcquire span. */
auto writeCommit(size_t) -> void;
/* Returns a span of the samples stored within the buffer. */
auto readAcquire() -> std::span<sample::Sample>;
/* Signals how many samples from the readAcquire span were consumed. */
auto readCommit(size_t) -> void;
auto isEmpty() -> bool;
auto clear() -> void;
Buffer(const Buffer&) = delete;
Buffer& operator=(const Buffer&) = delete;
private:
std::span<sample::Sample> buffer_;
std::span<sample::Sample> samples_in_buffer_;
};
Buffer input_buffer_; Buffer input_buffer_;
Buffer resampled_buffer_; Buffer resampled_buffer_;
Buffer output_buffer_; Buffer output_buffer_;

@ -6,8 +6,12 @@
#include "tts/player.hpp" #include "tts/player.hpp"
#include "audio/processor.hpp"
#include "audio/resample.hpp"
#include "codec.hpp" #include "codec.hpp"
#include "esp_log.h" #include "esp_log.h"
#include "freertos/projdefs.h"
#include "portmacro.h"
#include "sample.hpp" #include "sample.hpp"
#include "types.hpp" #include "types.hpp"
@ -18,57 +22,140 @@ namespace tts {
Player::Player(tasks::WorkerPool& worker, Player::Player(tasks::WorkerPool& worker,
drivers::PcmBuffer& output, drivers::PcmBuffer& output,
audio::FatfsStreamFactory& factory) audio::FatfsStreamFactory& factory)
: bg_(worker), stream_factory_(factory), output_(output) {} : bg_(worker), stream_factory_(factory), output_(output), play_count_(0) {}
auto Player::playFile(const std::string& path) -> void { auto Player::playFile(const std::string& path) -> void {
ESP_LOGI(kTag, "playing '%s'", path.c_str()); ESP_LOGI(kTag, "playing '%s'", path.c_str());
bg_.Dispatch<void>([=]() { int this_play = ++play_count_;
bg_.Dispatch<void>([=, this]() {
auto stream = stream_factory_.create(path); auto stream = stream_factory_.create(path);
if (!stream) { if (!stream) {
ESP_LOGE(kTag, "creating stream failed"); ESP_LOGE(kTag, "creating stream failed");
return; return;
} }
// FIXME: Rather than hardcoding WAV support only, we should work out a
// proper subset of 'low memory' decoders that can all be used for TTS
// playback.
if (stream->type() != codecs::StreamType::kWav) { if (stream->type() != codecs::StreamType::kWav) {
ESP_LOGE(kTag, "stream was unsupported type"); ESP_LOGE(kTag, "stream was unsupported type");
return; return;
} }
auto decoder = codecs::CreateCodecForType(stream->type()); auto decoder = codecs::CreateCodecForType(stream->type());
if (!decoder) { if (!decoder) {
ESP_LOGE(kTag, "creating decoder failed"); ESP_LOGE(kTag, "creating decoder failed");
return; return;
} }
std::unique_ptr<codecs::ICodec> codec{*decoder}; std::unique_ptr<codecs::ICodec> codec{*decoder};
auto open_res = codec->OpenStream(stream, 0); auto open_res = codec->OpenStream(stream, 0);
if (open_res.has_error()) { if (open_res.has_error()) {
ESP_LOGE(kTag, "opening stream failed"); ESP_LOGE(kTag, "opening stream failed");
return; return;
} }
// if (open_res->sample_rate_hz != 48000 || open_res->num_channels != 2) {
// ESP_LOGE(kTag, "stream format is wrong (was %u channels @ %lu hz)", decodeToSink(*open_res, std::move(codec), this_play);
// open_res->num_channels, open_res->sample_rate_hz); });
// return; }
// }
sample::Sample decode_buf[4096]; auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
for (;;) { std::unique_ptr<codecs::ICodec> codec,
auto decode_res = codec->DecodeTo(decode_buf); int play_count) -> void {
// Set up buffers to hold samples between the intermediary parts of
// processing. We can just use the stack for these, since this method is
// called only from background workers, which have enormous stacks.
sample::Sample decode_storage[4096];
audio::Buffer decode_buf(decode_storage);
sample::Sample resample_storage[4096];
audio::Buffer resample_buf(resample_storage);
sample::Sample stereo_storage[4096];
audio::Buffer stereo_buf(stereo_storage);
// Work out what processing the codec's output needs.
std::unique_ptr<audio::Resampler> resampler;
if (format.sample_rate_hz != 48000) {
resampler = std::make_unique<audio::Resampler>(format.sample_rate_hz, 48000,
format.num_channels);
}
bool double_samples = format.num_channels == 1;
// FIXME: This decode-and-process loop is substantially the same as the audio
// processor's filter loop. Ideally we should refactor both of these loops to
// reuse code, however I'm holding off on doing this until we've implemented
// more advanced audio processing features in the audio processor (EQ, tempo
// shifting, etc.) as it's not clear to me yet how much the two codepaths will
// be diverging later anyway.
while (codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() ||
!stereo_buf.isEmpty()) {
if (play_count != play_count_) {
// FIXME: This is a little unsafe and could maybe take out the first few
// samples of the next file.
output_.clear();
break;
}
if (codec) {
auto decode_res = codec->DecodeTo(decode_buf.writeAcquire());
if (decode_res.has_error()) { if (decode_res.has_error()) {
ESP_LOGE(kTag, "decoding error"); ESP_LOGE(kTag, "decoding error");
return; break;
} }
decode_buf.writeCommit(decode_res->samples_written);
if (decode_res->is_stream_finished) { if (decode_res->is_stream_finished) {
break; codec.reset();
} }
}
if (!decode_buf.isEmpty()) {
auto resample_input = decode_buf.readAcquire();
auto resample_output = resample_buf.writeAcquire();
std::span<sample::Sample> decode_span{decode_buf, size_t read, wrote;
decode_res->samples_written}; if (resampler) {
while (!decode_span.empty()) { std::tie(read, wrote) =
size_t sent = output_.send(decode_span); resampler->Process(resample_input, resample_output, false);
decode_span = decode_span.subspan(sent); } else {
read = wrote = std::min(resample_input.size(), resample_output.size());
std::copy_n(resample_input.begin(), read, resample_output.begin());
} }
decode_buf.readCommit(read);
resample_buf.writeCommit(wrote);
} }
ESP_LOGI(kTag, "finished playing okay"); if (!resample_buf.isEmpty()) {
}); auto channels_input = resample_buf.readAcquire();
auto channels_output = stereo_buf.writeAcquire();
size_t read, wrote;
if (double_samples) {
wrote = channels_output.size();
read = wrote / 2;
if (read > channels_input.size()) {
read = channels_input.size();
wrote = read * 2;
}
for (size_t i = 0; i < read; i++) {
channels_output[i * 2] = channels_input[i];
channels_output[(i * 2) + 1] = channels_input[i];
}
} else {
read = wrote = std::min(channels_input.size(), channels_output.size());
std::copy_n(channels_input.begin(), read, channels_output.begin());
}
resample_buf.readCommit(read);
stereo_buf.writeCommit(wrote);
}
// The mixin PcmBuffer should almost always be draining, so we can force
// samples into it more aggressively than with the main music PcmBuffer.
while (!stereo_buf.isEmpty()) {
size_t sent = output_.send(stereo_buf.readAcquire());
stereo_buf.readCommit(sent);
}
}
} }
} // namespace tts } // namespace tts

@ -9,6 +9,7 @@
#include <string> #include <string>
#include "audio/fatfs_stream_factory.hpp" #include "audio/fatfs_stream_factory.hpp"
#include "codec.hpp"
#include "drivers/pcm_buffer.hpp" #include "drivers/pcm_buffer.hpp"
#include "tasks.hpp" #include "tasks.hpp"
@ -33,6 +34,12 @@ class Player {
tasks::WorkerPool& bg_; tasks::WorkerPool& bg_;
audio::FatfsStreamFactory& stream_factory_; audio::FatfsStreamFactory& stream_factory_;
drivers::PcmBuffer& output_; drivers::PcmBuffer& output_;
std::atomic<int> play_count_;
auto decodeToSink(const codecs::ICodec::OutputFormat&,
std::unique_ptr<codecs::ICodec>,
int play_count) -> void;
}; };
} // namespace tts } // namespace tts

@ -28,7 +28,7 @@ static const char* kTtsPath = "/.tangara-tts/";
static auto textToFile(const std::string& text) -> std::optional<std::string> { static auto textToFile(const std::string& text) -> std::optional<std::string> {
uint64_t hash = komihash(text.data(), text.size(), 0); uint64_t hash = komihash(text.data(), text.size(), 0);
std::stringstream stream; std::stringstream stream;
stream << kTtsPath << std::hex << hash << ".wav"; stream << kTtsPath << std::hex << hash;
return stream.str(); return stream.str();
} }

Loading…
Cancel
Save