From d0b739c66ef11a6c16f99cad6957a1782236fd8c Mon Sep 17 00:00:00 2001 From: jacqueline Date: Mon, 9 Sep 2024 16:39:22 +1000 Subject: [PATCH] Play basic wav files in response to tts prompts It's currently quite limited (no stereo or sample rate conversion, multiple messages clobber each other, only plays if music is playing), but we're getting there! --- src/tangara/audio/audio_fsm.cpp | 2 +- src/tangara/tts/player.cpp | 50 +++++++++++++++++++++++++++++++++ src/tangara/tts/provider.cpp | 3 +- 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp index ee7215cb..dac04f75 100644 --- a/src/tangara/audio/audio_fsm.cpp +++ b/src/tangara/audio/audio_fsm.cpp @@ -226,7 +226,7 @@ void AudioState::react(const internal::StreamStarted& ev) { } sStreamCues.addCue(ev.track, ev.cue_at_sample); - sStreamCues.update(sDrainBuffer->totalReceived()); + sStreamCues.update(sDrainBuffers->first.totalReceived()); if (!sIsPaused && !is_in_state()) { transit(); diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp index 70959992..3fcd88bc 100644 --- a/src/tangara/tts/player.cpp +++ b/src/tangara/tts/player.cpp @@ -6,7 +6,10 @@ #include "tts/player.hpp" +#include "codec.hpp" #include "esp_log.h" +#include "sample.hpp" +#include "types.hpp" namespace tts { @@ -19,6 +22,53 @@ Player::Player(tasks::WorkerPool& worker, auto Player::playFile(const std::string& path) -> void { ESP_LOGI(kTag, "playing '%s'", path.c_str()); + bg_.Dispatch([=]() { + auto stream = stream_factory_.create(path); + if (!stream) { + ESP_LOGE(kTag, "creating stream failed"); + return; + } + if (stream->type() != codecs::StreamType::kWav) { + ESP_LOGE(kTag, "stream was unsupported type"); + return; + } + auto decoder = codecs::CreateCodecForType(stream->type()); + if (!decoder) { + ESP_LOGE(kTag, "creating decoder failed"); + return; + } + std::unique_ptr codec{*decoder}; + auto open_res = codec->OpenStream(stream, 0); + if (open_res.has_error()) { + ESP_LOGE(kTag, "opening stream failed"); + return; + } + // if (open_res->sample_rate_hz != 48000 || open_res->num_channels != 2) { + // ESP_LOGE(kTag, "stream format is wrong (was %u channels @ %lu hz)", + // open_res->num_channels, open_res->sample_rate_hz); + // return; + // } + sample::Sample decode_buf[4096]; + for (;;) { + auto decode_res = codec->DecodeTo(decode_buf); + if (decode_res.has_error()) { + ESP_LOGE(kTag, "decoding error"); + return; + } + if (decode_res->is_stream_finished) { + break; + } + + std::span decode_span{decode_buf, + decode_res->samples_written}; + while (!decode_span.empty()) { + size_t sent = output_.send(decode_span); + decode_span = decode_span.subspan(sent); + } + } + + ESP_LOGI(kTag, "finished playing okay"); + }); } } // namespace tts diff --git a/src/tangara/tts/provider.cpp b/src/tangara/tts/provider.cpp index 24229233..b7c1e55d 100644 --- a/src/tangara/tts/provider.cpp +++ b/src/tangara/tts/provider.cpp @@ -28,8 +28,7 @@ static const char* kTtsPath = "/.tangara-tts/"; static auto textToFile(const std::string& text) -> std::optional { uint64_t hash = komihash(text.data(), text.size(), 0); std::stringstream stream; - stream << drivers::kStoragePath << kTtsPath; - stream << std::hex << hash; + stream << kTtsPath << std::hex << hash << ".wav"; return stream.str(); }