/* * Copyright 2024 jacqueline * * SPDX-License-Identifier: GPL-3.0-only */ #include "tts/player.hpp" #include "audio/processor.hpp" #include "audio/resample.hpp" #include "codec.hpp" #include "esp_log.h" #include "freertos/projdefs.h" #include "portmacro.h" #include "sample.hpp" #include "types.hpp" namespace tts { [[maybe_unused]] static constexpr char kTag[] = "ttsplay"; Player::Player(tasks::WorkerPool& worker, drivers::PcmBuffer& output, audio::FatfsStreamFactory& factory) : bg_(worker), stream_factory_(factory), output_(output), play_count_(0) {} auto Player::playFile(const std::string& path) -> void { ESP_LOGI(kTag, "playing '%s'", path.c_str()); int this_play = ++play_count_; bg_.Dispatch([=, this]() { auto stream = stream_factory_.create(path); if (!stream) { ESP_LOGE(kTag, "creating stream failed"); return; } // FIXME: Rather than hardcoding WAV support only, we should work out a // proper subset of 'low memory' decoders that can all be used for TTS // playback. if (stream->type() != codecs::StreamType::kWav) { ESP_LOGE(kTag, "stream was unsupported type"); return; } auto decoder = codecs::CreateCodecForType(stream->type()); if (!decoder) { ESP_LOGE(kTag, "creating decoder failed"); return; } std::unique_ptr codec{*decoder}; auto open_res = codec->OpenStream(stream, 0); if (open_res.has_error()) { ESP_LOGE(kTag, "opening stream failed"); return; } decodeToSink(*open_res, std::move(codec), this_play); }); } auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format, std::unique_ptr codec, int play_count) -> void { // Set up buffers to hold samples between the intermediary parts of // processing. We can just use the stack for these, since this method is // called only from background workers, which have enormous stacks. sample::Sample decode_storage[4096]; audio::Buffer decode_buf(decode_storage); sample::Sample resample_storage[4096]; audio::Buffer resample_buf(resample_storage); sample::Sample stereo_storage[4096]; audio::Buffer stereo_buf(stereo_storage); // Work out what processing the codec's output needs. std::unique_ptr resampler; if (format.sample_rate_hz != 48000) { resampler = std::make_unique(format.sample_rate_hz, 48000, format.num_channels); } bool double_samples = format.num_channels == 1; // FIXME: This decode-and-process loop is substantially the same as the audio // processor's filter loop. Ideally we should refactor both of these loops to // reuse code, however I'm holding off on doing this until we've implemented // more advanced audio processing features in the audio processor (EQ, tempo // shifting, etc.) as it's not clear to me yet how much the two codepaths will // be diverging later anyway. while (codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() || !stereo_buf.isEmpty()) { if (play_count != play_count_) { // FIXME: This is a little unsafe and could maybe take out the first few // samples of the next file. output_.clear(); break; } if (codec) { auto decode_res = codec->DecodeTo(decode_buf.writeAcquire()); if (decode_res.has_error()) { ESP_LOGE(kTag, "decoding error"); break; } decode_buf.writeCommit(decode_res->samples_written); if (decode_res->is_stream_finished) { codec.reset(); } } if (!decode_buf.isEmpty()) { auto resample_input = decode_buf.readAcquire(); auto resample_output = resample_buf.writeAcquire(); size_t read, wrote; if (resampler) { std::tie(read, wrote) = resampler->Process(resample_input, resample_output, false); } else { read = wrote = std::min(resample_input.size(), resample_output.size()); std::copy_n(resample_input.begin(), read, resample_output.begin()); } decode_buf.readCommit(read); resample_buf.writeCommit(wrote); } if (!resample_buf.isEmpty()) { auto channels_input = resample_buf.readAcquire(); auto channels_output = stereo_buf.writeAcquire(); size_t read, wrote; if (double_samples) { wrote = channels_output.size(); read = wrote / 2; if (read > channels_input.size()) { read = channels_input.size(); wrote = read * 2; } for (size_t i = 0; i < read; i++) { channels_output[i * 2] = channels_input[i]; channels_output[(i * 2) + 1] = channels_input[i]; } } else { read = wrote = std::min(channels_input.size(), channels_output.size()); std::copy_n(channels_input.begin(), read, channels_output.begin()); } resample_buf.readCommit(read); stereo_buf.writeCommit(wrote); } // The mixin PcmBuffer should almost always be draining, so we can force // samples into it more aggressively than with the main music PcmBuffer. while (!stereo_buf.isEmpty()) { size_t sent = output_.send(stereo_buf.readAcquire()); stereo_buf.readCommit(sent); } } } } // namespace tts