From 6c3501dbcbd1095293d8a4d4b83311e94a7df9a8 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Tue, 8 Aug 2023 22:16:31 +1000 Subject: [PATCH] Flesh out opus decoder. it doesn't work! i hate opus. --- src/audio/audio_task.cpp | 2 +- src/audio/fatfs_audio_input.cpp | 4 +- src/audio/i2s_audio_output.cpp | 3 +- src/codecs/CMakeLists.txt | 6 ++- src/codecs/codec.cpp | 3 ++ src/codecs/include/opus.hpp | 13 ++++-- src/codecs/include/types.hpp | 2 +- src/codecs/opus.cpp | 78 ++++++++++++++++++++++++++++++--- src/database/include/track.hpp | 1 + src/database/tag_parser.cpp | 3 ++ 10 files changed, 97 insertions(+), 18 deletions(-) diff --git a/src/audio/audio_task.cpp b/src/audio/audio_task.cpp index 75b44594..046df378 100644 --- a/src/audio/audio_task.cpp +++ b/src/audio/audio_task.cpp @@ -125,7 +125,7 @@ AudioTask::AudioTask(IAudioSource* source, IAudioSink* sink) has_begun_decoding_(false), current_input_format_(), current_output_format_(), - codec_buffer_(new RawStream(kSampleBufferSize)) {} + codec_buffer_(new RawStream(kSampleBufferSize, MALLOC_CAP_8BIT)) {} void AudioTask::Main() { for (;;) { diff --git a/src/audio/fatfs_audio_input.cpp b/src/audio/fatfs_audio_input.cpp index 73586f09..9ca41da1 100644 --- a/src/audio/fatfs_audio_input.cpp +++ b/src/audio/fatfs_audio_input.cpp @@ -313,8 +313,8 @@ auto FatfsAudioInput::ContainerToStreamType(database::Encoding enc) return codecs::StreamType::kPcm; case database::Encoding::kFlac: return codecs::StreamType::kFlac; - case database::Encoding::kOgg: // Misnamed; this is Ogg Vorbis. - return codecs::StreamType::kVorbis; + case database::Encoding::kOpus: + return codecs::StreamType::kOpus; case database::Encoding::kUnsupported: default: return {}; diff --git a/src/audio/i2s_audio_output.cpp b/src/audio/i2s_audio_output.cpp index d60ddfa4..e53dbe2a 100644 --- a/src/audio/i2s_audio_output.cpp +++ b/src/audio/i2s_audio_output.cpp @@ -120,8 +120,7 @@ auto I2SAudioOutput::PrepareFormat(const StreamInfo::Pcm& orig) return StreamInfo::Pcm{ .channels = std::min(orig.channels, 2), .bits_per_sample = std::clamp(orig.bits_per_sample, 16, 32), - .sample_rate = 44100, - //.sample_rate = std::clamp(orig.sample_rate, 8000, 96000), + .sample_rate = std::clamp(orig.sample_rate, 8000, 96000), }; } diff --git a/src/codecs/CMakeLists.txt b/src/codecs/CMakeLists.txt index 866da8c8..f84c46a3 100644 --- a/src/codecs/CMakeLists.txt +++ b/src/codecs/CMakeLists.txt @@ -3,16 +3,18 @@ # SPDX-License-Identifier: GPL-3.0-only idf_component_register( - SRCS "codec.cpp" "mad.cpp" "foxenflac.cpp" + SRCS "codec.cpp" "mad.cpp" "foxenflac.cpp" "opus.cpp" INCLUDE_DIRS "include" REQUIRES "result" "span" "libmad" "libfoxenflac") target_compile_options("${COMPONENT_LIB}" PRIVATE ${EXTRA_WARNINGS}) set(OPUS_FIXED_POINT ON) -set(OPUS_ENABLE_FLOAT_API ON) +set(OPUS_ENABLE_FLOAT_API OFF) set(OPUS_INSTALL_PKG_CONFIG_MODULE OFF) set(OPUS_INSTALL_CMAKE_CONFIG_MODULE OFF) +set(OPUS_BUILD_TESTING OFF) +set(OPUS_BUILD_SHARED_LIBS OFF) set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) diff --git a/src/codecs/codec.cpp b/src/codecs/codec.cpp index 404ea214..5e67c0de 100644 --- a/src/codecs/codec.cpp +++ b/src/codecs/codec.cpp @@ -10,6 +10,7 @@ #include #include "foxenflac.hpp" +#include "opus.hpp" #include "mad.hpp" #include "types.hpp" @@ -21,6 +22,8 @@ auto CreateCodecForType(StreamType type) -> std::optional { return new MadMp3Decoder(); case StreamType::kFlac: return new FoxenFlacDecoder(); + case StreamType::kOpus: + return new XiphOpusDecoder(); default: return {}; } diff --git a/src/codecs/include/opus.hpp b/src/codecs/include/opus.hpp index a5a7d78c..f824a7cb 100644 --- a/src/codecs/include/opus.hpp +++ b/src/codecs/include/opus.hpp @@ -14,6 +14,7 @@ #include #include "opus.h" +#include "sample.hpp" #include "span.hpp" #include "codec.hpp" @@ -36,14 +37,18 @@ class XiphOpusDecoder : public ICodec { * Writes samples for the current frame. */ auto ContinueStream(cpp::span input, - cpp::span output) + cpp::span output) -> Result override; auto SeekStream(cpp::span input, std::size_t target_sample) -> Result override; private: - OpusDecoder *opus_; - float *sample_buffer_; - std::size_t sample_buffer_len_; + OpusDecoder* opus_; + cpp::span sample_buffer_; + int32_t pos_in_buffer_; + int32_t samples_in_buffer_; + +}; + } // namespace codecs diff --git a/src/codecs/include/types.hpp b/src/codecs/include/types.hpp index 3dfc1da9..2f669448 100644 --- a/src/codecs/include/types.hpp +++ b/src/codecs/include/types.hpp @@ -13,8 +13,8 @@ namespace codecs { enum class StreamType { kMp3, kPcm, - kVorbis, kFlac, + kOpus, }; } // namespace codecs diff --git a/src/codecs/opus.cpp b/src/codecs/opus.cpp index 2c4291c2..791c8e74 100644 --- a/src/codecs/opus.cpp +++ b/src/codecs/opus.cpp @@ -5,6 +5,7 @@ */ #include "opus.hpp" + #include #include @@ -12,36 +13,101 @@ #include #include +#include "esp_heap_caps.h" #include "mad.h" #include "codec.hpp" #include "esp_log.h" #include "opus.h" +#include "opus_types.h" #include "result.hpp" +#include "sample.hpp" #include "types.hpp" namespace codecs { - static constexpr std::size_t kSampleBufferSize = 5760 * sizeof(float); +static constexpr char kTag[] = "opus"; + +// "If this is less than the maximum packet duration (120ms; 5760 for 48kHz), +// this function will not be capable of decoding some packets" +static constexpr size_t kSampleBufferSize = 5760; XiphOpusDecoder::XiphOpusDecoder() { int err; opus_ = opus_decoder_create(48000, 2, &err); assert(err == OPUS_OK); + + pos_in_buffer_ = 0; + sample_buffer_ = {reinterpret_cast( + heap_caps_calloc(kSampleBufferSize, sizeof(opus_int16), + MALLOC_CAP_8BIT | MALLOC_CAP_SPIRAM)), + kSampleBufferSize}; } XiphOpusDecoder::~XiphOpusDecoder() { opus_decoder_destroy(opus_); + heap_caps_free(sample_buffer_.data()); } auto XiphOpusDecoder::BeginStream(const cpp::span input) - -> Result {} + -> Result { + return {0, OutputFormat{ + .num_channels = 2, + .sample_rate_hz = 48000, + }}; +} + +auto read_uint32(cpp::span src) -> uint32_t { + return static_cast(src[0] << 24) | + static_cast(src[1] << 16) | + static_cast(src[2] << 8) | + static_cast(src[3] << 0); +} auto XiphOpusDecoder::ContinueStream(cpp::span input, - cpp::span output) + cpp::span output) -> Result { - int samples_decoded = opus_decode_float( - opus_, reinterpret_cast(input.data()), - input.size_bytes(), sample_buffer_, sample_buffer_len_, 0); + size_t bytes_used = 0; + if (pos_in_buffer_ >= samples_in_buffer_) { + ESP_LOGI(kTag, "sample buffer is empty. parsing more."); + if (input.size() < 4) { + return {0, cpp::fail(Error::kOutOfInput)}; + } + uint32_t payload_length = read_uint32(input); + ESP_LOGI(kTag, "payload length is %lu", payload_length); + + if (input.size() - 4 < payload_length) { + ESP_LOGI(kTag, "input too small for payload"); + return {0, cpp::fail(Error::kOutOfInput)}; + } + + // Next 4 bytes are the 'final range'. + // uint32_t enc_final_range = read_uint32(input.subspan(4)); + + bytes_used = payload_length + 8; + + pos_in_buffer_ = 0; + samples_in_buffer_ = opus_decode( + opus_, reinterpret_cast(input.data() + 8), + payload_length, sample_buffer_.data(), sample_buffer_.size(), 0); + + if (samples_in_buffer_ < 0) { + ESP_LOGE(kTag, "error decoding stream"); + return {bytes_used, cpp::fail(Error::kMalformedData)}; + } + } + + size_t samples_written = 0; + while (pos_in_buffer_ < samples_in_buffer_ && + samples_written < output.size()) { + output[samples_written++] = + sample::FromSigned(sample_buffer_[pos_in_buffer_++], 16); + } + + return {bytes_used, + OutputInfo{ + .samples_written = samples_written, + .is_finished_writing = pos_in_buffer_ >= samples_in_buffer_, + }}; } auto XiphOpusDecoder::SeekStream(cpp::span input, diff --git a/src/database/include/track.hpp b/src/database/include/track.hpp index 78f973ac..d4f01b71 100644 --- a/src/database/include/track.hpp +++ b/src/database/include/track.hpp @@ -43,6 +43,7 @@ enum class Encoding { kWav = 2, kOgg = 3, kFlac = 4, + kOpus = 5, }; enum class Tag { diff --git a/src/database/tag_parser.cpp b/src/database/tag_parser.cpp index 2faf3408..2f1fe337 100644 --- a/src/database/tag_parser.cpp +++ b/src/database/tag_parser.cpp @@ -153,6 +153,9 @@ auto TagParserImpl::ReadAndParseTags(const std::string& path, TrackTags* out) case Fwav: out->encoding(Encoding::kWav); break; + case Fopus: + out->encoding(Encoding::kOpus); + break; default: out->encoding(Encoding::kUnsupported); }