From 578c3737f8c07e543b90f964da0e89db1c18bb9a Mon Sep 17 00:00:00 2001 From: jacqueline Date: Wed, 9 Aug 2023 11:22:08 +1000 Subject: [PATCH] Add vorbis support whilst we're here --- src/audio/fatfs_audio_input.cpp | 2 + src/codecs/CMakeLists.txt | 2 +- src/codecs/codec.cpp | 3 + src/codecs/include/ogg.hpp | 15 ++- src/codecs/include/stbvorbis.hpp | 42 -------- src/codecs/include/types.hpp | 1 + src/codecs/include/vorbis.hpp | 58 +++++++++++ src/codecs/ogg.cpp | 109 +++++++++++++++++++++ src/codecs/opus.cpp | 59 ++++++++--- src/codecs/stbvorbis.cpp | 128 ------------------------ src/codecs/vorbis.cpp | 162 +++++++++++++++++++++++++++++++ 11 files changed, 391 insertions(+), 190 deletions(-) delete mode 100644 src/codecs/include/stbvorbis.hpp create mode 100644 src/codecs/include/vorbis.hpp delete mode 100644 src/codecs/stbvorbis.cpp create mode 100644 src/codecs/vorbis.cpp diff --git a/src/audio/fatfs_audio_input.cpp b/src/audio/fatfs_audio_input.cpp index 9ca41da1..d5194821 100644 --- a/src/audio/fatfs_audio_input.cpp +++ b/src/audio/fatfs_audio_input.cpp @@ -311,6 +311,8 @@ auto FatfsAudioInput::ContainerToStreamType(database::Encoding enc) return codecs::StreamType::kMp3; case database::Encoding::kWav: return codecs::StreamType::kPcm; + case database::Encoding::kOgg: + return codecs::StreamType::kVorbis; case database::Encoding::kFlac: return codecs::StreamType::kFlac; case database::Encoding::kOpus: diff --git a/src/codecs/CMakeLists.txt b/src/codecs/CMakeLists.txt index 3ce1ac86..9f0febb9 100644 --- a/src/codecs/CMakeLists.txt +++ b/src/codecs/CMakeLists.txt @@ -3,7 +3,7 @@ # SPDX-License-Identifier: GPL-3.0-only idf_component_register( - SRCS "codec.cpp" "mad.cpp" "foxenflac.cpp" "opus.cpp" + SRCS "codec.cpp" "mad.cpp" "foxenflac.cpp" "opus.cpp" "ogg.cpp" "vorbis.cpp" INCLUDE_DIRS "include" REQUIRES "result" "span" "libmad" "libfoxenflac" "tremor" "ogg") diff --git a/src/codecs/codec.cpp b/src/codecs/codec.cpp index 5e67c0de..9ac20097 100644 --- a/src/codecs/codec.cpp +++ b/src/codecs/codec.cpp @@ -12,6 +12,7 @@ #include "foxenflac.hpp" #include "opus.hpp" #include "mad.hpp" +#include "vorbis.hpp" #include "types.hpp" namespace codecs { @@ -20,6 +21,8 @@ auto CreateCodecForType(StreamType type) -> std::optional { switch (type) { case StreamType::kMp3: return new MadMp3Decoder(); + case StreamType::kVorbis: + return new TremorVorbisDecoder(); case StreamType::kFlac: return new FoxenFlacDecoder(); case StreamType::kOpus: diff --git a/src/codecs/include/ogg.hpp b/src/codecs/include/ogg.hpp index 2d6ea8c5..a27e961e 100644 --- a/src/codecs/include/ogg.hpp +++ b/src/codecs/include/ogg.hpp @@ -21,16 +21,23 @@ class OggContainer { OggContainer(); ~OggContainer(); - auto AddBytes(cpp::span) -> void; - auto HasNextPacket() -> bool; - auto NextPacket() -> cpp::span; - auto PeekPacket() -> cpp::span; + auto AddBytes(cpp::span) -> bool; + + auto Next() -> bool; + auto Current() -> cpp::span; + auto HasPacket() -> bool; private: + auto AdvancePage() -> bool; + auto AdvancePacket() -> bool; + ogg_sync_state sync_; ogg_stream_state stream_; ogg_page page_; ogg_packet packet_; + + bool has_stream_; + bool has_packet_; }; } // namespace codecs \ No newline at end of file diff --git a/src/codecs/include/stbvorbis.hpp b/src/codecs/include/stbvorbis.hpp deleted file mode 100644 index 045e264e..00000000 --- a/src/codecs/include/stbvorbis.hpp +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2023 jacqueline - * - * SPDX-License-Identifier: GPL-3.0-only - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "stb_vorbis.h" - -#include "codec.hpp" - -namespace codecs { - -class StbVorbisDecoder : public ICodec { - public: - StbVorbisDecoder(); - ~StbVorbisDecoder(); - - auto BeginStream(cpp::span) -> Result override; - auto ContinueStream(cpp::span, cpp::span) - -> Result override; - auto SeekStream(cpp::span input, std::size_t target_sample) - -> Result override; - - private: - stb_vorbis* vorbis_; - - int current_sample_; - int num_channels_; - int num_samples_; - float** samples_array_; -}; - -} // namespace codecs diff --git a/src/codecs/include/types.hpp b/src/codecs/include/types.hpp index 2f669448..e0bba47d 100644 --- a/src/codecs/include/types.hpp +++ b/src/codecs/include/types.hpp @@ -13,6 +13,7 @@ namespace codecs { enum class StreamType { kMp3, kPcm, + kVorbis, kFlac, kOpus, }; diff --git a/src/codecs/include/vorbis.hpp b/src/codecs/include/vorbis.hpp new file mode 100644 index 00000000..2804bb7c --- /dev/null +++ b/src/codecs/include/vorbis.hpp @@ -0,0 +1,58 @@ +/* + * Copyright 2023 jacqueline + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "ivorbisfile.h" +#include "ogg.hpp" +#include "ogg/ogg.h" +#include "opus.h" +#include "sample.hpp" +#include "span.hpp" + +#include "codec.hpp" + +namespace codecs { + +class TremorVorbisDecoder : public ICodec { + public: + TremorVorbisDecoder(); + ~TremorVorbisDecoder(); + + /* + * Returns the output format for the next frame in the stream. MP3 streams + * may represent multiple distinct tracks, with different bitrates, and so we + * handle the stream only on a frame-by-frame basis. + */ + auto BeginStream(cpp::span) -> Result override; + + /* + * Writes samples for the current frame. + */ + auto ContinueStream(cpp::span input, + cpp::span output) + -> Result override; + + auto SeekStream(cpp::span input, std::size_t target_sample) + -> Result override; + + auto ReadCallback() -> cpp::span; + auto AfterReadCallback(size_t bytes_read) -> void; + + private: + OggVorbis_File vorbis_; + cpp::span input_; + size_t pos_in_input_; +}; + +} // namespace codecs diff --git a/src/codecs/ogg.cpp b/src/codecs/ogg.cpp index e69de29b..2b332a12 100644 --- a/src/codecs/ogg.cpp +++ b/src/codecs/ogg.cpp @@ -0,0 +1,109 @@ +/* + * Copyright 2023 jacqueline + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "ogg.hpp" +#include + +#include "esp_log.h" +#include "ogg/ogg.h" + +namespace codecs { + +static constexpr char kTag[] = "ogg"; + +OggContainer::OggContainer() + : sync_(), + stream_(), + page_(), + packet_(), + has_stream_(false), + has_packet_(false) { + ogg_sync_init(&sync_); + ogg_sync_pageout(&sync_, &page_); +} + +OggContainer::~OggContainer() { + ogg_sync_clear(&sync_); + if (has_stream_) { + ogg_stream_clear(&stream_); + } +} + +auto OggContainer::AddBytes(cpp::span in) -> bool { + ESP_LOGI(kTag, "adding %u bytes to buffer", in.size()); + char* buf = ogg_sync_buffer(&sync_, in.size()); + if (buf == NULL) { + ESP_LOGE(kTag, "failed to allocate sync buffer"); + return false; + } + std::memcpy(buf, in.data(), in.size()); + if (ogg_sync_wrote(&sync_, in.size()) < 0) { + ESP_LOGE(kTag, "failed to write to sync buffer"); + return false; + } + return AdvancePage() && AdvancePacket(); +} + +auto OggContainer::HasPacket() -> bool { + return has_packet_; +} + +auto OggContainer::Next() -> bool { + if (AdvancePacket()) { + return true; + } + if (AdvancePage() && AdvancePacket()) { + return true; + } + return false; +} + +auto OggContainer::Current() -> cpp::span { + if (!has_packet_) { + return {}; + } + ESP_LOGI(kTag, "getting packet, location %p size %li", packet_.packet, + packet_.bytes); + return {packet_.packet, static_cast(packet_.bytes)}; +} + +auto OggContainer::AdvancePage() -> bool { + int err; + if ((err = ogg_sync_pageout(&sync_, &page_)) != 1) { + ESP_LOGE(kTag, "failed to assemble page, res %i", err); + return false; + } + if (!has_stream_) { + int serialno = ogg_page_serialno(&page_); + ESP_LOGI(kTag, "beginning ogg stream, serial number %i", serialno); + if ((err = ogg_stream_init(&stream_, serialno) < 0)) { + ESP_LOGE(kTag, "failed to init stream page, res %i", err); + return false; + } + has_stream_ = true; + } + if (ogg_stream_pagein(&stream_, &page_) < 0) { + ESP_LOGE(kTag, "failed to read in page"); + return false; + } + return true; +} + +auto OggContainer::AdvancePacket() -> bool { + has_packet_ = false; + int res; + while ((res = ogg_stream_packetout(&stream_, &packet_)) == -1) { + // Retry until we sync, or run out of data. + ESP_LOGW(kTag, "trying to sync stream..."); + } + has_packet_ = res; + if (!has_packet_) { + ESP_LOGE(kTag, "failed to read out packet"); + } + return has_packet_; +} + +} // namespace codecs \ No newline at end of file diff --git a/src/codecs/opus.cpp b/src/codecs/opus.cpp index 2529d9ec..e0bc0c29 100644 --- a/src/codecs/opus.cpp +++ b/src/codecs/opus.cpp @@ -20,6 +20,7 @@ #include "esp_log.h" #include "ogg/ogg.h" #include "opus.h" +#include "opus_defines.h" #include "opus_types.h" #include "result.hpp" #include "sample.hpp" @@ -49,12 +50,13 @@ XiphOpusDecoder::~XiphOpusDecoder() { auto XiphOpusDecoder::BeginStream(const cpp::span input) -> Result { - ogg_.AddBytes(input); - if (!ogg_.HasNextPacket()) { + if (!ogg_.AddBytes(input)) { + ESP_LOGI(kTag, "need more input to begin"); return {input.size(), cpp::fail(Error::kOutOfInput)}; } - auto packet = ogg_.NextPacket(); + auto packet = ogg_.Current(); int num_channels = opus_packet_get_nb_channels(packet.data()); + ESP_LOGI(kTag, "opus stream has %i channels", num_channels); if (num_channels > 2) { // Too many channels; we can't handle this. // TODO: better error @@ -78,24 +80,51 @@ auto XiphOpusDecoder::ContinueStream(cpp::span input, -> Result { size_t bytes_used = 0; if (pos_in_buffer_ >= samples_in_buffer_) { - ESP_LOGI(kTag, "sample buffer is empty. parsing more."); - if (!ogg_.HasNextPacket()) { + if (!ogg_.HasPacket()) { bytes_used = input.size(); - ogg_.AddBytes(input); + if (!ogg_.AddBytes(input)) { + return {bytes_used, cpp::fail(Error::kOutOfInput)}; + } } - if (!ogg_.HasNextPacket()) { - return {bytes_used, cpp::fail(Error::kOutOfInput)}; - } - - auto packet = ogg_.NextPacket(); + auto packet = ogg_.Current(); pos_in_buffer_ = 0; - samples_in_buffer_ = - opus_decode(opus_, packet.data(), packet.size_bytes(), - sample_buffer_.data(), sample_buffer_.size(), 0); + samples_in_buffer_ = 0; + while (samples_in_buffer_ <= 0 && ogg_.HasPacket()) { + samples_in_buffer_ = + opus_decode(opus_, packet.data(), packet.size_bytes(), + sample_buffer_.data(), sample_buffer_.size(), 0); + ogg_.Next(); + } if (samples_in_buffer_ < 0) { - ESP_LOGE(kTag, "error decoding stream"); + std::string err_str; + switch (samples_in_buffer_) { + case OPUS_BAD_ARG: + err_str = "OPUS_BAD_ARG"; + break; + case OPUS_BUFFER_TOO_SMALL: + err_str = "OPUS_BUFFER_TOO_SMALL"; + break; + case OPUS_INTERNAL_ERROR: + err_str = "OPUS_INTERNAL_ERROR"; + break; + case OPUS_INVALID_PACKET: + err_str = "OPUS_INVALID_PACKET"; + break; + case OPUS_UNIMPLEMENTED: + err_str = "OPUS_UNIMPLEMENTED"; + break; + case OPUS_INVALID_STATE: + err_str = "OPUS_INVALID_STATE"; + break; + case OPUS_ALLOC_FAIL: + err_str = "OPUS_ALLOC_FAIL"; + break; + default: + err_str = "unknown"; + } + ESP_LOGE(kTag, "error decoding stream, err %s", err_str.c_str()); return {bytes_used, cpp::fail(Error::kMalformedData)}; } } diff --git a/src/codecs/stbvorbis.cpp b/src/codecs/stbvorbis.cpp deleted file mode 100644 index de315416..00000000 --- a/src/codecs/stbvorbis.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright 2023 jacqueline - * - * SPDX-License-Identifier: GPL-3.0-only - */ - -#include "stbvorbis.hpp" -#include - -#include -#include - -#include "stb_vorbis.h" - -namespace codecs { - -StbVorbisDecoder::StbVorbisDecoder() - : vorbis_(nullptr), - current_sample_(-1), - num_channels_(0), - num_samples_(0), - samples_array_(NULL) {} - -StbVorbisDecoder::~StbVorbisDecoder() { - if (vorbis_ != nullptr) { - stb_vorbis_close(vorbis_); - } -} - -static uint32_t scaleToBits(float sample, uint8_t bits) { - // Scale to range. - int32_t max_val = (1 << (bits - 1)); - int32_t fixed_point = sample * max_val; - - // Clamp within bounds. - fixed_point = std::clamp(fixed_point, -max_val, max_val); - - // Remove sign. - return *reinterpret_cast(&fixed_point); -} - -auto StbVorbisDecoder::BeginStream(const cpp::span input) - -> Result { - if (vorbis_ != nullptr) { - stb_vorbis_close(vorbis_); - vorbis_ = nullptr; - } - current_sample_ = -1; - int bytes_read = 0; - int error = 0; - vorbis_ = - stb_vorbis_open_pushdata(reinterpret_cast(input.data()), - input.size_bytes(), &bytes_read, &error, NULL); - if (error != 0) { - return {0, cpp::fail(Error::kMalformedData)}; - } - stb_vorbis_info info = stb_vorbis_get_info(vorbis_); - return {bytes_read, - OutputFormat{.num_channels = static_cast(info.channels), - .bits_per_sample = 24, - .sample_rate_hz = info.sample_rate}}; -} - -auto StbVorbisDecoder::ContinueStream(cpp::span input, - cpp::span output) - -> Result { - std::size_t bytes_used = 0; - if (current_sample_ < 0) { - num_channels_ = 0; - num_samples_ = 0; - samples_array_ = NULL; - - while (true) { - auto cropped = input.subspan(bytes_used); - std::size_t b = stb_vorbis_decode_frame_pushdata( - vorbis_, reinterpret_cast(cropped.data()), - cropped.size_bytes(), &num_channels_, &samples_array_, &num_samples_); - if (b == 0) { - return {bytes_used, cpp::fail(Error::kOutOfInput)}; - } - bytes_used += b; - - if (num_samples_ == 0) { - // Decoder is synchronising. Decode more bytes. - continue; - } - if (num_channels_ == 0 || samples_array_ == NULL) { - // The decoder isn't satisfying its contract. - return {bytes_used, cpp::fail(Error::kInternalError)}; - } - current_sample_ = 0; - break; - } - } - - // We successfully decoded a frame. Time to write out the samples. - std::size_t output_byte = 0; - while (current_sample_ < num_samples_) { - if (output_byte + (2 * num_channels_) >= output.size()) { - return {0, OutputInfo{.bytes_written = output_byte, - .is_finished_writing = false}}; - } - - for (int channel = 0; channel < num_channels_; channel++) { - float raw_sample = samples_array_[channel][current_sample_]; - - uint16_t sample_24 = scaleToBits(raw_sample, 24); - output[output_byte++] = static_cast((sample_24 >> 16) & 0xFF); - output[output_byte++] = static_cast((sample_24 >> 8) & 0xFF); - output[output_byte++] = static_cast((sample_24)&0xFF); - // Pad to 32 bits for alignment. - output[output_byte++] = static_cast(0); - } - current_sample_++; - } - - current_sample_ = -1; - return {bytes_used, OutputInfo{.bytes_written = output_byte, - .is_finished_writing = true}}; -} - -auto StbVorbisDecoder::SeekStream(cpp::span input, - std::size_t target_sample) -> Result { - // TODO(jacqueline): Implement me. - return {0, {}}; -} - -} // namespace codecs diff --git a/src/codecs/vorbis.cpp b/src/codecs/vorbis.cpp new file mode 100644 index 00000000..88ffbec4 --- /dev/null +++ b/src/codecs/vorbis.cpp @@ -0,0 +1,162 @@ +/* + * Copyright 2023 jacqueline + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "ivorbiscodec.h" +#include "ivorbisfile.h" +#include "ogg/config_types.h" +#include "opus.hpp" + +#include +#include + +#include +#include +#include + +#include "esp_heap_caps.h" +#include "mad.h" + +#include "codec.hpp" +#include "esp_log.h" +#include "ogg/ogg.h" +#include "opus.h" +#include "opus_defines.h" +#include "opus_types.h" +#include "result.hpp" +#include "sample.hpp" +#include "types.hpp" +#include "vorbis.hpp" + +namespace codecs { + +static constexpr char kTag[] = "vorbis"; + +size_t read_cb(void* ptr, size_t size, size_t nmemb, void* instance) { + TremorVorbisDecoder* dec = reinterpret_cast(instance); + auto input = dec->ReadCallback(); + size_t amount_to_read = std::min(size * nmemb, input.size_bytes()); + std::memcpy(ptr, input.data(), amount_to_read); + dec->AfterReadCallback(amount_to_read); + return amount_to_read; +} + +int seek_cb(void* instance, ogg_int64_t offset, int whence) { + // Seeking is handled separately. + return -1; +} + +int close_cb(void* instance) { + return 0; +} + +static const ov_callbacks kCallbacks{ + .read_func = read_cb, + .seek_func = seek_cb, + .close_func = close_cb, + .tell_func = NULL, // Not seekable +}; + +TremorVorbisDecoder::TremorVorbisDecoder() + : vorbis_(), input_(), pos_in_input_(0) {} + +TremorVorbisDecoder::~TremorVorbisDecoder() { + ov_clear(&vorbis_); +} + +auto TremorVorbisDecoder::BeginStream(const cpp::span input) + -> Result { + int res = ov_open_callbacks(this, &vorbis_, + reinterpret_cast(input.data()), + input.size(), kCallbacks); + if (res < 0) { + std::string err; + switch (res) { + case OV_EREAD: + err = "OV_EREAD"; + break; + case OV_ENOTVORBIS: + err = "OV_ENOTVORBIS"; + break; + case OV_EVERSION: + err = "OV_EVERSION"; + break; + case OV_EBADHEADER: + err = "OV_EBADHEADER"; + break; + case OV_EFAULT: + err = "OV_EFAULT"; + break; + default: + err = "unknown"; + } + ESP_LOGE(kTag, "error beginning stream: %s", err.c_str()); + return {input.size(), cpp::fail(Error::kMalformedData)}; + } + + vorbis_info* info = ov_info(&vorbis_, -1); + if (info == NULL) { + ESP_LOGE(kTag, "failed to get stream info"); + return {input.size(), cpp::fail(Error::kMalformedData)}; + } + + return {input.size(), + OutputFormat{ + .num_channels = static_cast(info->channels), + .sample_rate_hz = static_cast(info->rate), + .bits_per_second = info->bitrate_nominal, + }}; +} + +auto TremorVorbisDecoder::ContinueStream(cpp::span input, + cpp::span output) + -> Result { + cpp::span staging_buffer{ + reinterpret_cast(output.subspan(output.size() / 2).data()), + output.size_bytes() / 2}; + + input_ = input; + pos_in_input_ = 0; + + int bitstream; + long bytes_written = + ov_read(&vorbis_, reinterpret_cast(staging_buffer.data()), + staging_buffer.size_bytes(), &bitstream); + if (bytes_written == OV_HOLE) { + ESP_LOGE(kTag, "got OV_HOLE"); + return {pos_in_input_, cpp::fail(Error::kMalformedData)}; + } else if (bytes_written == OV_EBADLINK) { + ESP_LOGE(kTag, "got OV_EBADLINK"); + return {pos_in_input_, cpp::fail(Error::kMalformedData)}; + } else if (bytes_written == 0) { + return {pos_in_input_, cpp::fail(Error::kOutOfInput)}; + } + + for (int i = 0; i < bytes_written / 2; i++) { + output[i] = sample::FromSigned(staging_buffer[i], 16); + } + + return {pos_in_input_, + OutputInfo{ + .samples_written = static_cast(bytes_written / 2), + .is_finished_writing = bytes_written == 0, + }}; +} + +auto TremorVorbisDecoder::SeekStream(cpp::span input, + std::size_t target_sample) + -> Result { + return {}; +} + +auto TremorVorbisDecoder::ReadCallback() -> cpp::span { + return input_.subspan(pos_in_input_); +} + +auto TremorVorbisDecoder::AfterReadCallback(size_t bytes_read) -> void { + pos_in_input_ += bytes_read; +} + +} // namespace codecs