diff --git a/src/audio/audio_decoder.cpp b/src/audio/audio_decoder.cpp index eb19b75f..310f5740 100644 --- a/src/audio/audio_decoder.cpp +++ b/src/audio/audio_decoder.cpp @@ -14,6 +14,7 @@ #include #include +#include "codec.hpp" #include "freertos/FreeRTOS.h" #include "esp_heap_caps.h" @@ -50,6 +51,9 @@ auto AudioDecoder::ProcessStreamInfo(const StreamInfo& info) -> bool { // Reuse the existing codec if we can. This will help with gapless playback, // since we can potentially just continue to decode as we were before, // without any setup overhead. + // TODO(jacqueline): Reconsider this. It makes a lot of things harder to smash + // streams together at this layer. + /* if (current_codec_ != nullptr && current_input_format_) { auto cur_encoding = std::get(*current_input_format_); if (cur_encoding.type == encoded.type) { @@ -58,6 +62,7 @@ auto AudioDecoder::ProcessStreamInfo(const StreamInfo& info) -> bool { return true; } } + */ current_input_format_ = info.format; ESP_LOGI(kTag, "creating new decoder"); @@ -80,68 +85,94 @@ auto AudioDecoder::Process(const std::vector& inputs, OutputStream* output) -> void { auto input = inputs.begin(); const StreamInfo& info = input->info(); - if (std::holds_alternative(info.format) || - info.bytes_in_stream == 0) { - // TODO(jacqueline): should we clear the stream format? - // output->prepare({}); - return; - } + // Check the input stream's format has changed (or, by extension, if this is + // the first stream). if (!current_input_format_ || *current_input_format_ != info.format) { - // The input stream has changed! Immediately throw everything away and - // start from scratch. + ESP_LOGI(kTag, "beginning new stream"); has_samples_to_send_ = false; ProcessStreamInfo(info); + auto res = current_codec_->BeginStream(input->data()); + input->consume(res.first); + if (res.second.has_error()) { + // TODO(jacqueline): Handle errors. + return; + } + + // The stream started successfully. Record what format the samples are in. + codecs::ICodec::OutputFormat format = res.second.value(); + current_output_format_ = StreamInfo::Pcm{ + .channels = format.num_channels, + .bits_per_sample = format.bits_per_sample, + .sample_rate = format.sample_rate_hz, + }; + + if (info.seek_to_seconds) { + seek_to_sample_ = *info.seek_to_seconds * format.sample_rate_hz; + } else { + seek_to_sample_.reset(); + } } - current_codec_->SetInput(input->data()); + while (seek_to_sample_) { + ESP_LOGI(kTag, "seeking forwards..."); + auto res = current_codec_->SeekStream(input->data(), *seek_to_sample_); + input->consume(res.first); + if (res.second.has_error()) { + auto err = res.second.error(); + if (err == codecs::ICodec::Error::kOutOfInput) { + return; + } else { + // TODO(jacqueline): Handle errors. + seek_to_sample_.reset(); + } + } else { + seek_to_sample_.reset(); + } + } + has_input_remaining_ = true; while (true) { - if (has_samples_to_send_) { - auto format = current_codec_->GetOutputFormat(); - if (format.has_value()) { - current_output_format_ = StreamInfo::Pcm{ - .channels = format->num_channels, - .bits_per_sample = format->bits_per_sample, - .sample_rate = format->sample_rate_hz, - }; - - if (!output->prepare(*current_output_format_)) { - break; - } - - auto write_res = current_codec_->WriteOutputSamples(output->data()); - output->add(write_res.first); - has_samples_to_send_ = !write_res.second; - - if (has_samples_to_send_) { - // We weren't able to fit all the generated samples into the output - // buffer. Stop trying; we'll finish up during the next pass. - break; - } - } + // TODO(jacqueline): Pass through seek info here? + if (!output->prepare(*current_output_format_)) { + ESP_LOGI(kTag, "waiting for buffer to become free"); + break; } - auto res = current_codec_->ProcessNextFrame(); - if (res.has_error()) { - // TODO(jacqueline): Handle errors. + auto res = current_codec_->ContinueStream(input->data(), output->data()); + input->consume(res.first); + if (res.second.has_error()) { + if (res.second.error() == codecs::ICodec::Error::kOutOfInput) { + ESP_LOGW(kTag, "out of input"); + ESP_LOGW(kTag, "(%u bytes left)", input->data().size_bytes()); + has_input_remaining_ = false; + // We can't be halfway through sending samples if the codec is asking + // for more input. + has_samples_to_send_ = false; + input->mark_incomplete(); + } else { + // TODO(jacqueline): Handle errors. + ESP_LOGE(kTag, "codec return fatal error"); + } return; } - has_input_remaining_ = !res.value(); - if (!has_input_remaining_) { - // We're out of useable data in this buffer. Finish immediately; there's - // nothing to send. - input->mark_incomplete(); - break; - } else { - has_samples_to_send_ = true; + ESP_LOGI(kTag, "enc read: %u", res.first); + + codecs::ICodec::OutputInfo out_info = res.second.value(); + output->add(out_info.bytes_written); + has_samples_to_send_ = !out_info.is_finished_writing; + + ESP_LOGI(kTag, "enc wrote: %u", out_info.bytes_written); + if (out_info.is_finished_writing) { + ESP_LOGI(kTag, "(write finished)"); } - } - std::size_t pos = current_codec_->GetInputPosition(); - if (pos > 0) { - input->consume(pos - 1); + if (has_samples_to_send_) { + // We weren't able to fit all the generated samples into the output + // buffer. Stop trying; we'll finish up during the next pass. + break; + } } } diff --git a/src/audio/audio_task.cpp b/src/audio/audio_task.cpp index 9dd7d994..eea84e45 100644 --- a/src/audio/audio_task.cpp +++ b/src/audio/audio_task.cpp @@ -126,7 +126,7 @@ void AudioTaskMain(std::unique_ptr pipeline, IAudioSink* sink) { if (sink_stream.info().bytes_in_stream == 0) { // No new bytes to sink, so skip sinking completely. - ESP_LOGI(kTag, "no bytes to sink"); + ESP_LOGW(kTag, "no bytes to sink"); continue; } diff --git a/src/audio/fatfs_audio_input.cpp b/src/audio/fatfs_audio_input.cpp index a89858ca..eaa62ee3 100644 --- a/src/audio/fatfs_audio_input.cpp +++ b/src/audio/fatfs_audio_input.cpp @@ -56,11 +56,13 @@ auto FatfsAudioInput::OpenFile(const std::string& path) -> bool { database::SongTags tags; if (!tag_parser.ReadAndParseTags(path, &tags)) { ESP_LOGE(kTag, "failed to read tags"); - return false; + tags.encoding = database::Encoding::kFlac; + // return false; } auto stream_type = ContainerToStreamType(tags.encoding); if (!stream_type.has_value()) { + ESP_LOGE(kTag, "couldn't match container to stream"); return false; } @@ -144,8 +146,8 @@ auto FatfsAudioInput::ContainerToStreamType(database::Encoding enc) return codecs::StreamType::kPcm; case database::Encoding::kFlac: return codecs::StreamType::kFlac; - case database::Encoding::kOgg: - return codecs::StreamType::kOgg; + case database::Encoding::kOgg: // Misnamed; this is Ogg Vorbis. + return codecs::StreamType::kVorbis; case database::Encoding::kUnsupported: default: return {}; diff --git a/src/audio/include/audio_decoder.hpp b/src/audio/include/audio_decoder.hpp index 3cda0305..4e7e127e 100644 --- a/src/audio/include/audio_decoder.hpp +++ b/src/audio/include/audio_decoder.hpp @@ -42,6 +42,7 @@ class AudioDecoder : public IAudioElement { std::unique_ptr current_codec_; std::optional current_input_format_; std::optional current_output_format_; + std::optional seek_to_sample_; bool has_samples_to_send_; bool has_input_remaining_; diff --git a/src/audio/include/stream_info.hpp b/src/audio/include/stream_info.hpp index 91b2f085..54b87003 100644 --- a/src/audio/include/stream_info.hpp +++ b/src/audio/include/stream_info.hpp @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include @@ -30,6 +31,9 @@ struct StreamInfo { // generated audio, etc.) std::optional length_bytes{}; + // + std::optional seek_to_seconds{}; + struct Encoded { // The codec that this stream is associated with. codecs::StreamType type; diff --git a/src/codecs/CMakeLists.txt b/src/codecs/CMakeLists.txt index cdf9c99d..478d4d3f 100644 --- a/src/codecs/CMakeLists.txt +++ b/src/codecs/CMakeLists.txt @@ -3,7 +3,7 @@ # SPDX-License-Identifier: GPL-3.0-only idf_component_register( - SRCS "codec.cpp" "mad.cpp" + SRCS "codec.cpp" "mad.cpp" "foxenflac.cpp" "stbvorbis.cpp" INCLUDE_DIRS "include" REQUIRES "result" "span" "libmad" "libfoxenflac" "stb_vorbis") diff --git a/src/codecs/codec.cpp b/src/codecs/codec.cpp index 73bc9032..e23b8702 100644 --- a/src/codecs/codec.cpp +++ b/src/codecs/codec.cpp @@ -8,7 +8,10 @@ #include #include + +#include "foxenflac.hpp" #include "mad.hpp" +#include "stbvorbis.hpp" #include "types.hpp" namespace codecs { @@ -17,6 +20,10 @@ auto CreateCodecForType(StreamType type) -> std::optional { switch (type) { case StreamType::kMp3: return new MadMp3Decoder(); + case StreamType::kFlac: + return new FoxenFlacDecoder(); + case StreamType::kVorbis: + return new StbVorbisDecoder(); default: return {}; } diff --git a/src/codecs/foxenflac.cpp b/src/codecs/foxenflac.cpp new file mode 100644 index 00000000..a2d6f000 --- /dev/null +++ b/src/codecs/foxenflac.cpp @@ -0,0 +1,80 @@ +/* + * Copyright 2023 jacqueline + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "foxenflac.hpp" +#include + +#include + +#include "esp_log.h" +#include "foxen/flac.h" + +namespace codecs { + +FoxenFlacDecoder::FoxenFlacDecoder() + : flac_(FX_FLAC_ALLOC(FLAC_MAX_BLOCK_SIZE, 2)) {} + +FoxenFlacDecoder::~FoxenFlacDecoder() { + free(flac_); +} + +auto FoxenFlacDecoder::BeginStream(const cpp::span input) + -> Result { + uint32_t bytes_used = input.size_bytes(); + fx_flac_state_t state = + fx_flac_process(flac_, reinterpret_cast(input.data()), + &bytes_used, NULL, NULL); + if (state != FLAC_END_OF_METADATA) { + return {bytes_used, cpp::fail(Error::kMalformedData)}; + } + + int64_t channels = fx_flac_get_streaminfo(flac_, FLAC_KEY_N_CHANNELS); + int64_t fs = fx_flac_get_streaminfo(flac_, FLAC_KEY_SAMPLE_RATE); + if (channels == FLAC_INVALID_METADATA_KEY || + fs == FLAC_INVALID_METADATA_KEY) { + return {bytes_used, cpp::fail(Error::kMalformedData)}; + } + + return {bytes_used, + OutputFormat{ + .num_channels = static_cast(channels), + .bits_per_sample = 32, // libfoxenflac output is fixed-size. + .sample_rate_hz = static_cast(fs), + }}; +} + +auto FoxenFlacDecoder::ContinueStream(cpp::span input, + cpp::span output) + -> Result { + cpp::span output_as_samples{ + reinterpret_cast(output.data()), output.size_bytes() / 4}; + uint32_t bytes_read = input.size_bytes(); + uint32_t samples_written = output_as_samples.size(); + + fx_flac_state_t state = + fx_flac_process(flac_, reinterpret_cast(input.data()), + &bytes_read, output_as_samples.data(), &samples_written); + if (state == FLAC_ERR) { + return {bytes_read, cpp::fail(Error::kMalformedData)}; + } + + if (samples_written > 0) { + return {bytes_read, + OutputInfo{.bytes_written = samples_written * 4, + .is_finished_writing = state == FLAC_END_OF_FRAME}}; + } + + // No error, but no samples written. We must be out of data. + return {bytes_read, cpp::fail(Error::kOutOfInput)}; +} + +auto FoxenFlacDecoder::SeekStream(cpp::span input, + std::size_t target_sample) -> Result { + // TODO(jacqueline): Implement me. + return {0, {}}; +} + +} // namespace codecs diff --git a/src/codecs/include/codec.hpp b/src/codecs/include/codec.hpp index 31c67e13..4b5ab47f 100644 --- a/src/codecs/include/codec.hpp +++ b/src/codecs/include/codec.hpp @@ -21,48 +21,58 @@ namespace codecs { +/* + * Common interface to be implemented by all audio decoders. + */ class ICodec { public: virtual ~ICodec() {} + /* Errors that may be returned by codecs. */ + enum class Error { + // Indicates that more data is required before this codec can finish its + // operation. E.g. the input buffer ends with a truncated frame. + kOutOfInput, + // Indicates that the data within the input buffer is fatally malformed. + kMalformedData, + + kInternalError, + }; + + /* + * Alias for more readable return types. All codec methods, success or + * failure, should also return the number of bytes they consumed. + */ + template + using Result = std::pair>; + struct OutputFormat { uint8_t num_channels; uint8_t bits_per_sample; uint32_t sample_rate_hz; }; - virtual auto GetOutputFormat() -> std::optional = 0; - - enum ProcessingError { MALFORMED_DATA }; - - virtual auto SetInput(cpp::span input) -> void = 0; - /* - * Returns the codec's next read position within the input buffer. If the - * codec is out of usable data, but there is still some data left in the - * stream, that data should be prepended to the next input buffer. + * Decodes metadata or headers from the given input stream, and returns the + * format for the samples that will be decoded from it. */ - virtual auto GetInputPosition() -> std::size_t = 0; + virtual auto BeginStream(cpp::span input) + -> Result = 0; - /* - * Read one frame (or equivalent discrete chunk) from the input, and - * synthesize output samples for it. - * - * Returns true if we are out of usable data from the input stream, or false - * otherwise. - */ - virtual auto ProcessNextFrame() -> cpp::result = 0; + struct OutputInfo { + std::size_t bytes_written; + bool is_finished_writing; + }; /* * Writes PCM samples to the given output buffer. - * - * Returns the number of bytes that were written, and true if all of the - * samples synthesized from the last call to `ProcessNextFrame` have been - * written. If this returns false, then this method should be called again - * after flushing the output buffer. */ - virtual auto WriteOutputSamples(cpp::span output) - -> std::pair = 0; + virtual auto ContinueStream(cpp::span input, + cpp::span output) + -> Result = 0; + + virtual auto SeekStream(cpp::span input, + std::size_t target_sample) -> Result = 0; }; auto CreateCodecForType(StreamType type) -> std::optional; diff --git a/src/codecs/include/foxenflac.hpp b/src/codecs/include/foxenflac.hpp new file mode 100644 index 00000000..cce1b762 --- /dev/null +++ b/src/codecs/include/foxenflac.hpp @@ -0,0 +1,38 @@ +/* + * Copyright 2023 jacqueline + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "foxen/flac.h" +#include "span.hpp" + +#include "codec.hpp" + +namespace codecs { + +class FoxenFlacDecoder : public ICodec { + public: + FoxenFlacDecoder(); + ~FoxenFlacDecoder(); + + auto BeginStream(cpp::span) -> Result override; + auto ContinueStream(cpp::span, cpp::span) + -> Result override; + auto SeekStream(cpp::span input, std::size_t target_sample) + -> Result override; + + private: + fx_flac_t* flac_; +}; + +} // namespace codecs diff --git a/src/codecs/include/mad.hpp b/src/codecs/include/mad.hpp index 5ba4db84..e1c479bf 100644 --- a/src/codecs/include/mad.hpp +++ b/src/codecs/include/mad.hpp @@ -24,12 +24,22 @@ class MadMp3Decoder : public ICodec { MadMp3Decoder(); ~MadMp3Decoder(); - auto GetOutputFormat() -> std::optional override; - auto SetInput(cpp::span input) -> void override; - auto GetInputPosition() -> std::size_t override; - auto ProcessNextFrame() -> cpp::result override; - auto WriteOutputSamples(cpp::span output) - -> std::pair override; + /* + * Returns the output format for the next frame in the stream. MP3 streams + * may represent multiple distinct tracks, with different bitrates, and so we + * handle the stream only on a frame-by-frame basis. + */ + auto BeginStream(cpp::span) -> Result override; + + /* + * Writes samples for the current frame. + */ + auto ContinueStream(cpp::span input, + cpp::span output) + -> Result override; + + auto SeekStream(cpp::span input, std::size_t target_sample) + -> Result override; private: mad_stream stream_; @@ -37,6 +47,8 @@ class MadMp3Decoder : public ICodec { mad_synth synth_; int current_sample_; + + auto GetInputPosition() -> std::size_t; }; } // namespace codecs diff --git a/src/codecs/include/stbvorbis.hpp b/src/codecs/include/stbvorbis.hpp new file mode 100644 index 00000000..045e264e --- /dev/null +++ b/src/codecs/include/stbvorbis.hpp @@ -0,0 +1,42 @@ +/* + * Copyright 2023 jacqueline + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "stb_vorbis.h" + +#include "codec.hpp" + +namespace codecs { + +class StbVorbisDecoder : public ICodec { + public: + StbVorbisDecoder(); + ~StbVorbisDecoder(); + + auto BeginStream(cpp::span) -> Result override; + auto ContinueStream(cpp::span, cpp::span) + -> Result override; + auto SeekStream(cpp::span input, std::size_t target_sample) + -> Result override; + + private: + stb_vorbis* vorbis_; + + int current_sample_; + int num_channels_; + int num_samples_; + float** samples_array_; +}; + +} // namespace codecs diff --git a/src/codecs/include/types.hpp b/src/codecs/include/types.hpp index 61d36a28..3dfc1da9 100644 --- a/src/codecs/include/types.hpp +++ b/src/codecs/include/types.hpp @@ -13,7 +13,7 @@ namespace codecs { enum class StreamType { kMp3, kPcm, - kOgg, + kVorbis, kFlac, }; diff --git a/src/codecs/mad.cpp b/src/codecs/mad.cpp index fbe85213..8b9897eb 100644 --- a/src/codecs/mad.cpp +++ b/src/codecs/mad.cpp @@ -13,11 +13,12 @@ #include "mad.h" #include "codec.hpp" +#include "result.hpp" #include "types.hpp" namespace codecs { -static uint32_t scaleToBits(mad_fixed_t sample, uint8_t bits) { +static uint32_t mad_fixed_to_pcm(mad_fixed_t sample, uint8_t bits) { // Round the bottom bits. sample += (1L << (MAD_F_FRACBITS - bits)); @@ -42,93 +43,167 @@ MadMp3Decoder::~MadMp3Decoder() { mad_synth_finish(&synth_); } -auto MadMp3Decoder::GetOutputFormat() -> std::optional { - if (synth_.pcm.channels == 0 || synth_.pcm.samplerate == 0) { - return {}; - } - return std::optional({ - .num_channels = static_cast(synth_.pcm.channels), - .bits_per_sample = 24, - .sample_rate_hz = synth_.pcm.samplerate, - }); +auto MadMp3Decoder::GetInputPosition() -> std::size_t { + return stream_.next_frame - stream_.buffer; } -auto MadMp3Decoder::SetInput(cpp::span input) -> void { +auto MadMp3Decoder::BeginStream(const cpp::span input) + -> Result { mad_stream_buffer(&stream_, reinterpret_cast(input.data()), input.size()); -} - -auto MadMp3Decoder::GetInputPosition() -> std::size_t { - return stream_.next_frame - stream_.buffer; -} - -auto MadMp3Decoder::ProcessNextFrame() -> cpp::result { // Whatever was last synthesized is now invalid, so ensure we don't try to // send it. current_sample_ = -1; - // Decode the next frame. To signal errors, this returns -1 and - // stashes an error code in the stream structure. - if (mad_frame_decode(&frame_, &stream_) < 0) { + // To get the output format for MP3 streams, we simply need to decode the + // first frame header. + mad_header header; + mad_header_init(&header); + while (mad_header_decode(&header, &stream_) < 0) { if (MAD_RECOVERABLE(stream_.error)) { // Recoverable errors are usually malformed parts of the stream. // We can recover from them by just retrying the decode. - return false; + continue; + } else { + // Don't bother checking for other errors; if the first part of the stream + // doesn't even contain a header then something's gone wrong. + return {GetInputPosition(), cpp::fail(Error::kMalformedData)}; } - - if (stream_.error == MAD_ERROR_BUFLEN) { - // The decoder ran out of bytes before it completed a frame. We - // need to return back to the caller to give us more data. - return true; - } - - // The error is unrecoverable. Give up. - return cpp::fail(MALFORMED_DATA); } - // We've successfully decoded a frame! - // Now we need to synthesize PCM samples based on the frame, and send - // them downstream. - mad_synth_frame(&synth_, &frame_); - current_sample_ = 0; - return false; + uint8_t channels = MAD_NCHANNELS(&header); + return {GetInputPosition(), + OutputFormat{ + .num_channels = channels, + .bits_per_sample = 24, // We always scale to 24 bits + .sample_rate_hz = header.samplerate, + }}; } -auto MadMp3Decoder::WriteOutputSamples(cpp::span output) - -> std::pair { - size_t output_byte = 0; - // First ensure that we actually have some samples to send off. +auto MadMp3Decoder::ContinueStream(cpp::span input, + cpp::span output) + -> Result { if (current_sample_ < 0) { - return std::make_pair(output_byte, true); + mad_stream_buffer(&stream_, + reinterpret_cast(input.data()), + input.size()); + + // Decode the next frame. To signal errors, this returns -1 and + // stashes an error code in the stream structure. + while (mad_frame_decode(&frame_, &stream_) < 0) { + if (MAD_RECOVERABLE(stream_.error)) { + // Recoverable errors are usually malformed parts of the stream. + // We can recover from them by just retrying the decode. + continue; + } + if (stream_.error == MAD_ERROR_BUFLEN) { + // The decoder ran out of bytes before it completed a frame. We + // need to return back to the caller to give us more data. + return {GetInputPosition(), cpp::fail(Error::kOutOfInput)}; + } + // The error is unrecoverable. Give up. + return {GetInputPosition(), cpp::fail(Error::kMalformedData)}; + } + + // We've successfully decoded a frame! Now synthesize samples to write out. + mad_synth_frame(&synth_, &frame_); + current_sample_ = 0; } + size_t output_byte = 0; while (current_sample_ < synth_.pcm.length) { - if (output_byte + (2 * synth_.pcm.channels) >= output.size()) { - return std::make_pair(output_byte, false); + if (output_byte + (4 * synth_.pcm.channels) >= output.size()) { + // We can't fit the next sample into the buffer. Stop now, and also avoid + // writing the sample for only half the channels. + return {GetInputPosition(), OutputInfo{.bytes_written = output_byte, + .is_finished_writing = false}}; } for (int channel = 0; channel < synth_.pcm.channels; channel++) { uint32_t sample_24 = - scaleToBits(synth_.pcm.samples[channel][current_sample_], 24); + mad_fixed_to_pcm(synth_.pcm.samples[channel][current_sample_], 24); output[output_byte++] = static_cast((sample_24 >> 16) & 0xFF); output[output_byte++] = static_cast((sample_24 >> 8) & 0xFF); output[output_byte++] = static_cast((sample_24)&0xFF); // 24 bit samples must still be aligned to 32 bits. The LSB is ignored. output[output_byte++] = static_cast(0); - /* - uint16_t sample_16 = - scaleToBits(synth_.pcm.samples[channel][current_sample_], 16); - output[output_byte++] = static_cast((sample_16 >> 8) & 0xFF); - output[output_byte++] = static_cast((sample_16)&0xFF); - */ } current_sample_++; } // We wrote everything! Reset, ready for the next frame. current_sample_ = -1; - return std::make_pair(output_byte, true); + return {GetInputPosition(), OutputInfo{.bytes_written = output_byte, + .is_finished_writing = true}}; +} + +auto MadMp3Decoder::SeekStream(cpp::span input, + std::size_t target_sample) -> Result { + mad_stream_buffer(&stream_, + reinterpret_cast(input.data()), + input.size()); + std::size_t current_sample = 0; + std::size_t samples_per_frame = 0; + while (true) { + current_sample += samples_per_frame; + + // First, decode the header for this frame. + mad_header header; + mad_header_init(&header); + while (mad_header_decode(&header, &stream_) < 0) { + if (MAD_RECOVERABLE(stream_.error)) { + // Recoverable errors are usually malformed parts of the stream. + // We can recover from them by just retrying the decode. + continue; + } else { + // Don't bother checking for other errors; if the first part of the + // stream doesn't even contain a header then something's gone wrong. + return {GetInputPosition(), cpp::fail(Error::kMalformedData)}; + } + } + + // Calculate samples per frame if we haven't already. + if (samples_per_frame == 0) { + samples_per_frame = 32 * MAD_NSBSAMPLES(&header); + } + + // Work out how close we are to the target. + std::size_t samples_to_go = target_sample - current_sample; + std::size_t frames_to_go = samples_to_go / samples_per_frame; + if (frames_to_go > 3) { + // The target is far in the distance. Keep skipping through headers only. + continue; + } + + // The target is within the next few frames. We should decode these, to give + // the decoder a chance to sync with the stream. + while (mad_frame_decode(&frame_, &stream_) < 0) { + if (MAD_RECOVERABLE(stream_.error)) { + continue; + } + if (stream_.error == MAD_ERROR_BUFLEN) { + return {GetInputPosition(), cpp::fail(Error::kOutOfInput)}; + } + // The error is unrecoverable. Give up. + return {GetInputPosition(), cpp::fail(Error::kMalformedData)}; + } + + if (frames_to_go <= 1) { + // The target is within the next couple of frames. We should start + // synthesizing a frame early because this guy says so: + // https://lists.mars.org/hyperkitty/list/mad-dev@lists.mars.org/message/UZSHXZTIZEF7FZ4KFOR65DUCKAY2OCUT/ + mad_synth_frame(&synth_, &frame_); + } + + if (frames_to_go == 0) { + // The target is actually within this frame! Set up for the ContinueStream + // call. + current_sample_ = + (target_sample > current_sample) ? target_sample - current_sample : 0; + return {GetInputPosition(), {}}; + } + } } } // namespace codecs diff --git a/src/codecs/stbvorbis.cpp b/src/codecs/stbvorbis.cpp new file mode 100644 index 00000000..de315416 --- /dev/null +++ b/src/codecs/stbvorbis.cpp @@ -0,0 +1,128 @@ +/* + * Copyright 2023 jacqueline + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "stbvorbis.hpp" +#include + +#include +#include + +#include "stb_vorbis.h" + +namespace codecs { + +StbVorbisDecoder::StbVorbisDecoder() + : vorbis_(nullptr), + current_sample_(-1), + num_channels_(0), + num_samples_(0), + samples_array_(NULL) {} + +StbVorbisDecoder::~StbVorbisDecoder() { + if (vorbis_ != nullptr) { + stb_vorbis_close(vorbis_); + } +} + +static uint32_t scaleToBits(float sample, uint8_t bits) { + // Scale to range. + int32_t max_val = (1 << (bits - 1)); + int32_t fixed_point = sample * max_val; + + // Clamp within bounds. + fixed_point = std::clamp(fixed_point, -max_val, max_val); + + // Remove sign. + return *reinterpret_cast(&fixed_point); +} + +auto StbVorbisDecoder::BeginStream(const cpp::span input) + -> Result { + if (vorbis_ != nullptr) { + stb_vorbis_close(vorbis_); + vorbis_ = nullptr; + } + current_sample_ = -1; + int bytes_read = 0; + int error = 0; + vorbis_ = + stb_vorbis_open_pushdata(reinterpret_cast(input.data()), + input.size_bytes(), &bytes_read, &error, NULL); + if (error != 0) { + return {0, cpp::fail(Error::kMalformedData)}; + } + stb_vorbis_info info = stb_vorbis_get_info(vorbis_); + return {bytes_read, + OutputFormat{.num_channels = static_cast(info.channels), + .bits_per_sample = 24, + .sample_rate_hz = info.sample_rate}}; +} + +auto StbVorbisDecoder::ContinueStream(cpp::span input, + cpp::span output) + -> Result { + std::size_t bytes_used = 0; + if (current_sample_ < 0) { + num_channels_ = 0; + num_samples_ = 0; + samples_array_ = NULL; + + while (true) { + auto cropped = input.subspan(bytes_used); + std::size_t b = stb_vorbis_decode_frame_pushdata( + vorbis_, reinterpret_cast(cropped.data()), + cropped.size_bytes(), &num_channels_, &samples_array_, &num_samples_); + if (b == 0) { + return {bytes_used, cpp::fail(Error::kOutOfInput)}; + } + bytes_used += b; + + if (num_samples_ == 0) { + // Decoder is synchronising. Decode more bytes. + continue; + } + if (num_channels_ == 0 || samples_array_ == NULL) { + // The decoder isn't satisfying its contract. + return {bytes_used, cpp::fail(Error::kInternalError)}; + } + current_sample_ = 0; + break; + } + } + + // We successfully decoded a frame. Time to write out the samples. + std::size_t output_byte = 0; + while (current_sample_ < num_samples_) { + if (output_byte + (2 * num_channels_) >= output.size()) { + return {0, OutputInfo{.bytes_written = output_byte, + .is_finished_writing = false}}; + } + + for (int channel = 0; channel < num_channels_; channel++) { + float raw_sample = samples_array_[channel][current_sample_]; + + uint16_t sample_24 = scaleToBits(raw_sample, 24); + output[output_byte++] = static_cast((sample_24 >> 16) & 0xFF); + output[output_byte++] = static_cast((sample_24 >> 8) & 0xFF); + output[output_byte++] = static_cast((sample_24)&0xFF); + // Pad to 32 bits for alignment. + output[output_byte++] = static_cast(0); + } + current_sample_++; + } + + current_sample_ = -1; + return {bytes_used, OutputInfo{.bytes_written = output_byte, + .is_finished_writing = true}}; +} + +auto StbVorbisDecoder::SeekStream(cpp::span input, + std::size_t target_sample) -> Result { + // TODO(jacqueline): Implement me. + return {0, {}}; +} + +} // namespace codecs diff --git a/src/database/tag_parser.cpp b/src/database/tag_parser.cpp index 27d4163f..589c988f 100644 --- a/src/database/tag_parser.cpp +++ b/src/database/tag_parser.cpp @@ -96,6 +96,7 @@ auto TagParserImpl::ReadAndParseTags(const std::string& path, SongTags* out) if (res != 0) { // Parsing failed. + ESP_LOGE(kTag, "tag parsing failed, reason %d", res); return false; } @@ -103,6 +104,15 @@ auto TagParserImpl::ReadAndParseTags(const std::string& path, SongTags* out) case Fmp3: out->encoding = Encoding::kMp3; break; + case Fogg: + out->encoding = Encoding::kOgg; + break; + case Fflac: + out->encoding = Encoding::kFlac; + break; + case Fwav: + out->encoding = Encoding::kWav; + break; default: out->encoding = Encoding::kUnsupported; } diff --git a/src/tasks/tasks.cpp b/src/tasks/tasks.cpp index b95d8e16..2477d8b9 100644 --- a/src/tasks/tasks.cpp +++ b/src/tasks/tasks.cpp @@ -39,7 +39,7 @@ auto AllocateStack() -> cpp::span; // amount of stack space. template <> auto AllocateStack() -> cpp::span { - std::size_t size = 32 * 1024; + std::size_t size = 48 * 1024; return {static_cast(heap_caps_malloc(size, MALLOC_CAP_DEFAULT)), size}; }