Add vorbis and flac decoders, flesh out codec interface

vorbis doesn't quite work yet, not sure why. will pick it up again
later.
custom
jacqueline 2 years ago
parent 1238437717
commit a2c1dfbabd
  1. 125
      src/audio/audio_decoder.cpp
  2. 2
      src/audio/audio_task.cpp
  3. 8
      src/audio/fatfs_audio_input.cpp
  4. 1
      src/audio/include/audio_decoder.hpp
  5. 4
      src/audio/include/stream_info.hpp
  6. 2
      src/codecs/CMakeLists.txt
  7. 7
      src/codecs/codec.cpp
  8. 80
      src/codecs/foxenflac.cpp
  9. 60
      src/codecs/include/codec.hpp
  10. 38
      src/codecs/include/foxenflac.hpp
  11. 24
      src/codecs/include/mad.hpp
  12. 42
      src/codecs/include/stbvorbis.hpp
  13. 2
      src/codecs/include/types.hpp
  14. 179
      src/codecs/mad.cpp
  15. 128
      src/codecs/stbvorbis.cpp
  16. 10
      src/database/tag_parser.cpp
  17. 2
      src/tasks/tasks.cpp

@ -14,6 +14,7 @@
#include <memory>
#include <variant>
#include "codec.hpp"
#include "freertos/FreeRTOS.h"
#include "esp_heap_caps.h"
@ -50,6 +51,9 @@ auto AudioDecoder::ProcessStreamInfo(const StreamInfo& info) -> bool {
// Reuse the existing codec if we can. This will help with gapless playback,
// since we can potentially just continue to decode as we were before,
// without any setup overhead.
// TODO(jacqueline): Reconsider this. It makes a lot of things harder to smash
// streams together at this layer.
/*
if (current_codec_ != nullptr && current_input_format_) {
auto cur_encoding = std::get<StreamInfo::Encoded>(*current_input_format_);
if (cur_encoding.type == encoded.type) {
@ -58,6 +62,7 @@ auto AudioDecoder::ProcessStreamInfo(const StreamInfo& info) -> bool {
return true;
}
}
*/
current_input_format_ = info.format;
ESP_LOGI(kTag, "creating new decoder");
@ -80,68 +85,94 @@ auto AudioDecoder::Process(const std::vector<InputStream>& inputs,
OutputStream* output) -> void {
auto input = inputs.begin();
const StreamInfo& info = input->info();
if (std::holds_alternative<std::monostate>(info.format) ||
info.bytes_in_stream == 0) {
// TODO(jacqueline): should we clear the stream format?
// output->prepare({});
return;
}
// Check the input stream's format has changed (or, by extension, if this is
// the first stream).
if (!current_input_format_ || *current_input_format_ != info.format) {
// The input stream has changed! Immediately throw everything away and
// start from scratch.
ESP_LOGI(kTag, "beginning new stream");
has_samples_to_send_ = false;
ProcessStreamInfo(info);
auto res = current_codec_->BeginStream(input->data());
input->consume(res.first);
if (res.second.has_error()) {
// TODO(jacqueline): Handle errors.
return;
}
// The stream started successfully. Record what format the samples are in.
codecs::ICodec::OutputFormat format = res.second.value();
current_output_format_ = StreamInfo::Pcm{
.channels = format.num_channels,
.bits_per_sample = format.bits_per_sample,
.sample_rate = format.sample_rate_hz,
};
if (info.seek_to_seconds) {
seek_to_sample_ = *info.seek_to_seconds * format.sample_rate_hz;
} else {
seek_to_sample_.reset();
}
}
current_codec_->SetInput(input->data());
while (seek_to_sample_) {
ESP_LOGI(kTag, "seeking forwards...");
auto res = current_codec_->SeekStream(input->data(), *seek_to_sample_);
input->consume(res.first);
if (res.second.has_error()) {
auto err = res.second.error();
if (err == codecs::ICodec::Error::kOutOfInput) {
return;
} else {
// TODO(jacqueline): Handle errors.
seek_to_sample_.reset();
}
} else {
seek_to_sample_.reset();
}
}
has_input_remaining_ = true;
while (true) {
if (has_samples_to_send_) {
auto format = current_codec_->GetOutputFormat();
if (format.has_value()) {
current_output_format_ = StreamInfo::Pcm{
.channels = format->num_channels,
.bits_per_sample = format->bits_per_sample,
.sample_rate = format->sample_rate_hz,
};
if (!output->prepare(*current_output_format_)) {
break;
}
auto write_res = current_codec_->WriteOutputSamples(output->data());
output->add(write_res.first);
has_samples_to_send_ = !write_res.second;
if (has_samples_to_send_) {
// We weren't able to fit all the generated samples into the output
// buffer. Stop trying; we'll finish up during the next pass.
break;
}
}
// TODO(jacqueline): Pass through seek info here?
if (!output->prepare(*current_output_format_)) {
ESP_LOGI(kTag, "waiting for buffer to become free");
break;
}
auto res = current_codec_->ProcessNextFrame();
if (res.has_error()) {
// TODO(jacqueline): Handle errors.
auto res = current_codec_->ContinueStream(input->data(), output->data());
input->consume(res.first);
if (res.second.has_error()) {
if (res.second.error() == codecs::ICodec::Error::kOutOfInput) {
ESP_LOGW(kTag, "out of input");
ESP_LOGW(kTag, "(%u bytes left)", input->data().size_bytes());
has_input_remaining_ = false;
// We can't be halfway through sending samples if the codec is asking
// for more input.
has_samples_to_send_ = false;
input->mark_incomplete();
} else {
// TODO(jacqueline): Handle errors.
ESP_LOGE(kTag, "codec return fatal error");
}
return;
}
has_input_remaining_ = !res.value();
if (!has_input_remaining_) {
// We're out of useable data in this buffer. Finish immediately; there's
// nothing to send.
input->mark_incomplete();
break;
} else {
has_samples_to_send_ = true;
ESP_LOGI(kTag, "enc read: %u", res.first);
codecs::ICodec::OutputInfo out_info = res.second.value();
output->add(out_info.bytes_written);
has_samples_to_send_ = !out_info.is_finished_writing;
ESP_LOGI(kTag, "enc wrote: %u", out_info.bytes_written);
if (out_info.is_finished_writing) {
ESP_LOGI(kTag, "(write finished)");
}
}
std::size_t pos = current_codec_->GetInputPosition();
if (pos > 0) {
input->consume(pos - 1);
if (has_samples_to_send_) {
// We weren't able to fit all the generated samples into the output
// buffer. Stop trying; we'll finish up during the next pass.
break;
}
}
}

@ -126,7 +126,7 @@ void AudioTaskMain(std::unique_ptr<Pipeline> pipeline, IAudioSink* sink) {
if (sink_stream.info().bytes_in_stream == 0) {
// No new bytes to sink, so skip sinking completely.
ESP_LOGI(kTag, "no bytes to sink");
ESP_LOGW(kTag, "no bytes to sink");
continue;
}

@ -56,11 +56,13 @@ auto FatfsAudioInput::OpenFile(const std::string& path) -> bool {
database::SongTags tags;
if (!tag_parser.ReadAndParseTags(path, &tags)) {
ESP_LOGE(kTag, "failed to read tags");
return false;
tags.encoding = database::Encoding::kFlac;
// return false;
}
auto stream_type = ContainerToStreamType(tags.encoding);
if (!stream_type.has_value()) {
ESP_LOGE(kTag, "couldn't match container to stream");
return false;
}
@ -144,8 +146,8 @@ auto FatfsAudioInput::ContainerToStreamType(database::Encoding enc)
return codecs::StreamType::kPcm;
case database::Encoding::kFlac:
return codecs::StreamType::kFlac;
case database::Encoding::kOgg:
return codecs::StreamType::kOgg;
case database::Encoding::kOgg: // Misnamed; this is Ogg Vorbis.
return codecs::StreamType::kVorbis;
case database::Encoding::kUnsupported:
default:
return {};

@ -42,6 +42,7 @@ class AudioDecoder : public IAudioElement {
std::unique_ptr<codecs::ICodec> current_codec_;
std::optional<StreamInfo::Format> current_input_format_;
std::optional<StreamInfo::Format> current_output_format_;
std::optional<std::size_t> seek_to_sample_;
bool has_samples_to_send_;
bool has_input_remaining_;

@ -6,6 +6,7 @@
#pragma once
#include <stdint.h>
#include <cstdint>
#include <optional>
#include <string>
@ -30,6 +31,9 @@ struct StreamInfo {
// generated audio, etc.)
std::optional<std::size_t> length_bytes{};
//
std::optional<uint32_t> seek_to_seconds{};
struct Encoded {
// The codec that this stream is associated with.
codecs::StreamType type;

@ -3,7 +3,7 @@
# SPDX-License-Identifier: GPL-3.0-only
idf_component_register(
SRCS "codec.cpp" "mad.cpp"
SRCS "codec.cpp" "mad.cpp" "foxenflac.cpp" "stbvorbis.cpp"
INCLUDE_DIRS "include"
REQUIRES "result" "span" "libmad" "libfoxenflac" "stb_vorbis")

@ -8,7 +8,10 @@
#include <memory>
#include <optional>
#include "foxenflac.hpp"
#include "mad.hpp"
#include "stbvorbis.hpp"
#include "types.hpp"
namespace codecs {
@ -17,6 +20,10 @@ auto CreateCodecForType(StreamType type) -> std::optional<ICodec*> {
switch (type) {
case StreamType::kMp3:
return new MadMp3Decoder();
case StreamType::kFlac:
return new FoxenFlacDecoder();
case StreamType::kVorbis:
return new StbVorbisDecoder();
default:
return {};
}

@ -0,0 +1,80 @@
/*
* Copyright 2023 jacqueline <me@jacqueline.id.au>
*
* SPDX-License-Identifier: GPL-3.0-only
*/
#include "foxenflac.hpp"
#include <stdint.h>
#include <cstdlib>
#include "esp_log.h"
#include "foxen/flac.h"
namespace codecs {
FoxenFlacDecoder::FoxenFlacDecoder()
: flac_(FX_FLAC_ALLOC(FLAC_MAX_BLOCK_SIZE, 2)) {}
FoxenFlacDecoder::~FoxenFlacDecoder() {
free(flac_);
}
auto FoxenFlacDecoder::BeginStream(const cpp::span<const std::byte> input)
-> Result<OutputFormat> {
uint32_t bytes_used = input.size_bytes();
fx_flac_state_t state =
fx_flac_process(flac_, reinterpret_cast<const uint8_t*>(input.data()),
&bytes_used, NULL, NULL);
if (state != FLAC_END_OF_METADATA) {
return {bytes_used, cpp::fail(Error::kMalformedData)};
}
int64_t channels = fx_flac_get_streaminfo(flac_, FLAC_KEY_N_CHANNELS);
int64_t fs = fx_flac_get_streaminfo(flac_, FLAC_KEY_SAMPLE_RATE);
if (channels == FLAC_INVALID_METADATA_KEY ||
fs == FLAC_INVALID_METADATA_KEY) {
return {bytes_used, cpp::fail(Error::kMalformedData)};
}
return {bytes_used,
OutputFormat{
.num_channels = static_cast<uint8_t>(channels),
.bits_per_sample = 32, // libfoxenflac output is fixed-size.
.sample_rate_hz = static_cast<uint32_t>(fs),
}};
}
auto FoxenFlacDecoder::ContinueStream(cpp::span<const std::byte> input,
cpp::span<std::byte> output)
-> Result<OutputInfo> {
cpp::span<int32_t> output_as_samples{
reinterpret_cast<int32_t*>(output.data()), output.size_bytes() / 4};
uint32_t bytes_read = input.size_bytes();
uint32_t samples_written = output_as_samples.size();
fx_flac_state_t state =
fx_flac_process(flac_, reinterpret_cast<const uint8_t*>(input.data()),
&bytes_read, output_as_samples.data(), &samples_written);
if (state == FLAC_ERR) {
return {bytes_read, cpp::fail(Error::kMalformedData)};
}
if (samples_written > 0) {
return {bytes_read,
OutputInfo{.bytes_written = samples_written * 4,
.is_finished_writing = state == FLAC_END_OF_FRAME}};
}
// No error, but no samples written. We must be out of data.
return {bytes_read, cpp::fail(Error::kOutOfInput)};
}
auto FoxenFlacDecoder::SeekStream(cpp::span<const std::byte> input,
std::size_t target_sample) -> Result<void> {
// TODO(jacqueline): Implement me.
return {0, {}};
}
} // namespace codecs

@ -21,48 +21,58 @@
namespace codecs {
/*
* Common interface to be implemented by all audio decoders.
*/
class ICodec {
public:
virtual ~ICodec() {}
/* Errors that may be returned by codecs. */
enum class Error {
// Indicates that more data is required before this codec can finish its
// operation. E.g. the input buffer ends with a truncated frame.
kOutOfInput,
// Indicates that the data within the input buffer is fatally malformed.
kMalformedData,
kInternalError,
};
/*
* Alias for more readable return types. All codec methods, success or
* failure, should also return the number of bytes they consumed.
*/
template <typename T>
using Result = std::pair<std::size_t, cpp::result<T, Error>>;
struct OutputFormat {
uint8_t num_channels;
uint8_t bits_per_sample;
uint32_t sample_rate_hz;
};
virtual auto GetOutputFormat() -> std::optional<OutputFormat> = 0;
enum ProcessingError { MALFORMED_DATA };
virtual auto SetInput(cpp::span<const std::byte> input) -> void = 0;
/*
* Returns the codec's next read position within the input buffer. If the
* codec is out of usable data, but there is still some data left in the
* stream, that data should be prepended to the next input buffer.
* Decodes metadata or headers from the given input stream, and returns the
* format for the samples that will be decoded from it.
*/
virtual auto GetInputPosition() -> std::size_t = 0;
virtual auto BeginStream(cpp::span<const std::byte> input)
-> Result<OutputFormat> = 0;
/*
* Read one frame (or equivalent discrete chunk) from the input, and
* synthesize output samples for it.
*
* Returns true if we are out of usable data from the input stream, or false
* otherwise.
*/
virtual auto ProcessNextFrame() -> cpp::result<bool, ProcessingError> = 0;
struct OutputInfo {
std::size_t bytes_written;
bool is_finished_writing;
};
/*
* Writes PCM samples to the given output buffer.
*
* Returns the number of bytes that were written, and true if all of the
* samples synthesized from the last call to `ProcessNextFrame` have been
* written. If this returns false, then this method should be called again
* after flushing the output buffer.
*/
virtual auto WriteOutputSamples(cpp::span<std::byte> output)
-> std::pair<std::size_t, bool> = 0;
virtual auto ContinueStream(cpp::span<const std::byte> input,
cpp::span<std::byte> output)
-> Result<OutputInfo> = 0;
virtual auto SeekStream(cpp::span<const std::byte> input,
std::size_t target_sample) -> Result<void> = 0;
};
auto CreateCodecForType(StreamType type) -> std::optional<ICodec*>;

@ -0,0 +1,38 @@
/*
* Copyright 2023 jacqueline <me@jacqueline.id.au>
*
* SPDX-License-Identifier: GPL-3.0-only
*/
#pragma once
#include <cstddef>
#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include "foxen/flac.h"
#include "span.hpp"
#include "codec.hpp"
namespace codecs {
class FoxenFlacDecoder : public ICodec {
public:
FoxenFlacDecoder();
~FoxenFlacDecoder();
auto BeginStream(cpp::span<const std::byte>) -> Result<OutputFormat> override;
auto ContinueStream(cpp::span<const std::byte>, cpp::span<std::byte>)
-> Result<OutputInfo> override;
auto SeekStream(cpp::span<const std::byte> input, std::size_t target_sample)
-> Result<void> override;
private:
fx_flac_t* flac_;
};
} // namespace codecs

@ -24,12 +24,22 @@ class MadMp3Decoder : public ICodec {
MadMp3Decoder();
~MadMp3Decoder();
auto GetOutputFormat() -> std::optional<OutputFormat> override;
auto SetInput(cpp::span<const std::byte> input) -> void override;
auto GetInputPosition() -> std::size_t override;
auto ProcessNextFrame() -> cpp::result<bool, ProcessingError> override;
auto WriteOutputSamples(cpp::span<std::byte> output)
-> std::pair<std::size_t, bool> override;
/*
* Returns the output format for the next frame in the stream. MP3 streams
* may represent multiple distinct tracks, with different bitrates, and so we
* handle the stream only on a frame-by-frame basis.
*/
auto BeginStream(cpp::span<const std::byte>) -> Result<OutputFormat> override;
/*
* Writes samples for the current frame.
*/
auto ContinueStream(cpp::span<const std::byte> input,
cpp::span<std::byte> output)
-> Result<OutputInfo> override;
auto SeekStream(cpp::span<const std::byte> input, std::size_t target_sample)
-> Result<void> override;
private:
mad_stream stream_;
@ -37,6 +47,8 @@ class MadMp3Decoder : public ICodec {
mad_synth synth_;
int current_sample_;
auto GetInputPosition() -> std::size_t;
};
} // namespace codecs

@ -0,0 +1,42 @@
/*
* Copyright 2023 jacqueline <me@jacqueline.id.au>
*
* SPDX-License-Identifier: GPL-3.0-only
*/
#pragma once
#include <cstddef>
#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include "stb_vorbis.h"
#include "codec.hpp"
namespace codecs {
class StbVorbisDecoder : public ICodec {
public:
StbVorbisDecoder();
~StbVorbisDecoder();
auto BeginStream(cpp::span<const std::byte>) -> Result<OutputFormat> override;
auto ContinueStream(cpp::span<const std::byte>, cpp::span<std::byte>)
-> Result<OutputInfo> override;
auto SeekStream(cpp::span<const std::byte> input, std::size_t target_sample)
-> Result<void> override;
private:
stb_vorbis* vorbis_;
int current_sample_;
int num_channels_;
int num_samples_;
float** samples_array_;
};
} // namespace codecs

@ -13,7 +13,7 @@ namespace codecs {
enum class StreamType {
kMp3,
kPcm,
kOgg,
kVorbis,
kFlac,
};

@ -13,11 +13,12 @@
#include "mad.h"
#include "codec.hpp"
#include "result.hpp"
#include "types.hpp"
namespace codecs {
static uint32_t scaleToBits(mad_fixed_t sample, uint8_t bits) {
static uint32_t mad_fixed_to_pcm(mad_fixed_t sample, uint8_t bits) {
// Round the bottom bits.
sample += (1L << (MAD_F_FRACBITS - bits));
@ -42,93 +43,167 @@ MadMp3Decoder::~MadMp3Decoder() {
mad_synth_finish(&synth_);
}
auto MadMp3Decoder::GetOutputFormat() -> std::optional<OutputFormat> {
if (synth_.pcm.channels == 0 || synth_.pcm.samplerate == 0) {
return {};
}
return std::optional<OutputFormat>({
.num_channels = static_cast<uint8_t>(synth_.pcm.channels),
.bits_per_sample = 24,
.sample_rate_hz = synth_.pcm.samplerate,
});
auto MadMp3Decoder::GetInputPosition() -> std::size_t {
return stream_.next_frame - stream_.buffer;
}
auto MadMp3Decoder::SetInput(cpp::span<const std::byte> input) -> void {
auto MadMp3Decoder::BeginStream(const cpp::span<const std::byte> input)
-> Result<OutputFormat> {
mad_stream_buffer(&stream_,
reinterpret_cast<const unsigned char*>(input.data()),
input.size());
}
auto MadMp3Decoder::GetInputPosition() -> std::size_t {
return stream_.next_frame - stream_.buffer;
}
auto MadMp3Decoder::ProcessNextFrame() -> cpp::result<bool, ProcessingError> {
// Whatever was last synthesized is now invalid, so ensure we don't try to
// send it.
current_sample_ = -1;
// Decode the next frame. To signal errors, this returns -1 and
// stashes an error code in the stream structure.
if (mad_frame_decode(&frame_, &stream_) < 0) {
// To get the output format for MP3 streams, we simply need to decode the
// first frame header.
mad_header header;
mad_header_init(&header);
while (mad_header_decode(&header, &stream_) < 0) {
if (MAD_RECOVERABLE(stream_.error)) {
// Recoverable errors are usually malformed parts of the stream.
// We can recover from them by just retrying the decode.
return false;
continue;
} else {
// Don't bother checking for other errors; if the first part of the stream
// doesn't even contain a header then something's gone wrong.
return {GetInputPosition(), cpp::fail(Error::kMalformedData)};
}
if (stream_.error == MAD_ERROR_BUFLEN) {
// The decoder ran out of bytes before it completed a frame. We
// need to return back to the caller to give us more data.
return true;
}
// The error is unrecoverable. Give up.
return cpp::fail(MALFORMED_DATA);
}
// We've successfully decoded a frame!
// Now we need to synthesize PCM samples based on the frame, and send
// them downstream.
mad_synth_frame(&synth_, &frame_);
current_sample_ = 0;
return false;
uint8_t channels = MAD_NCHANNELS(&header);
return {GetInputPosition(),
OutputFormat{
.num_channels = channels,
.bits_per_sample = 24, // We always scale to 24 bits
.sample_rate_hz = header.samplerate,
}};
}
auto MadMp3Decoder::WriteOutputSamples(cpp::span<std::byte> output)
-> std::pair<std::size_t, bool> {
size_t output_byte = 0;
// First ensure that we actually have some samples to send off.
auto MadMp3Decoder::ContinueStream(cpp::span<const std::byte> input,
cpp::span<std::byte> output)
-> Result<OutputInfo> {
if (current_sample_ < 0) {
return std::make_pair(output_byte, true);
mad_stream_buffer(&stream_,
reinterpret_cast<const unsigned char*>(input.data()),
input.size());
// Decode the next frame. To signal errors, this returns -1 and
// stashes an error code in the stream structure.
while (mad_frame_decode(&frame_, &stream_) < 0) {
if (MAD_RECOVERABLE(stream_.error)) {
// Recoverable errors are usually malformed parts of the stream.
// We can recover from them by just retrying the decode.
continue;
}
if (stream_.error == MAD_ERROR_BUFLEN) {
// The decoder ran out of bytes before it completed a frame. We
// need to return back to the caller to give us more data.
return {GetInputPosition(), cpp::fail(Error::kOutOfInput)};
}
// The error is unrecoverable. Give up.
return {GetInputPosition(), cpp::fail(Error::kMalformedData)};
}
// We've successfully decoded a frame! Now synthesize samples to write out.
mad_synth_frame(&synth_, &frame_);
current_sample_ = 0;
}
size_t output_byte = 0;
while (current_sample_ < synth_.pcm.length) {
if (output_byte + (2 * synth_.pcm.channels) >= output.size()) {
return std::make_pair(output_byte, false);
if (output_byte + (4 * synth_.pcm.channels) >= output.size()) {
// We can't fit the next sample into the buffer. Stop now, and also avoid
// writing the sample for only half the channels.
return {GetInputPosition(), OutputInfo{.bytes_written = output_byte,
.is_finished_writing = false}};
}
for (int channel = 0; channel < synth_.pcm.channels; channel++) {
uint32_t sample_24 =
scaleToBits(synth_.pcm.samples[channel][current_sample_], 24);
mad_fixed_to_pcm(synth_.pcm.samples[channel][current_sample_], 24);
output[output_byte++] = static_cast<std::byte>((sample_24 >> 16) & 0xFF);
output[output_byte++] = static_cast<std::byte>((sample_24 >> 8) & 0xFF);
output[output_byte++] = static_cast<std::byte>((sample_24)&0xFF);
// 24 bit samples must still be aligned to 32 bits. The LSB is ignored.
output[output_byte++] = static_cast<std::byte>(0);
/*
uint16_t sample_16 =
scaleToBits(synth_.pcm.samples[channel][current_sample_], 16);
output[output_byte++] = static_cast<std::byte>((sample_16 >> 8) & 0xFF);
output[output_byte++] = static_cast<std::byte>((sample_16)&0xFF);
*/
}
current_sample_++;
}
// We wrote everything! Reset, ready for the next frame.
current_sample_ = -1;
return std::make_pair(output_byte, true);
return {GetInputPosition(), OutputInfo{.bytes_written = output_byte,
.is_finished_writing = true}};
}
auto MadMp3Decoder::SeekStream(cpp::span<const std::byte> input,
std::size_t target_sample) -> Result<void> {
mad_stream_buffer(&stream_,
reinterpret_cast<const unsigned char*>(input.data()),
input.size());
std::size_t current_sample = 0;
std::size_t samples_per_frame = 0;
while (true) {
current_sample += samples_per_frame;
// First, decode the header for this frame.
mad_header header;
mad_header_init(&header);
while (mad_header_decode(&header, &stream_) < 0) {
if (MAD_RECOVERABLE(stream_.error)) {
// Recoverable errors are usually malformed parts of the stream.
// We can recover from them by just retrying the decode.
continue;
} else {
// Don't bother checking for other errors; if the first part of the
// stream doesn't even contain a header then something's gone wrong.
return {GetInputPosition(), cpp::fail(Error::kMalformedData)};
}
}
// Calculate samples per frame if we haven't already.
if (samples_per_frame == 0) {
samples_per_frame = 32 * MAD_NSBSAMPLES(&header);
}
// Work out how close we are to the target.
std::size_t samples_to_go = target_sample - current_sample;
std::size_t frames_to_go = samples_to_go / samples_per_frame;
if (frames_to_go > 3) {
// The target is far in the distance. Keep skipping through headers only.
continue;
}
// The target is within the next few frames. We should decode these, to give
// the decoder a chance to sync with the stream.
while (mad_frame_decode(&frame_, &stream_) < 0) {
if (MAD_RECOVERABLE(stream_.error)) {
continue;
}
if (stream_.error == MAD_ERROR_BUFLEN) {
return {GetInputPosition(), cpp::fail(Error::kOutOfInput)};
}
// The error is unrecoverable. Give up.
return {GetInputPosition(), cpp::fail(Error::kMalformedData)};
}
if (frames_to_go <= 1) {
// The target is within the next couple of frames. We should start
// synthesizing a frame early because this guy says so:
// https://lists.mars.org/hyperkitty/list/mad-dev@lists.mars.org/message/UZSHXZTIZEF7FZ4KFOR65DUCKAY2OCUT/
mad_synth_frame(&synth_, &frame_);
}
if (frames_to_go == 0) {
// The target is actually within this frame! Set up for the ContinueStream
// call.
current_sample_ =
(target_sample > current_sample) ? target_sample - current_sample : 0;
return {GetInputPosition(), {}};
}
}
}
} // namespace codecs

@ -0,0 +1,128 @@
/*
* Copyright 2023 jacqueline <me@jacqueline.id.au>
*
* SPDX-License-Identifier: GPL-3.0-only
*/
#include "stbvorbis.hpp"
#include <stdint.h>
#include <cstdint>
#include <optional>
#include "stb_vorbis.h"
namespace codecs {
StbVorbisDecoder::StbVorbisDecoder()
: vorbis_(nullptr),
current_sample_(-1),
num_channels_(0),
num_samples_(0),
samples_array_(NULL) {}
StbVorbisDecoder::~StbVorbisDecoder() {
if (vorbis_ != nullptr) {
stb_vorbis_close(vorbis_);
}
}
static uint32_t scaleToBits(float sample, uint8_t bits) {
// Scale to range.
int32_t max_val = (1 << (bits - 1));
int32_t fixed_point = sample * max_val;
// Clamp within bounds.
fixed_point = std::clamp(fixed_point, -max_val, max_val);
// Remove sign.
return *reinterpret_cast<uint32_t*>(&fixed_point);
}
auto StbVorbisDecoder::BeginStream(const cpp::span<const std::byte> input)
-> Result<OutputFormat> {
if (vorbis_ != nullptr) {
stb_vorbis_close(vorbis_);
vorbis_ = nullptr;
}
current_sample_ = -1;
int bytes_read = 0;
int error = 0;
vorbis_ =
stb_vorbis_open_pushdata(reinterpret_cast<const uint8_t*>(input.data()),
input.size_bytes(), &bytes_read, &error, NULL);
if (error != 0) {
return {0, cpp::fail(Error::kMalformedData)};
}
stb_vorbis_info info = stb_vorbis_get_info(vorbis_);
return {bytes_read,
OutputFormat{.num_channels = static_cast<uint8_t>(info.channels),
.bits_per_sample = 24,
.sample_rate_hz = info.sample_rate}};
}
auto StbVorbisDecoder::ContinueStream(cpp::span<const std::byte> input,
cpp::span<std::byte> output)
-> Result<OutputInfo> {
std::size_t bytes_used = 0;
if (current_sample_ < 0) {
num_channels_ = 0;
num_samples_ = 0;
samples_array_ = NULL;
while (true) {
auto cropped = input.subspan(bytes_used);
std::size_t b = stb_vorbis_decode_frame_pushdata(
vorbis_, reinterpret_cast<const uint8_t*>(cropped.data()),
cropped.size_bytes(), &num_channels_, &samples_array_, &num_samples_);
if (b == 0) {
return {bytes_used, cpp::fail(Error::kOutOfInput)};
}
bytes_used += b;
if (num_samples_ == 0) {
// Decoder is synchronising. Decode more bytes.
continue;
}
if (num_channels_ == 0 || samples_array_ == NULL) {
// The decoder isn't satisfying its contract.
return {bytes_used, cpp::fail(Error::kInternalError)};
}
current_sample_ = 0;
break;
}
}
// We successfully decoded a frame. Time to write out the samples.
std::size_t output_byte = 0;
while (current_sample_ < num_samples_) {
if (output_byte + (2 * num_channels_) >= output.size()) {
return {0, OutputInfo{.bytes_written = output_byte,
.is_finished_writing = false}};
}
for (int channel = 0; channel < num_channels_; channel++) {
float raw_sample = samples_array_[channel][current_sample_];
uint16_t sample_24 = scaleToBits(raw_sample, 24);
output[output_byte++] = static_cast<std::byte>((sample_24 >> 16) & 0xFF);
output[output_byte++] = static_cast<std::byte>((sample_24 >> 8) & 0xFF);
output[output_byte++] = static_cast<std::byte>((sample_24)&0xFF);
// Pad to 32 bits for alignment.
output[output_byte++] = static_cast<std::byte>(0);
}
current_sample_++;
}
current_sample_ = -1;
return {bytes_used, OutputInfo{.bytes_written = output_byte,
.is_finished_writing = true}};
}
auto StbVorbisDecoder::SeekStream(cpp::span<const std::byte> input,
std::size_t target_sample) -> Result<void> {
// TODO(jacqueline): Implement me.
return {0, {}};
}
} // namespace codecs

@ -96,6 +96,7 @@ auto TagParserImpl::ReadAndParseTags(const std::string& path, SongTags* out)
if (res != 0) {
// Parsing failed.
ESP_LOGE(kTag, "tag parsing failed, reason %d", res);
return false;
}
@ -103,6 +104,15 @@ auto TagParserImpl::ReadAndParseTags(const std::string& path, SongTags* out)
case Fmp3:
out->encoding = Encoding::kMp3;
break;
case Fogg:
out->encoding = Encoding::kOgg;
break;
case Fflac:
out->encoding = Encoding::kFlac;
break;
case Fwav:
out->encoding = Encoding::kWav;
break;
default:
out->encoding = Encoding::kUnsupported;
}

@ -39,7 +39,7 @@ auto AllocateStack() -> cpp::span<StackType_t>;
// amount of stack space.
template <>
auto AllocateStack<Type::kAudio>() -> cpp::span<StackType_t> {
std::size_t size = 32 * 1024;
std::size_t size = 48 * 1024;
return {static_cast<StackType_t*>(heap_caps_malloc(size, MALLOC_CAP_DEFAULT)),
size};
}

Loading…
Cancel
Save