Fork of Tangara with customizations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
tangara-fw/src/codecs/mad.cpp

319 lines
9.6 KiB

/*
* Copyright 2023 jacqueline <me@jacqueline.id.au>
*
* SPDX-License-Identifier: GPL-3.0-only
*/
#include "mad.hpp"
#include <stdint.h>
#include <sys/_stdint.h>
#include <cstdint>
#include <cstring>
#include <optional>
#include "esp_heap_caps.h"
#include "mad.h"
#include "codec.hpp"
#include "esp_log.h"
#include "result.hpp"
#include "sample.hpp"
#include "types.hpp"
namespace codecs {
[[maybe_unused]] static constexpr char kTag[] = "mad";
static constexpr uint32_t kMallocCaps = MALLOC_CAP_SPIRAM;
MadMp3Decoder::MadMp3Decoder()
: input_(),
buffer_(),
stream_(reinterpret_cast<mad_stream*>(
heap_caps_malloc(sizeof(mad_stream), kMallocCaps))),
frame_(reinterpret_cast<mad_frame*>(
heap_caps_malloc(sizeof(mad_frame), kMallocCaps))),
synth_(reinterpret_cast<mad_synth*>(
heap_caps_malloc(sizeof(mad_synth),
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT))),
current_sample_(-1),
is_eof_(false),
is_eos_(false) {
mad_stream_init(stream_.get());
mad_frame_init(frame_.get());
mad_synth_init(synth_.get());
}
MadMp3Decoder::~MadMp3Decoder() {
mad_stream_finish(stream_.get());
mad_frame_finish(frame_.get());
mad_synth_finish(synth_.get());
}
auto MadMp3Decoder::GetBytesUsed() -> std::size_t {
if (stream_->next_frame) {
return stream_->next_frame - stream_->buffer;
} else {
return stream_->bufend - stream_->buffer;
}
}
auto MadMp3Decoder::OpenStream(std::shared_ptr<IStream> input, uint32_t offset)
-> cpp::result<OutputFormat, ICodec::Error> {
input_ = input;
SkipID3Tags(*input);
// To get the output format for MP3 streams, we simply need to decode the
// first frame header.
mad_header header;
mad_header_init(&header);
bool eof = false;
bool got_header = false;
while (!eof && !got_header) {
eof = buffer_.Refill(input_.get());
buffer_.ConsumeBytes([&](std::span<std::byte> buf) -> size_t {
mad_stream_buffer(stream_.get(),
reinterpret_cast<const unsigned char*>(buf.data()),
buf.size_bytes());
while (mad_header_decode(&header, stream_.get()) < 0) {
if (MAD_RECOVERABLE(stream_->error)) {
// Recoverable errors are usually malformed parts of the stream.
// We can recover from them by just retrying the decode.
continue;
}
if (stream_->error == MAD_ERROR_BUFLEN) {
return GetBytesUsed();
}
eof = true;
return 0;
}
got_header = true;
return GetBytesUsed();
});
}
if (!got_header) {
return cpp::fail(ICodec::Error::kMalformedData);
}
uint8_t channels = MAD_NCHANNELS(&header);
OutputFormat output{
.num_channels = channels,
.sample_rate_hz = header.samplerate,
};
auto vbr_length = GetVbrLength(header);
if (vbr_length) {
output.total_samples = vbr_length.value() * channels;
} else if (input->Size() && header.bitrate > 0) {
auto cbr_length = input->Size().value() / (header.bitrate / 8);
output.total_samples = cbr_length * output.sample_rate_hz * channels;
}
mad_timer_t timer;
mad_timer_reset(&timer);
bool need_refill = false;
bool seek_err = false;
while (mad_timer_count(timer, MAD_UNITS_SECONDS) < offset) {
if (seek_err) {
return cpp::fail(ICodec::Error::kMalformedData);
}
if (need_refill && buffer_.Refill(input_.get())) {
return cpp::fail(ICodec::Error::kMalformedData);
}
need_refill = false;
buffer_.ConsumeBytes([&](std::span<std::byte> buf) -> size_t {
mad_stream_buffer(stream_.get(),
reinterpret_cast<const unsigned char*>(buf.data()),
buf.size());
while (mad_header_decode(&header, stream_.get()) < 0) {
if (MAD_RECOVERABLE(stream_->error)) {
continue;
}
if (stream_->error == MAD_ERROR_BUFLEN) {
need_refill = true;
return GetBytesUsed();
}
// The error is unrecoverable. Give up.
seek_err = true;
return 0;
}
mad_timer_add(&timer, header.duration);
return GetBytesUsed();
});
}
return output;
}
auto MadMp3Decoder::DecodeTo(std::span<sample::Sample> output)
-> cpp::result<OutputInfo, Error> {
if (current_sample_ < 0 && !is_eos_) {
if (!is_eof_) {
is_eof_ = buffer_.Refill(input_.get());
if (is_eof_) {
buffer_.AddBytes([&](std::span<std::byte> buf) -> size_t {
if (buf.size() < MAD_BUFFER_GUARD) {
is_eof_ = false;
return 0;
}
ESP_LOGI(kTag, "adding MAD_BUFFER_GUARD");
std::fill_n(buf.begin(), MAD_BUFFER_GUARD, std::byte(0));
return 8;
});
}
}
buffer_.ConsumeBytes([&](std::span<std::byte> buf) -> size_t {
mad_stream_buffer(stream_.get(),
reinterpret_cast<const unsigned char*>(buf.data()),
buf.size());
// Decode the next frame. To signal errors, this returns -1 and
// stashes an error code in the stream structure.
while (mad_frame_decode(frame_.get(), stream_.get()) < 0) {
if (MAD_RECOVERABLE(stream_->error)) {
// Recoverable errors are usually malformed parts of the stream.
// We can recover from them by just retrying the decode.
continue;
}
if (stream_->error == MAD_ERROR_BUFLEN) {
if (is_eof_) {
is_eos_ = true;
}
return GetBytesUsed();
}
// The error is unrecoverable. Give up.
is_eof_ = true;
is_eos_ = true;
return 0;
}
// We've successfully decoded a frame! Now synthesize samples to write
// out.
mad_synth_frame(synth_.get(), frame_.get());
current_sample_ = 0;
return GetBytesUsed();
});
}
size_t output_sample = 0;
if (current_sample_ >= 0) {
while (current_sample_ < synth_->pcm.length) {
if (output_sample + synth_->pcm.channels >= output.size()) {
// We can't fit the next full frame into the buffer.
return OutputInfo{.samples_written = output_sample,
.is_stream_finished = false};
}
for (int channel = 0; channel < synth_->pcm.channels; channel++) {
output[output_sample++] =
sample::FromMad(synth_->pcm.samples[channel][current_sample_]);
}
current_sample_++;
}
}
// We wrote everything! Reset, ready for the next frame.
current_sample_ = -1;
return OutputInfo{.samples_written = output_sample,
.is_stream_finished = is_eos_};
}
auto MadMp3Decoder::SkipID3Tags(IStream& stream) -> void {
// First check that the file actually does start with ID3 tags.
std::array<std::byte, 3> magic_buf{};
if (stream.Read(magic_buf) != 3) {
return;
}
if (std::memcmp(magic_buf.data(), "ID3", 3) != 0) {
stream.SeekTo(0, IStream::SeekFrom::kStartOfStream);
return;
}
// The size of the tags (*not* including the 10-byte header) is located 6
// bytes in.
std::array<std::byte, 4> size_buf{};
stream.SeekTo(6, IStream::SeekFrom::kStartOfStream);
if (stream.Read(size_buf) != 4) {
return;
}
// Size is encoded with 7-bit ints for some reason.
uint32_t tags_size = (static_cast<uint32_t>(size_buf[0]) << (7 * 3)) |
(static_cast<uint32_t>(size_buf[1]) << (7 * 2)) |
(static_cast<uint32_t>(size_buf[2]) << 7) |
static_cast<uint32_t>(size_buf[3]);
stream.SeekTo(10 + tags_size, IStream::SeekFrom::kStartOfStream);
}
/*
* Implementation taken from SDL_mixer and modified. Original is
* zlib-licensed, copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
*/
auto MadMp3Decoder::GetVbrLength(const mad_header& header)
-> std::optional<uint32_t> {
if (!stream_->this_frame || !stream_->next_frame ||
stream_->next_frame <= stream_->this_frame ||
(stream_->next_frame - stream_->this_frame) < 48) {
return {};
}
int mpeg_version = (stream_->this_frame[1] >> 3) & 0x03;
int xing_offset = 0;
switch (mpeg_version) {
case 0x03: /* MPEG1 */
if (header.mode == MAD_MODE_SINGLE_CHANNEL) {
xing_offset = 4 + 17;
} else {
xing_offset = 4 + 32;
}
break;
default: /* MPEG2 and MPEG2.5 */
if (header.mode == MAD_MODE_SINGLE_CHANNEL) {
xing_offset = 4 + 17;
} else {
xing_offset = 4 + 9;
}
break;
}
uint32_t samples_per_frame = 32 * MAD_NSBSAMPLES(&header);
unsigned char const* frames_count_raw;
uint32_t frames_count = 0;
// TODO(jacqueline): we should also look up any toc fields here, to make
// seeking faster.
if (std::memcmp(stream_->this_frame + xing_offset, "Xing", 4) == 0 ||
std::memcmp(stream_->this_frame + xing_offset, "Info", 4) == 0) {
/* Xing header to get the count of frames for VBR */
frames_count_raw = stream_->this_frame + xing_offset + 8;
frames_count = ((uint32_t)frames_count_raw[0] << 24) +
((uint32_t)frames_count_raw[1] << 16) +
((uint32_t)frames_count_raw[2] << 8) +
((uint32_t)frames_count_raw[3]);
} else if (std::memcmp(stream_->this_frame + xing_offset, "VBRI", 4) == 0) {
/* VBRI header to get the count of frames for VBR */
frames_count_raw = stream_->this_frame + xing_offset + 14;
frames_count = ((uint32_t)frames_count_raw[0] << 24) +
((uint32_t)frames_count_raw[1] << 16) +
((uint32_t)frames_count_raw[2] << 8) +
((uint32_t)frames_count_raw[3]);
} else {
return {};
}
return (double)(frames_count * samples_per_frame);
}
} // namespace codecs