You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
319 lines
9.6 KiB
319 lines
9.6 KiB
/*
|
|
* Copyright 2023 jacqueline <me@jacqueline.id.au>
|
|
*
|
|
* SPDX-License-Identifier: GPL-3.0-only
|
|
*/
|
|
|
|
#include "mad.hpp"
|
|
#include <stdint.h>
|
|
#include <sys/_stdint.h>
|
|
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <optional>
|
|
|
|
#include "esp_heap_caps.h"
|
|
#include "mad.h"
|
|
|
|
#include "codec.hpp"
|
|
#include "esp_log.h"
|
|
#include "result.hpp"
|
|
#include "sample.hpp"
|
|
#include "types.hpp"
|
|
|
|
namespace codecs {
|
|
|
|
[[maybe_unused]] static constexpr char kTag[] = "mad";
|
|
|
|
static constexpr uint32_t kMallocCaps = MALLOC_CAP_SPIRAM;
|
|
|
|
MadMp3Decoder::MadMp3Decoder()
|
|
: input_(),
|
|
buffer_(),
|
|
stream_(reinterpret_cast<mad_stream*>(
|
|
heap_caps_malloc(sizeof(mad_stream), kMallocCaps))),
|
|
frame_(reinterpret_cast<mad_frame*>(
|
|
heap_caps_malloc(sizeof(mad_frame), kMallocCaps))),
|
|
synth_(reinterpret_cast<mad_synth*>(
|
|
heap_caps_malloc(sizeof(mad_synth),
|
|
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT))),
|
|
current_sample_(-1),
|
|
is_eof_(false),
|
|
is_eos_(false) {
|
|
mad_stream_init(stream_.get());
|
|
mad_frame_init(frame_.get());
|
|
mad_synth_init(synth_.get());
|
|
}
|
|
|
|
MadMp3Decoder::~MadMp3Decoder() {
|
|
mad_stream_finish(stream_.get());
|
|
mad_frame_finish(frame_.get());
|
|
mad_synth_finish(synth_.get());
|
|
}
|
|
|
|
auto MadMp3Decoder::GetBytesUsed() -> std::size_t {
|
|
if (stream_->next_frame) {
|
|
return stream_->next_frame - stream_->buffer;
|
|
} else {
|
|
return stream_->bufend - stream_->buffer;
|
|
}
|
|
}
|
|
|
|
auto MadMp3Decoder::OpenStream(std::shared_ptr<IStream> input, uint32_t offset)
|
|
-> cpp::result<OutputFormat, ICodec::Error> {
|
|
input_ = input;
|
|
|
|
SkipID3Tags(*input);
|
|
|
|
// To get the output format for MP3 streams, we simply need to decode the
|
|
// first frame header.
|
|
mad_header header;
|
|
mad_header_init(&header);
|
|
bool eof = false;
|
|
bool got_header = false;
|
|
while (!eof && !got_header) {
|
|
eof = buffer_.Refill(input_.get());
|
|
|
|
buffer_.ConsumeBytes([&](std::span<std::byte> buf) -> size_t {
|
|
mad_stream_buffer(stream_.get(),
|
|
reinterpret_cast<const unsigned char*>(buf.data()),
|
|
buf.size_bytes());
|
|
|
|
while (mad_header_decode(&header, stream_.get()) < 0) {
|
|
if (MAD_RECOVERABLE(stream_->error)) {
|
|
// Recoverable errors are usually malformed parts of the stream.
|
|
// We can recover from them by just retrying the decode.
|
|
continue;
|
|
}
|
|
if (stream_->error == MAD_ERROR_BUFLEN) {
|
|
return GetBytesUsed();
|
|
}
|
|
eof = true;
|
|
return 0;
|
|
}
|
|
|
|
got_header = true;
|
|
return GetBytesUsed();
|
|
});
|
|
}
|
|
|
|
if (!got_header) {
|
|
return cpp::fail(ICodec::Error::kMalformedData);
|
|
}
|
|
|
|
uint8_t channels = MAD_NCHANNELS(&header);
|
|
OutputFormat output{
|
|
.num_channels = channels,
|
|
.sample_rate_hz = header.samplerate,
|
|
};
|
|
|
|
auto vbr_length = GetVbrLength(header);
|
|
if (vbr_length) {
|
|
output.total_samples = vbr_length.value() * channels;
|
|
} else if (input->Size() && header.bitrate > 0) {
|
|
auto cbr_length = input->Size().value() / (header.bitrate / 8);
|
|
output.total_samples = cbr_length * output.sample_rate_hz * channels;
|
|
}
|
|
|
|
mad_timer_t timer;
|
|
mad_timer_reset(&timer);
|
|
bool need_refill = false;
|
|
bool seek_err = false;
|
|
|
|
while (mad_timer_count(timer, MAD_UNITS_SECONDS) < offset) {
|
|
if (seek_err) {
|
|
return cpp::fail(ICodec::Error::kMalformedData);
|
|
}
|
|
|
|
if (need_refill && buffer_.Refill(input_.get())) {
|
|
return cpp::fail(ICodec::Error::kMalformedData);
|
|
}
|
|
need_refill = false;
|
|
|
|
buffer_.ConsumeBytes([&](std::span<std::byte> buf) -> size_t {
|
|
mad_stream_buffer(stream_.get(),
|
|
reinterpret_cast<const unsigned char*>(buf.data()),
|
|
buf.size());
|
|
|
|
while (mad_header_decode(&header, stream_.get()) < 0) {
|
|
if (MAD_RECOVERABLE(stream_->error)) {
|
|
continue;
|
|
}
|
|
if (stream_->error == MAD_ERROR_BUFLEN) {
|
|
need_refill = true;
|
|
return GetBytesUsed();
|
|
}
|
|
// The error is unrecoverable. Give up.
|
|
seek_err = true;
|
|
return 0;
|
|
}
|
|
|
|
mad_timer_add(&timer, header.duration);
|
|
return GetBytesUsed();
|
|
});
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
auto MadMp3Decoder::DecodeTo(std::span<sample::Sample> output)
|
|
-> cpp::result<OutputInfo, Error> {
|
|
if (current_sample_ < 0 && !is_eos_) {
|
|
if (!is_eof_) {
|
|
is_eof_ = buffer_.Refill(input_.get());
|
|
if (is_eof_) {
|
|
buffer_.AddBytes([&](std::span<std::byte> buf) -> size_t {
|
|
if (buf.size() < MAD_BUFFER_GUARD) {
|
|
is_eof_ = false;
|
|
return 0;
|
|
}
|
|
ESP_LOGI(kTag, "adding MAD_BUFFER_GUARD");
|
|
std::fill_n(buf.begin(), MAD_BUFFER_GUARD, std::byte(0));
|
|
return 8;
|
|
});
|
|
}
|
|
}
|
|
|
|
buffer_.ConsumeBytes([&](std::span<std::byte> buf) -> size_t {
|
|
mad_stream_buffer(stream_.get(),
|
|
reinterpret_cast<const unsigned char*>(buf.data()),
|
|
buf.size());
|
|
|
|
// Decode the next frame. To signal errors, this returns -1 and
|
|
// stashes an error code in the stream structure.
|
|
while (mad_frame_decode(frame_.get(), stream_.get()) < 0) {
|
|
if (MAD_RECOVERABLE(stream_->error)) {
|
|
// Recoverable errors are usually malformed parts of the stream.
|
|
// We can recover from them by just retrying the decode.
|
|
continue;
|
|
}
|
|
if (stream_->error == MAD_ERROR_BUFLEN) {
|
|
if (is_eof_) {
|
|
is_eos_ = true;
|
|
}
|
|
return GetBytesUsed();
|
|
}
|
|
// The error is unrecoverable. Give up.
|
|
is_eof_ = true;
|
|
is_eos_ = true;
|
|
return 0;
|
|
}
|
|
|
|
// We've successfully decoded a frame! Now synthesize samples to write
|
|
// out.
|
|
mad_synth_frame(synth_.get(), frame_.get());
|
|
current_sample_ = 0;
|
|
return GetBytesUsed();
|
|
});
|
|
}
|
|
|
|
size_t output_sample = 0;
|
|
if (current_sample_ >= 0) {
|
|
while (current_sample_ < synth_->pcm.length) {
|
|
if (output_sample + synth_->pcm.channels >= output.size()) {
|
|
// We can't fit the next full frame into the buffer.
|
|
return OutputInfo{.samples_written = output_sample,
|
|
.is_stream_finished = false};
|
|
}
|
|
|
|
for (int channel = 0; channel < synth_->pcm.channels; channel++) {
|
|
output[output_sample++] =
|
|
sample::FromMad(synth_->pcm.samples[channel][current_sample_]);
|
|
}
|
|
current_sample_++;
|
|
}
|
|
}
|
|
|
|
// We wrote everything! Reset, ready for the next frame.
|
|
current_sample_ = -1;
|
|
return OutputInfo{.samples_written = output_sample,
|
|
.is_stream_finished = is_eos_};
|
|
}
|
|
|
|
auto MadMp3Decoder::SkipID3Tags(IStream& stream) -> void {
|
|
// First check that the file actually does start with ID3 tags.
|
|
std::array<std::byte, 3> magic_buf{};
|
|
if (stream.Read(magic_buf) != 3) {
|
|
return;
|
|
}
|
|
if (std::memcmp(magic_buf.data(), "ID3", 3) != 0) {
|
|
stream.SeekTo(0, IStream::SeekFrom::kStartOfStream);
|
|
return;
|
|
}
|
|
|
|
// The size of the tags (*not* including the 10-byte header) is located 6
|
|
// bytes in.
|
|
std::array<std::byte, 4> size_buf{};
|
|
stream.SeekTo(6, IStream::SeekFrom::kStartOfStream);
|
|
if (stream.Read(size_buf) != 4) {
|
|
return;
|
|
}
|
|
// Size is encoded with 7-bit ints for some reason.
|
|
uint32_t tags_size = (static_cast<uint32_t>(size_buf[0]) << (7 * 3)) |
|
|
(static_cast<uint32_t>(size_buf[1]) << (7 * 2)) |
|
|
(static_cast<uint32_t>(size_buf[2]) << 7) |
|
|
static_cast<uint32_t>(size_buf[3]);
|
|
stream.SeekTo(10 + tags_size, IStream::SeekFrom::kStartOfStream);
|
|
}
|
|
|
|
/*
|
|
* Implementation taken from SDL_mixer and modified. Original is
|
|
* zlib-licensed, copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
|
|
*/
|
|
auto MadMp3Decoder::GetVbrLength(const mad_header& header)
|
|
-> std::optional<uint32_t> {
|
|
if (!stream_->this_frame || !stream_->next_frame ||
|
|
stream_->next_frame <= stream_->this_frame ||
|
|
(stream_->next_frame - stream_->this_frame) < 48) {
|
|
return {};
|
|
}
|
|
|
|
int mpeg_version = (stream_->this_frame[1] >> 3) & 0x03;
|
|
|
|
int xing_offset = 0;
|
|
switch (mpeg_version) {
|
|
case 0x03: /* MPEG1 */
|
|
if (header.mode == MAD_MODE_SINGLE_CHANNEL) {
|
|
xing_offset = 4 + 17;
|
|
} else {
|
|
xing_offset = 4 + 32;
|
|
}
|
|
break;
|
|
default: /* MPEG2 and MPEG2.5 */
|
|
if (header.mode == MAD_MODE_SINGLE_CHANNEL) {
|
|
xing_offset = 4 + 17;
|
|
} else {
|
|
xing_offset = 4 + 9;
|
|
}
|
|
break;
|
|
}
|
|
|
|
uint32_t samples_per_frame = 32 * MAD_NSBSAMPLES(&header);
|
|
|
|
unsigned char const* frames_count_raw;
|
|
uint32_t frames_count = 0;
|
|
// TODO(jacqueline): we should also look up any toc fields here, to make
|
|
// seeking faster.
|
|
if (std::memcmp(stream_->this_frame + xing_offset, "Xing", 4) == 0 ||
|
|
std::memcmp(stream_->this_frame + xing_offset, "Info", 4) == 0) {
|
|
/* Xing header to get the count of frames for VBR */
|
|
frames_count_raw = stream_->this_frame + xing_offset + 8;
|
|
frames_count = ((uint32_t)frames_count_raw[0] << 24) +
|
|
((uint32_t)frames_count_raw[1] << 16) +
|
|
((uint32_t)frames_count_raw[2] << 8) +
|
|
((uint32_t)frames_count_raw[3]);
|
|
} else if (std::memcmp(stream_->this_frame + xing_offset, "VBRI", 4) == 0) {
|
|
/* VBRI header to get the count of frames for VBR */
|
|
frames_count_raw = stream_->this_frame + xing_offset + 14;
|
|
frames_count = ((uint32_t)frames_count_raw[0] << 24) +
|
|
((uint32_t)frames_count_raw[1] << 16) +
|
|
((uint32_t)frames_count_raw[2] << 8) +
|
|
((uint32_t)frames_count_raw[3]);
|
|
} else {
|
|
return {};
|
|
}
|
|
|
|
return (double)(frames_count * samples_per_frame);
|
|
}
|
|
|
|
} // namespace codecs
|
|
|