Merge pull request 'MP3 gap... less' (#180) from tjk/tangara-fw:mp3-gapless into main

Reviewed-on: https://codeberg.org/cool-tech-zone/tangara-fw/pulls/180
Reviewed-by: cooljqln <cooljqln@noreply.codeberg.org>
custom
cooljqln 3 months ago
commit dfb4ea5458
  1. 14
      src/codecs/include/mad.hpp
  2. 114
      src/codecs/mad.cpp

@ -38,13 +38,14 @@ class MadMp3Decoder : public ICodec {
private: private:
auto SkipID3Tags(IStream& stream) -> std::optional<uint32_t>; auto SkipID3Tags(IStream& stream) -> std::optional<uint32_t>;
struct VbrInfo { struct Mp3Info {
uint16_t starting_sample;
uint32_t length; uint32_t length;
std::optional<uint32_t> bytes; std::optional<uint32_t> bytes;
std::optional<std::span<const unsigned char, 100>> toc; std::optional<std::span<const unsigned char, 100>> toc;
}; };
auto GetVbrInfo(const mad_header& header) -> std::optional<VbrInfo>; auto GetMp3Info(const mad_header& header) -> std::optional<Mp3Info>;
auto GetBytesUsed() -> std::size_t; auto GetBytesUsed() -> std::size_t;
@ -55,7 +56,14 @@ class MadMp3Decoder : public ICodec {
std::unique_ptr<mad_frame> frame_; std::unique_ptr<mad_frame> frame_;
std::unique_ptr<mad_synth> synth_; std::unique_ptr<mad_synth> synth_;
int current_sample_; // Count of samples processed in the current frame (channels combined)
int current_frame_sample_;
// Count of samples processed in the current stream (channels separate, i.e. usually x2)
int current_stream_sample_;
// How many samples in the current stream (channels separate) with encoder delay/padding removed
int total_samples_;
// Encoder delay, i.e. how many samples to skip at the start of the stream
int skip_samples_;
bool is_eof_; bool is_eof_;
bool is_eos_; bool is_eos_;
}; };

@ -37,7 +37,10 @@ MadMp3Decoder::MadMp3Decoder()
synth_(reinterpret_cast<mad_synth*>( synth_(reinterpret_cast<mad_synth*>(
heap_caps_malloc(sizeof(mad_synth), heap_caps_malloc(sizeof(mad_synth),
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT))), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT))),
current_sample_(-1), current_frame_sample_(-1),
current_stream_sample_(0),
total_samples_(0),
skip_samples_(0),
is_eof_(false), is_eof_(false),
is_eos_(false) { is_eos_(false) {
mad_stream_init(stream_.get()); mad_stream_init(stream_.get());
@ -63,6 +66,8 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr<IStream> input, uint32_t offset)
-> cpp::result<OutputFormat, ICodec::Error> { -> cpp::result<OutputFormat, ICodec::Error> {
input_ = input; input_ = input;
current_stream_sample_ = 0;
auto id3size = SkipID3Tags(*input); auto id3size = SkipID3Tags(*input);
// To get the output format for MP3 streams, we simply need to decode the // To get the output format for MP3 streams, we simply need to decode the
@ -107,14 +112,15 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr<IStream> input, uint32_t offset)
.sample_rate_hz = header.samplerate, .sample_rate_hz = header.samplerate,
}; };
auto vbr_info = GetVbrInfo(header); auto mp3_info = GetMp3Info(header);
uint64_t cbr_length = 0; uint64_t cbr_length = 0;
if (vbr_info) { if (mp3_info) {
output.total_samples = vbr_info->length * channels; output.total_samples = mp3_info->length * channels;
} else if (input->Size() && header.bitrate > 0) { } else if (input->Size() && header.bitrate > 0) {
cbr_length = (input->Size().value() * 8) / header.bitrate; cbr_length = (input->Size().value() * 8) / header.bitrate;
output.total_samples = cbr_length * output.sample_rate_hz * channels; output.total_samples = cbr_length * output.sample_rate_hz * channels;
} }
total_samples_ = output.total_samples.value();
// header.bitrate is only for CBR, but we've calculated total samples for VBR // header.bitrate is only for CBR, but we've calculated total samples for VBR
// and CBR, so we can use that to calculate sample size and therefore bitrate. // and CBR, so we can use that to calculate sample size and therefore bitrate.
@ -124,28 +130,33 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr<IStream> input, uint32_t offset)
output.bitrate_kbps = static_cast<uint32_t>(output.sample_rate_hz * channels * sample_size / 1024); output.bitrate_kbps = static_cast<uint32_t>(output.sample_rate_hz * channels * sample_size / 1024);
} }
// For gapless MP3s, save samples to skip
if (mp3_info) {
skip_samples_ = mp3_info->starting_sample;
}
if (offset > 1 && cbr_length > 0) { if (offset > 1 && cbr_length > 0) {
// Constant bitrate seeking // Constant bitrate seeking
uint64_t skip_bytes = header.bitrate * (offset - 1) / 8; uint64_t skip_bytes = header.bitrate * (offset - 1) / 8;
input->SeekTo(skip_bytes, IStream::SeekFrom::kCurrentPosition); input->SeekTo(skip_bytes, IStream::SeekFrom::kCurrentPosition);
// Reset the offset so the next part will seek to the next second // Reset the offset so the next part will seek to the next second
offset = 1; offset = 1;
} else if (offset > 1 && vbr_info && vbr_info->toc && vbr_info->bytes) { } else if (offset > 1 && mp3_info && mp3_info->toc && mp3_info->bytes) {
// VBR seeking // VBR seeking
double percent = double percent =
((offset - 1) * output.sample_rate_hz) / (double)vbr_info->length * 100; ((offset - 1) * output.sample_rate_hz) / (double)mp3_info->length * 100;
percent = std::clamp(percent, 0., 100.); percent = std::clamp(percent, 0., 100.);
int index = (int)percent; int index = (int)percent;
if (index > 99) if (index > 99)
index = 99; index = 99;
uint8_t first_val = (*vbr_info->toc)[index]; uint8_t first_val = (*mp3_info->toc)[index];
uint8_t second_val = 255; uint8_t second_val = 255;
if (index < 99) { if (index < 99) {
second_val = (*vbr_info->toc)[index + 1]; second_val = (*mp3_info->toc)[index + 1];
} }
double interp = first_val + (second_val - first_val) * (percent - index); double interp = first_val + (second_val - first_val) * (percent - index);
uint32_t bytes_to_skip = uint32_t bytes_to_skip =
(uint32_t)((1.0 / 255.0) * interp * vbr_info->bytes.value()); (uint32_t)((1.0 / 255.0) * interp * mp3_info->bytes.value());
input->SeekTo(bytes_to_skip, IStream::SeekFrom::kCurrentPosition); input->SeekTo(bytes_to_skip, IStream::SeekFrom::kCurrentPosition);
offset = 1; offset = 1;
} }
@ -199,7 +210,7 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr<IStream> input, uint32_t offset)
auto MadMp3Decoder::DecodeTo(std::span<sample::Sample> output) auto MadMp3Decoder::DecodeTo(std::span<sample::Sample> output)
-> cpp::result<OutputInfo, Error> { -> cpp::result<OutputInfo, Error> {
if (current_sample_ < 0 && !is_eos_) { if (current_frame_sample_ < 0 && !is_eos_) {
if (!is_eof_) { if (!is_eof_) {
is_eof_ = buffer_.Refill(input_.get()); is_eof_ = buffer_.Refill(input_.get());
if (is_eof_) { if (is_eof_) {
@ -243,14 +254,21 @@ auto MadMp3Decoder::DecodeTo(std::span<sample::Sample> output)
// We've successfully decoded a frame! Now synthesize samples to write // We've successfully decoded a frame! Now synthesize samples to write
// out. // out.
mad_synth_frame(synth_.get(), frame_.get()); mad_synth_frame(synth_.get(), frame_.get());
current_sample_ = 0; current_frame_sample_ = 0;
return GetBytesUsed(); return GetBytesUsed();
}); });
} }
size_t output_sample = 0; size_t output_sample = 0;
if (current_sample_ >= 0) { if (current_frame_sample_ >= 0) {
while (current_sample_ < synth_->pcm.length) { // Skip any gap samples indicated by the headers
while (skip_samples_ > 0) {
skip_samples_--;
current_frame_sample_++;
}
// Process samples until we hit the end of the frame or stream
while (current_frame_sample_ < synth_->pcm.length && current_stream_sample_ <= total_samples_) {
if (output_sample + synth_->pcm.channels >= output.size()) { if (output_sample + synth_->pcm.channels >= output.size()) {
// We can't fit the next full frame into the buffer. // We can't fit the next full frame into the buffer.
return OutputInfo{.samples_written = output_sample, return OutputInfo{.samples_written = output_sample,
@ -259,14 +277,18 @@ auto MadMp3Decoder::DecodeTo(std::span<sample::Sample> output)
for (int channel = 0; channel < synth_->pcm.channels; channel++) { for (int channel = 0; channel < synth_->pcm.channels; channel++) {
output[output_sample++] = output[output_sample++] =
sample::FromMad(synth_->pcm.samples[channel][current_sample_]); sample::FromMad(synth_->pcm.samples[channel][current_frame_sample_]);
} }
current_sample_++; current_frame_sample_++;
current_stream_sample_ += synth_->pcm.channels;
}
if (current_stream_sample_ > total_samples_) {
is_eos_ = true;
} }
} }
// We wrote everything! Reset, ready for the next frame. // We wrote everything! Reset, ready for the next frame.
current_sample_ = -1; current_frame_sample_ = -1;
return OutputInfo{.samples_written = output_sample, return OutputInfo{.samples_written = output_sample,
.is_stream_finished = is_eos_}; .is_stream_finished = is_eos_};
} }
@ -304,8 +326,8 @@ auto MadMp3Decoder::SkipID3Tags(IStream& stream) -> std::optional<uint32_t> {
* Implementation taken from SDL_mixer and modified. Original is * Implementation taken from SDL_mixer and modified. Original is
* zlib-licensed, copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org> * zlib-licensed, copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
*/ */
auto MadMp3Decoder::GetVbrInfo(const mad_header& header) auto MadMp3Decoder::GetMp3Info(const mad_header& header)
-> std::optional<VbrInfo> { -> std::optional<Mp3Info> {
if (!stream_->this_frame || !stream_->next_frame || if (!stream_->this_frame || !stream_->next_frame ||
stream_->next_frame <= stream_->this_frame || stream_->next_frame <= stream_->this_frame ||
(stream_->next_frame - stream_->this_frame) < 48) { (stream_->next_frame - stream_->this_frame) < 48) {
@ -336,15 +358,19 @@ auto MadMp3Decoder::GetVbrInfo(const mad_header& header)
unsigned char const* frames_count_raw; unsigned char const* frames_count_raw;
uint32_t frames_count = 0; uint32_t frames_count = 0;
if (std::memcmp(stream_->this_frame + xing_offset, "Xing", 4) == 0 ||
std::memcmp(stream_->this_frame + xing_offset, "Info", 4) == 0) { bool xing_vbr = std::memcmp(stream_->this_frame + xing_offset, "Xing", 4) == 0;
bool xing_cbr = std::memcmp(stream_->this_frame + xing_offset, "Info", 4) == 0;
bool vbri = std::memcmp(stream_->this_frame + xing_offset, "VBRI", 4) == 0;
if ( xing_vbr || xing_cbr) {
/* Xing header to get the count of frames for VBR */ /* Xing header to get the count of frames for VBR */
frames_count_raw = stream_->this_frame + xing_offset + 8; frames_count_raw = stream_->this_frame + xing_offset + 8;
frames_count = ((uint32_t)frames_count_raw[0] << 24) + frames_count = ((uint32_t)frames_count_raw[0] << 24) +
((uint32_t)frames_count_raw[1] << 16) + ((uint32_t)frames_count_raw[1] << 16) +
((uint32_t)frames_count_raw[2] << 8) + ((uint32_t)frames_count_raw[2] << 8) +
((uint32_t)frames_count_raw[3]); ((uint32_t)frames_count_raw[3]);
} else if (std::memcmp(stream_->this_frame + xing_offset, "VBRI", 4) == 0) { } else if (vbri) {
/* VBRI header to get the count of frames for VBR */ /* VBRI header to get the count of frames for VBR */
frames_count_raw = stream_->this_frame + xing_offset + 14; frames_count_raw = stream_->this_frame + xing_offset + 14;
frames_count = ((uint32_t)frames_count_raw[0] << 24) + frames_count = ((uint32_t)frames_count_raw[0] << 24) +
@ -356,35 +382,63 @@ auto MadMp3Decoder::GetVbrInfo(const mad_header& header)
} }
// Check TOC and bytes in the bitstream (used for VBR seeking) // Check TOC and bytes in the bitstream (used for VBR seeking)
// Also get gapless playback info: encoder delay and padding
std::optional<std::span<const unsigned char, 100>> toc; std::optional<std::span<const unsigned char, 100>> toc;
std::optional<uint32_t> bytes; std::optional<uint32_t> bytes;
if (std::memcmp(stream_->this_frame + xing_offset, "Xing", 4) == 0) { auto lame_offset = xing_offset;
uint16_t starting_sample = 0;
uint16_t encoder_padding = 0;
if (xing_vbr || xing_cbr) {
unsigned char const* flags_raw = stream_->this_frame + xing_offset + 4; unsigned char const* flags_raw = stream_->this_frame + xing_offset + 4;
uint32_t flags = ((uint32_t)flags_raw[0] << 24) + uint32_t flags = ((uint32_t)flags_raw[0] << 24) +
((uint32_t)flags_raw[1] << 16) + ((uint32_t)flags_raw[1] << 16) +
((uint32_t)flags_raw[2] << 8) + ((uint32_t)flags_raw[3]); ((uint32_t)flags_raw[2] << 8) + ((uint32_t)flags_raw[3]);
lame_offset += 8;
auto toc_offset = 8;
auto bytes_offset = 8;
if (flags & 1) {
// Frames field is present
lame_offset += 4;
toc_offset += 4;
bytes_offset += 4;
}
if (flags & 2) {
// Bytes field is present
lame_offset += 4;
toc_offset += 4;
}
if (flags & 4) { if (flags & 4) {
// TOC flag is set // TOC flag is set
auto toc_offset = 8; lame_offset += 100;
if (flags & 1) {
toc_offset += 4;
}
if (flags & 2) { if (flags & 2) {
// Bytes field // Bytes field
unsigned char const* bytes_raw = stream_->this_frame + xing_offset + 12; unsigned char const* bytes_raw = stream_->this_frame + xing_offset + bytes_offset;
uint32_t num_bytes = uint32_t num_bytes =
((uint32_t)bytes_raw[0] << 24) + ((uint32_t)bytes_raw[1] << 16) + ((uint32_t)bytes_raw[0] << 24) + ((uint32_t)bytes_raw[1] << 16) +
((uint32_t)bytes_raw[2] << 8) + ((uint32_t)bytes_raw[3]); ((uint32_t)bytes_raw[2] << 8) + ((uint32_t)bytes_raw[3]);
bytes.emplace(num_bytes); bytes.emplace(num_bytes);
toc_offset += 4;
} }
// Read the table of contents in // Read the table of contents in
toc.emplace((stream_->this_frame + xing_offset + toc_offset), 100); toc.emplace((stream_->this_frame + xing_offset + toc_offset), 100);
} }
if (flags & 8) {
lame_offset += 4;
}
if (std::memcmp(stream_->this_frame + lame_offset, "LAME", 4) == 0) {
unsigned char const* delay_addr = stream_->this_frame + lame_offset + 21;
uint32_t delay_raw =
((uint32_t)delay_addr[0] << 16) +
((uint32_t)delay_addr[1] << 8) +
((uint32_t)delay_addr[2]);
starting_sample = (delay_raw >> 12) & 0xFFF;
encoder_padding = delay_raw & 0xFFF;
}
} }
return VbrInfo{ return Mp3Info{
.length = (frames_count * samples_per_frame), .starting_sample = starting_sample,
.length = (frames_count * samples_per_frame - starting_sample - encoder_padding),
.bytes = bytes, .bytes = bytes,
.toc = toc, .toc = toc,
}; };

Loading…
Cancel
Save