From 1f1059843fa9ea7e8b6d9c288626dac9e2df67f2 Mon Sep 17 00:00:00 2001 From: Tom Kirchner Date: Sat, 11 Jan 2025 21:57:26 -0800 Subject: [PATCH 1/5] Rename VbrInfo to Mp3Info It can apply to CBR files too, when the marker is "Info" --- src/codecs/include/mad.hpp | 4 ++-- src/codecs/mad.cpp | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/codecs/include/mad.hpp b/src/codecs/include/mad.hpp index e28e32c8..274862f7 100644 --- a/src/codecs/include/mad.hpp +++ b/src/codecs/include/mad.hpp @@ -38,13 +38,13 @@ class MadMp3Decoder : public ICodec { private: auto SkipID3Tags(IStream& stream) -> std::optional; - struct VbrInfo { + struct Mp3Info { uint32_t length; std::optional bytes; std::optional> toc; }; - auto GetVbrInfo(const mad_header& header) -> std::optional; + auto GetMp3Info(const mad_header& header) -> std::optional; auto GetBytesUsed() -> std::size_t; diff --git a/src/codecs/mad.cpp b/src/codecs/mad.cpp index d823d99d..2aa3766a 100644 --- a/src/codecs/mad.cpp +++ b/src/codecs/mad.cpp @@ -107,10 +107,10 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr input, uint32_t offset) .sample_rate_hz = header.samplerate, }; - auto vbr_info = GetVbrInfo(header); + auto mp3_info = GetMp3Info(header); uint64_t cbr_length = 0; - if (vbr_info) { - output.total_samples = vbr_info->length * channels; + if (mp3_info) { + output.total_samples = mp3_info->length * channels; } else if (input->Size() && header.bitrate > 0) { cbr_length = (input->Size().value() * 8) / header.bitrate; output.total_samples = cbr_length * output.sample_rate_hz * channels; @@ -130,22 +130,22 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr input, uint32_t offset) input->SeekTo(skip_bytes, IStream::SeekFrom::kCurrentPosition); // Reset the offset so the next part will seek to the next second offset = 1; - } else if (offset > 1 && vbr_info && vbr_info->toc && vbr_info->bytes) { + } else if (offset > 1 && mp3_info && mp3_info->toc && mp3_info->bytes) { // VBR seeking double percent = - ((offset - 1) * output.sample_rate_hz) / (double)vbr_info->length * 100; + ((offset - 1) * output.sample_rate_hz) / (double)mp3_info->length * 100; percent = std::clamp(percent, 0., 100.); int index = (int)percent; if (index > 99) index = 99; - uint8_t first_val = (*vbr_info->toc)[index]; + uint8_t first_val = (*mp3_info->toc)[index]; uint8_t second_val = 255; if (index < 99) { - second_val = (*vbr_info->toc)[index + 1]; + second_val = (*mp3_info->toc)[index + 1]; } double interp = first_val + (second_val - first_val) * (percent - index); uint32_t bytes_to_skip = - (uint32_t)((1.0 / 255.0) * interp * vbr_info->bytes.value()); + (uint32_t)((1.0 / 255.0) * interp * mp3_info->bytes.value()); input->SeekTo(bytes_to_skip, IStream::SeekFrom::kCurrentPosition); offset = 1; } @@ -304,8 +304,8 @@ auto MadMp3Decoder::SkipID3Tags(IStream& stream) -> std::optional { * Implementation taken from SDL_mixer and modified. Original is * zlib-licensed, copyright (C) 1997-2022 Sam Lantinga */ -auto MadMp3Decoder::GetVbrInfo(const mad_header& header) - -> std::optional { +auto MadMp3Decoder::GetMp3Info(const mad_header& header) + -> std::optional { if (!stream_->this_frame || !stream_->next_frame || stream_->next_frame <= stream_->this_frame || (stream_->next_frame - stream_->this_frame) < 48) { @@ -383,7 +383,7 @@ auto MadMp3Decoder::GetVbrInfo(const mad_header& header) } } - return VbrInfo{ + return Mp3Info{ .length = (frames_count * samples_per_frame), .bytes = bytes, .toc = toc, From 35f124637943976450150b96bdd0359bf26287f7 Mon Sep 17 00:00:00 2001 From: Tom Kirchner Date: Sat, 11 Jan 2025 22:01:37 -0800 Subject: [PATCH 2/5] Check for frames/bytes/TOC in CBR as well --- src/codecs/mad.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/codecs/mad.cpp b/src/codecs/mad.cpp index 2aa3766a..a0184487 100644 --- a/src/codecs/mad.cpp +++ b/src/codecs/mad.cpp @@ -336,15 +336,19 @@ auto MadMp3Decoder::GetMp3Info(const mad_header& header) unsigned char const* frames_count_raw; uint32_t frames_count = 0; - if (std::memcmp(stream_->this_frame + xing_offset, "Xing", 4) == 0 || - std::memcmp(stream_->this_frame + xing_offset, "Info", 4) == 0) { + + bool xing_vbr = std::memcmp(stream_->this_frame + xing_offset, "Xing", 4) == 0; + bool xing_cbr = std::memcmp(stream_->this_frame + xing_offset, "Info", 4) == 0; + bool vbri = std::memcmp(stream_->this_frame + xing_offset, "VBRI", 4) == 0; + + if ( xing_vbr || xing_cbr) { /* Xing header to get the count of frames for VBR */ frames_count_raw = stream_->this_frame + xing_offset + 8; frames_count = ((uint32_t)frames_count_raw[0] << 24) + ((uint32_t)frames_count_raw[1] << 16) + ((uint32_t)frames_count_raw[2] << 8) + ((uint32_t)frames_count_raw[3]); - } else if (std::memcmp(stream_->this_frame + xing_offset, "VBRI", 4) == 0) { + } else if (vbri) { /* VBRI header to get the count of frames for VBR */ frames_count_raw = stream_->this_frame + xing_offset + 14; frames_count = ((uint32_t)frames_count_raw[0] << 24) + @@ -358,7 +362,7 @@ auto MadMp3Decoder::GetMp3Info(const mad_header& header) // Check TOC and bytes in the bitstream (used for VBR seeking) std::optional> toc; std::optional bytes; - if (std::memcmp(stream_->this_frame + xing_offset, "Xing", 4) == 0) { + if (xing_vbr || xing_cbr) { unsigned char const* flags_raw = stream_->this_frame + xing_offset + 4; uint32_t flags = ((uint32_t)flags_raw[0] << 24) + ((uint32_t)flags_raw[1] << 16) + From f8199bbd6d7b8bc64d8f58dc5638af8bc36cc73f Mon Sep 17 00:00:00 2001 From: Tom Kirchner Date: Sat, 11 Jan 2025 22:04:49 -0800 Subject: [PATCH 3/5] Handle optional frames field in bytes offset of MP3 header --- src/codecs/mad.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/codecs/mad.cpp b/src/codecs/mad.cpp index a0184487..e6e641f5 100644 --- a/src/codecs/mad.cpp +++ b/src/codecs/mad.cpp @@ -367,20 +367,26 @@ auto MadMp3Decoder::GetMp3Info(const mad_header& header) uint32_t flags = ((uint32_t)flags_raw[0] << 24) + ((uint32_t)flags_raw[1] << 16) + ((uint32_t)flags_raw[2] << 8) + ((uint32_t)flags_raw[3]); + auto toc_offset = 8; + auto bytes_offset = 8; + if (flags & 1) { + // Frames field is present + toc_offset += 4; + bytes_offset += 4; + } + if (flags & 2) { + // Bytes field is present + toc_offset += 4; + } if (flags & 4) { // TOC flag is set - auto toc_offset = 8; - if (flags & 1) { - toc_offset += 4; - } if (flags & 2) { // Bytes field - unsigned char const* bytes_raw = stream_->this_frame + xing_offset + 12; + unsigned char const* bytes_raw = stream_->this_frame + xing_offset + bytes_offset; uint32_t num_bytes = ((uint32_t)bytes_raw[0] << 24) + ((uint32_t)bytes_raw[1] << 16) + ((uint32_t)bytes_raw[2] << 8) + ((uint32_t)bytes_raw[3]); bytes.emplace(num_bytes); - toc_offset += 4; } // Read the table of contents in toc.emplace((stream_->this_frame + xing_offset + toc_offset), 100); From 3993835a35b1f7137c9d8665fb047a42e9ae6f94 Mon Sep 17 00:00:00 2001 From: Tom Kirchner Date: Sat, 11 Jan 2025 22:08:47 -0800 Subject: [PATCH 4/5] Extract gapless info from MP3 LAME header --- src/codecs/include/mad.hpp | 1 + src/codecs/mad.cpp | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/codecs/include/mad.hpp b/src/codecs/include/mad.hpp index 274862f7..fba3ef11 100644 --- a/src/codecs/include/mad.hpp +++ b/src/codecs/include/mad.hpp @@ -39,6 +39,7 @@ class MadMp3Decoder : public ICodec { auto SkipID3Tags(IStream& stream) -> std::optional; struct Mp3Info { + uint16_t starting_sample; uint32_t length; std::optional bytes; std::optional> toc; diff --git a/src/codecs/mad.cpp b/src/codecs/mad.cpp index e6e641f5..af74b244 100644 --- a/src/codecs/mad.cpp +++ b/src/codecs/mad.cpp @@ -360,26 +360,34 @@ auto MadMp3Decoder::GetMp3Info(const mad_header& header) } // Check TOC and bytes in the bitstream (used for VBR seeking) + // Also get gapless playback info: encoder delay and padding std::optional> toc; std::optional bytes; + auto lame_offset = xing_offset; + uint16_t starting_sample = 0; + uint16_t encoder_padding = 0; if (xing_vbr || xing_cbr) { unsigned char const* flags_raw = stream_->this_frame + xing_offset + 4; uint32_t flags = ((uint32_t)flags_raw[0] << 24) + ((uint32_t)flags_raw[1] << 16) + ((uint32_t)flags_raw[2] << 8) + ((uint32_t)flags_raw[3]); + lame_offset += 8; auto toc_offset = 8; auto bytes_offset = 8; if (flags & 1) { // Frames field is present + lame_offset += 4; toc_offset += 4; bytes_offset += 4; } if (flags & 2) { // Bytes field is present + lame_offset += 4; toc_offset += 4; } if (flags & 4) { // TOC flag is set + lame_offset += 100; if (flags & 2) { // Bytes field unsigned char const* bytes_raw = stream_->this_frame + xing_offset + bytes_offset; @@ -391,10 +399,24 @@ auto MadMp3Decoder::GetMp3Info(const mad_header& header) // Read the table of contents in toc.emplace((stream_->this_frame + xing_offset + toc_offset), 100); } + if (flags & 8) { + lame_offset += 4; + } + + if (std::memcmp(stream_->this_frame + lame_offset, "LAME", 4) == 0) { + unsigned char const* delay_addr = stream_->this_frame + lame_offset + 21; + uint32_t delay_raw = + ((uint32_t)delay_addr[0] << 16) + + ((uint32_t)delay_addr[1] << 8) + + ((uint32_t)delay_addr[2]); + starting_sample = (delay_raw >> 12) & 0xFFF; + encoder_padding = delay_raw & 0xFFF; + } } return Mp3Info{ - .length = (frames_count * samples_per_frame), + .starting_sample = starting_sample, + .length = (frames_count * samples_per_frame - starting_sample - encoder_padding), .bytes = bytes, .toc = toc, }; From b65713624f57d4b8918a0f27dab5eccb401f5e5a Mon Sep 17 00:00:00 2001 From: Tom Kirchner Date: Sat, 11 Jan 2025 22:18:11 -0800 Subject: [PATCH 5/5] Skip gap samples in MP3 decoding --- src/codecs/include/mad.hpp | 9 ++++++++- src/codecs/mad.cpp | 38 ++++++++++++++++++++++++++++++-------- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/codecs/include/mad.hpp b/src/codecs/include/mad.hpp index fba3ef11..60a0b81c 100644 --- a/src/codecs/include/mad.hpp +++ b/src/codecs/include/mad.hpp @@ -56,7 +56,14 @@ class MadMp3Decoder : public ICodec { std::unique_ptr frame_; std::unique_ptr synth_; - int current_sample_; + // Count of samples processed in the current frame (channels combined) + int current_frame_sample_; + // Count of samples processed in the current stream (channels separate, i.e. usually x2) + int current_stream_sample_; + // How many samples in the current stream (channels separate) with encoder delay/padding removed + int total_samples_; + // Encoder delay, i.e. how many samples to skip at the start of the stream + int skip_samples_; bool is_eof_; bool is_eos_; }; diff --git a/src/codecs/mad.cpp b/src/codecs/mad.cpp index af74b244..538f0715 100644 --- a/src/codecs/mad.cpp +++ b/src/codecs/mad.cpp @@ -37,7 +37,10 @@ MadMp3Decoder::MadMp3Decoder() synth_(reinterpret_cast( heap_caps_malloc(sizeof(mad_synth), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT))), - current_sample_(-1), + current_frame_sample_(-1), + current_stream_sample_(0), + total_samples_(0), + skip_samples_(0), is_eof_(false), is_eos_(false) { mad_stream_init(stream_.get()); @@ -63,6 +66,8 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr input, uint32_t offset) -> cpp::result { input_ = input; + current_stream_sample_ = 0; + auto id3size = SkipID3Tags(*input); // To get the output format for MP3 streams, we simply need to decode the @@ -115,6 +120,7 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr input, uint32_t offset) cbr_length = (input->Size().value() * 8) / header.bitrate; output.total_samples = cbr_length * output.sample_rate_hz * channels; } + total_samples_ = output.total_samples.value(); // header.bitrate is only for CBR, but we've calculated total samples for VBR // and CBR, so we can use that to calculate sample size and therefore bitrate. @@ -124,6 +130,11 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr input, uint32_t offset) output.bitrate_kbps = static_cast(output.sample_rate_hz * channels * sample_size / 1024); } + // For gapless MP3s, save samples to skip + if (mp3_info) { + skip_samples_ = mp3_info->starting_sample; + } + if (offset > 1 && cbr_length > 0) { // Constant bitrate seeking uint64_t skip_bytes = header.bitrate * (offset - 1) / 8; @@ -199,7 +210,7 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr input, uint32_t offset) auto MadMp3Decoder::DecodeTo(std::span output) -> cpp::result { - if (current_sample_ < 0 && !is_eos_) { + if (current_frame_sample_ < 0 && !is_eos_) { if (!is_eof_) { is_eof_ = buffer_.Refill(input_.get()); if (is_eof_) { @@ -243,14 +254,21 @@ auto MadMp3Decoder::DecodeTo(std::span output) // We've successfully decoded a frame! Now synthesize samples to write // out. mad_synth_frame(synth_.get(), frame_.get()); - current_sample_ = 0; + current_frame_sample_ = 0; return GetBytesUsed(); }); } size_t output_sample = 0; - if (current_sample_ >= 0) { - while (current_sample_ < synth_->pcm.length) { + if (current_frame_sample_ >= 0) { + // Skip any gap samples indicated by the headers + while (skip_samples_ > 0) { + skip_samples_--; + current_frame_sample_++; + } + + // Process samples until we hit the end of the frame or stream + while (current_frame_sample_ < synth_->pcm.length && current_stream_sample_ <= total_samples_) { if (output_sample + synth_->pcm.channels >= output.size()) { // We can't fit the next full frame into the buffer. return OutputInfo{.samples_written = output_sample, @@ -259,14 +277,18 @@ auto MadMp3Decoder::DecodeTo(std::span output) for (int channel = 0; channel < synth_->pcm.channels; channel++) { output[output_sample++] = - sample::FromMad(synth_->pcm.samples[channel][current_sample_]); + sample::FromMad(synth_->pcm.samples[channel][current_frame_sample_]); } - current_sample_++; + current_frame_sample_++; + current_stream_sample_ += synth_->pcm.channels; + } + if (current_stream_sample_ > total_samples_) { + is_eos_ = true; } } // We wrote everything! Reset, ready for the next frame. - current_sample_ = -1; + current_frame_sample_ = -1; return OutputInfo{.samples_written = output_sample, .is_stream_finished = is_eos_}; }