Merge pull request 'MP3 gap... less' (#180) from tjk/tangara-fw:mp3-gapless into main

Reviewed-on: https://codeberg.org/cool-tech-zone/tangara-fw/pulls/180 Reviewed-by: cooljqln <cooljqln@noreply.codeberg.org>
1 year ago · dfb4ea5458
parent 1625d5a6b8 b65713624f
commit dfb4ea5458
2 changed files with 95 additions and 33 deletions
--- a/src/codecs/include/mad.hpp
+++ b/src/codecs/include/mad.hpp
@ -38,13 +38,14 @@ class MadMp3Decoder : public ICodec {
 private:
  auto SkipID3Tags(IStream& stream) -> std::optional<uint32_t>;

-  struct VbrInfo {
+  struct Mp3Info {
+    uint16_t starting_sample;
    uint32_t length;
    std::optional<uint32_t> bytes;
    std::optional<std::span<const unsigned char, 100>> toc;
  };

-  auto GetVbrInfo(const mad_header& header) -> std::optional<VbrInfo>;
+  auto GetMp3Info(const mad_header& header) -> std::optional<Mp3Info>;
  
  auto GetBytesUsed() -> std::size_t;

@ -55,7 +56,14 @@ class MadMp3Decoder : public ICodec {
  std::unique_ptr<mad_frame> frame_;
  std::unique_ptr<mad_synth> synth_;

-  int current_sample_;
+  // Count of samples processed in the current frame (channels combined)
+  int current_frame_sample_;
+  // Count of samples processed in the current stream (channels separate, i.e. usually x2)
+  int current_stream_sample_;
+  // How many samples in the current stream (channels separate) with encoder delay/padding removed
+  int total_samples_;
+  // Encoder delay, i.e. how many samples to skip at the start of the stream
+  int skip_samples_;
  bool is_eof_;
  bool is_eos_;
 };
--- a/src/codecs/mad.cpp
+++ b/src/codecs/mad.cpp
@ -37,7 +37,10 @@ MadMp3Decoder::MadMp3Decoder()
      synth_(reinterpret_cast<mad_synth*>(
          heap_caps_malloc(sizeof(mad_synth),
                           MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT))),
-      current_sample_(-1),
+      current_frame_sample_(-1),
+      current_stream_sample_(0),
+      total_samples_(0),
+      skip_samples_(0),
      is_eof_(false),
      is_eos_(false) {
  mad_stream_init(stream_.get());
@ -63,6 +66,8 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr<IStream> input, uint32_t offset)
    -> cpp::result<OutputFormat, ICodec::Error> {
  input_ = input;

+  current_stream_sample_ = 0;
+
  auto id3size = SkipID3Tags(*input);

  // To get the output format for MP3 streams, we simply need to decode the
@ -107,14 +112,15 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr<IStream> input, uint32_t offset)
      .sample_rate_hz = header.samplerate,
  };

-  auto vbr_info = GetVbrInfo(header);
+  auto mp3_info = GetMp3Info(header);
  uint64_t cbr_length = 0;
-  if (vbr_info) {
-    output.total_samples = vbr_info->length * channels;
+  if (mp3_info) {
+    output.total_samples = mp3_info->length * channels;
  } else if (input->Size() && header.bitrate > 0) {
    cbr_length = (input->Size().value() * 8) / header.bitrate;
    output.total_samples = cbr_length * output.sample_rate_hz * channels;
  }
+  total_samples_ = output.total_samples.value();

  // header.bitrate is only for CBR, but we've calculated total samples for VBR
  // and CBR, so we can use that to calculate sample size and therefore bitrate.
@ -124,28 +130,33 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr<IStream> input, uint32_t offset)
    output.bitrate_kbps = static_cast<uint32_t>(output.sample_rate_hz * channels * sample_size / 1024);
  }

+  // For gapless MP3s, save samples to skip
+  if (mp3_info) {
+    skip_samples_ = mp3_info->starting_sample;
+  }
+
  if (offset > 1 && cbr_length > 0) {
    // Constant bitrate seeking
    uint64_t skip_bytes = header.bitrate * (offset - 1) / 8;
    input->SeekTo(skip_bytes, IStream::SeekFrom::kCurrentPosition);
    // Reset the offset so the next part will seek to the next second
    offset = 1;
-  } else if (offset > 1 && vbr_info && vbr_info->toc && vbr_info->bytes) {
+  } else if (offset > 1 && mp3_info && mp3_info->toc && mp3_info->bytes) {
    // VBR seeking
    double percent =
-        ((offset - 1) * output.sample_rate_hz) / (double)vbr_info->length * 100;
+        ((offset - 1) * output.sample_rate_hz) / (double)mp3_info->length * 100;
    percent = std::clamp(percent, 0., 100.);
    int index = (int)percent;
    if (index > 99)
      index = 99;
-    uint8_t first_val = (*vbr_info->toc)[index];
+    uint8_t first_val = (*mp3_info->toc)[index];
    uint8_t second_val = 255;
    if (index < 99) {
-      second_val = (*vbr_info->toc)[index + 1];
+      second_val = (*mp3_info->toc)[index + 1];
    }
    double interp = first_val + (second_val - first_val) * (percent - index);
    uint32_t bytes_to_skip =
-        (uint32_t)((1.0 / 255.0) * interp * vbr_info->bytes.value());
+        (uint32_t)((1.0 / 255.0) * interp * mp3_info->bytes.value());
    input->SeekTo(bytes_to_skip, IStream::SeekFrom::kCurrentPosition);
    offset = 1;
  }
@ -199,7 +210,7 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr<IStream> input, uint32_t offset)

 auto MadMp3Decoder::DecodeTo(std::span<sample::Sample> output)
    -> cpp::result<OutputInfo, Error> {
-  if (current_sample_ < 0 && !is_eos_) {
+  if (current_frame_sample_ < 0 && !is_eos_) {
    if (!is_eof_) {
      is_eof_ = buffer_.Refill(input_.get());
      if (is_eof_) {
@ -243,14 +254,21 @@ auto MadMp3Decoder::DecodeTo(std::span<sample::Sample> output)
      // We've successfully decoded a frame! Now synthesize samples to write
      // out.
      mad_synth_frame(synth_.get(), frame_.get());
-      current_sample_ = 0;
+      current_frame_sample_ = 0;
      return GetBytesUsed();
    });
  }

  size_t output_sample = 0;
-  if (current_sample_ >= 0) {
-    while (current_sample_ < synth_->pcm.length) {
+  if (current_frame_sample_ >= 0) {
+    // Skip any gap samples indicated by the headers
+    while (skip_samples_ > 0) {
+      skip_samples_--;
+      current_frame_sample_++;
+    }
+
+    // Process samples until we hit the end of the frame or stream
+    while (current_frame_sample_ < synth_->pcm.length && current_stream_sample_ <= total_samples_) {
      if (output_sample + synth_->pcm.channels >= output.size()) {
        // We can't fit the next full frame into the buffer.
        return OutputInfo{.samples_written = output_sample,
@ -259,14 +277,18 @@ auto MadMp3Decoder::DecodeTo(std::span<sample::Sample> output)

      for (int channel = 0; channel < synth_->pcm.channels; channel++) {
        output[output_sample++] =
-            sample::FromMad(synth_->pcm.samples[channel][current_sample_]);
+            sample::FromMad(synth_->pcm.samples[channel][current_frame_sample_]);
      }
-      current_sample_++;
+      current_frame_sample_++;
+      current_stream_sample_ += synth_->pcm.channels;
+    }
+    if (current_stream_sample_ > total_samples_) {
+      is_eos_ = true;
    }
  }

  // We wrote everything! Reset, ready for the next frame.
-  current_sample_ = -1;
+  current_frame_sample_ = -1;
  return OutputInfo{.samples_written = output_sample,
                    .is_stream_finished = is_eos_};
 }
@ -304,8 +326,8 @@ auto MadMp3Decoder::SkipID3Tags(IStream& stream) -> std::optional<uint32_t> {
 * Implementation taken from SDL_mixer and modified. Original is
 * zlib-licensed, copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
 */
-auto MadMp3Decoder::GetVbrInfo(const mad_header& header)
-    -> std::optional<VbrInfo> {
+auto MadMp3Decoder::GetMp3Info(const mad_header& header)
+    -> std::optional<Mp3Info> {
  if (!stream_->this_frame || !stream_->next_frame ||
      stream_->next_frame <= stream_->this_frame ||
      (stream_->next_frame - stream_->this_frame) < 48) {
@ -336,15 +358,19 @@ auto MadMp3Decoder::GetVbrInfo(const mad_header& header)

  unsigned char const* frames_count_raw;
  uint32_t frames_count = 0;
-  if (std::memcmp(stream_->this_frame + xing_offset, "Xing", 4) == 0 ||
-      std::memcmp(stream_->this_frame + xing_offset, "Info", 4) == 0) {
+
+  bool xing_vbr = std::memcmp(stream_->this_frame + xing_offset, "Xing", 4) == 0;
+  bool xing_cbr = std::memcmp(stream_->this_frame + xing_offset, "Info", 4) == 0;
+  bool vbri = std::memcmp(stream_->this_frame + xing_offset, "VBRI", 4) == 0;
+
+  if ( xing_vbr || xing_cbr) {
    /* Xing header to get the count of frames for VBR */
    frames_count_raw = stream_->this_frame + xing_offset + 8;
    frames_count = ((uint32_t)frames_count_raw[0] << 24) +
                   ((uint32_t)frames_count_raw[1] << 16) +
                   ((uint32_t)frames_count_raw[2] << 8) +
                   ((uint32_t)frames_count_raw[3]);
-  } else if (std::memcmp(stream_->this_frame + xing_offset, "VBRI", 4) == 0) {
+  } else if (vbri) {
    /* VBRI header to get the count of frames for VBR */
    frames_count_raw = stream_->this_frame + xing_offset + 14;
    frames_count = ((uint32_t)frames_count_raw[0] << 24) +
@ -356,35 +382,63 @@ auto MadMp3Decoder::GetVbrInfo(const mad_header& header)
  }

  // Check TOC and bytes in the bitstream (used for VBR seeking)
+  // Also get gapless playback info: encoder delay and padding
  std::optional<std::span<const unsigned char, 100>> toc;
  std::optional<uint32_t> bytes;
-  if (std::memcmp(stream_->this_frame + xing_offset, "Xing", 4) == 0) {
+  auto lame_offset = xing_offset;
+  uint16_t starting_sample = 0;
+  uint16_t encoder_padding = 0;
+  if (xing_vbr || xing_cbr) {
    unsigned char const* flags_raw = stream_->this_frame + xing_offset + 4;
    uint32_t flags = ((uint32_t)flags_raw[0] << 24) +
                     ((uint32_t)flags_raw[1] << 16) +
                     ((uint32_t)flags_raw[2] << 8) + ((uint32_t)flags_raw[3]);
+    lame_offset += 8;
+    auto toc_offset = 8;
+    auto bytes_offset = 8;
+    if (flags & 1) {
+      // Frames field is present
+      lame_offset += 4;
+      toc_offset += 4;
+      bytes_offset += 4;
+    }
+    if (flags & 2) {
+      // Bytes field is present
+      lame_offset += 4;
+      toc_offset += 4;
+    }
    if (flags & 4) {
      // TOC flag is set
-      auto toc_offset = 8;
-      if (flags & 1) {
-        toc_offset += 4;
-      }
+      lame_offset += 100;
      if (flags & 2) {
        // Bytes field
-        unsigned char const* bytes_raw = stream_->this_frame + xing_offset + 12;
+        unsigned char const* bytes_raw = stream_->this_frame + xing_offset + bytes_offset;
        uint32_t num_bytes =
            ((uint32_t)bytes_raw[0] << 24) + ((uint32_t)bytes_raw[1] << 16) +
            ((uint32_t)bytes_raw[2] << 8) + ((uint32_t)bytes_raw[3]);
        bytes.emplace(num_bytes);
-        toc_offset += 4;
      }
      // Read the table of contents in
      toc.emplace((stream_->this_frame + xing_offset + toc_offset), 100);
    }
+    if (flags & 8) {
+      lame_offset += 4;
+    }
+
+    if (std::memcmp(stream_->this_frame + lame_offset, "LAME", 4) == 0) {
+        unsigned char const* delay_addr = stream_->this_frame + lame_offset + 21;
+        uint32_t delay_raw =
+            ((uint32_t)delay_addr[0] << 16) +
+            ((uint32_t)delay_addr[1] << 8) +
+            ((uint32_t)delay_addr[2]);
+        starting_sample = (delay_raw >> 12) & 0xFFF;
+        encoder_padding = delay_raw & 0xFFF;
+    }
  }

-  return VbrInfo{
-      .length = (frames_count * samples_per_frame),
+  return Mp3Info{
+      .starting_sample = starting_sample,
+      .length = (frames_count * samples_per_frame - starting_sample - encoder_padding),
      .bytes = bytes,
      .toc = toc,
  };