Merge pull request 'Factor gapless handling out of audio_fsm and into the audio pipeline' (#69) from jqln/audio-pipeline into main

Reviewed-on: https://codeberg.org/cool-tech-zone/tangara-fw/pulls/69 Reviewed-by: ailurux <ailurux@noreply.codeberg.org>
1 year ago · b720ba42a0
parent 344a46d066 690c64c151
commit b720ba42a0
22 changed files with 665 additions and 593 deletions
--- a/src/drivers/bluetooth.cpp
+++ b/src/drivers/bluetooth.cpp
@ -38,6 +38,7 @@ namespace drivers {

 DRAM_ATTR static StreamBufferHandle_t sStream = nullptr;
 DRAM_ATTR static std::atomic<float> sVolumeFactor = 1.f;
+DRAM_ATTR static std::atomic<uint32_t> sSamplesUsed = 0;

 static tasks::WorkerPool* sBgWorker;

@ -47,8 +48,8 @@ auto gap_cb(esp_bt_gap_cb_event_t event, esp_bt_gap_cb_param_t* param) -> void {
      bluetooth::events::internal::Gap{.type = event, .param = param});
 }

-auto avrcp_cb(esp_avrc_ct_cb_event_t event,
-              esp_avrc_ct_cb_param_t* param) -> void {
+auto avrcp_cb(esp_avrc_ct_cb_event_t event, esp_avrc_ct_cb_param_t* param)
+    -> void {
  esp_avrc_ct_cb_param_t copy = *param;
  sBgWorker->Dispatch<void>([=]() {
    auto lock = bluetooth::BluetoothState::lock();
@ -73,6 +74,13 @@ IRAM_ATTR auto a2dp_data_cb(uint8_t* buf, int32_t buf_size) -> int32_t {
  }
  size_t bytes_received = xStreamBufferReceive(stream, buf, buf_size, 0);

+  size_t samples_received = bytes_received / 2;
+  if (UINT32_MAX - sSamplesUsed < samples_received) {
+    sSamplesUsed = samples_received - (UINT32_MAX - sSamplesUsed);
+  } else {
+    sSamplesUsed += samples_received;
+  }
+
  // Apply software volume scaling.
  int16_t* samples = reinterpret_cast<int16_t*>(buf);
  float factor = sVolumeFactor.load();
@ -165,6 +173,10 @@ auto Bluetooth::SetVolumeFactor(float f) -> void {
  sVolumeFactor = f;
 }

+auto Bluetooth::SamplesUsed() -> uint32_t {
+  return sSamplesUsed;
+}
+
 auto Bluetooth::SetEventHandler(std::function<void(bluetooth::Event)> cb)
    -> void {
  auto lock = bluetooth::BluetoothState::lock();
--- a/src/drivers/i2s_dac.cpp
+++ b/src/drivers/i2s_dac.cpp
@ -5,6 +5,7 @@
 */

 #include "drivers/i2s_dac.hpp"
+#include <stdint.h>

 #include <cmath>
 #include <cstdint>
@ -140,11 +141,10 @@ auto I2SDac::SetPaused(bool paused) -> void {
  }
 }

-static volatile bool sSwapWords = false;
+DRAM_ATTR static volatile bool sSwapWords = false;

-auto I2SDac::Reconfigure(Channels ch,
-                         BitsPerSample bps,
-                         SampleRate rate) -> void {
+auto I2SDac::Reconfigure(Channels ch, BitsPerSample bps, SampleRate rate)
+    -> void {
  std::lock_guard<std::mutex> lock(configure_mutex_);

  if (i2s_active_) {
@ -217,6 +217,8 @@ auto I2SDac::WriteData(const std::span<const std::byte>& data) -> void {
  }
 }

+DRAM_ATTR static volatile uint32_t sSamplesRead = 0;
+
 extern "C" IRAM_ATTR auto callback(i2s_chan_handle_t handle,
                                   i2s_event_data_t* event,
                                   void* user_ctx) -> bool {
@ -235,6 +237,14 @@ extern "C" IRAM_ATTR auto callback(i2s_chan_handle_t handle,
  size_t bytes_written =
      xStreamBufferReceiveFromISR(src, buf, event->size, &ret);

+  // Assume 16 bit samples.
+  size_t samples = bytes_written / 2;
+  if (UINT32_MAX - sSamplesRead < samples) {
+    sSamplesRead = samples - (UINT32_MAX - sSamplesRead);
+  } else {
+    sSamplesRead = sSamplesRead + samples;
+  }
+
  // The ESP32's I2S peripheral has a different endianness to its processors.
  // ESP-IDF handles this difference for stereo channels, but not for mono
  // channels. We therefore sometimes need to swap each pair of words as they're
@ -276,6 +286,10 @@ auto I2SDac::SetSource(StreamBufferHandle_t buffer) -> void {
  }
 }

+auto I2SDac::SamplesUsed() -> uint32_t {
+  return sSamplesRead;
+}
+
 auto I2SDac::set_channel(bool enabled) -> void {
  if (i2s_active_ == enabled) {
    return;
--- a/src/drivers/include/drivers/bluetooth.hpp
+++ b/src/drivers/include/drivers/bluetooth.hpp
@ -13,10 +13,10 @@
 #include <freertos/stream_buffer.h>
 #include <stdint.h>
 #include "drivers/bluetooth_types.hpp"
+#include "drivers/nvs.hpp"
 #include "esp_a2dp_api.h"
 #include "esp_avrc_api.h"
 #include "esp_gap_bt_api.h"
-#include "drivers/nvs.hpp"
 #include "tasks.hpp"
 #include "tinyfsm.hpp"
 #include "tinyfsm/include/tinyfsm.hpp"
@ -44,6 +44,7 @@ class Bluetooth {

  auto SetSource(StreamBufferHandle_t) -> void;
  auto SetVolumeFactor(float) -> void;
+  auto SamplesUsed() -> uint32_t;

  auto SetEventHandler(std::function<void(bluetooth::Event)> cb) -> void;
 };
--- a/src/drivers/include/drivers/i2s_dac.hpp
+++ b/src/drivers/include/drivers/i2s_dac.hpp
@ -11,8 +11,8 @@
 #include <functional>
 #include <memory>
 #include <optional>
-#include <utility>
 #include <span>
+#include <utility>

 #include "driver/i2s_std.h"
 #include "driver/i2s_types.h"
@ -71,6 +71,8 @@ class I2SDac {
  auto WriteData(const std::span<const std::byte>& data) -> void;
  auto SetSource(StreamBufferHandle_t buffer) -> void;

+  auto SamplesUsed() -> uint32_t;
+
  // Not copyable or movable.
  I2SDac(const I2SDac&) = delete;
  I2SDac& operator=(const I2SDac&) = delete;
--- a/src/tangara/audio/audio_decoder.cpp
+++ b/src/tangara/audio/audio_decoder.cpp
@ -6,7 +6,7 @@

 #include "audio/audio_decoder.hpp"

-#include <algorithm>
+#include <cassert>
 #include <cmath>
 #include <cstddef>
 #include <cstdint>
@ -23,14 +23,12 @@
 #include "freertos/portmacro.h"
 #include "freertos/projdefs.h"
 #include "freertos/queue.h"
-#include "freertos/ringbuf.h"

-#include "audio/audio_converter.hpp"
 #include "audio/audio_events.hpp"
 #include "audio/audio_fsm.hpp"
 #include "audio/audio_sink.hpp"
 #include "audio/audio_source.hpp"
-#include "audio/fatfs_audio_input.hpp"
+#include "audio/processor.hpp"
 #include "codec.hpp"
 #include "database/track.hpp"
 #include "drivers/i2s_dac.hpp"
@ -42,21 +40,33 @@

 namespace audio {

-[[maybe_unused]] static const char* kTag = "audio_dec";
+static const char* kTag = "decoder";

+/*
+ * The size of the buffer used for holding decoded samples. This buffer is
+ * allocated in internal memory for greater speed, so be careful when
+ * increasing its size.
+ */
 static constexpr std::size_t kCodecBufferLength =
    drivers::kI2SBufferLengthFrames * sizeof(sample::Sample);

-auto Decoder::Start(std::shared_ptr<IAudioSource> source,
-                    std::shared_ptr<SampleConverter> sink) -> Decoder* {
-  Decoder* task = new Decoder(source, sink);
+auto Decoder::Start(std::shared_ptr<SampleProcessor> sink) -> Decoder* {
+  Decoder* task = new Decoder(sink);
  tasks::StartPersistent<tasks::Type::kAudioDecoder>([=]() { task->Main(); });
  return task;
 }

-Decoder::Decoder(std::shared_ptr<IAudioSource> source,
-                 std::shared_ptr<SampleConverter> mixer)
-    : source_(source), converter_(mixer), codec_(), current_format_() {
+auto Decoder::open(std::shared_ptr<TaggedStream> stream) -> void {
+  NextStream* next = new NextStream();
+  next->stream = stream;
+  // The decoder services its queue very quickly, so blocking on this write
+  // should be fine. If we discover contention here, then adding more space for
+  // items to next_stream_ should be fine too.
+  xQueueSend(next_stream_, &next, portMAX_DELAY);
+}
+
+Decoder::Decoder(std::shared_ptr<SampleProcessor> processor)
+    : processor_(processor), next_stream_(xQueueCreate(1, sizeof(void*))) {
  ESP_LOGI(kTag, "allocating codec buffer, %u KiB", kCodecBufferLength / 1024);
  codec_buffer_ = {
      reinterpret_cast<sample::Sample*>(heap_caps_calloc(
@ -64,81 +74,126 @@ Decoder::Decoder(std::shared_ptr<IAudioSource> source,
      kCodecBufferLength};
 }

+/*
+ * Main decoding loop. Handles watching for new streams, or continuing to nudge
+ * along the current stream if we have one.
+ */
 void Decoder::Main() {
  for (;;) {
-    if (source_->HasNewStream() || !stream_) {
-      std::shared_ptr<TaggedStream> new_stream = source_->NextStream();
-      if (new_stream && BeginDecoding(new_stream)) {
-        stream_ = new_stream;
-      } else {
+    // Check whether there's a new stream to begin. If we're idle, then we
+    // simply park and wait forever for a stream to arrive.
+    TickType_t wait_time = stream_ ? 0 : portMAX_DELAY;
+    NextStream* next;
+    if (xQueueReceive(next_stream_, &next, wait_time)) {
+      // Copy the data out of the queue, then clean up the item.
+      std::shared_ptr<TaggedStream> new_stream = next->stream;
+      delete next;
+
+      // If we were already decoding, then make sure we finish up the current
+      // file gracefully.
+      if (stream_) {
+        finishDecode(true);
+      }
+
+      // Ensure there's actually stream data; we might have been given nullptr
+      // as a signal to stop.
+      if (!new_stream) {
        continue;
      }
+
+      // Start decoding the new stream.
+      prepareDecode(new_stream);
    }

-    if (ContinueDecoding()) {
-      stream_.reset();
+    if (!continueDecode()) {
+      finishDecode(false);
    }
  }
 }

-auto Decoder::BeginDecoding(std::shared_ptr<TaggedStream> stream) -> bool {
-  // Ensure any previous codec is freed before creating a new one.
-  codec_.reset();
+auto Decoder::prepareDecode(std::shared_ptr<TaggedStream> stream) -> void {
+  auto stub_track = std::make_shared<TrackInfo>(TrackInfo{
+      .tags = stream->tags(),
+      .uri = stream->Filepath(),
+      .duration = {},
+      .start_offset = {},
+      .bitrate_kbps = {},
+      .encoding = stream->type(),
+      .format = {},
+  });
+
  codec_.reset(codecs::CreateCodecForType(stream->type()).value_or(nullptr));
  if (!codec_) {
    ESP_LOGE(kTag, "no codec found for stream");
-    return false;
+    events::Audio().Dispatch(
+        internal::DecodingFailedToStart{.track = stub_track});
+    return;
  }

  auto open_res = codec_->OpenStream(stream, stream->Offset());
  if (open_res.has_error()) {
    ESP_LOGE(kTag, "codec failed to start: %s",
             codecs::ICodec::ErrorString(open_res.error()).c_str());
-    return false;
-  }
-  stream->SetPreambleFinished();
-  current_sink_format_ = IAudioOutput::Format{
-      .sample_rate = open_res->sample_rate_hz,
-      .num_channels = open_res->num_channels,
-      .bits_per_sample = 16,
-  };
-
-  std::optional<uint32_t> duration;
-  if (open_res->total_samples) {
-    duration = open_res->total_samples.value() / open_res->num_channels /
-               open_res->sample_rate_hz;
+    events::Audio().Dispatch(
+        internal::DecodingFailedToStart{.track = stub_track});
+    return;
  }

-  converter_->beginStream(std::make_shared<TrackInfo>(TrackInfo{
+  // Decoding started okay! Fill out the rest of the track info for this
+  // stream.
+  stream_ = stream;
+  track_ = std::make_shared<TrackInfo>(TrackInfo{
      .tags = stream->tags(),
      .uri = stream->Filepath(),
-      .duration = duration,
+      .duration = {},
      .start_offset = stream->Offset(),
-      .bitrate_kbps = open_res->sample_rate_hz,
+      .bitrate_kbps = {},
      .encoding = stream->type(),
-      .format = *current_sink_format_,
-  }));
+      .format =
+          {
+              .sample_rate = open_res->sample_rate_hz,
+              .num_channels = open_res->num_channels,
+              .bits_per_sample = 16,
+          },
+  });

-  return true;
+  if (open_res->total_samples) {
+    track_->duration = open_res->total_samples.value() /
+                       open_res->num_channels / open_res->sample_rate_hz;
+  }
+
+  events::Audio().Dispatch(internal::DecodingStarted{.track = track_});
+  processor_->beginStream(track_);
 }

-auto Decoder::ContinueDecoding() -> bool {
+auto Decoder::continueDecode() -> bool {
  auto res = codec_->DecodeTo(codec_buffer_);
  if (res.has_error()) {
-    converter_->endStream();
-    return true;
+    return false;
  }

  if (res->samples_written > 0) {
-    converter_->continueStream(codec_buffer_.first(res->samples_written));
+    processor_->continueStream(codec_buffer_.first(res->samples_written));
  }

-  if (res->is_stream_finished) {
-    converter_->endStream();
-    codec_.reset();
+  return !res->is_stream_finished;
+}
+
+auto Decoder::finishDecode(bool cancel) -> void {
+  assert(track_);
+
+  // Tell everyone we're finished.
+  if (cancel) {
+    events::Audio().Dispatch(internal::DecodingCancelled{.track = track_});
+  } else {
+    events::Audio().Dispatch(internal::DecodingFinished{.track = track_});
  }
+  processor_->endStream(cancel);

-  return res->is_stream_finished;
+  // Clean up after ourselves.
+  stream_.reset();
+  codec_.reset();
+  track_.reset();
 }

 }  // namespace audio
--- a/src/tangara/audio/audio_decoder.hpp
+++ b/src/tangara/audio/audio_decoder.hpp
@ -9,10 +9,10 @@
 #include <cstdint>
 #include <memory>

-#include "audio/audio_converter.hpp"
 #include "audio/audio_events.hpp"
 #include "audio/audio_sink.hpp"
 #include "audio/audio_source.hpp"
+#include "audio/processor.hpp"
 #include "codec.hpp"
 #include "database/track.hpp"
 #include "types.hpp"
@ -20,35 +20,39 @@
 namespace audio {

 /*
- * Handle to a persistent task that takes bytes from the given source, decodes
- * them into sample::Sample (normalised to 16 bit signed PCM), and then
- * forwards the resulting stream to the given converter.
+ * Handle to a persistent task that takes encoded bytes from arbitrary sources,
+ * decodes them into sample::Sample (normalised to 16 bit signed PCM), and then
+ * streams them onward to the sample processor.
 */
 class Decoder {
 public:
-  static auto Start(std::shared_ptr<IAudioSource> source,
-                    std::shared_ptr<SampleConverter> converter) -> Decoder*;
+  static auto Start(std::shared_ptr<SampleProcessor>) -> Decoder*;

-  auto Main() -> void;
+  auto open(std::shared_ptr<TaggedStream>) -> void;

  Decoder(const Decoder&) = delete;
  Decoder& operator=(const Decoder&) = delete;

 private:
-  Decoder(std::shared_ptr<IAudioSource> source,
-          std::shared_ptr<SampleConverter> converter);
+  Decoder(std::shared_ptr<SampleProcessor>);
+
+  auto Main() -> void;

-  auto BeginDecoding(std::shared_ptr<TaggedStream>) -> bool;
-  auto ContinueDecoding() -> bool;
+  auto prepareDecode(std::shared_ptr<TaggedStream>) -> void;
+  auto continueDecode() -> bool;
+  auto finishDecode(bool cancel) -> void;

-  std::shared_ptr<IAudioSource> source_;
-  std::shared_ptr<SampleConverter> converter_;
+  std::shared_ptr<SampleProcessor> processor_;
+
+  // Struct used with the next_stream_ queue.
+  struct NextStream {
+    std::shared_ptr<TaggedStream> stream;
+  };
+  QueueHandle_t next_stream_;

  std::shared_ptr<codecs::IStream> stream_;
  std::unique_ptr<codecs::ICodec> codec_;
-
-  std::optional<codecs::ICodec::OutputFormat> current_format_;
-  std::optional<IAudioOutput::Format> current_sink_format_;
+  std::shared_ptr<TrackInfo> track_;

  std::span<sample::Sample> codec_buffer_;
 };
--- a/src/tangara/audio/audio_events.hpp
+++ b/src/tangara/audio/audio_events.hpp
@ -84,13 +84,6 @@ struct PlaybackUpdate : tinyfsm::Event {
 struct SetTrack : tinyfsm::Event {
  std::variant<std::string, database::TrackId, std::monostate> new_track;
  std::optional<uint32_t> seek_to_second;
-
-  enum Transition {
-    kHardCut,
-    kGapless,
-    // TODO: kCrossFade
-  };
-  Transition transition;
 };

 struct TogglePlayPause : tinyfsm::Event {
@ -138,17 +131,33 @@ struct OutputModeChanged : tinyfsm::Event {};

 namespace internal {

+struct DecodingStarted : tinyfsm::Event {
+  std::shared_ptr<TrackInfo> track;
+};
+
+struct DecodingFailedToStart : tinyfsm::Event {
+  std::shared_ptr<TrackInfo> track;
+};
+
+struct DecodingCancelled : tinyfsm::Event {
+  std::shared_ptr<TrackInfo> track;
+};
+
+struct DecodingFinished : tinyfsm::Event {
+  std::shared_ptr<TrackInfo> track;
+};
+
 struct StreamStarted : tinyfsm::Event {
  std::shared_ptr<TrackInfo> track;
-  IAudioOutput::Format src_format;
-  IAudioOutput::Format dst_format;
+  IAudioOutput::Format sink_format;
+  uint32_t cue_at_sample;
 };

-struct StreamUpdate : tinyfsm::Event {
-  uint32_t samples_sunk;
+struct StreamEnded : tinyfsm::Event {
+  uint32_t cue_at_sample;
 };

-struct StreamEnded : tinyfsm::Event {};
+struct StreamHeartbeat : tinyfsm::Event {};

 }  // namespace internal

--- a/src/tangara/audio/audio_fsm.cpp
+++ b/src/tangara/audio/audio_fsm.cpp
@ -5,41 +5,41 @@
 */

 #include "audio/audio_fsm.hpp"
-#include <stdint.h>

+#include <cstdint>
 #include <future>
 #include <memory>
 #include <variant>

-#include "audio/audio_sink.hpp"
 #include "cppbor.h"
 #include "cppbor_parse.h"
-#include "drivers/bluetooth_types.hpp"
-#include "drivers/storage.hpp"
 #include "esp_heap_caps.h"
 #include "esp_log.h"
 #include "freertos/FreeRTOS.h"
 #include "freertos/portmacro.h"
 #include "freertos/projdefs.h"
+#include "tinyfsm.hpp"

-#include "audio/audio_converter.hpp"
 #include "audio/audio_decoder.hpp"
 #include "audio/audio_events.hpp"
+#include "audio/audio_sink.hpp"
 #include "audio/bt_audio_output.hpp"
-#include "audio/fatfs_audio_input.hpp"
+#include "audio/fatfs_stream_factory.hpp"
 #include "audio/i2s_audio_output.hpp"
+#include "audio/stream_cues.hpp"
 #include "audio/track_queue.hpp"
 #include "database/future_fetcher.hpp"
 #include "database/track.hpp"
 #include "drivers/bluetooth.hpp"
+#include "drivers/bluetooth_types.hpp"
 #include "drivers/i2s_dac.hpp"
 #include "drivers/nvs.hpp"
+#include "drivers/storage.hpp"
 #include "drivers/wm8523.hpp"
 #include "events/event_queue.hpp"
 #include "sample.hpp"
 #include "system_fsm/service_locator.hpp"
 #include "system_fsm/system_events.hpp"
-#include "tinyfsm.hpp"

 namespace audio {

@ -47,12 +47,14 @@ namespace audio {

 std::shared_ptr<system_fsm::ServiceLocator> AudioState::sServices;

-std::shared_ptr<FatfsAudioInput> AudioState::sFileSource;
+std::shared_ptr<FatfsStreamFactory> AudioState::sStreamFactory;
+
 std::unique_ptr<Decoder> AudioState::sDecoder;
-std::shared_ptr<SampleConverter> AudioState::sSampleConverter;
+std::shared_ptr<SampleProcessor> AudioState::sSampleProcessor;
+
+std::shared_ptr<IAudioOutput> AudioState::sOutput;
 std::shared_ptr<I2SAudioOutput> AudioState::sI2SOutput;
 std::shared_ptr<BluetoothAudioOutput> AudioState::sBtOutput;
-std::shared_ptr<IAudioOutput> AudioState::sOutput;

 // Two seconds of samples for two channels, at a representative sample rate.
 constexpr size_t kDrainLatencySamples = 48000 * 2 * 2;
@ -62,30 +64,33 @@ constexpr size_t kDrainBufferSize =
 StreamBufferHandle_t AudioState::sDrainBuffer;
 std::optional<IAudioOutput::Format> AudioState::sDrainFormat;

-std::shared_ptr<TrackInfo> AudioState::sCurrentTrack;
-uint64_t AudioState::sCurrentSamples;
-bool AudioState::sCurrentTrackIsFromQueue;
+StreamCues AudioState::sStreamCues;

-std::shared_ptr<TrackInfo> AudioState::sNextTrack;
-uint64_t AudioState::sNextTrackCueSamples;
-bool AudioState::sNextTrackIsFromQueue;
-
-bool AudioState::sIsResampling;
 bool AudioState::sIsPaused = true;

-auto AudioState::currentPositionSeconds() -> std::optional<uint32_t> {
-  if (!sCurrentTrack || !sDrainFormat) {
-    return {};
+auto AudioState::emitPlaybackUpdate(bool paused) -> void {
+  std::optional<uint32_t> position;
+  auto current = sStreamCues.current();
+  if (current.first && sDrainFormat) {
+    position = (current.second /
+                (sDrainFormat->num_channels * sDrainFormat->sample_rate)) +
+               current.first->start_offset.value_or(0);
  }
-  return sCurrentSamples /
-         (sDrainFormat->num_channels * sDrainFormat->sample_rate);
+
+  PlaybackUpdate event{
+      .current_track = current.first,
+      .track_position = position,
+      .paused = paused,
+  };
+
+  events::System().Dispatch(event);
+  events::Ui().Dispatch(event);
 }

 void AudioState::react(const QueueUpdate& ev) {
  SetTrack cmd{
      .new_track = std::monostate{},
      .seek_to_second = {},
-      .transition = SetTrack::Transition::kHardCut,
  };

  auto current = sServices->track_queue().current();
@ -98,20 +103,13 @@ void AudioState::react(const QueueUpdate& ev) {
      if (!ev.current_changed) {
        return;
      }
-      sNextTrackIsFromQueue = true;
-      cmd.transition = SetTrack::Transition::kHardCut;
      break;
    case QueueUpdate::kRepeatingLastTrack:
-      sNextTrackIsFromQueue = true;
-      cmd.transition = SetTrack::Transition::kGapless;
      break;
    case QueueUpdate::kTrackFinished:
      if (!ev.current_changed) {
        cmd.new_track = std::monostate{};
-      } else {
-        sNextTrackIsFromQueue = true;
      }
-      cmd.transition = SetTrack::Transition::kGapless;
      break;
    case QueueUpdate::kDeserialised:
    default:
@ -124,32 +122,9 @@ void AudioState::react(const QueueUpdate& ev) {
 }

 void AudioState::react(const SetTrack& ev) {
-  // Remember the current track if there is one, since we need to preserve some
-  // of the state if it turns out this SetTrack event corresponds to seeking
-  // within the current track.
-  std::string prev_uri;
-  bool prev_from_queue = false;
-  if (sCurrentTrack) {
-    prev_uri = sCurrentTrack->uri;
-    prev_from_queue = sCurrentTrackIsFromQueue;
-  }
-
-  if (ev.transition == SetTrack::Transition::kHardCut) {
-    sCurrentTrack.reset();
-    sCurrentSamples = 0;
-    sCurrentTrackIsFromQueue = false;
-    clearDrainBuffer();
-  }
-
  if (std::holds_alternative<std::monostate>(ev.new_track)) {
    ESP_LOGI(kTag, "playback finished, awaiting drain");
-    sFileSource->SetPath();
-    awaitEmptyDrainBuffer();
-    sCurrentTrack.reset();
-    sDrainFormat.reset();
-    sCurrentSamples = 0;
-    sCurrentTrackIsFromQueue = false;
-    transit<states::Standby>();
+    sDecoder->open({});
    return;
  }

@ -158,96 +133,76 @@ void AudioState::react(const SetTrack& ev) {
  auto new_track = ev.new_track;
  uint32_t seek_to = ev.seek_to_second.value_or(0);
  sServices->bg_worker().Dispatch<void>([=]() {
-    std::optional<std::string> path;
+    std::shared_ptr<TaggedStream> stream;
    if (std::holds_alternative<database::TrackId>(new_track)) {
-      auto db = sServices->database().lock();
-      if (db) {
-        path = db->getTrackPath(std::get<database::TrackId>(new_track));
-      }
+      stream = sStreamFactory->create(std::get<database::TrackId>(new_track),
+                                      seek_to);
    } else if (std::holds_alternative<std::string>(new_track)) {
-      path = std::get<std::string>(new_track);
+      stream =
+          sStreamFactory->create(std::get<std::string>(new_track), seek_to);
    }

-    if (path) {
-      if (*path == prev_uri) {
-        // This was a seek or replay within the same track; don't forget where
-        // the track originally came from.
-        sNextTrackIsFromQueue = prev_from_queue;
-      }
-      sFileSource->SetPath(*path, seek_to);
-    } else {
-      sFileSource->SetPath();
-    }
+    sDecoder->open(stream);
  });
 }

 void AudioState::react(const TogglePlayPause& ev) {
  sIsPaused = !ev.set_to.value_or(sIsPaused);
-  if (!sIsPaused && is_in_state<states::Standby>() && sCurrentTrack) {
+  if (!sIsPaused && is_in_state<states::Standby>() &&
+      sStreamCues.current().first) {
    transit<states::Playback>();
  } else if (sIsPaused && is_in_state<states::Playback>()) {
    transit<states::Standby>();
  }
 }

-void AudioState::react(const internal::StreamStarted& ev) {
-  sDrainFormat = ev.dst_format;
-  sIsResampling = ev.src_format != ev.dst_format;
-
-  sNextTrack = ev.track;
-  sNextTrackCueSamples = sCurrentSamples + (kDrainLatencySamples / 2);
-
-  ESP_LOGI(kTag, "new stream %s %u ch @ %lu hz (resample=%i)",
-           ev.track->uri.c_str(), sDrainFormat->num_channels,
-           sDrainFormat->sample_rate, sIsResampling);
-}
-
-void AudioState::react(const internal::StreamEnded&) {
-  ESP_LOGI(kTag, "stream ended");
-
-  if (sCurrentTrackIsFromQueue) {
-    sServices->track_queue().finish();
-  } else {
-    tinyfsm::FsmList<AudioState>::dispatch(SetTrack{
-        .new_track = std::monostate{},
-        .seek_to_second = {},
-        .transition = SetTrack::Transition::kGapless,
-    });
-  }
+void AudioState::react(const internal::DecodingFinished& ev) {
+  // If we just finished playing whatever's at the front of the queue, then we
+  // need to advanve and start playing the next one ASAP in order to continue
+  // gaplessly.
+  sServices->bg_worker().Dispatch<void>([=]() {
+    auto& queue = sServices->track_queue();
+    auto current = queue.current();
+    if (!current) {
+      return;
+    }
+    auto db = sServices->database().lock();
+    if (!db) {
+      return;
+    }
+    auto path = db->getTrackPath(*current);
+    if (!path) {
+      return;
+    }
+    if (*path == ev.track->uri) {
+      queue.finish();
+    }
+  });
 }

-void AudioState::react(const internal::StreamUpdate& ev) {
-  sCurrentSamples += ev.samples_sunk;
-
-  if (sNextTrack && sCurrentSamples >= sNextTrackCueSamples) {
-    ESP_LOGI(kTag, "next track is now sinking");
-    sCurrentTrack = sNextTrack;
-    sCurrentSamples -= sNextTrackCueSamples;
-    sCurrentSamples += sNextTrack->start_offset.value_or(0) *
-                       (sDrainFormat->num_channels * sDrainFormat->sample_rate);
-    sCurrentTrackIsFromQueue = sNextTrackIsFromQueue;
-
-    sNextTrack.reset();
-    sNextTrackCueSamples = 0;
-    sNextTrackIsFromQueue = false;
+void AudioState::react(const internal::StreamStarted& ev) {
+  if (sDrainFormat != ev.sink_format) {
+    sDrainFormat = ev.sink_format;
+    ESP_LOGI(kTag, "sink_format=%u ch @ %lu hz", sDrainFormat->num_channels,
+             sDrainFormat->sample_rate);
  }

-  if (sCurrentTrack) {
-    PlaybackUpdate event{
-        .current_track = sCurrentTrack,
-        .track_position = currentPositionSeconds(),
-        .paused = !is_in_state<states::Playback>(),
-    };
-    events::System().Dispatch(event);
-    events::Ui().Dispatch(event);
-  }
+  sStreamCues.addCue(ev.track, ev.cue_at_sample);

-  if (sCurrentTrack && !sIsPaused && !is_in_state<states::Playback>()) {
-    ESP_LOGI(kTag, "ready to play!");
+  if (!sIsPaused && !is_in_state<states::Playback>()) {
    transit<states::Playback>();
+  } else {
+    // Make sure everyone knows we've got a track ready to go, even if we're
+    // not playing it yet. This mostly matters when restoring the queue from
+    // disk after booting.
+    emitPlaybackUpdate(true);
  }
 }

+void AudioState::react(const internal::StreamEnded& ev) {
+  sStreamCues.addCue({}, ev.cue_at_sample);
+}
+
 void AudioState::react(const system_fsm::BluetoothEvent& ev) {
  if (ev.event != drivers::bluetooth::Event::kConnectionStateChanged) {
    return;
@ -283,14 +238,6 @@ void AudioState::react(const StepDownVolume& ev) {
  }
 }

-void AudioState::react(const system_fsm::HasPhonesChanged& ev) {
-  if (ev.has_headphones) {
-    ESP_LOGI(kTag, "headphones in!");
-  } else {
-    ESP_LOGI(kTag, "headphones out!");
-  }
-}
-
 void AudioState::react(const SetVolume& ev) {
  if (ev.db.has_value()) {
    if (sOutput->SetVolumeDb(ev.db.value())) {
@ -350,7 +297,7 @@ void AudioState::react(const OutputModeChanged& ev) {
      break;
  }
  sOutput->mode(IAudioOutput::Modes::kOnPaused);
-  sSampleConverter->SetOutput(sOutput);
+  sSampleProcessor->SetOutput(sOutput);

  // Bluetooth volume isn't 'changed' until we've connected to a device.
  if (new_mode == drivers::NvsStorage::Output::kHeadphones) {
@ -361,43 +308,6 @@ void AudioState::react(const OutputModeChanged& ev) {
  }
 }

-auto AudioState::clearDrainBuffer() -> void {
-  // Tell the decoder to stop adding new samples. This might not take effect
-  // immediately, since the decoder might currently be stuck waiting for space
-  // to become available in the drain buffer.
-  sFileSource->SetPath();
-
-  auto mode = sOutput->mode();
-  if (mode == IAudioOutput::Modes::kOnPlaying) {
-    // If we're currently playing, then the drain buffer will be actively
-    // draining on its own. Just keep trying to reset until it works.
-    while (xStreamBufferReset(sDrainBuffer) != pdPASS) {
-    }
-  } else {
-    // If we're not currently playing, then we need to actively pull samples
-    // out of the drain buffer to unblock the decoder.
-    while (!xStreamBufferIsEmpty(sDrainBuffer)) {
-      // Read a little to unblock the decoder.
-      uint8_t drain[2048];
-      xStreamBufferReceive(sDrainBuffer, drain, sizeof(drain), 0);
-
-      // Try to quickly discard the rest.
-      xStreamBufferReset(sDrainBuffer);
-    }
-  }
-}
-
-auto AudioState::awaitEmptyDrainBuffer() -> void {
-  if (is_in_state<states::Playback>()) {
-    for (int i = 0; i < 10 && !xStreamBufferIsEmpty(sDrainBuffer); i++) {
-      vTaskDelay(pdMS_TO_TICKS(250));
-    }
-  }
-  if (!xStreamBufferIsEmpty(sDrainBuffer)) {
-    clearDrainBuffer();
-  }
-}
-
 auto AudioState::commitVolume() -> void {
  auto mode = sServices->nvs().OutputMode();
  auto vol = sOutput->GetVolume();
@ -428,8 +338,7 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) {
  sDrainBuffer = xStreamBufferCreateStatic(
      kDrainBufferSize, sizeof(sample::Sample), storage, meta);

-  sFileSource.reset(
-      new FatfsAudioInput(sServices->tag_parser(), sServices->bg_worker()));
+  sStreamFactory.reset(new FatfsStreamFactory(*sServices));
  sI2SOutput.reset(new I2SAudioOutput(sDrainBuffer, sServices->gpios()));
  sBtOutput.reset(new BluetoothAudioOutput(sDrainBuffer, sServices->bluetooth(),
                                           sServices->bg_worker()));
@ -463,10 +372,10 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) {
      .left_bias = nvs.AmpLeftBias(),
  });

-  sSampleConverter.reset(new SampleConverter());
-  sSampleConverter->SetOutput(sOutput);
+  sSampleProcessor.reset(new SampleProcessor(sDrainBuffer));
+  sSampleProcessor->SetOutput(sOutput);

-  Decoder::Start(sFileSource, sSampleConverter);
+  sDecoder.reset(Decoder::Start(sSampleProcessor));

  transit<Standby>();
 }
@ -478,7 +387,8 @@ void Standby::react(const system_fsm::KeyLockChanged& ev) {
  if (!ev.locking) {
    return;
  }
-  sServices->bg_worker().Dispatch<void>([this]() {
+  auto current = sStreamCues.current();
+  sServices->bg_worker().Dispatch<void>([=]() {
    auto db = sServices->database().lock();
    if (!db) {
      return;
@ -491,10 +401,13 @@ void Standby::react(const system_fsm::KeyLockChanged& ev) {
    }
    db->put(kQueueKey, queue.serialise());

-    if (sCurrentTrack) {
+    if (current.first && sDrainFormat) {
+      uint32_t seconds = (current.second / (sDrainFormat->num_channels *
+                                            sDrainFormat->sample_rate)) +
+                         current.first->start_offset.value_or(0);
      cppbor::Array current_track{
-          cppbor::Tstr{sCurrentTrack->uri},
-          cppbor::Uint{currentPositionSeconds().value_or(0)},
+          cppbor::Tstr{current.first->uri},
+          cppbor::Uint{seconds},
      };
      db->put(kCurrentFileKey, current_track.toString());
    }
@ -529,7 +442,6 @@ void Standby::react(const system_fsm::SdStateChanged& ev) {
        events::Audio().Dispatch(SetTrack{
            .new_track = filename,
            .seek_to_second = pos,
-            .transition = SetTrack::Transition::kHardCut,
        });
      }
    }
@ -545,32 +457,29 @@ void Standby::react(const system_fsm::SdStateChanged& ev) {
  });
 }

+static TimerHandle_t sHeartbeatTimer;
+
+static void heartbeat(TimerHandle_t) {
+  events::Audio().Dispatch(internal::StreamHeartbeat{});
+}
+
 void Playback::entry() {
  ESP_LOGI(kTag, "audio output resumed");
  sOutput->mode(IAudioOutput::Modes::kOnPlaying);
+  emitPlaybackUpdate(false);

-  PlaybackUpdate event{
-      .current_track = sCurrentTrack,
-      .track_position = currentPositionSeconds(),
-      .paused = false,
-  };
-
-  events::System().Dispatch(event);
-  events::Ui().Dispatch(event);
+  if (!sHeartbeatTimer) {
+    sHeartbeatTimer =
+        xTimerCreate("stream", pdMS_TO_TICKS(250), true, NULL, heartbeat);
+  }
+  xTimerStart(sHeartbeatTimer, portMAX_DELAY);
 }

 void Playback::exit() {
  ESP_LOGI(kTag, "audio output paused");
+  xTimerStop(sHeartbeatTimer, portMAX_DELAY);
  sOutput->mode(IAudioOutput::Modes::kOnPaused);
-
-  PlaybackUpdate event{
-      .current_track = sCurrentTrack,
-      .track_position = currentPositionSeconds(),
-      .paused = true,
-  };
-
-  events::System().Dispatch(event);
-  events::Ui().Dispatch(event);
+  emitPlaybackUpdate(true);
 }

 void Playback::react(const system_fsm::SdStateChanged& ev) {
@ -579,6 +488,18 @@ void Playback::react(const system_fsm::SdStateChanged& ev) {
  }
 }

+void Playback::react(const internal::StreamHeartbeat& ev) {
+  sStreamCues.update(sOutput->samplesUsed());
+
+  if (sStreamCues.hasStream()) {
+    emitPlaybackUpdate(false);
+  } else {
+    // Finished the current stream, and there's nothing upcoming. We must be
+    // finished.
+    transit<Standby>();
+  }
+}
+
 }  // namespace states

 }  // namespace audio
--- a/src/tangara/audio/audio_fsm.hpp
+++ b/src/tangara/audio/audio_fsm.hpp
@ -11,14 +11,14 @@
 #include <memory>
 #include <vector>

-#include "audio/audio_sink.hpp"
-#include "system_fsm/service_locator.hpp"
+#include "audio/stream_cues.hpp"
 #include "tinyfsm.hpp"

 #include "audio/audio_decoder.hpp"
 #include "audio/audio_events.hpp"
+#include "audio/audio_sink.hpp"
 #include "audio/bt_audio_output.hpp"
-#include "audio/fatfs_audio_input.hpp"
+#include "audio/fatfs_stream_factory.hpp"
 #include "audio/i2s_audio_output.hpp"
 #include "audio/track_queue.hpp"
 #include "database/database.hpp"
@ -28,6 +28,7 @@
 #include "drivers/gpios.hpp"
 #include "drivers/i2s_dac.hpp"
 #include "drivers/storage.hpp"
+#include "system_fsm/service_locator.hpp"
 #include "system_fsm/system_events.hpp"

 namespace audio {
@ -46,13 +47,13 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
  void react(const SetTrack&);
  void react(const TogglePlayPause&);

+  void react(const internal::DecodingFinished&);
  void react(const internal::StreamStarted&);
-  void react(const internal::StreamUpdate&);
  void react(const internal::StreamEnded&);
+  virtual void react(const internal::StreamHeartbeat&) {}

  void react(const StepUpVolume&);
  void react(const StepDownVolume&);
-  virtual void react(const system_fsm::HasPhonesChanged&);

  void react(const SetVolume&);
  void react(const SetVolumeLimit&);
@ -66,36 +67,24 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
  virtual void react(const system_fsm::BluetoothEvent&);

 protected:
-  auto clearDrainBuffer() -> void;
-  auto awaitEmptyDrainBuffer() -> void;
-
-  auto playTrack(database::TrackId id) -> void;
+  auto emitPlaybackUpdate(bool paused) -> void;
  auto commitVolume() -> void;

  static std::shared_ptr<system_fsm::ServiceLocator> sServices;

-  static std::shared_ptr<FatfsAudioInput> sFileSource;
+  static std::shared_ptr<FatfsStreamFactory> sStreamFactory;
  static std::unique_ptr<Decoder> sDecoder;
-  static std::shared_ptr<SampleConverter> sSampleConverter;
+  static std::shared_ptr<SampleProcessor> sSampleProcessor;
  static std::shared_ptr<I2SAudioOutput> sI2SOutput;
  static std::shared_ptr<BluetoothAudioOutput> sBtOutput;
  static std::shared_ptr<IAudioOutput> sOutput;

  static StreamBufferHandle_t sDrainBuffer;

-  static std::shared_ptr<TrackInfo> sCurrentTrack;
-  static uint64_t sCurrentSamples;
+  static StreamCues sStreamCues;
  static std::optional<IAudioOutput::Format> sDrainFormat;
-  static bool sCurrentTrackIsFromQueue;
-
-  static std::shared_ptr<TrackInfo> sNextTrack;
-  static uint64_t sNextTrackCueSamples;
-  static bool sNextTrackIsFromQueue;

-  static bool sIsResampling;
  static bool sIsPaused;
-
-  auto currentPositionSeconds() -> std::optional<uint32_t>;
 };

 namespace states {
@ -122,6 +111,7 @@ class Playback : public AudioState {
  void exit() override;

  void react(const system_fsm::SdStateChanged&) override;
+  void react(const internal::StreamHeartbeat&) override;

  using AudioState::react;
 };
--- a/src/tangara/audio/audio_sink.hpp
+++ b/src/tangara/audio/audio_sink.hpp
@ -75,6 +75,7 @@ class IAudioOutput {

  virtual auto PrepareFormat(const Format&) -> Format = 0;
  virtual auto Configure(const Format& format) -> void = 0;
+  virtual auto samplesUsed() -> uint32_t = 0;

  auto stream() -> StreamBufferHandle_t { return stream_; }

--- a/src/tangara/audio/bt_audio_output.cpp
+++ b/src/tangara/audio/bt_audio_output.cpp
@ -121,4 +121,8 @@ auto BluetoothAudioOutput::Configure(const Format& fmt) -> void {
  // No configuration necessary; the output format is fixed.
 }

+auto BluetoothAudioOutput::samplesUsed() -> uint32_t {
+  return bluetooth_.SamplesUsed();
+}
+
 }  // namespace audio
--- a/src/tangara/audio/bt_audio_output.hpp
+++ b/src/tangara/audio/bt_audio_output.hpp
@ -45,6 +45,8 @@ class BluetoothAudioOutput : public IAudioOutput {
  auto PrepareFormat(const Format&) -> Format override;
  auto Configure(const Format& format) -> void override;

+  auto samplesUsed() -> uint32_t override;
+
  BluetoothAudioOutput(const BluetoothAudioOutput&) = delete;
  BluetoothAudioOutput& operator=(const BluetoothAudioOutput&) = delete;

--- a/src/tangara/audio/fatfs_audio_input.cpp
+++ b/src/tangara/audio/fatfs_audio_input.cpp
@ -1,163 +0,0 @@
-/*
- * Copyright 2023 jacqueline <me@jacqueline.id.au>
- *
- * SPDX-License-Identifier: GPL-3.0-only
- */
-
-#include "audio/fatfs_audio_input.hpp"
-
-#include <algorithm>
-#include <climits>
-#include <cstddef>
-#include <cstdint>
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <span>
-#include <string>
-#include <variant>
-
-#include "audio/readahead_source.hpp"
-#include "esp_heap_caps.h"
-#include "esp_log.h"
-#include "ff.h"
-#include "freertos/portmacro.h"
-#include "freertos/projdefs.h"
-
-#include "audio/audio_events.hpp"
-#include "audio/audio_fsm.hpp"
-#include "audio/audio_source.hpp"
-#include "audio/fatfs_source.hpp"
-#include "codec.hpp"
-#include "database/future_fetcher.hpp"
-#include "database/tag_parser.hpp"
-#include "database/track.hpp"
-#include "drivers/spi.hpp"
-#include "events/event_queue.hpp"
-#include "tasks.hpp"
-#include "types.hpp"
-
-[[maybe_unused]] static const char* kTag = "SRC";
-
-namespace audio {
-
-FatfsAudioInput::FatfsAudioInput(database::ITagParser& tag_parser,
-                                 tasks::WorkerPool& bg_worker)
-    : IAudioSource(),
-      tag_parser_(tag_parser),
-      bg_worker_(bg_worker),
-      new_stream_mutex_(),
-      new_stream_(),
-      has_new_stream_(false) {}
-
-FatfsAudioInput::~FatfsAudioInput() {}
-
-auto FatfsAudioInput::SetPath(std::optional<std::string> path) -> void {
-  if (path) {
-    SetPath(*path);
-  } else {
-    SetPath();
-  }
-}
-
-auto FatfsAudioInput::SetPath(const std::string& path,
-                              uint32_t offset) -> void {
-  std::lock_guard<std::mutex> guard{new_stream_mutex_};
-  if (OpenFile(path, offset)) {
-    has_new_stream_ = true;
-    has_new_stream_.notify_one();
-  }
-}
-
-auto FatfsAudioInput::SetPath() -> void {
-  std::lock_guard<std::mutex> guard{new_stream_mutex_};
-  new_stream_.reset();
-  has_new_stream_ = true;
-  has_new_stream_.notify_one();
-}
-
-auto FatfsAudioInput::HasNewStream() -> bool {
-  return has_new_stream_;
-}
-
-auto FatfsAudioInput::NextStream() -> std::shared_ptr<TaggedStream> {
-  while (true) {
-    has_new_stream_.wait(false);
-
-    {
-      std::lock_guard<std::mutex> guard{new_stream_mutex_};
-      if (!has_new_stream_.exchange(false)) {
-        // If the new stream went away, then we need to go back to waiting.
-        continue;
-      }
-
-      if (new_stream_ == nullptr) {
-        continue;
-      }
-
-      auto stream = new_stream_;
-      new_stream_ = nullptr;
-      return stream;
-    }
-  }
-}
-
-auto FatfsAudioInput::OpenFile(const std::string& path,
-                               uint32_t offset) -> bool {
-  ESP_LOGI(kTag, "opening file %s", path.c_str());
-
-  auto tags = tag_parser_.ReadAndParseTags(path);
-  if (!tags) {
-    ESP_LOGE(kTag, "failed to read tags");
-    return false;
-  }
-  if (!tags->title()) {
-    tags->title(path);
-  }
-
-  auto stream_type = ContainerToStreamType(tags->encoding());
-  if (!stream_type.has_value()) {
-    ESP_LOGE(kTag, "couldn't match container to stream");
-    return false;
-  }
-
-  std::unique_ptr<FIL> file = std::make_unique<FIL>();
-  FRESULT res;
-
-  {
-    auto lock = drivers::acquire_spi();
-    res = f_open(file.get(), path.c_str(), FA_READ);
-  }
-
-  if (res != FR_OK) {
-    ESP_LOGE(kTag, "failed to open file! res: %i", res);
-    return false;
-  }
-
-  auto source =
-      std::make_unique<FatfsSource>(stream_type.value(), std::move(file));
-  new_stream_.reset(new TaggedStream(tags, std::move(source), path, offset));
-  return true;
-}
-
-auto FatfsAudioInput::ContainerToStreamType(database::Container enc)
-    -> std::optional<codecs::StreamType> {
-  switch (enc) {
-    case database::Container::kMp3:
-      return codecs::StreamType::kMp3;
-    case database::Container::kWav:
-      return codecs::StreamType::kWav;
-    case database::Container::kOgg:
-      return codecs::StreamType::kVorbis;
-    case database::Container::kFlac:
-      return codecs::StreamType::kFlac;
-    case database::Container::kOpus:
-      return codecs::StreamType::kOpus;
-    case database::Container::kUnsupported:
-    default:
-      return {};
-  }
-}
-
-}  // namespace audio
--- a/src/tangara/audio/fatfs_audio_input.hpp
+++ b/src/tangara/audio/fatfs_audio_input.hpp
@ -1,66 +0,0 @@
-/*
- * Copyright 2023 jacqueline <me@jacqueline.id.au>
- *
- * SPDX-License-Identifier: GPL-3.0-only
- */
-
-#pragma once
-
-#include <cstddef>
-#include <cstdint>
-#include <future>
-#include <memory>
-#include <string>
-
-#include "ff.h"
-#include "freertos/portmacro.h"
-
-#include "audio/audio_source.hpp"
-#include "codec.hpp"
-#include "database/future_fetcher.hpp"
-#include "database/tag_parser.hpp"
-#include "tasks.hpp"
-#include "types.hpp"
-
-namespace audio {
-
-/*
- * Audio source that fetches data from a FatFs (or exfat i guess) filesystem.
- *
- * All public methods are safe to call from any task.
- */
-class FatfsAudioInput : public IAudioSource {
- public:
-  explicit FatfsAudioInput(database::ITagParser&, tasks::WorkerPool&);
-  ~FatfsAudioInput();
-
-  /*
-   * Immediately cease reading any current source, and begin reading from the
-   * given file path.
-   */
-  auto SetPath(std::optional<std::string>) -> void;
-  auto SetPath(const std::string&, uint32_t offset = 0) -> void;
-  auto SetPath() -> void;
-
-  auto HasNewStream() -> bool override;
-  auto NextStream() -> std::shared_ptr<TaggedStream> override;
-
-  FatfsAudioInput(const FatfsAudioInput&) = delete;
-  FatfsAudioInput& operator=(const FatfsAudioInput&) = delete;
-
- private:
-  auto OpenFile(const std::string& path, uint32_t offset) -> bool;
-
-  auto ContainerToStreamType(database::Container)
-      -> std::optional<codecs::StreamType>;
-
-  database::ITagParser& tag_parser_;
-  tasks::WorkerPool& bg_worker_;
-
-  std::mutex new_stream_mutex_;
-  std::shared_ptr<TaggedStream> new_stream_;
-
-  std::atomic<bool> has_new_stream_;
-};
-
-}  // namespace audio
--- a/src/tangara/audio/fatfs_stream_factory.cpp
+++ b/src/tangara/audio/fatfs_stream_factory.cpp
@ -0,0 +1,104 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "audio/fatfs_stream_factory.hpp"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "database/database.hpp"
+#include "esp_log.h"
+#include "ff.h"
+#include "freertos/portmacro.h"
+#include "freertos/projdefs.h"
+
+#include "audio/audio_source.hpp"
+#include "audio/fatfs_source.hpp"
+#include "codec.hpp"
+#include "database/tag_parser.hpp"
+#include "database/track.hpp"
+#include "drivers/spi.hpp"
+#include "system_fsm/service_locator.hpp"
+#include "tasks.hpp"
+#include "types.hpp"
+
+[[maybe_unused]] static const char* kTag = "SRC";
+
+namespace audio {
+
+FatfsStreamFactory::FatfsStreamFactory(system_fsm::ServiceLocator& services)
+    : services_(services) {}
+
+auto FatfsStreamFactory::create(database::TrackId id, uint32_t offset)
+    -> std::shared_ptr<TaggedStream> {
+  auto db = services_.database().lock();
+  if (!db) {
+    return {};
+  }
+  auto path = db->getTrackPath(id);
+  if (!path) {
+    return {};
+  }
+  return create(*path, offset);
+}
+
+auto FatfsStreamFactory::create(std::string path, uint32_t offset)
+    -> std::shared_ptr<TaggedStream> {
+  auto tags = services_.tag_parser().ReadAndParseTags(path);
+  if (!tags) {
+    ESP_LOGE(kTag, "failed to read tags");
+    return {};
+  }
+
+  if (!tags->title()) {
+    tags->title(path);
+  }
+
+  auto stream_type = ContainerToStreamType(tags->encoding());
+  if (!stream_type.has_value()) {
+    ESP_LOGE(kTag, "couldn't match container to stream");
+    return {};
+  }
+
+  std::unique_ptr<FIL> file = std::make_unique<FIL>();
+  FRESULT res;
+
+  {
+    auto lock = drivers::acquire_spi();
+    res = f_open(file.get(), path.c_str(), FA_READ);
+  }
+
+  if (res != FR_OK) {
+    ESP_LOGE(kTag, "failed to open file! res: %i", res);
+    return {};
+  }
+
+  return std::make_shared<TaggedStream>(
+      tags, std::make_unique<FatfsSource>(stream_type.value(), std::move(file)),
+      path, offset);
+}
+
+auto FatfsStreamFactory::ContainerToStreamType(database::Container enc)
+    -> std::optional<codecs::StreamType> {
+  switch (enc) {
+    case database::Container::kMp3:
+      return codecs::StreamType::kMp3;
+    case database::Container::kWav:
+      return codecs::StreamType::kWav;
+    case database::Container::kOgg:
+      return codecs::StreamType::kVorbis;
+    case database::Container::kFlac:
+      return codecs::StreamType::kFlac;
+    case database::Container::kOpus:
+      return codecs::StreamType::kOpus;
+    case database::Container::kUnsupported:
+    default:
+      return {};
+  }
+}
+
+}  // namespace audio
--- a/src/tangara/audio/fatfs_stream_factory.hpp
+++ b/src/tangara/audio/fatfs_stream_factory.hpp
@ -0,0 +1,53 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <cstddef>
+#include <cstdint>
+#include <future>
+#include <memory>
+#include <string>
+
+#include "database/database.hpp"
+#include "database/track.hpp"
+#include "ff.h"
+#include "freertos/portmacro.h"
+
+#include "audio/audio_source.hpp"
+#include "codec.hpp"
+#include "database/future_fetcher.hpp"
+#include "database/tag_parser.hpp"
+#include "system_fsm/service_locator.hpp"
+#include "tasks.hpp"
+#include "types.hpp"
+
+namespace audio {
+
+/*
+ * Utility to create streams that read from files on the sd card.
+ */
+class FatfsStreamFactory {
+ public:
+  explicit FatfsStreamFactory(system_fsm::ServiceLocator&);
+
+  auto create(database::TrackId, uint32_t offset = 0)
+      -> std::shared_ptr<TaggedStream>;
+  auto create(std::string, uint32_t offset = 0)
+      -> std::shared_ptr<TaggedStream>;
+
+  FatfsStreamFactory(const FatfsStreamFactory&) = delete;
+  FatfsStreamFactory& operator=(const FatfsStreamFactory&) = delete;
+
+ private:
+  auto ContainerToStreamType(database::Container)
+      -> std::optional<codecs::StreamType>;
+
+  system_fsm::ServiceLocator& services_;
+};
+
+}  // namespace audio
--- a/src/tangara/audio/i2s_audio_output.cpp
+++ b/src/tangara/audio/i2s_audio_output.cpp
@ -230,4 +230,8 @@ auto I2SAudioOutput::Configure(const Format& fmt) -> void {
  current_config_ = fmt;
 }

+auto I2SAudioOutput::samplesUsed() -> uint32_t {
+  return dac_->SamplesUsed();
+}
+
 }  // namespace audio
--- a/src/tangara/audio/i2s_audio_output.hpp
+++ b/src/tangara/audio/i2s_audio_output.hpp
@ -43,6 +43,8 @@ class I2SAudioOutput : public IAudioOutput {
  auto PrepareFormat(const Format&) -> Format override;
  auto Configure(const Format& format) -> void override;

+  auto samplesUsed() -> uint32_t override;
+
  I2SAudioOutput(const I2SAudioOutput&) = delete;
  I2SAudioOutput& operator=(const I2SAudioOutput&) = delete;

--- a/src/tangara/audio/audio_converter.cpp
+++ b/src/tangara/audio/audio_converter.cpp
@ -4,12 +4,13 @@
 * SPDX-License-Identifier: GPL-3.0-only
 */

-#include "audio/audio_converter.hpp"
+#include "audio/processor.hpp"
 #include <stdint.h>

 #include <algorithm>
 #include <cmath>
 #include <cstdint>
+#include <limits>

 #include "audio/audio_events.hpp"
 #include "audio/audio_sink.hpp"
@ -32,14 +33,15 @@ static constexpr std::size_t kSourceBufferLength = kSampleBufferLength * 2;

 namespace audio {

-SampleConverter::SampleConverter()
+SampleProcessor::SampleProcessor(StreamBufferHandle_t sink)
    : commands_(xQueueCreate(1, sizeof(Args))),
      resampler_(nullptr),
      source_(xStreamBufferCreateWithCaps(kSourceBufferLength,
                                          sizeof(sample::Sample) * 2,
                                          MALLOC_CAP_DMA)),
+      sink_(sink),
      leftover_bytes_(0),
-      samples_sunk_(0) {
+      samples_written_(0) {
  input_buffer_ = {
      reinterpret_cast<sample::Sample*>(heap_caps_calloc(
          kSampleBufferLength, sizeof(sample::Sample), MALLOC_CAP_DMA)),
@ -55,47 +57,52 @@ SampleConverter::SampleConverter()
  tasks::StartPersistent<tasks::Type::kAudioConverter>([&]() { Main(); });
 }

-SampleConverter::~SampleConverter() {
+SampleProcessor::~SampleProcessor() {
  vQueueDelete(commands_);
  vStreamBufferDelete(source_);
 }

-auto SampleConverter::SetOutput(std::shared_ptr<IAudioOutput> output) -> void {
-  // FIXME: We should add synchronisation here, but we should be careful about
-  // not impacting performance given that the output will change only very
-  // rarely (if ever).
-  sink_ = output;
+auto SampleProcessor::SetOutput(std::shared_ptr<IAudioOutput> output) -> void {
+  assert(xStreamBufferIsEmpty(sink_));
+  // FIXME: We should add synchronisation here, but we should be careful
+  // about not impacting performance given that the output will change only
+  // very rarely (if ever).
+  output_ = output;
+  samples_written_ = output_->samplesUsed();
 }

-auto SampleConverter::beginStream(std::shared_ptr<TrackInfo> track) -> void {
+auto SampleProcessor::beginStream(std::shared_ptr<TrackInfo> track) -> void {
  Args args{
      .track = new std::shared_ptr<TrackInfo>(track),
      .samples_available = 0,
      .is_end_of_stream = false,
+      .clear_buffers = false,
  };
  xQueueSend(commands_, &args, portMAX_DELAY);
 }

-auto SampleConverter::continueStream(std::span<sample::Sample> input) -> void {
+auto SampleProcessor::continueStream(std::span<sample::Sample> input) -> void {
  Args args{
      .track = nullptr,
      .samples_available = input.size(),
      .is_end_of_stream = false,
+      .clear_buffers = false,
  };
  xQueueSend(commands_, &args, portMAX_DELAY);
  xStreamBufferSend(source_, input.data(), input.size_bytes(), portMAX_DELAY);
 }

-auto SampleConverter::endStream() -> void {
+auto SampleProcessor::endStream(bool cancelled) -> void {
  Args args{
      .track = nullptr,
      .samples_available = 0,
      .is_end_of_stream = true,
+      .clear_buffers = cancelled,
  };
  xQueueSend(commands_, &args, portMAX_DELAY);
 }

-auto SampleConverter::Main() -> void {
+auto SampleProcessor::Main() -> void {
  for (;;) {
    Args args;
    while (!xQueueReceive(commands_, &args, portMAX_DELAY)) {
@ -109,43 +116,44 @@ auto SampleConverter::Main() -> void {
      handleContinueStream(args.samples_available);
    }
    if (args.is_end_of_stream) {
-      handleEndStream();
+      handleEndStream(args.clear_buffers);
    }
  }
 }

-auto SampleConverter::handleBeginStream(std::shared_ptr<TrackInfo> track)
+auto SampleProcessor::handleBeginStream(std::shared_ptr<TrackInfo> track)
    -> void {
  if (track->format != source_format_) {
-    resampler_.reset();
    source_format_ = track->format;
+    // The new stream has a different format to the previous stream (or there
+    // was no previous stream).
+    // First, clean up our filters.
+    resampler_.reset();
    leftover_bytes_ = 0;

-    auto new_target = sink_->PrepareFormat(track->format);
-    if (new_target != target_format_) {
-      // The new format is different to the old one. Wait for the sink to
-      // drain before continuing.
-      while (!xStreamBufferIsEmpty(sink_->stream())) {
-        ESP_LOGI(kTag, "waiting for sink stream to drain...");
-        // TODO(jacqueline): Get the sink drain ISR to notify us of this
-        // via semaphore instead of busy-ish waiting.
-        vTaskDelay(pdMS_TO_TICKS(10));
-      }
-
-      sink_->Configure(new_target);
+    // If the output is idle, then we can reconfigure it to the closest format
+    // to our new source.
+    // If the output *wasn't* idle, then we can't reconfigure without an
+    // audible gap in playback. So instead, we simply keep the same target
+    // format and begin resampling.
+    if (xStreamBufferIsEmpty(sink_)) {
+      target_format_ = output_->PrepareFormat(track->format);
+      output_->Configure(target_format_);
    }
-    target_format_ = new_target;
  }

-  samples_sunk_ = 0;
+  if (xStreamBufferIsEmpty(sink_)) {
+    samples_written_ = output_->samplesUsed();
+  }
+
  events::Audio().Dispatch(internal::StreamStarted{
      .track = track,
-      .src_format = source_format_,
-      .dst_format = target_format_,
+      .sink_format = target_format_,
+      .cue_at_sample = samples_written_,
  });
 }

-auto SampleConverter::handleContinueStream(size_t samples_available) -> void {
+auto SampleProcessor::handleContinueStream(size_t samples_available) -> void {
  // Loop until we finish reading all the bytes indicated. There might be
  // leftovers from each iteration, and from this process as a whole,
  // depending on the resampling stage.
@ -182,7 +190,7 @@ auto SampleConverter::handleContinueStream(size_t samples_available) -> void {
  }
 }

-auto SampleConverter::handleSamples(std::span<sample::Sample> input) -> size_t {
+auto SampleProcessor::handleSamples(std::span<sample::Sample> input) -> size_t {
  if (source_format_ == target_format_) {
    // The happiest possible case: the input format matches the output
    // format already.
@ -223,8 +231,8 @@ auto SampleConverter::handleSamples(std::span<sample::Sample> input) -> size_t {
  return samples_used;
 }

-auto SampleConverter::handleEndStream() -> void {
-  if (resampler_) {
+auto SampleProcessor::handleEndStream(bool clear_bufs) -> void {
+  if (resampler_ && !clear_bufs) {
    size_t read, written;
    std::tie(read, written) = resampler_->Process({}, resampled_buffer_, true);

@ -233,33 +241,31 @@ auto SampleConverter::handleEndStream() -> void {
    }
  }

-  // Send a final update to finish off this stream's samples.
-  if (samples_sunk_ > 0) {
-    events::Audio().Dispatch(internal::StreamUpdate{
-        .samples_sunk = samples_sunk_,
-    });
-    samples_sunk_ = 0;
+  if (clear_bufs) {
+    assert(xStreamBufferReset(sink_));
+    samples_written_ = output_->samplesUsed();
  }
+
+  // FIXME: This discards any leftover samples, but there probably shouldn't be
+  // any leftover samples. Can this be an assert instead?
  leftover_bytes_ = 0;

-  events::Audio().Dispatch(internal::StreamEnded{});
+  events::Audio().Dispatch(internal::StreamEnded{
+      .cue_at_sample = samples_written_,
+  });
 }

-auto SampleConverter::sendToSink(std::span<sample::Sample> samples) -> void {
-  // Update the number of samples sunk so far *before* actually sinking them,
-  // since writing to the stream buffer will block when the buffer gets full.
-  samples_sunk_ += samples.size();
-  if (samples_sunk_ >=
-      target_format_.sample_rate * target_format_.num_channels) {
-    events::Audio().Dispatch(internal::StreamUpdate{
-        .samples_sunk = samples_sunk_,
-    });
-    samples_sunk_ = 0;
-  }
+auto SampleProcessor::sendToSink(std::span<sample::Sample> samples) -> void {
+  auto data = std::as_bytes(samples);
+  xStreamBufferSend(sink_, data.data(), data.size(), portMAX_DELAY);

-  xStreamBufferSend(sink_->stream(),
-                    reinterpret_cast<std::byte*>(samples.data()),
-                    samples.size_bytes(), portMAX_DELAY);
+  uint32_t samples_before_overflow =
+      std::numeric_limits<uint32_t>::max() - samples_written_;
+  if (samples_before_overflow < samples.size()) {
+    samples_written_ = samples.size() - samples_before_overflow;
+  } else {
+    samples_written_ += samples.size();
+  }
 }

 }  // namespace audio
--- a/src/tangara/audio/audio_converter.hpp
+++ b/src/tangara/audio/audio_converter.hpp
@ -25,23 +25,23 @@ namespace audio {
 * format of the current output device. The resulting samples are forwarded
 * to the output device's sink stream.
 */
-class SampleConverter {
+class SampleProcessor {
 public:
-  SampleConverter();
-  ~SampleConverter();
+  SampleProcessor(StreamBufferHandle_t sink);
+  ~SampleProcessor();

  auto SetOutput(std::shared_ptr<IAudioOutput>) -> void;

  auto beginStream(std::shared_ptr<TrackInfo>) -> void;
  auto continueStream(std::span<sample::Sample>) -> void;
-  auto endStream() -> void;
+  auto endStream(bool cancelled) -> void;

 private:
  auto Main() -> void;

  auto handleBeginStream(std::shared_ptr<TrackInfo>) -> void;
  auto handleContinueStream(size_t samples_available) -> void;
-  auto handleEndStream() -> void;
+  auto handleEndStream(bool cancel) -> void;

  auto handleSamples(std::span<sample::Sample>) -> size_t;

@ -51,23 +51,26 @@ class SampleConverter {
    std::shared_ptr<TrackInfo>* track;
    size_t samples_available;
    bool is_end_of_stream;
+    bool clear_buffers;
  };
  QueueHandle_t commands_;

  std::unique_ptr<Resampler> resampler_;

  StreamBufferHandle_t source_;
+  StreamBufferHandle_t sink_;
+
  std::span<sample::Sample> input_buffer_;
  std::span<std::byte> input_buffer_as_bytes_;

  std::span<sample::Sample> resampled_buffer_;

-  std::shared_ptr<IAudioOutput> sink_;
+  std::shared_ptr<IAudioOutput> output_;
  IAudioOutput::Format source_format_;
  IAudioOutput::Format target_format_;
  size_t leftover_bytes_;

-  uint32_t samples_sunk_;
+  uint32_t samples_written_;
 };

 }  // namespace audio
--- a/src/tangara/audio/stream_cues.cpp
+++ b/src/tangara/audio/stream_cues.cpp
@ -0,0 +1,65 @@
+/*
+ * Copyright 2024 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "audio/stream_cues.hpp"
+
+#include <cstdint>
+#include <memory>
+
+namespace audio {
+
+StreamCues::StreamCues() : now_(0) {}
+
+auto StreamCues::update(uint32_t sample) -> void {
+  if (sample < now_) {
+    // The current time must have overflowed. Deal with any cues between now_
+    // and UINT32_MAX, then proceed as normal.
+    while (!upcoming_.empty() && upcoming_.front().start_at > now_) {
+      current_ = upcoming_.front();
+      upcoming_.pop_front();
+    }
+  }
+  now_ = sample;
+
+  while (!upcoming_.empty() && upcoming_.front().start_at <= now_) {
+    current_ = upcoming_.front();
+    upcoming_.pop_front();
+  }
+}
+
+auto StreamCues::addCue(std::shared_ptr<TrackInfo> track, uint32_t sample)
+    -> void {
+  if (sample == now_) {
+    current_ = {track, now_};
+  } else {
+    upcoming_.push_back(Cue{
+        .track = track,
+        .start_at = sample,
+    });
+  }
+}
+
+auto StreamCues::current() -> std::pair<std::shared_ptr<TrackInfo>, uint32_t> {
+  if (!current_) {
+    return {};
+  }
+
+  uint32_t duration;
+  if (now_ < current_->start_at) {
+    // now_ overflowed since this track started.
+    duration = now_ + (UINT32_MAX - current_->start_at);
+  } else {
+    duration = now_ - current_->start_at;
+  }
+
+  return {current_->track, duration};
+}
+
+auto StreamCues::hasStream() -> bool {
+  return current_ || !upcoming_.empty();
+}
+
+}  // namespace audio
--- a/src/tangara/audio/stream_cues.hpp
+++ b/src/tangara/audio/stream_cues.hpp
@ -0,0 +1,49 @@
+/*
+ * Copyright 2024 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <cstdint>
+#include <deque>
+#include <memory>
+
+#include "audio/audio_events.hpp"
+
+namespace audio {
+
+/*
+ * Utility for tracking which track is currently being played (and how long it
+ * has been playing for) based on counting samples that are put into and taken
+ * out of the audio processor's output buffer.
+ */
+class StreamCues {
+ public:
+  StreamCues();
+
+  /* Updates the current track given the new most recently played sample. */
+  auto update(uint32_t sample) -> void;
+
+  /* Returns the current track, and how long it has been playing for. */
+  auto current() -> std::pair<std::shared_ptr<TrackInfo>, uint32_t>;
+
+  auto hasStream() -> bool;
+
+  auto addCue(std::shared_ptr<TrackInfo>, uint32_t start_at) -> void;
+
+ private:
+  uint32_t now_;
+
+  struct Cue {
+    std::shared_ptr<TrackInfo> track;
+    uint32_t start_at;
+  };
+
+  std::optional<Cue> current_;
+  std::deque<Cue> upcoming_;
+};
+
+}  // namespace audio