diff --git a/src/audio/audio_task.cpp b/src/audio/audio_task.cpp
index 797ab7f9..baa0a52d 100644
--- a/src/audio/audio_task.cpp
+++ b/src/audio/audio_task.cpp
@@ -56,31 +56,18 @@ static const char* kTag = "audio_dec";
 
 static constexpr std::size_t kCodecBufferLength = 240 * 4;
 
-Timer::Timer(const StreamInfo::Pcm& format, const Duration& duration)
-    : format_(format), current_seconds_(0), current_sample_in_second_(0) {
-  switch (duration.src) {
-    case Duration::Source::kLibTags:
-      ESP_LOGI(kTag, "using duration from libtags");
-      total_duration_seconds_ = duration.duration;
-      break;
-    case Duration::Source::kCodec:
-      ESP_LOGI(kTag, "using duration from decoder");
-      total_duration_seconds_ = duration.duration;
-      break;
-    case Duration::Source::kFileSize:
-      ESP_LOGW(kTag, "calculating duration from filesize");
-      total_duration_seconds_ =
-          bytes_to_samples(duration.duration) / format_.sample_rate;
-      break;
-  }
-}
+Timer::Timer(uint32_t sample_rate, uint32_t total_samples)
+    : sample_rate_(sample_rate),
+      current_seconds_(0),
+      current_sample_in_second_(0),
+      total_duration_seconds_(total_samples / sample_rate) {}
 
-auto Timer::AddBytes(std::size_t bytes) -> void {
+auto Timer::AddSamples(std::size_t samples) -> void {
   bool incremented = false;
-  current_sample_in_second_ += bytes_to_samples(bytes);
-  while (current_sample_in_second_ >= format_.sample_rate) {
+  current_sample_in_second_ += samples;
+  while (current_sample_in_second_ >= sample_rate_) {
     current_seconds_++;
-    current_sample_in_second_ -= format_.sample_rate;
+    current_sample_in_second_ -= sample_rate_;
     incremented = true;
   }
 
@@ -96,18 +83,6 @@ auto Timer::AddBytes(std::size_t bytes) -> void {
   }
 }
 
-auto Timer::bytes_to_samples(uint32_t bytes) -> uint32_t {
-  uint32_t samples = bytes;
-  samples /= format_.channels;
-
-  // Samples must be aligned to 16 bits. The number of actual bytes per
-  // sample is therefore the bps divided by 16, rounded up (align to word),
-  // times two (convert to bytes).
-  uint8_t bytes_per_sample = ((format_.bits_per_sample + 16 - 1) / 16) * 2;
-  samples /= bytes_per_sample;
-  return samples;
-}
-
 auto AudioTask::Start(IAudioSource* source, IAudioSink* sink) -> AudioTask* {
   AudioTask* task = new AudioTask(source, sink);
   // Pin to CORE1 because codecs should be fixed point anyway, and being on
@@ -162,6 +137,13 @@ auto AudioTask::BeginDecoding(std::shared_ptr<codecs::IStream> stream) -> bool {
     return false;
   }
 
+  if (open_res->total_samples) {
+    timer_.reset(
+        new Timer(open_res->sample_rate_hz, open_res->total_samples.value()));
+  } else {
+    timer_.reset();
+  }
+
   current_sink_format_ = IAudioSink::Format{
       .sample_rate = open_res->sample_rate_hz,
       .num_channels = open_res->num_channels,
@@ -183,6 +165,10 @@ auto AudioTask::ContinueDecoding() -> bool {
                        current_sink_format_.value(), res->is_stream_finished);
   }
 
+  if (timer_) {
+    timer_->AddSamples(res->samples_written);
+  }
+
   return res->is_stream_finished;
 }
 
diff --git a/src/audio/include/audio_task.hpp b/src/audio/include/audio_task.hpp
index 48f5502c..5614fbbd 100644
--- a/src/audio/include/audio_task.hpp
+++ b/src/audio/include/audio_task.hpp
@@ -21,26 +21,14 @@
 
 namespace audio {
 
-struct Duration {
-  enum class Source {
-    kLibTags,
-    kCodec,
-    kFileSize,
-  };
-  Source src;
-  uint32_t duration;
-};
-
 class Timer {
  public:
-  Timer(const StreamInfo::Pcm&, const Duration&);
+  Timer(uint32_t sample_rate, uint32_t total_samples);
 
-  auto AddBytes(std::size_t) -> void;
+  auto AddSamples(std::size_t) -> void;
 
  private:
-  auto bytes_to_samples(uint32_t) -> uint32_t;
-
-  StreamInfo::Pcm format_;
+  uint32_t sample_rate_;
 
   uint32_t current_seconds_;
   uint32_t current_sample_in_second_;
diff --git a/src/codecs/foxenflac.cpp b/src/codecs/foxenflac.cpp
index eef8225a..b1ba348a 100644
--- a/src/codecs/foxenflac.cpp
+++ b/src/codecs/foxenflac.cpp
@@ -64,7 +64,7 @@ auto FoxenFlacDecoder::OpenStream(std::shared_ptr<IStream> input)
 
   uint64_t num_samples = fx_flac_get_streaminfo(flac_, FLAC_KEY_N_SAMPLES);
   if (num_samples > 0) {
-    format.duration_seconds = num_samples / fs;
+    format.total_samples = num_samples / fs;
   }
 
   return format;
diff --git a/src/codecs/include/codec.hpp b/src/codecs/include/codec.hpp
index ece3d4fe..2f95389c 100644
--- a/src/codecs/include/codec.hpp
+++ b/src/codecs/include/codec.hpp
@@ -91,7 +91,7 @@ class ICodec {
   struct OutputFormat {
     uint8_t num_channels;
     uint32_t sample_rate_hz;
-    std::optional<uint32_t> duration_seconds;
+    std::optional<uint32_t> total_samples;
 
     bool operator==(const OutputFormat&) const = default;
   };
diff --git a/src/codecs/mad.cpp b/src/codecs/mad.cpp
index ce3a9cac..dfa568d4 100644
--- a/src/codecs/mad.cpp
+++ b/src/codecs/mad.cpp
@@ -92,7 +92,7 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr<IStream> input)
 
   auto vbr_length = GetVbrLength(header);
   if (vbr_length) {
-    output.duration_seconds = vbr_length;
+    output.total_samples = vbr_length;
   }
   return output;
 }
@@ -234,7 +234,7 @@ auto MadMp3Decoder::GetVbrLength(const mad_header& header)
     return {};
   }
 
-  return (double)(frames_count * samples_per_frame) / header.samplerate;
+  return (double)(frames_count * samples_per_frame);
 }
 
 }  // namespace codecs
diff --git a/src/codecs/opus.cpp b/src/codecs/opus.cpp
index c0727c6b..f5ff471a 100644
--- a/src/codecs/opus.cpp
+++ b/src/codecs/opus.cpp
@@ -122,9 +122,24 @@ auto XiphOpusDecoder::OpenStream(std::shared_ptr<IStream> input)
     return cpp::fail(Error::kMalformedData);
   }
 
+  auto l = op_pcm_total(opus_, -1);
+  std::optional<uint32_t> length;
+  if (l > 0) {
+    // Output is always downmixed to two channels, but the pcm count does not
+    // reflect this.
+    int channels = op_channel_count(opus_, -1);
+    if (channels == 1) {
+      l *= 2;
+    } else if (channels > 2) {
+      l /= channels * 2;
+    }
+    length = l;
+  }
+
   return OutputFormat{
       .num_channels = 2,
       .sample_rate_hz = 48000,
+      .total_samples = length,
   };
 }
 
diff --git a/src/codecs/vorbis.cpp b/src/codecs/vorbis.cpp
index aa367e02..cf783978 100644
--- a/src/codecs/vorbis.cpp
+++ b/src/codecs/vorbis.cpp
@@ -118,9 +118,16 @@ auto TremorVorbisDecoder::OpenStream(std::shared_ptr<IStream> input)
     return cpp::fail(Error::kMalformedData);
   }
 
+  auto l = ov_pcm_total(&vorbis_, -1);
+  std::optional<uint32_t> length;
+  if (l > 0) {
+    length = l;
+  }
+
   return OutputFormat{
       .num_channels = static_cast<uint8_t>(info->channels),
       .sample_rate_hz = static_cast<uint32_t>(info->rate),
+      .total_samples = length,
   };
 }