diff --git a/src/audio/audio_task.cpp b/src/audio/audio_task.cpp index 797ab7f9..baa0a52d 100644 --- a/src/audio/audio_task.cpp +++ b/src/audio/audio_task.cpp @@ -56,31 +56,18 @@ static const char* kTag = "audio_dec"; static constexpr std::size_t kCodecBufferLength = 240 * 4; -Timer::Timer(const StreamInfo::Pcm& format, const Duration& duration) - : format_(format), current_seconds_(0), current_sample_in_second_(0) { - switch (duration.src) { - case Duration::Source::kLibTags: - ESP_LOGI(kTag, "using duration from libtags"); - total_duration_seconds_ = duration.duration; - break; - case Duration::Source::kCodec: - ESP_LOGI(kTag, "using duration from decoder"); - total_duration_seconds_ = duration.duration; - break; - case Duration::Source::kFileSize: - ESP_LOGW(kTag, "calculating duration from filesize"); - total_duration_seconds_ = - bytes_to_samples(duration.duration) / format_.sample_rate; - break; - } -} +Timer::Timer(uint32_t sample_rate, uint32_t total_samples) + : sample_rate_(sample_rate), + current_seconds_(0), + current_sample_in_second_(0), + total_duration_seconds_(total_samples / sample_rate) {} -auto Timer::AddBytes(std::size_t bytes) -> void { +auto Timer::AddSamples(std::size_t samples) -> void { bool incremented = false; - current_sample_in_second_ += bytes_to_samples(bytes); - while (current_sample_in_second_ >= format_.sample_rate) { + current_sample_in_second_ += samples; + while (current_sample_in_second_ >= sample_rate_) { current_seconds_++; - current_sample_in_second_ -= format_.sample_rate; + current_sample_in_second_ -= sample_rate_; incremented = true; } @@ -96,18 +83,6 @@ auto Timer::AddBytes(std::size_t bytes) -> void { } } -auto Timer::bytes_to_samples(uint32_t bytes) -> uint32_t { - uint32_t samples = bytes; - samples /= format_.channels; - - // Samples must be aligned to 16 bits. The number of actual bytes per - // sample is therefore the bps divided by 16, rounded up (align to word), - // times two (convert to bytes). - uint8_t bytes_per_sample = ((format_.bits_per_sample + 16 - 1) / 16) * 2; - samples /= bytes_per_sample; - return samples; -} - auto AudioTask::Start(IAudioSource* source, IAudioSink* sink) -> AudioTask* { AudioTask* task = new AudioTask(source, sink); // Pin to CORE1 because codecs should be fixed point anyway, and being on @@ -162,6 +137,13 @@ auto AudioTask::BeginDecoding(std::shared_ptr stream) -> bool { return false; } + if (open_res->total_samples) { + timer_.reset( + new Timer(open_res->sample_rate_hz, open_res->total_samples.value())); + } else { + timer_.reset(); + } + current_sink_format_ = IAudioSink::Format{ .sample_rate = open_res->sample_rate_hz, .num_channels = open_res->num_channels, @@ -183,6 +165,10 @@ auto AudioTask::ContinueDecoding() -> bool { current_sink_format_.value(), res->is_stream_finished); } + if (timer_) { + timer_->AddSamples(res->samples_written); + } + return res->is_stream_finished; } diff --git a/src/audio/include/audio_task.hpp b/src/audio/include/audio_task.hpp index 48f5502c..5614fbbd 100644 --- a/src/audio/include/audio_task.hpp +++ b/src/audio/include/audio_task.hpp @@ -21,26 +21,14 @@ namespace audio { -struct Duration { - enum class Source { - kLibTags, - kCodec, - kFileSize, - }; - Source src; - uint32_t duration; -}; - class Timer { public: - Timer(const StreamInfo::Pcm&, const Duration&); + Timer(uint32_t sample_rate, uint32_t total_samples); - auto AddBytes(std::size_t) -> void; + auto AddSamples(std::size_t) -> void; private: - auto bytes_to_samples(uint32_t) -> uint32_t; - - StreamInfo::Pcm format_; + uint32_t sample_rate_; uint32_t current_seconds_; uint32_t current_sample_in_second_; diff --git a/src/codecs/foxenflac.cpp b/src/codecs/foxenflac.cpp index eef8225a..b1ba348a 100644 --- a/src/codecs/foxenflac.cpp +++ b/src/codecs/foxenflac.cpp @@ -64,7 +64,7 @@ auto FoxenFlacDecoder::OpenStream(std::shared_ptr input) uint64_t num_samples = fx_flac_get_streaminfo(flac_, FLAC_KEY_N_SAMPLES); if (num_samples > 0) { - format.duration_seconds = num_samples / fs; + format.total_samples = num_samples / fs; } return format; diff --git a/src/codecs/include/codec.hpp b/src/codecs/include/codec.hpp index ece3d4fe..2f95389c 100644 --- a/src/codecs/include/codec.hpp +++ b/src/codecs/include/codec.hpp @@ -91,7 +91,7 @@ class ICodec { struct OutputFormat { uint8_t num_channels; uint32_t sample_rate_hz; - std::optional duration_seconds; + std::optional total_samples; bool operator==(const OutputFormat&) const = default; }; diff --git a/src/codecs/mad.cpp b/src/codecs/mad.cpp index ce3a9cac..dfa568d4 100644 --- a/src/codecs/mad.cpp +++ b/src/codecs/mad.cpp @@ -92,7 +92,7 @@ auto MadMp3Decoder::OpenStream(std::shared_ptr input) auto vbr_length = GetVbrLength(header); if (vbr_length) { - output.duration_seconds = vbr_length; + output.total_samples = vbr_length; } return output; } @@ -234,7 +234,7 @@ auto MadMp3Decoder::GetVbrLength(const mad_header& header) return {}; } - return (double)(frames_count * samples_per_frame) / header.samplerate; + return (double)(frames_count * samples_per_frame); } } // namespace codecs diff --git a/src/codecs/opus.cpp b/src/codecs/opus.cpp index c0727c6b..f5ff471a 100644 --- a/src/codecs/opus.cpp +++ b/src/codecs/opus.cpp @@ -122,9 +122,24 @@ auto XiphOpusDecoder::OpenStream(std::shared_ptr input) return cpp::fail(Error::kMalformedData); } + auto l = op_pcm_total(opus_, -1); + std::optional length; + if (l > 0) { + // Output is always downmixed to two channels, but the pcm count does not + // reflect this. + int channels = op_channel_count(opus_, -1); + if (channels == 1) { + l *= 2; + } else if (channels > 2) { + l /= channels * 2; + } + length = l; + } + return OutputFormat{ .num_channels = 2, .sample_rate_hz = 48000, + .total_samples = length, }; } diff --git a/src/codecs/vorbis.cpp b/src/codecs/vorbis.cpp index aa367e02..cf783978 100644 --- a/src/codecs/vorbis.cpp +++ b/src/codecs/vorbis.cpp @@ -118,9 +118,16 @@ auto TremorVorbisDecoder::OpenStream(std::shared_ptr input) return cpp::fail(Error::kMalformedData); } + auto l = ov_pcm_total(&vorbis_, -1); + std::optional length; + if (l > 0) { + length = l; + } + return OutputFormat{ .num_channels = static_cast(info->channels), .sample_rate_hz = static_cast(info->rate), + .total_samples = length, }; }