here is a wav decoder, enjoy!

Reviewed-on: https://codeberg.org/cool-tech-zone/tangara-fw/pulls/13
Reviewed-by: cooljqln <cooljqln@noreply.codeberg.org>
Co-authored-by: ailurux <ailuruxx@gmail.com>
Co-committed-by: ailurux <ailuruxx@gmail.com>
custom
ailurux 1 year ago committed by cooljqln
parent 55bde70b96
commit 0e04eb918e
  1. 2
      .gitignore
  2. 2
      sdkconfig.common
  3. 2
      src/audio/fatfs_audio_input.cpp
  4. 4
      src/codecs/CMakeLists.txt
  5. 5
      src/codecs/codec.cpp
  6. 4
      src/codecs/include/codec.hpp
  7. 6
      src/codecs/include/sample.hpp
  8. 2
      src/codecs/include/types.hpp
  9. 57
      src/codecs/include/wav.hpp
  10. 7
      src/codecs/sample.cpp
  11. 2
      src/codecs/source_buffer.cpp
  12. 259
      src/codecs/wav.cpp
  13. 2
      src/util/CMakeLists.txt
  14. 47
      src/util/include/debug.hpp

2
.gitignore vendored

@ -18,3 +18,5 @@ doc/
test/build/ test/build/
test/dependencies.lock test/dependencies.lock
test/sdkconfig test/sdkconfig
sdkconfig.bak
*.ignore

@ -40,7 +40,7 @@ CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY=y
CONFIG_RINGBUF_PLACE_FUNCTIONS_INTO_FLASH=y CONFIG_RINGBUF_PLACE_FUNCTIONS_INTO_FLASH=y
CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y
CONFIG_ESP_SYSTEM_PANIC_PRINT_HALT=y CONFIG_ESP_SYSTEM_PANIC_PRINT_HALT=y
CONFIG_ESP_MAIN_TASK_STACK_SIZE=10000 CONFIG_ESP_MAIN_TASK_STACK_SIZE=12000
CONFIG_ESP_INT_WDT_TIMEOUT_MS=5000 CONFIG_ESP_INT_WDT_TIMEOUT_MS=5000
CONFIG_ESP_TASK_WDT_TIMEOUT_S=10 CONFIG_ESP_TASK_WDT_TIMEOUT_S=10
CONFIG_ESP_BROWNOUT_DET_LVL_SEL_7=y CONFIG_ESP_BROWNOUT_DET_LVL_SEL_7=y

@ -143,7 +143,7 @@ auto FatfsAudioInput::ContainerToStreamType(database::Container enc)
case database::Container::kMp3: case database::Container::kMp3:
return codecs::StreamType::kMp3; return codecs::StreamType::kMp3;
case database::Container::kWav: case database::Container::kWav:
return codecs::StreamType::kPcm; return codecs::StreamType::kWav;
case database::Container::kOgg: case database::Container::kOgg:
return codecs::StreamType::kVorbis; return codecs::StreamType::kVorbis;
case database::Container::kFlac: case database::Container::kFlac:

@ -4,9 +4,9 @@
idf_component_register( idf_component_register(
SRCS "codec.cpp" "mad.cpp" "miniflac.cpp" "opus.cpp" "vorbis.cpp" SRCS "codec.cpp" "mad.cpp" "miniflac.cpp" "opus.cpp" "vorbis.cpp"
"source_buffer.cpp" "sample.cpp" "source_buffer.cpp" "sample.cpp" "wav.cpp"
INCLUDE_DIRS "include" INCLUDE_DIRS "include"
REQUIRES "result" "span" "libmad" "miniflac" "tremor" "opusfile" "memory" REQUIRES "result" "span" "libmad" "miniflac" "tremor" "opusfile" "memory" "util"
"komihash") "komihash")
target_compile_options("${COMPONENT_LIB}" PRIVATE ${EXTRA_WARNINGS}) target_compile_options("${COMPONENT_LIB}" PRIVATE ${EXTRA_WARNINGS})

@ -14,6 +14,7 @@
#include "opus.hpp" #include "opus.hpp"
#include "types.hpp" #include "types.hpp"
#include "vorbis.hpp" #include "vorbis.hpp"
#include "wav.hpp"
namespace codecs { namespace codecs {
@ -21,7 +22,7 @@ auto StreamTypeToString(StreamType t) -> std::string {
switch (t) { switch (t) {
case StreamType::kMp3: case StreamType::kMp3:
return "Mp3"; return "Mp3";
case StreamType::kPcm: case StreamType::kWav:
return "Wav"; return "Wav";
case StreamType::kVorbis: case StreamType::kVorbis:
return "Vorbis"; return "Vorbis";
@ -44,6 +45,8 @@ auto CreateCodecForType(StreamType type) -> std::optional<ICodec*> {
return new MiniFlacDecoder(); return new MiniFlacDecoder();
case StreamType::kOpus: case StreamType::kOpus:
return new XiphOpusDecoder(); return new XiphOpusDecoder();
case StreamType::kWav:
return new WavDecoder();
default: default:
return {}; return {};
} }

@ -76,6 +76,8 @@ class ICodec {
kOutOfInput, kOutOfInput,
// Indicates that the data within the input buffer is fatally malformed. // Indicates that the data within the input buffer is fatally malformed.
kMalformedData, kMalformedData,
// Indicated that the format is unsupported
kUnsupportedFormat,
kInternalError, kInternalError,
}; };
@ -88,6 +90,8 @@ class ICodec {
return "malformed data"; return "malformed data";
case Error::kInternalError: case Error::kInternalError:
return "internal error"; return "internal error";
case Error::kUnsupportedFormat:
return "unsupported format";
} }
return "uhh"; return "uhh";
} }

@ -24,10 +24,6 @@ namespace sample {
// 3. Monty from Xiph.org reckons it's all you need. // 3. Monty from Xiph.org reckons it's all you need.
typedef int16_t Sample; typedef int16_t Sample;
constexpr auto Clip(int64_t v) -> Sample {
return std::clamp<int64_t>(v, INT16_MIN, INT16_MAX);
}
auto shiftWithDither(int64_t src, uint_fast8_t bits) -> Sample; auto shiftWithDither(int64_t src, uint_fast8_t bits) -> Sample;
constexpr auto FromSigned(int32_t src, uint_fast8_t bits) -> Sample { constexpr auto FromSigned(int32_t src, uint_fast8_t bits) -> Sample {
@ -42,7 +38,7 @@ constexpr auto FromSigned(int32_t src, uint_fast8_t bits) -> Sample {
constexpr auto FromUnsigned(uint32_t src, uint_fast8_t bits) -> Sample { constexpr auto FromUnsigned(uint32_t src, uint_fast8_t bits) -> Sample {
// Left-align, then substract the max value / 2 to make the sample centred // Left-align, then substract the max value / 2 to make the sample centred
// around zero. // around zero.
return (src << (sizeof(uint16_t) * 8 - bits)) - (~0UL >> 1); return (src << (sizeof(uint16_t) * 8 - bits)) - (INT16_MAX+1);
} }
constexpr auto FromFloat(float src) -> Sample { constexpr auto FromFloat(float src) -> Sample {

@ -12,10 +12,10 @@ namespace codecs {
enum class StreamType { enum class StreamType {
kMp3, kMp3,
kPcm,
kVorbis, kVorbis,
kFlac, kFlac,
kOpus, kOpus,
kWav,
}; };
auto StreamTypeToString(StreamType t) -> std::string; auto StreamTypeToString(StreamType t) -> std::string;

@ -0,0 +1,57 @@
/*
* Copyright 2023 Daniel <ailuruxx@gmail.com>
*
* SPDX-License-Identifier: GPL-3.0-only
*/
#pragma once
#include <cstddef>
#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include "sample.hpp"
#include "source_buffer.hpp"
#include "codec.hpp"
namespace codecs {
static const uint16_t kWaveFormatPCM = 0x0001;
static const uint16_t kWaveFormatIEEEFloat = 0x0003;
static const uint16_t kWaveFormatAlaw = 0x0006;
static const uint16_t kWaveFormatMulaw = 0x0007;
static const uint16_t kWaveFormatExtensible = 0xFFFE;
class WavDecoder : public ICodec {
public:
WavDecoder();
~WavDecoder();
auto OpenStream(std::shared_ptr<IStream> input)
-> cpp::result<OutputFormat, Error> override;
auto DecodeTo(cpp::span<sample::Sample> destination)
-> cpp::result<OutputInfo, Error> override;
auto SeekTo(std::size_t target_sample) -> cpp::result<void, Error> override;
WavDecoder(const WavDecoder&) = delete;
WavDecoder& operator=(const WavDecoder&) = delete;
private:
std::shared_ptr<IStream> input_;
SourceBuffer buffer_;
uint16_t wave_format_;
uint16_t subformat_;
OutputFormat output_format_;
uint16_t bytes_per_sample_;
uint16_t num_channels_;
auto GetFormat() const -> uint16_t;
};
} // namespace codecs

@ -21,11 +21,8 @@ auto shiftWithDither(int64_t src, uint_fast8_t bits) -> Sample {
uint64_t mask = 0xFFFFFFFF; uint64_t mask = 0xFFFFFFFF;
mask >>= 32 - bits; mask >>= 32 - bits;
int64_t noise = static_cast<int32_t>(komirand(&sSeed1, &sSeed2) & mask); int64_t noise = static_cast<int32_t>(komirand(&sSeed1, &sSeed2) & mask);
// Centre the noise around 0. // Apply to the sample, then shift to 16 bit.
noise -= (mask >> 1); return (src + noise) >> bits;
// Apply to the sample, then clip and shift to 16 bit.
Sample clipped = Clip((src + noise) >> bits);
return clipped;
} }
} // namespace sample } // namespace sample

@ -62,7 +62,7 @@ auto SourceBuffer::AddBytes(std::function<size_t(cpp::span<std::byte>)> writer)
auto SourceBuffer::ConsumeBytes( auto SourceBuffer::ConsumeBytes(
std::function<size_t(cpp::span<std::byte>)> reader) -> void { std::function<size_t(cpp::span<std::byte>)> reader) -> void {
size_t bytes_consumed = std::invoke( size_t bytes_consumed = std::invoke(
reader, buffer_.subspan(offset_of_bytes_).first(bytes_in_buffer_)); reader, buffer_.subspan(offset_of_bytes_, bytes_in_buffer_));
assert(bytes_consumed <= bytes_in_buffer_); assert(bytes_consumed <= bytes_in_buffer_);
bytes_in_buffer_ -= bytes_consumed; bytes_in_buffer_ -= bytes_consumed;

@ -0,0 +1,259 @@
/*
* Copyright 2023 Daniel <ailuruxx@gmail.com>
*
* SPDX-License-Identifier: GPL-3.0-only
*/
#include "wav.hpp"
#include <stdint.h>
#include <sys/_stdint.h>
#include <algorithm>
#include <cstdlib>
#include <string>
#include "debug.hpp"
#include "esp_log.h"
#include "sample.hpp"
namespace codecs {
[[maybe_unused]] static const char kTag[] = "wav";
static inline auto bytes_to_u16(cpp::span<std::byte const, 2> bytes)
-> uint16_t {
return (uint16_t)bytes[0] | (uint16_t)bytes[1] << 8;
}
static inline auto bytes_to_u32(cpp::span<std::byte const, 4> bytes)
-> uint32_t {
return (uint32_t)bytes[0] | (uint32_t)bytes[1] << 8 |
(uint32_t)bytes[2] << 16 | (uint32_t)bytes[3] << 24;
}
static inline auto bytes_to_str(cpp::span<std::byte const> bytes)
-> std::string {
return std::string(reinterpret_cast<const char*>(bytes.data()),
bytes.size_bytes());
}
static int16_t convert_f32_to_16_bit(cpp::span<const std::byte> bytes) {
uint64_t val = 0;
val = (uint8_t)bytes[3];
val = (val << 8) | (uint8_t)bytes[2];
val = (val << 8) | (uint8_t)bytes[1];
val = (val << 8) | (uint8_t)bytes[0];
// Isolate the sign and remove from the value
uint64_t sign = val >> 31;
val -= (sign << 31);
// Isolate the exponent and remove from the value
uint64_t exp = (val >> 23);
val -= (exp << 23);
// Remove old bias and add new bias
exp = exp - 127 + 1023;
// Reconstruct the bits in the correct order and convert to double
uint64_t dval = (sign << 63) + (exp << 52) + (val << 29);
double* fval = reinterpret_cast<double*>(&dval);
return sample::FromDouble(*fval);
}
static int16_t convert_to_16_bit(cpp::span<const std::byte> bytes) {
int depth = bytes.size();
int32_t val = 0;
// If 8-bit Assume Unsigned
if (depth == 1) {
return sample::FromUnsigned((uint8_t)bytes[0], 8);
}
// Otherwise, build the signed int of the right depth
switch (depth) {
case 4:
val = (uint8_t)bytes[3];
case 3:
val = (val << 8) | (uint8_t)bytes[2];
case 2:
val = (val << 8) | (uint8_t)bytes[1];
case 1:
val = (val << 8) | (uint8_t)bytes[0];
}
// Convert to sample
int16_t result = sample::FromSigned(val, depth * 8);
return result;
}
WavDecoder::WavDecoder() : input_(), buffer_() {}
WavDecoder::~WavDecoder() {}
auto WavDecoder::OpenStream(std::shared_ptr<IStream> input)
-> cpp::result<OutputFormat, Error> {
input_ = input;
std::array<std::byte, 255> buf{std::byte{0}};
auto size = input->Read(buf);
if (size < 44) {
return cpp::fail(Error::kOutOfInput);
}
// - check the first 4 bytes = 'RIFF'
// - next 4 bytes = file size
// - check next 4 bytes = 'WAVE'
// - index of 'fmt\0' (i) marks start of fmt data
// - i + 4 = size of fmt header (16, 18 or 40)
// - i + 8 = format (should be 0x01 for pcm, 0xfffe for
// wave_format_exstensible)
// - i + 10 = num channels
// - i + 12 = sample rate
// - i + 16 = byte rate (sample rate * channels * bits per sample / 8)
// - i + 20 = sample size (bits per sample * channels / 8)
// - i + 22 = bits per sample (2 bytes)
// - end of this part, next header we care about is 'data'
// - and then the next 4 bytes = 32 bit int = size of data
auto buffer_span = cpp::span{buf};
std::string riff = bytes_to_str(buffer_span.subspan(0, 4));
if (riff != "RIFF") {
ESP_LOGW(kTag, "file is not RIFF");
return cpp::fail(Error::kMalformedData);
}
uint32_t file_size = bytes_to_u32(buffer_span.subspan(4, 4)) + 8;
std::string fmt_header = bytes_to_str(buffer_span.subspan(12, 4));
ESP_LOGI(kTag, "fmt header found? %s",
(fmt_header.starts_with("fmt")) ? "yes" : "no");
if (!fmt_header.starts_with("fmt")) {
ESP_LOGW(kTag, "Could not find format chunk");
return cpp::fail(Error::kMalformedData);
}
// Size of the fmt header, should be 16, 18 or 40
uint32_t fmt_header_size = bytes_to_u32(buffer_span.subspan(16, 4));
wave_format_ = bytes_to_u16(buffer_span.subspan(20, 2));
if (wave_format_ == kWaveFormatPCM) {
ESP_LOGD(kTag, "wave format: PCM");
} else if (wave_format_ == kWaveFormatExtensible) {
ESP_LOGD(kTag, "wave format: extensible");
} else if (wave_format_ == kWaveFormatIEEEFloat) {
ESP_LOGD(kTag, "wave format: IEEE Float");
} else {
ESP_LOGW(kTag, "WAVE format not supported");
return cpp::fail(Error::kUnsupportedFormat);
}
num_channels_ = bytes_to_u16(buffer_span.subspan(22, 2));
uint32_t samples_per_second = bytes_to_u32(buffer_span.subspan(24, 4));
uint32_t avg_bytes_per_second = bytes_to_u32(buffer_span.subspan(28, 4));
uint16_t block_align = bytes_to_u16(buffer_span.subspan(32, 2));
bytes_per_sample_ = block_align / num_channels_;
uint16_t bits_per_sample = bytes_to_u16(buffer_span.subspan(34, 2));
// find the start of the data chunk
std::array<std::byte, 4> data_tag = {std::byte{0x64}, std::byte{0x61},
std::byte{0x74}, std::byte{0x61}};
auto data_loc = std::ranges::search(buffer_span, data_tag);
if (data_loc.begin() == buffer_span.end()) {
ESP_LOGW(kTag, "Could not find data chunk!");
return cpp::fail(Error::kMalformedData);
}
int data_chunk_index = std::distance(buffer_span.begin(), data_loc.begin());
uint32_t data_chunk_size =
bytes_to_u32(buffer_span.subspan(data_chunk_index + 4, 4));
// calculate number of samples
int number_of_samples = data_chunk_size / bytes_per_sample_;
// extension to the fmt chunk size (0 or 22)
uint16_t extension_size = 0;
if (wave_format_ == kWaveFormatExtensible) {
extension_size = bytes_to_u16(buffer_span.subspan(36, 2));
}
// Parse extension if applicable
if (extension_size == 22) {
// Valid bits per sample
uint16_t valid_bits_per_sample = bytes_to_u16(buffer_span.subspan(38, 2));
uint32_t speaker_mask = bytes_to_u32(buffer_span.subspan(40, 4));
// Parse subformat
subformat_ = bytes_to_u16(buffer_span.subspan(44, 2));
if (!(subformat_ == kWaveFormatPCM ||
subformat_ == kWaveFormatIEEEFloat)) {
ESP_LOGW(kTag, "WAVE extensible subformat_ not supported");
return cpp::fail(Error::kUnsupportedFormat);
}
}
// 64 bit float is not implemented yet, make sure we're not letting it through
if (GetFormat() == kWaveFormatIEEEFloat && bytes_per_sample_ == 8) {
ESP_LOGW(kTag, "WAVE 64-Bit Float not supported");
return cpp::fail(Error::kUnsupportedFormat);
}
// Seek track to start of data
input->SeekTo(data_chunk_index + 8, IStream::SeekFrom::kStartOfStream);
output_format_ = {.num_channels = (uint8_t)num_channels_,
.sample_rate_hz = samples_per_second,
.total_samples = number_of_samples};
return output_format_;
}
auto WavDecoder::DecodeTo(cpp::span<sample::Sample> output)
-> cpp::result<OutputInfo, Error> {
bool is_eof = buffer_.Refill(input_.get());
size_t samples_written = 0;
buffer_.ConsumeBytes([&](cpp::span<std::byte> buf) -> size_t {
size_t bytes_read = buf.size_bytes();
size_t frames_read =
bytes_read / bytes_per_sample_ / output_format_.num_channels;
samples_written =
std::min<size_t>(frames_read,
output.size() / output_format_.num_channels) *
output_format_.num_channels;
// For each sample that we're going to write
for (size_t i = 0; i < samples_written; i++) {
auto data = buf.subspan(i * bytes_per_sample_, bytes_per_sample_);
if (GetFormat() == kWaveFormatPCM) {
// PCM
output[i] = convert_to_16_bit(data);
} else if (GetFormat() == kWaveFormatIEEEFloat) {
// 32-Bit Float
if (bytes_per_sample_ == 4) {
output[i] = convert_f32_to_16_bit(data);
}
}
}
return samples_written * bytes_per_sample_;
});
return OutputInfo{.samples_written = samples_written,
.is_stream_finished = samples_written == 0 && is_eof};
}
auto WavDecoder::SeekTo(size_t target) -> cpp::result<void, Error> {
return {};
}
auto codecs::WavDecoder::GetFormat() const -> uint16_t {
if (wave_format_ == kWaveFormatExtensible) {
return subformat_;
}
return wave_format_;
}
} // namespace codecs

@ -2,4 +2,4 @@
# #
# SPDX-License-Identifier: GPL-3.0-only # SPDX-License-Identifier: GPL-3.0-only
idf_component_register(SRCS INCLUDE_DIRS "include" REQUIRES "database") idf_component_register(SRCS INCLUDE_DIRS "include" REQUIRES "database" "span")

@ -0,0 +1,47 @@
/*
* Copyright 2023 jacqueline <me@jacqueline.id.au>
*
* SPDX-License-Identifier: GPL-3.0-only
*/
#pragma once
#include <iomanip>
#include <ostream>
#include <string>
#include "span.hpp"
namespace util {
inline std::string format_hex_string(cpp::span<const std::byte> data) {
std::ostringstream oss;
std::ostringstream ascii_values;
int count = 0;
for (auto byte : data) {
if (count % 16 == 0) {
if (ascii_values.str().size() > 0) {
oss << "\t|" << ascii_values.str() << "|";
// Reset ascii values
ascii_values.str("");
}
oss << std::endl;
oss << "0x" << std::uppercase << std::setfill('0') << std::setw(2)
<< std::hex << count << '\t';
} else if (count % 8 == 0) {
oss << " ";
}
int byte_val = (int)byte;
oss << "[0x" << std::uppercase << std::setfill('0') << std::setw(2)
<< std::hex << byte_val << ']';
if (byte_val >= 32 && byte_val < 127) {
ascii_values << (char)byte;
} else {
ascii_values << ".";
}
count++;
}
return oss.str();
}
} // namespace util
Loading…
Cancel
Save