wav-codec (#13)
here is a wav decoder, enjoy! Reviewed-on: https://codeberg.org/cool-tech-zone/tangara-fw/pulls/13 Reviewed-by: cooljqln <cooljqln@noreply.codeberg.org> Co-authored-by: ailurux <ailuruxx@gmail.com> Co-committed-by: ailurux <ailuruxx@gmail.com>custom
parent
55bde70b96
commit
0e04eb918e
@ -0,0 +1,57 @@ |
||||
/*
|
||||
* Copyright 2023 Daniel <ailuruxx@gmail.com> |
||||
* |
||||
* SPDX-License-Identifier: GPL-3.0-only |
||||
*/ |
||||
|
||||
#pragma once |
||||
|
||||
#include <cstddef> |
||||
#include <cstdint> |
||||
#include <memory> |
||||
#include <optional> |
||||
#include <string> |
||||
#include <utility> |
||||
|
||||
#include "sample.hpp" |
||||
#include "source_buffer.hpp" |
||||
|
||||
#include "codec.hpp" |
||||
|
||||
namespace codecs { |
||||
|
||||
static const uint16_t kWaveFormatPCM = 0x0001; |
||||
static const uint16_t kWaveFormatIEEEFloat = 0x0003; |
||||
static const uint16_t kWaveFormatAlaw = 0x0006; |
||||
static const uint16_t kWaveFormatMulaw = 0x0007; |
||||
static const uint16_t kWaveFormatExtensible = 0xFFFE; |
||||
|
||||
class WavDecoder : public ICodec { |
||||
public: |
||||
WavDecoder(); |
||||
~WavDecoder(); |
||||
|
||||
auto OpenStream(std::shared_ptr<IStream> input) |
||||
-> cpp::result<OutputFormat, Error> override; |
||||
|
||||
auto DecodeTo(cpp::span<sample::Sample> destination) |
||||
-> cpp::result<OutputInfo, Error> override; |
||||
|
||||
auto SeekTo(std::size_t target_sample) -> cpp::result<void, Error> override; |
||||
|
||||
WavDecoder(const WavDecoder&) = delete; |
||||
WavDecoder& operator=(const WavDecoder&) = delete; |
||||
|
||||
private: |
||||
std::shared_ptr<IStream> input_; |
||||
SourceBuffer buffer_; |
||||
uint16_t wave_format_; |
||||
uint16_t subformat_; |
||||
OutputFormat output_format_; |
||||
uint16_t bytes_per_sample_; |
||||
uint16_t num_channels_; |
||||
|
||||
auto GetFormat() const -> uint16_t; |
||||
}; |
||||
|
||||
} // namespace codecs
|
@ -0,0 +1,259 @@ |
||||
/*
|
||||
* Copyright 2023 Daniel <ailuruxx@gmail.com> |
||||
* |
||||
* SPDX-License-Identifier: GPL-3.0-only |
||||
*/ |
||||
|
||||
#include "wav.hpp" |
||||
#include <stdint.h> |
||||
#include <sys/_stdint.h> |
||||
|
||||
#include <algorithm> |
||||
#include <cstdlib> |
||||
#include <string> |
||||
|
||||
#include "debug.hpp" |
||||
#include "esp_log.h" |
||||
#include "sample.hpp" |
||||
|
||||
namespace codecs { |
||||
|
||||
[[maybe_unused]] static const char kTag[] = "wav"; |
||||
|
||||
static inline auto bytes_to_u16(cpp::span<std::byte const, 2> bytes) |
||||
-> uint16_t { |
||||
return (uint16_t)bytes[0] | (uint16_t)bytes[1] << 8; |
||||
} |
||||
|
||||
static inline auto bytes_to_u32(cpp::span<std::byte const, 4> bytes) |
||||
-> uint32_t { |
||||
return (uint32_t)bytes[0] | (uint32_t)bytes[1] << 8 | |
||||
(uint32_t)bytes[2] << 16 | (uint32_t)bytes[3] << 24; |
||||
} |
||||
|
||||
static inline auto bytes_to_str(cpp::span<std::byte const> bytes) |
||||
-> std::string { |
||||
return std::string(reinterpret_cast<const char*>(bytes.data()), |
||||
bytes.size_bytes()); |
||||
} |
||||
|
||||
static int16_t convert_f32_to_16_bit(cpp::span<const std::byte> bytes) { |
||||
uint64_t val = 0; |
||||
val = (uint8_t)bytes[3]; |
||||
val = (val << 8) | (uint8_t)bytes[2]; |
||||
val = (val << 8) | (uint8_t)bytes[1]; |
||||
val = (val << 8) | (uint8_t)bytes[0]; |
||||
// Isolate the sign and remove from the value
|
||||
uint64_t sign = val >> 31; |
||||
val -= (sign << 31); |
||||
// Isolate the exponent and remove from the value
|
||||
uint64_t exp = (val >> 23); |
||||
val -= (exp << 23); |
||||
// Remove old bias and add new bias
|
||||
exp = exp - 127 + 1023; |
||||
// Reconstruct the bits in the correct order and convert to double
|
||||
uint64_t dval = (sign << 63) + (exp << 52) + (val << 29); |
||||
double* fval = reinterpret_cast<double*>(&dval); |
||||
return sample::FromDouble(*fval); |
||||
} |
||||
|
||||
static int16_t convert_to_16_bit(cpp::span<const std::byte> bytes) { |
||||
int depth = bytes.size(); |
||||
int32_t val = 0; |
||||
// If 8-bit Assume Unsigned
|
||||
if (depth == 1) { |
||||
return sample::FromUnsigned((uint8_t)bytes[0], 8); |
||||
} |
||||
// Otherwise, build the signed int of the right depth
|
||||
switch (depth) { |
||||
case 4: |
||||
val = (uint8_t)bytes[3]; |
||||
case 3: |
||||
val = (val << 8) | (uint8_t)bytes[2]; |
||||
case 2: |
||||
val = (val << 8) | (uint8_t)bytes[1]; |
||||
case 1: |
||||
val = (val << 8) | (uint8_t)bytes[0]; |
||||
} |
||||
// Convert to sample
|
||||
int16_t result = sample::FromSigned(val, depth * 8); |
||||
return result; |
||||
} |
||||
|
||||
WavDecoder::WavDecoder() : input_(), buffer_() {} |
||||
|
||||
WavDecoder::~WavDecoder() {} |
||||
|
||||
auto WavDecoder::OpenStream(std::shared_ptr<IStream> input) |
||||
-> cpp::result<OutputFormat, Error> { |
||||
input_ = input; |
||||
|
||||
std::array<std::byte, 255> buf{std::byte{0}}; |
||||
auto size = input->Read(buf); |
||||
if (size < 44) { |
||||
return cpp::fail(Error::kOutOfInput); |
||||
} |
||||
|
||||
// - check the first 4 bytes = 'RIFF'
|
||||
// - next 4 bytes = file size
|
||||
// - check next 4 bytes = 'WAVE'
|
||||
// - index of 'fmt\0' (i) marks start of fmt data
|
||||
// - i + 4 = size of fmt header (16, 18 or 40)
|
||||
// - i + 8 = format (should be 0x01 for pcm, 0xfffe for
|
||||
// wave_format_exstensible)
|
||||
// - i + 10 = num channels
|
||||
// - i + 12 = sample rate
|
||||
// - i + 16 = byte rate (sample rate * channels * bits per sample / 8)
|
||||
// - i + 20 = sample size (bits per sample * channels / 8)
|
||||
// - i + 22 = bits per sample (2 bytes)
|
||||
// - end of this part, next header we care about is 'data'
|
||||
// - and then the next 4 bytes = 32 bit int = size of data
|
||||
|
||||
auto buffer_span = cpp::span{buf}; |
||||
|
||||
std::string riff = bytes_to_str(buffer_span.subspan(0, 4)); |
||||
if (riff != "RIFF") { |
||||
ESP_LOGW(kTag, "file is not RIFF"); |
||||
return cpp::fail(Error::kMalformedData); |
||||
} |
||||
|
||||
uint32_t file_size = bytes_to_u32(buffer_span.subspan(4, 4)) + 8; |
||||
|
||||
std::string fmt_header = bytes_to_str(buffer_span.subspan(12, 4)); |
||||
ESP_LOGI(kTag, "fmt header found? %s", |
||||
(fmt_header.starts_with("fmt")) ? "yes" : "no"); |
||||
if (!fmt_header.starts_with("fmt")) { |
||||
ESP_LOGW(kTag, "Could not find format chunk"); |
||||
return cpp::fail(Error::kMalformedData); |
||||
} |
||||
|
||||
// Size of the fmt header, should be 16, 18 or 40
|
||||
uint32_t fmt_header_size = bytes_to_u32(buffer_span.subspan(16, 4)); |
||||
|
||||
wave_format_ = bytes_to_u16(buffer_span.subspan(20, 2)); |
||||
if (wave_format_ == kWaveFormatPCM) { |
||||
ESP_LOGD(kTag, "wave format: PCM"); |
||||
} else if (wave_format_ == kWaveFormatExtensible) { |
||||
ESP_LOGD(kTag, "wave format: extensible"); |
||||
} else if (wave_format_ == kWaveFormatIEEEFloat) { |
||||
ESP_LOGD(kTag, "wave format: IEEE Float"); |
||||
} else { |
||||
ESP_LOGW(kTag, "WAVE format not supported"); |
||||
return cpp::fail(Error::kUnsupportedFormat); |
||||
} |
||||
|
||||
num_channels_ = bytes_to_u16(buffer_span.subspan(22, 2)); |
||||
|
||||
uint32_t samples_per_second = bytes_to_u32(buffer_span.subspan(24, 4)); |
||||
|
||||
uint32_t avg_bytes_per_second = bytes_to_u32(buffer_span.subspan(28, 4)); |
||||
|
||||
uint16_t block_align = bytes_to_u16(buffer_span.subspan(32, 2)); |
||||
|
||||
bytes_per_sample_ = block_align / num_channels_; |
||||
|
||||
uint16_t bits_per_sample = bytes_to_u16(buffer_span.subspan(34, 2)); |
||||
|
||||
// find the start of the data chunk
|
||||
std::array<std::byte, 4> data_tag = {std::byte{0x64}, std::byte{0x61}, |
||||
std::byte{0x74}, std::byte{0x61}}; |
||||
auto data_loc = std::ranges::search(buffer_span, data_tag); |
||||
if (data_loc.begin() == buffer_span.end()) { |
||||
ESP_LOGW(kTag, "Could not find data chunk!"); |
||||
return cpp::fail(Error::kMalformedData); |
||||
} |
||||
|
||||
int data_chunk_index = std::distance(buffer_span.begin(), data_loc.begin()); |
||||
|
||||
uint32_t data_chunk_size = |
||||
bytes_to_u32(buffer_span.subspan(data_chunk_index + 4, 4)); |
||||
|
||||
// calculate number of samples
|
||||
int number_of_samples = data_chunk_size / bytes_per_sample_; |
||||
|
||||
// extension to the fmt chunk size (0 or 22)
|
||||
uint16_t extension_size = 0; |
||||
if (wave_format_ == kWaveFormatExtensible) { |
||||
extension_size = bytes_to_u16(buffer_span.subspan(36, 2)); |
||||
} |
||||
|
||||
// Parse extension if applicable
|
||||
if (extension_size == 22) { |
||||
// Valid bits per sample
|
||||
uint16_t valid_bits_per_sample = bytes_to_u16(buffer_span.subspan(38, 2)); |
||||
|
||||
uint32_t speaker_mask = bytes_to_u32(buffer_span.subspan(40, 4)); |
||||
|
||||
// Parse subformat
|
||||
subformat_ = bytes_to_u16(buffer_span.subspan(44, 2)); |
||||
if (!(subformat_ == kWaveFormatPCM || |
||||
subformat_ == kWaveFormatIEEEFloat)) { |
||||
ESP_LOGW(kTag, "WAVE extensible subformat_ not supported"); |
||||
return cpp::fail(Error::kUnsupportedFormat); |
||||
} |
||||
} |
||||
|
||||
// 64 bit float is not implemented yet, make sure we're not letting it through
|
||||
if (GetFormat() == kWaveFormatIEEEFloat && bytes_per_sample_ == 8) { |
||||
ESP_LOGW(kTag, "WAVE 64-Bit Float not supported"); |
||||
return cpp::fail(Error::kUnsupportedFormat); |
||||
} |
||||
|
||||
// Seek track to start of data
|
||||
input->SeekTo(data_chunk_index + 8, IStream::SeekFrom::kStartOfStream); |
||||
|
||||
output_format_ = {.num_channels = (uint8_t)num_channels_, |
||||
.sample_rate_hz = samples_per_second, |
||||
.total_samples = number_of_samples}; |
||||
|
||||
return output_format_; |
||||
} |
||||
|
||||
auto WavDecoder::DecodeTo(cpp::span<sample::Sample> output) |
||||
-> cpp::result<OutputInfo, Error> { |
||||
bool is_eof = buffer_.Refill(input_.get()); |
||||
size_t samples_written = 0; |
||||
|
||||
buffer_.ConsumeBytes([&](cpp::span<std::byte> buf) -> size_t { |
||||
size_t bytes_read = buf.size_bytes(); |
||||
size_t frames_read = |
||||
bytes_read / bytes_per_sample_ / output_format_.num_channels; |
||||
|
||||
samples_written = |
||||
std::min<size_t>(frames_read, |
||||
output.size() / output_format_.num_channels) * |
||||
output_format_.num_channels; |
||||
|
||||
// For each sample that we're going to write
|
||||
for (size_t i = 0; i < samples_written; i++) { |
||||
auto data = buf.subspan(i * bytes_per_sample_, bytes_per_sample_); |
||||
if (GetFormat() == kWaveFormatPCM) { |
||||
// PCM
|
||||
output[i] = convert_to_16_bit(data); |
||||
} else if (GetFormat() == kWaveFormatIEEEFloat) { |
||||
// 32-Bit Float
|
||||
if (bytes_per_sample_ == 4) { |
||||
output[i] = convert_f32_to_16_bit(data); |
||||
} |
||||
} |
||||
} |
||||
|
||||
return samples_written * bytes_per_sample_; |
||||
}); |
||||
|
||||
return OutputInfo{.samples_written = samples_written, |
||||
.is_stream_finished = samples_written == 0 && is_eof}; |
||||
} |
||||
|
||||
auto WavDecoder::SeekTo(size_t target) -> cpp::result<void, Error> { |
||||
return {}; |
||||
} |
||||
|
||||
auto codecs::WavDecoder::GetFormat() const -> uint16_t { |
||||
if (wave_format_ == kWaveFormatExtensible) { |
||||
return subformat_; |
||||
} |
||||
return wave_format_; |
||||
} |
||||
|
||||
} // namespace codecs
|
@ -0,0 +1,47 @@ |
||||
/*
|
||||
* Copyright 2023 jacqueline <me@jacqueline.id.au> |
||||
* |
||||
* SPDX-License-Identifier: GPL-3.0-only |
||||
*/ |
||||
|
||||
#pragma once |
||||
|
||||
#include <iomanip> |
||||
#include <ostream> |
||||
|
||||
#include <string> |
||||
#include "span.hpp" |
||||
|
||||
namespace util { |
||||
|
||||
inline std::string format_hex_string(cpp::span<const std::byte> data) { |
||||
std::ostringstream oss; |
||||
std::ostringstream ascii_values; |
||||
int count = 0; |
||||
for (auto byte : data) { |
||||
if (count % 16 == 0) { |
||||
if (ascii_values.str().size() > 0) { |
||||
oss << "\t|" << ascii_values.str() << "|"; |
||||
// Reset ascii values
|
||||
ascii_values.str(""); |
||||
} |
||||
oss << std::endl; |
||||
oss << "0x" << std::uppercase << std::setfill('0') << std::setw(2) |
||||
<< std::hex << count << '\t'; |
||||
} else if (count % 8 == 0) { |
||||
oss << " "; |
||||
} |
||||
int byte_val = (int)byte; |
||||
oss << "[0x" << std::uppercase << std::setfill('0') << std::setw(2) |
||||
<< std::hex << byte_val << ']'; |
||||
if (byte_val >= 32 && byte_val < 127) { |
||||
ascii_values << (char)byte; |
||||
} else { |
||||
ascii_values << "."; |
||||
} |
||||
count++; |
||||
} |
||||
return oss.str(); |
||||
} |
||||
|
||||
} // namespace util
|
Loading…
Reference in new issue