diff --git a/.gitignore b/.gitignore index 5a30495c..193750f9 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,5 @@ doc/ test/build/ test/dependencies.lock test/sdkconfig +sdkconfig.bak +*.ignore diff --git a/sdkconfig.common b/sdkconfig.common index a15c7c6c..d94018a4 100644 --- a/sdkconfig.common +++ b/sdkconfig.common @@ -40,7 +40,7 @@ CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY=y CONFIG_RINGBUF_PLACE_FUNCTIONS_INTO_FLASH=y CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y CONFIG_ESP_SYSTEM_PANIC_PRINT_HALT=y -CONFIG_ESP_MAIN_TASK_STACK_SIZE=10000 +CONFIG_ESP_MAIN_TASK_STACK_SIZE=12000 CONFIG_ESP_INT_WDT_TIMEOUT_MS=5000 CONFIG_ESP_TASK_WDT_TIMEOUT_S=10 CONFIG_ESP_BROWNOUT_DET_LVL_SEL_7=y diff --git a/src/audio/fatfs_audio_input.cpp b/src/audio/fatfs_audio_input.cpp index 58d5852f..1dcab97a 100644 --- a/src/audio/fatfs_audio_input.cpp +++ b/src/audio/fatfs_audio_input.cpp @@ -143,7 +143,7 @@ auto FatfsAudioInput::ContainerToStreamType(database::Container enc) case database::Container::kMp3: return codecs::StreamType::kMp3; case database::Container::kWav: - return codecs::StreamType::kPcm; + return codecs::StreamType::kWav; case database::Container::kOgg: return codecs::StreamType::kVorbis; case database::Container::kFlac: diff --git a/src/codecs/CMakeLists.txt b/src/codecs/CMakeLists.txt index b8e0bbca..eb1897da 100644 --- a/src/codecs/CMakeLists.txt +++ b/src/codecs/CMakeLists.txt @@ -4,9 +4,9 @@ idf_component_register( SRCS "codec.cpp" "mad.cpp" "miniflac.cpp" "opus.cpp" "vorbis.cpp" - "source_buffer.cpp" "sample.cpp" + "source_buffer.cpp" "sample.cpp" "wav.cpp" INCLUDE_DIRS "include" - REQUIRES "result" "span" "libmad" "miniflac" "tremor" "opusfile" "memory" + REQUIRES "result" "span" "libmad" "miniflac" "tremor" "opusfile" "memory" "util" "komihash") target_compile_options("${COMPONENT_LIB}" PRIVATE ${EXTRA_WARNINGS}) diff --git a/src/codecs/codec.cpp b/src/codecs/codec.cpp index d81d4b05..7bc591aa 100644 --- a/src/codecs/codec.cpp +++ b/src/codecs/codec.cpp @@ -14,6 +14,7 @@ #include "opus.hpp" #include "types.hpp" #include "vorbis.hpp" +#include "wav.hpp" namespace codecs { @@ -21,7 +22,7 @@ auto StreamTypeToString(StreamType t) -> std::string { switch (t) { case StreamType::kMp3: return "Mp3"; - case StreamType::kPcm: + case StreamType::kWav: return "Wav"; case StreamType::kVorbis: return "Vorbis"; @@ -44,6 +45,8 @@ auto CreateCodecForType(StreamType type) -> std::optional { return new MiniFlacDecoder(); case StreamType::kOpus: return new XiphOpusDecoder(); + case StreamType::kWav: + return new WavDecoder(); default: return {}; } diff --git a/src/codecs/include/codec.hpp b/src/codecs/include/codec.hpp index 87f6637c..36dda8ff 100644 --- a/src/codecs/include/codec.hpp +++ b/src/codecs/include/codec.hpp @@ -76,6 +76,8 @@ class ICodec { kOutOfInput, // Indicates that the data within the input buffer is fatally malformed. kMalformedData, + // Indicated that the format is unsupported + kUnsupportedFormat, kInternalError, }; @@ -88,6 +90,8 @@ class ICodec { return "malformed data"; case Error::kInternalError: return "internal error"; + case Error::kUnsupportedFormat: + return "unsupported format"; } return "uhh"; } diff --git a/src/codecs/include/sample.hpp b/src/codecs/include/sample.hpp index 7e550680..7b3f96a3 100644 --- a/src/codecs/include/sample.hpp +++ b/src/codecs/include/sample.hpp @@ -24,10 +24,6 @@ namespace sample { // 3. Monty from Xiph.org reckons it's all you need. typedef int16_t Sample; -constexpr auto Clip(int64_t v) -> Sample { - return std::clamp(v, INT16_MIN, INT16_MAX); -} - auto shiftWithDither(int64_t src, uint_fast8_t bits) -> Sample; constexpr auto FromSigned(int32_t src, uint_fast8_t bits) -> Sample { @@ -42,7 +38,7 @@ constexpr auto FromSigned(int32_t src, uint_fast8_t bits) -> Sample { constexpr auto FromUnsigned(uint32_t src, uint_fast8_t bits) -> Sample { // Left-align, then substract the max value / 2 to make the sample centred // around zero. - return (src << (sizeof(uint16_t) * 8 - bits)) - (~0UL >> 1); + return (src << (sizeof(uint16_t) * 8 - bits)) - (INT16_MAX+1); } constexpr auto FromFloat(float src) -> Sample { diff --git a/src/codecs/include/types.hpp b/src/codecs/include/types.hpp index c9eefe45..c6dcb486 100644 --- a/src/codecs/include/types.hpp +++ b/src/codecs/include/types.hpp @@ -12,10 +12,10 @@ namespace codecs { enum class StreamType { kMp3, - kPcm, kVorbis, kFlac, kOpus, + kWav, }; auto StreamTypeToString(StreamType t) -> std::string; diff --git a/src/codecs/include/wav.hpp b/src/codecs/include/wav.hpp new file mode 100644 index 00000000..896976dd --- /dev/null +++ b/src/codecs/include/wav.hpp @@ -0,0 +1,57 @@ +/* + * Copyright 2023 Daniel + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "sample.hpp" +#include "source_buffer.hpp" + +#include "codec.hpp" + +namespace codecs { + +static const uint16_t kWaveFormatPCM = 0x0001; +static const uint16_t kWaveFormatIEEEFloat = 0x0003; +static const uint16_t kWaveFormatAlaw = 0x0006; +static const uint16_t kWaveFormatMulaw = 0x0007; +static const uint16_t kWaveFormatExtensible = 0xFFFE; + +class WavDecoder : public ICodec { + public: + WavDecoder(); + ~WavDecoder(); + + auto OpenStream(std::shared_ptr input) + -> cpp::result override; + + auto DecodeTo(cpp::span destination) + -> cpp::result override; + + auto SeekTo(std::size_t target_sample) -> cpp::result override; + + WavDecoder(const WavDecoder&) = delete; + WavDecoder& operator=(const WavDecoder&) = delete; + + private: + std::shared_ptr input_; + SourceBuffer buffer_; + uint16_t wave_format_; + uint16_t subformat_; + OutputFormat output_format_; + uint16_t bytes_per_sample_; + uint16_t num_channels_; + + auto GetFormat() const -> uint16_t; +}; + +} // namespace codecs diff --git a/src/codecs/sample.cpp b/src/codecs/sample.cpp index d4860b94..c99710f1 100644 --- a/src/codecs/sample.cpp +++ b/src/codecs/sample.cpp @@ -21,11 +21,8 @@ auto shiftWithDither(int64_t src, uint_fast8_t bits) -> Sample { uint64_t mask = 0xFFFFFFFF; mask >>= 32 - bits; int64_t noise = static_cast(komirand(&sSeed1, &sSeed2) & mask); - // Centre the noise around 0. - noise -= (mask >> 1); - // Apply to the sample, then clip and shift to 16 bit. - Sample clipped = Clip((src + noise) >> bits); - return clipped; + // Apply to the sample, then shift to 16 bit. + return (src + noise) >> bits; } } // namespace sample diff --git a/src/codecs/source_buffer.cpp b/src/codecs/source_buffer.cpp index bf8951f3..1db2e6c2 100644 --- a/src/codecs/source_buffer.cpp +++ b/src/codecs/source_buffer.cpp @@ -62,7 +62,7 @@ auto SourceBuffer::AddBytes(std::function)> writer) auto SourceBuffer::ConsumeBytes( std::function)> reader) -> void { size_t bytes_consumed = std::invoke( - reader, buffer_.subspan(offset_of_bytes_).first(bytes_in_buffer_)); + reader, buffer_.subspan(offset_of_bytes_, bytes_in_buffer_)); assert(bytes_consumed <= bytes_in_buffer_); bytes_in_buffer_ -= bytes_consumed; diff --git a/src/codecs/wav.cpp b/src/codecs/wav.cpp new file mode 100644 index 00000000..a67f3ff4 --- /dev/null +++ b/src/codecs/wav.cpp @@ -0,0 +1,259 @@ +/* + * Copyright 2023 Daniel + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "wav.hpp" +#include +#include + +#include +#include +#include + +#include "debug.hpp" +#include "esp_log.h" +#include "sample.hpp" + +namespace codecs { + +[[maybe_unused]] static const char kTag[] = "wav"; + +static inline auto bytes_to_u16(cpp::span bytes) + -> uint16_t { + return (uint16_t)bytes[0] | (uint16_t)bytes[1] << 8; +} + +static inline auto bytes_to_u32(cpp::span bytes) + -> uint32_t { + return (uint32_t)bytes[0] | (uint32_t)bytes[1] << 8 | + (uint32_t)bytes[2] << 16 | (uint32_t)bytes[3] << 24; +} + +static inline auto bytes_to_str(cpp::span bytes) + -> std::string { + return std::string(reinterpret_cast(bytes.data()), + bytes.size_bytes()); +} + +static int16_t convert_f32_to_16_bit(cpp::span bytes) { + uint64_t val = 0; + val = (uint8_t)bytes[3]; + val = (val << 8) | (uint8_t)bytes[2]; + val = (val << 8) | (uint8_t)bytes[1]; + val = (val << 8) | (uint8_t)bytes[0]; + // Isolate the sign and remove from the value + uint64_t sign = val >> 31; + val -= (sign << 31); + // Isolate the exponent and remove from the value + uint64_t exp = (val >> 23); + val -= (exp << 23); + // Remove old bias and add new bias + exp = exp - 127 + 1023; + // Reconstruct the bits in the correct order and convert to double + uint64_t dval = (sign << 63) + (exp << 52) + (val << 29); + double* fval = reinterpret_cast(&dval); + return sample::FromDouble(*fval); +} + +static int16_t convert_to_16_bit(cpp::span bytes) { + int depth = bytes.size(); + int32_t val = 0; + // If 8-bit Assume Unsigned + if (depth == 1) { + return sample::FromUnsigned((uint8_t)bytes[0], 8); + } + // Otherwise, build the signed int of the right depth + switch (depth) { + case 4: + val = (uint8_t)bytes[3]; + case 3: + val = (val << 8) | (uint8_t)bytes[2]; + case 2: + val = (val << 8) | (uint8_t)bytes[1]; + case 1: + val = (val << 8) | (uint8_t)bytes[0]; + } + // Convert to sample + int16_t result = sample::FromSigned(val, depth * 8); + return result; +} + +WavDecoder::WavDecoder() : input_(), buffer_() {} + +WavDecoder::~WavDecoder() {} + +auto WavDecoder::OpenStream(std::shared_ptr input) + -> cpp::result { + input_ = input; + + std::array buf{std::byte{0}}; + auto size = input->Read(buf); + if (size < 44) { + return cpp::fail(Error::kOutOfInput); + } + + // - check the first 4 bytes = 'RIFF' + // - next 4 bytes = file size + // - check next 4 bytes = 'WAVE' + // - index of 'fmt\0' (i) marks start of fmt data + // - i + 4 = size of fmt header (16, 18 or 40) + // - i + 8 = format (should be 0x01 for pcm, 0xfffe for + // wave_format_exstensible) + // - i + 10 = num channels + // - i + 12 = sample rate + // - i + 16 = byte rate (sample rate * channels * bits per sample / 8) + // - i + 20 = sample size (bits per sample * channels / 8) + // - i + 22 = bits per sample (2 bytes) + // - end of this part, next header we care about is 'data' + // - and then the next 4 bytes = 32 bit int = size of data + + auto buffer_span = cpp::span{buf}; + + std::string riff = bytes_to_str(buffer_span.subspan(0, 4)); + if (riff != "RIFF") { + ESP_LOGW(kTag, "file is not RIFF"); + return cpp::fail(Error::kMalformedData); + } + + uint32_t file_size = bytes_to_u32(buffer_span.subspan(4, 4)) + 8; + + std::string fmt_header = bytes_to_str(buffer_span.subspan(12, 4)); + ESP_LOGI(kTag, "fmt header found? %s", + (fmt_header.starts_with("fmt")) ? "yes" : "no"); + if (!fmt_header.starts_with("fmt")) { + ESP_LOGW(kTag, "Could not find format chunk"); + return cpp::fail(Error::kMalformedData); + } + + // Size of the fmt header, should be 16, 18 or 40 + uint32_t fmt_header_size = bytes_to_u32(buffer_span.subspan(16, 4)); + + wave_format_ = bytes_to_u16(buffer_span.subspan(20, 2)); + if (wave_format_ == kWaveFormatPCM) { + ESP_LOGD(kTag, "wave format: PCM"); + } else if (wave_format_ == kWaveFormatExtensible) { + ESP_LOGD(kTag, "wave format: extensible"); + } else if (wave_format_ == kWaveFormatIEEEFloat) { + ESP_LOGD(kTag, "wave format: IEEE Float"); + } else { + ESP_LOGW(kTag, "WAVE format not supported"); + return cpp::fail(Error::kUnsupportedFormat); + } + + num_channels_ = bytes_to_u16(buffer_span.subspan(22, 2)); + + uint32_t samples_per_second = bytes_to_u32(buffer_span.subspan(24, 4)); + + uint32_t avg_bytes_per_second = bytes_to_u32(buffer_span.subspan(28, 4)); + + uint16_t block_align = bytes_to_u16(buffer_span.subspan(32, 2)); + + bytes_per_sample_ = block_align / num_channels_; + + uint16_t bits_per_sample = bytes_to_u16(buffer_span.subspan(34, 2)); + + // find the start of the data chunk + std::array data_tag = {std::byte{0x64}, std::byte{0x61}, + std::byte{0x74}, std::byte{0x61}}; + auto data_loc = std::ranges::search(buffer_span, data_tag); + if (data_loc.begin() == buffer_span.end()) { + ESP_LOGW(kTag, "Could not find data chunk!"); + return cpp::fail(Error::kMalformedData); + } + + int data_chunk_index = std::distance(buffer_span.begin(), data_loc.begin()); + + uint32_t data_chunk_size = + bytes_to_u32(buffer_span.subspan(data_chunk_index + 4, 4)); + + // calculate number of samples + int number_of_samples = data_chunk_size / bytes_per_sample_; + + // extension to the fmt chunk size (0 or 22) + uint16_t extension_size = 0; + if (wave_format_ == kWaveFormatExtensible) { + extension_size = bytes_to_u16(buffer_span.subspan(36, 2)); + } + + // Parse extension if applicable + if (extension_size == 22) { + // Valid bits per sample + uint16_t valid_bits_per_sample = bytes_to_u16(buffer_span.subspan(38, 2)); + + uint32_t speaker_mask = bytes_to_u32(buffer_span.subspan(40, 4)); + + // Parse subformat + subformat_ = bytes_to_u16(buffer_span.subspan(44, 2)); + if (!(subformat_ == kWaveFormatPCM || + subformat_ == kWaveFormatIEEEFloat)) { + ESP_LOGW(kTag, "WAVE extensible subformat_ not supported"); + return cpp::fail(Error::kUnsupportedFormat); + } + } + + // 64 bit float is not implemented yet, make sure we're not letting it through + if (GetFormat() == kWaveFormatIEEEFloat && bytes_per_sample_ == 8) { + ESP_LOGW(kTag, "WAVE 64-Bit Float not supported"); + return cpp::fail(Error::kUnsupportedFormat); + } + + // Seek track to start of data + input->SeekTo(data_chunk_index + 8, IStream::SeekFrom::kStartOfStream); + + output_format_ = {.num_channels = (uint8_t)num_channels_, + .sample_rate_hz = samples_per_second, + .total_samples = number_of_samples}; + + return output_format_; +} + +auto WavDecoder::DecodeTo(cpp::span output) + -> cpp::result { + bool is_eof = buffer_.Refill(input_.get()); + size_t samples_written = 0; + + buffer_.ConsumeBytes([&](cpp::span buf) -> size_t { + size_t bytes_read = buf.size_bytes(); + size_t frames_read = + bytes_read / bytes_per_sample_ / output_format_.num_channels; + + samples_written = + std::min(frames_read, + output.size() / output_format_.num_channels) * + output_format_.num_channels; + + // For each sample that we're going to write + for (size_t i = 0; i < samples_written; i++) { + auto data = buf.subspan(i * bytes_per_sample_, bytes_per_sample_); + if (GetFormat() == kWaveFormatPCM) { + // PCM + output[i] = convert_to_16_bit(data); + } else if (GetFormat() == kWaveFormatIEEEFloat) { + // 32-Bit Float + if (bytes_per_sample_ == 4) { + output[i] = convert_f32_to_16_bit(data); + } + } + } + + return samples_written * bytes_per_sample_; + }); + + return OutputInfo{.samples_written = samples_written, + .is_stream_finished = samples_written == 0 && is_eof}; +} + +auto WavDecoder::SeekTo(size_t target) -> cpp::result { + return {}; +} + +auto codecs::WavDecoder::GetFormat() const -> uint16_t { + if (wave_format_ == kWaveFormatExtensible) { + return subformat_; + } + return wave_format_; +} + +} // namespace codecs diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index e1913920..bb4ce320 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -2,4 +2,4 @@ # # SPDX-License-Identifier: GPL-3.0-only -idf_component_register(SRCS INCLUDE_DIRS "include" REQUIRES "database") +idf_component_register(SRCS INCLUDE_DIRS "include" REQUIRES "database" "span") diff --git a/src/util/include/debug.hpp b/src/util/include/debug.hpp new file mode 100644 index 00000000..620b0974 --- /dev/null +++ b/src/util/include/debug.hpp @@ -0,0 +1,47 @@ +/* + * Copyright 2023 jacqueline + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include +#include + +#include +#include "span.hpp" + +namespace util { + +inline std::string format_hex_string(cpp::span data) { + std::ostringstream oss; + std::ostringstream ascii_values; + int count = 0; + for (auto byte : data) { + if (count % 16 == 0) { + if (ascii_values.str().size() > 0) { + oss << "\t|" << ascii_values.str() << "|"; + // Reset ascii values + ascii_values.str(""); + } + oss << std::endl; + oss << "0x" << std::uppercase << std::setfill('0') << std::setw(2) + << std::hex << count << '\t'; + } else if (count % 8 == 0) { + oss << " "; + } + int byte_val = (int)byte; + oss << "[0x" << std::uppercase << std::setfill('0') << std::setw(2) + << std::hex << byte_val << ']'; + if (byte_val >= 32 && byte_val < 127) { + ascii_values << (char)byte; + } else { + ascii_values << "."; + } + count++; + } + return oss.str(); +} + +} // namespace util