From 1238437717a49924cb45a12b934b3108c402e864 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Thu, 8 Jun 2023 08:02:37 +1000
Subject: [PATCH 1/7] Add flac and ogg decoder deps

---
 .reuse/dep5                           |    8 +
 LICENSES/Apache-2.0.txt               |   73 +
 lib/libfoxenflac/CMakeLists.txt       |    8 +
 lib/libfoxenflac/flac.c               | 2022 +++++++++
 lib/libfoxenflac/include/foxen/flac.h |  297 ++
 lib/stb_vorbis/CMakeLists.txt         |    8 +
 lib/stb_vorbis/include/stb_vorbis.h   |  418 ++
 lib/stb_vorbis/stb_vorbis.c           | 5584 +++++++++++++++++++++++++
 src/codecs/CMakeLists.txt             |    2 +-
 tools/cmake/common.cmake              |    2 +
 10 files changed, 8421 insertions(+), 1 deletion(-)
 create mode 100644 LICENSES/Apache-2.0.txt
 create mode 100644 lib/libfoxenflac/CMakeLists.txt
 create mode 100644 lib/libfoxenflac/flac.c
 create mode 100644 lib/libfoxenflac/include/foxen/flac.h
 create mode 100644 lib/stb_vorbis/CMakeLists.txt
 create mode 100644 lib/stb_vorbis/include/stb_vorbis.h
 create mode 100644 lib/stb_vorbis/stb_vorbis.c

diff --git a/.reuse/dep5 b/.reuse/dep5
index 1e95bfca..9bc6b94e 100644
--- a/.reuse/dep5
+++ b/.reuse/dep5
@@ -19,6 +19,10 @@ Files: lib/leveldb/*
 Copyright: 2011 The LevelDB Authors
 License: LicenseRef-LevelDB
 
+Files: lib/libfoxenflac/*
+Copyright: 2018-2022  Andreas Stöckel
+License: GPL-2.0-or-later
+
 Files: lib/libmad/*
 Copyright: 2000-2004 Underbit Technologies, Inc.
 License: GPL-2.0-or-later
@@ -39,6 +43,10 @@ Files: lib/span/include/*
 Copyright: 2018 Tristan Brindle
 License: BSL-1.0
 
+Files: lib/stb_vorbis/*
+Copyright: 2017 Sean Barrett
+License: Unlicense
+
 Files: lib/tinyfsm/*
 Copyright: 2012-2022 Axel Burri
 License: MIT
diff --git a/LICENSES/Apache-2.0.txt b/LICENSES/Apache-2.0.txt
new file mode 100644
index 00000000..137069b8
--- /dev/null
+++ b/LICENSES/Apache-2.0.txt
@@ -0,0 +1,73 @@
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
+
+     (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and
+
+     (b) You must cause any modified files to carry prominent notices stating that You changed the files; and
+
+     (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
+
+     (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
+
+     You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!)  The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/lib/libfoxenflac/CMakeLists.txt b/lib/libfoxenflac/CMakeLists.txt
new file mode 100644
index 00000000..0389236b
--- /dev/null
+++ b/lib/libfoxenflac/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2023 jacqueline <me@jacqueline.id.au>
+#
+# SPDX-License-Identifier: GPL-3.0-only
+
+idf_component_register(
+  SRCS "flac.c"
+  INCLUDE_DIRS "include"
+)
diff --git a/lib/libfoxenflac/flac.c b/lib/libfoxenflac/flac.c
new file mode 100644
index 00000000..d65ada3e
--- /dev/null
+++ b/lib/libfoxenflac/flac.c
@@ -0,0 +1,2022 @@
+/*
+ *  libfoxenflac -- Tiny FLAC Decoder Library
+ *  Copyright (C) 2018-2022  Andreas Stöckel
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <foxen/flac.h>
+
+#if 0
+/* Set FX_FLAC_NO_CRC if you control the input data and already performed other
+   integrity checks. This makes the decoder significantly faster. */
+#define FX_FLAC_NO_CRC
+#endif
+
+/******************************************************************************
+ * CODE MERGED FROM OTHER LIBFOXEN PROJECTS                                   *
+ ******************************************************************************/
+
+/******************************************************************************
+ * Copy of foxen/bitstream.h                                                  *
+ ******************************************************************************/
+
+/**
+ * Structure holding the current state of the bitstream reader.
+ */
+struct fx_bitstream {
+	/**
+	 * 64 bit word from which the data is extracted.
+	 */
+	uint64_t buf;
+
+	/**
+	 * Pointer at the source byte stream.
+	 */
+	uint8_t const *src;
+
+	/**
+	 * Pointer at the end of the source byte stream.
+	 */
+	uint8_t const *src_end;
+
+	/**
+	 * Position within the source byte stream in bits, i.e. the number of bits
+	 * that have been consumed.
+	 */
+	uint8_t pos;
+};
+
+/**
+ * Typedef for the fx_bitstream struct.
+ */
+typedef struct fx_bitstream fx_bitstream_t;
+
+/**
+ * Callback called whenever a fully byte has been consumed. This is useful for
+ * CRC calculations.
+ */
+typedef void (*fx_bitstream_byte_callback_t)(uint8_t byte, void *data);
+
+/**
+ * Initializes the bitstream reader instance. Call fx_bitstream_set_source()
+ * to set the byte buffer from which the bitstream reader should read its
+ * data.
+ *
+ * @param reader is the bitstream reader instance that should be
+ * initialized.
+ */
+static inline void fx_bitstream_init(fx_bitstream_t *reader) {
+	reader->buf = 0U;
+	reader->pos = sizeof(reader->buf) * 8U;
+	reader->src = NULL;
+	reader->src_end = NULL;
+}
+
+/**
+ * Sets the backing source buffer for the bitstream. This function may only be
+ * called if the given pointer is a direct continuation of the previous data,
+ * i.e. are essentially set to reader->src.
+ *
+ * @param reader is the bitstream reader instance for which the source byte
+ * buffer should be set.
+ * @param src is a pointer at the source byte buffer.
+ * @param src_len is the length of the source byte buffer in bytes.
+ */
+static inline void fx_bitstream_set_source(fx_bitstream_t *reader,
+                                           const uint8_t *src,
+                                           uint32_t src_len);
+
+/**
+ * Returns true if the corresponding read operation will be successful.
+ *
+ * @param reader is the bitstream reader instance from which the data should be
+ * read.
+ * @param n_bits is the number of bits that should be read from the bitstream
+ * reader. Must be in 1 <= n_bits <= 57.
+ * @return true if the number of available bits is smaller or equal to n_bits.
+ */
+static inline bool fx_bitstream_can_read(fx_bitstream_t *reader,
+                                         uint8_t n_bits) {
+	return (sizeof(reader->buf) * 8U) >= (n_bits + reader->pos);
+}
+
+/**
+ * Reads up to 64 bits from the input buffer in MSB order. Note that this
+ * function does not check whether the read operation returns valid data, so
+ * make sure to call fx_bitstream_can_read() before reading.
+ *
+ * @param reader is the bitstream reader instance from which the data should be
+ * read.
+ * @param n_bits is the number of bits that should be read. Must be in
+ * 1 <= n_bits <= 57.
+ * @return an integer corresponding the the specified number of bits.
+ */
+static inline uint64_t fx_bitstream_read_msb(fx_bitstream_t *reader,
+                                             uint8_t n_bits);
+
+/**
+ * Reads up to 64 bits from the input buffer in MSB order. Note that this
+ * function does not check whether the read operation returns valid data, so
+ * make sure to call fx_bitstream_can_read() before reading.
+ *
+ * @param reader is the bitstream reader instance from which the data should be
+ * read.
+ * @param n_bits is the number of bits that should be read. Must be in
+ * 1 <= n_bits <= 57.
+ * @param callback is called whenever a full byte is consumed. Note that this
+ * includes a "virtual" set of zeros at the beginning of the bitstream.
+ * @param callback_data is a user-defined pointer passed to the byte callback.
+ * @return an integer corresponding the the specified number of bits.
+ */
+static inline uint64_t fx_bitstream_read_msb_ex(
+    fx_bitstream_t *reader, uint8_t n_bits,
+    fx_bitstream_byte_callback_t callback, void *callback_data);
+
+/**
+ * Reads up to 64 bits from the input buffer in MSB order without advancing the
+ * buffer location. Note that this function does not check whether the read
+ * operation returns valid data, so make sure to call fx_bitstream_can_read()
+ * before reading.
+ *
+ * @param reader is the bitstream reader instance from which the data should be
+ * read.
+ * @param n_bits is the number of bits that should be read. Must be in
+ * 1 <= n_bits <= 57.
+ * @return an integer corresponding to the specified number of bits.
+ */
+static inline uint64_t fx_bitstream_peek_msb(fx_bitstream_t *reader,
+                                             uint8_t n_bits);
+
+/**
+ * Combination of fx_bitstream_can_read and fx_bitstream_read_msb. Returns a
+ * negative value if the desired number of bits cannot be read from the source.
+ * If the given number of threads are available, returns the desired integer.
+ *
+ * @param reads is the bitstream reader instance from which the data should be
+ * read.
+ * @param n_bits is the number of bits that should be read. Must be in
+ * 1 <= n_bits <= 57.
+ * @return -1 if the desired number of bits is not available in the bitstream.
+ * Otherwise the integer corresponding to the specified number of bits is
+ * returned.
+ */
+static inline int64_t fx_bitstream_try_read_msb(fx_bitstream_t *reader,
+                                                uint8_t n_bits) {
+	return fx_bitstream_can_read(reader, n_bits)
+	           ? (int64_t)fx_bitstream_read_msb(reader, n_bits)
+	           : -1;
+}
+
+/**
+ * Combination of fx_bitstream_can_read and fx_bitstream_read_msb. Returns a
+ * negative value if the desired number of bits cannot be read from the source.
+ * If the given number of threads are available, returns the desired integer.
+ *
+ * @param reads is the bitstream reader instance from which the data should be
+ * read.
+ * @param n_bits is the number of bits that should be read. Must be in
+ * 1 <= n_bits <= 57.
+ * @param callback is called whenever a full byte is consumed. Note that this
+ * includes a "virtual" set of zeros at the beginning of the bitstream.
+ * @param callback_data is a user-defined pointer passed to the byte callback.
+ * @return -1 if the desired number of bits is not available in the bitstream.
+ * Otherwise the integer corresponding to the specified number of bits is
+ * returned.
+ */
+static inline int64_t fx_bitstream_try_read_msb_ex(
+    fx_bitstream_t *reader, uint8_t n_bits,
+    fx_bitstream_byte_callback_t callback, void *callback_data) {
+	return fx_bitstream_can_read(reader, n_bits)
+	           ? (int64_t)fx_bitstream_read_msb_ex(reader, n_bits, callback,
+	                                               callback_data)
+	           : -1;
+}
+
+/**
+ * Combination of fx_bitstream_can_read and fx_bitstream_peek. Returns a
+ * negative value if the desired number of bits cannot be read from the source.
+ * If the given number of threads are available, returns the desired integer.
+ * In contrast to fx_bitstream_try_read_msb() this function does not advance
+ * the actual reader pointer.
+ *
+ * @param reads is the bitstream reader instance from which the data should be
+ * read.
+ * @param n_bits is the number of bits that should be read. Must be in
+ * 1 <= n_bits <= 57.
+ * @return -1 if the desired number of bits is not available in the bitstream.
+ * Otherwise the integer corresponding to the specified number of bits is
+ * returned.
+ */
+static inline int64_t fx_bitstream_try_peek_msb(fx_bitstream_t *reader,
+                                                uint8_t n_bits) {
+	return fx_bitstream_can_read(reader, n_bits)
+	           ? (int64_t)fx_bitstream_peek_msb(reader, n_bits)
+	           : -1;
+}
+
+#define BUFSIZE (sizeof(((fx_bitstream_t *)NULL)->buf) * 8U)
+
+static inline void _fx_bitstream_fill_buf(fx_bitstream_t *reader) {
+	while (reader->pos >= 8U && reader->src != reader->src_end) {
+		reader->buf = (reader->buf << 8U) | *(reader->src++);
+		reader->pos -= 8U;
+	}
+}
+
+static inline uint64_t _fx_bitstream_read_msb(
+    fx_bitstream_t *reader, uint8_t n_bits,
+    fx_bitstream_byte_callback_t callback, void *callback_data) {
+	assert((n_bits >= 1U) && (n_bits <= (BUFSIZE - 7U)));
+
+	/* Copy the current buffer content, skip already read bits */
+	uint64_t bits = reader->buf << reader->pos;
+
+	/* If the callback is specified, issue bytes that were read entirely */
+	const uint8_t pos_new = reader->pos + n_bits;
+	if (callback) {
+		const uint8_t i0 = reader->pos / 8U, i1 = pos_new / 8U;
+		uint64_t buf = reader->buf << (i0 * 8U);
+		for (uint8_t i = i0; i < i1; i++) {
+			uint8_t byte = buf >> (BUFSIZE - 8U);
+			callback(byte, callback_data);
+			buf = buf << 8U;
+		}
+	}
+
+	/* Advance the position */
+	reader->pos = pos_new;
+
+	/* Read new bytes from the byte stream */
+	_fx_bitstream_fill_buf(reader);
+
+	/* Mask out the "low" bits */
+	return bits >> (BUFSIZE - n_bits);
+}
+
+static inline void fx_bitstream_set_source(fx_bitstream_t *reader,
+                                           const uint8_t *src,
+                                           uint32_t src_len) {
+	reader->src = src;
+	reader->src_end = src + src_len;
+	_fx_bitstream_fill_buf(reader);
+}
+
+static inline uint64_t fx_bitstream_read_msb(fx_bitstream_t *reader,
+                                             uint8_t n_bits) {
+	return _fx_bitstream_read_msb(reader, n_bits, NULL, NULL);
+}
+
+static inline uint64_t fx_bitstream_read_msb_ex(
+    fx_bitstream_t *reader, uint8_t n_bits,
+    fx_bitstream_byte_callback_t callback, void *callback_data) {
+	return _fx_bitstream_read_msb(reader, n_bits, callback, callback_data);
+}
+
+static inline uint64_t fx_bitstream_peek_msb(fx_bitstream_t *reader,
+                                             uint8_t n_bits) {
+	assert((n_bits >= 1U) && (n_bits <= (BUFSIZE - 7U)));
+	return (reader->buf << reader->pos) >> (BUFSIZE - n_bits);
+}
+
+/******************************************************************************
+ * Copy of foxen/mem.h                                                        *
+ ******************************************************************************/
+
+/**
+ * Memory alignment for pointers internally used by Stanchion. Aligning memory
+ * and telling the compiler about it allows the compiler to perform better
+ * optimization. Furthermore, some platforms (WASM) do not allow unaligned
+ * memory access.
+ */
+#define FX_ALIGN 16
+
+/**
+ * Macro telling the compiler that P is aligned with the specified alignment
+ * ALIGN.
+ */
+#define FX_ASSUME_ALIGNED_EX(P, ALIGN) P
+#ifdef __GNUC__
+#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 7) || (__GNUC__ > 4)
+#undef FX_ASSUME_ALIGNED_EX
+#define FX_ASSUME_ALIGNED_EX(P, ALIGN) (__builtin_assume_aligned(P, ALIGN))
+#endif /* (__GNUC__ == 4 && __GNUC_MINOR__ >= 7) || (__GNUC__ > 4) */
+#endif /* __GNUC__ */
+
+/**
+ * Macro telling the compiler that P is aligned with the alignment defined
+ * above.
+ */
+#define FX_ASSUME_ALIGNED(P) FX_ASSUME_ALIGNED_EX(P, FX_ALIGN)
+
+/**
+ * Forces a pointer to have the specified alignment.
+ */
+#define FX_ALIGN_ADDR_EX(P, ALIGN)                                          \
+	(FX_ASSUME_ALIGNED_EX(                                                  \
+	    (void *)(((uintptr_t)(P) + ALIGN - 1) & (~(uintptr_t)(ALIGN - 1))), \
+	    ALIGN))
+
+/**
+ * Forces a pointer to have the alignment defined above.
+ */
+#define FX_ALIGN_ADDR(P) FX_ALIGN_ADDR_EX(P, FX_ALIGN)
+
+/**
+ * Macro that fills the structure pointed at by P with zeros. See
+ * fx_mem_zero_aligned() regarding potential dangers.
+ */
+#define FX_MEM_ZERO_ALIGNED(P)                \
+	do {                                      \
+		fx_mem_zero_aligned(P, sizeof(*(P))); \
+	} while (0)
+
+/**
+ * Call this first in a chain of fx_mem_update_size() calls. It will make sure
+ * that there is enough space to align the datastructure whenever the user
+ * provides a non-aligned target memory pointer.
+ *
+ * @param size is a pointer at a variable that holds the size of the object
+ * that we're describing. This function initializes this value to FX_ALIGN - 1.
+ * @return Always returns true to facilitate chaining with other fx_mem_*_size()
+ * functions.
+ */
+static inline bool fx_mem_init_size(uint32_t *size) {
+	*size = FX_ALIGN;
+	return true;
+}
+
+/**
+ * Function used to compute the total size of a datastructure consisting of
+ * multiple substructures. Calling this function updates the size of the outer
+ * datastructure by adding a substructure of size n_bytes. Assumes that the
+ * beginning of the substructure must be aligned to the given alignment.
+ *
+ * @param size is a pointer at the variable holding the size of the
+ * datastructure. This must always be a multiple of FX_ALIGN.
+ * @param n_bytes size of the sub-structure that should be added.
+ * @return zero if there was an overflow, one otherwise.
+ */
+static inline bool fx_mem_update_size_ex(uint32_t *size, uint32_t n_bytes,
+                                         uint32_t align) {
+	const uint32_t new_size = ((*size + n_bytes + align - 1) & (~(align - 1)));
+	if (new_size < *size) {
+		return false; /* error, there has been an overflow */
+	}
+	*size = new_size;
+	return true; /* success */
+}
+
+/**
+ * Function used to compute the total size of a datastructure consisting of
+ * multiple substructures. Calling this function updates the size of the outer
+ * datastructure by adding a substructure of size n_bytes. Assumes that the
+ * beginning of the substructure must be aligned to the default alignment.
+ *
+ * @param size is a pointer at the variable holding the size of the
+ * datastructure. This must always be a multiple of FX_ALIGN.
+ * @param n_bytes size of the sub-structure that should be added.
+ * @return zero if there was an overflow, one otherwise.
+ */
+static inline bool fx_mem_update_size(uint32_t *size, uint32_t n_bytes) {
+	return fx_mem_update_size_ex(size, n_bytes, FX_ALIGN);
+}
+
+/**
+ * Computes the aligned pointer pointing at the substructure of the given size
+ * for the specified alignment.
+ *
+ * @param mem pointer at the variable holding the pointer at the current
+ * pointer. The pointer is advanced by the given size after the return value is
+ * computed.
+ * @param size is the size of the substructure for which the pointer should be
+ * returned.
+ * @param align is the memory alignment to use.
+ * @return an aligned pointer pointing at the beginning of the substructure.
+ */
+static inline void *fx_mem_align_ex(void **mem, uint32_t size, uint32_t align) {
+	void *res = FX_ALIGN_ADDR_EX(*mem, align);
+	*mem = (void *)((uintptr_t)res + size);
+	return res;
+}
+
+/**
+ * Computes the default-aligned pointer pointing at the substructure of the
+ * given size.
+ *
+ * @param mem pointer at the variable holding the pointer at the current
+ * pointer. The pointer is advanced by the given size after the return value is
+ * computed.
+ * @param size is the size of the substructure for which the pointer should be
+ * returned.
+ * @return an aligned pointer pointing at the beginning of the substructure.
+ */
+static inline void *fx_mem_align(void **mem, uint32_t size) {
+	return fx_mem_align_ex(mem, size, FX_ALIGN);
+}
+
+/**
+ * Fills the given memory region with zeros. In contrast to memset(mem, 0, size)
+ * this assumes that the pointer is at least aligned at the FX_ALIGN boundary
+ * and that we can write multiples of FX_ALIGN bytes at once. This is
+ * potentially dangerous, so do not use this function if you don't know exactly
+ * what you're doing.
+ *
+ * @param mem is a pointer at the memory region that should be zeroed out. This
+ * pointer is assumed to be aligned.
+ * @param size is the size of the memory region that should be zeroed in bytes.
+ * This value is effectively rounded up to a multiple of FX_ALIGN
+ */
+static inline void fx_mem_zero_aligned(void *mem, uint32_t size) {
+	assert((((uintptr_t)mem) & (FX_ALIGN - 1)) == 0); /* mem must be aligned */
+	mem = FX_ASSUME_ALIGNED(mem);
+	for (uint32_t i = 0; i < (size + FX_ALIGN - 1) / FX_ALIGN; i++) {
+		((uint64_t *)mem)[2 * i + 0] = 0; /* If we're lucky, this loop is */
+		((uint64_t *)mem)[2 * i + 1] = 0; /* unrolled and vectorised. */
+	}
+}
+
+/******************************************************************************
+ * DATATYPES                                                                  *
+ ******************************************************************************/
+
+/******************************************************************************
+ * Enums and constants defined in the FLAC format specifiction                *
+ ******************************************************************************/
+
+/**
+ * Possible metadata block types.
+ */
+typedef enum {
+	META_TYPE_STREAMINFO = 0,
+	META_TYPE_PADDING = 1,
+	META_TYPE_APPLICATION = 2,
+	META_TYPE_SEEKTABLE = 3,
+	META_TYPE_VORBIS_COMMENT = 4,
+	META_TYPE_CUESHEET = 5,
+	META_TYPE_PICTURE = 6,
+	META_TYPE_INVALID = 127
+} fx_flac_metadata_type_t;
+
+typedef enum { BLK_FIXED = 0, BLK_VARIABLE = 1 } fx_flac_blocking_strategy_t;
+
+typedef enum {
+	INDEPENDENT_MONO = 0,
+	INDEPENDENT_STEREO = 1,
+	INDEPENDENT_3C = 2,
+	INDEPENDENT_4C = 3,
+	INDEPENDENT_5C = 4,
+	INDEPENDENT_6C = 5,
+	INDEPENDENT_7C = 6,
+	INDEPENDENT_8C = 7,
+	LEFT_SIDE_STEREO = 8,
+	RIGHT_SIDE_STEREO = 9,
+	MID_SIDE_STEREO = 10,
+} fx_flac_channel_assignment_t;
+
+typedef enum {
+	BLK_SIZE_RESERVED = 0,
+	BLK_SIZE_192 = 1,
+	BLK_SIZE_576 = 2,
+	BLK_SIZE_1152 = 3,
+	BLK_SIZE_2304 = 4,
+	BLK_SIZE_4608 = 5,
+	BLK_SIZE_READ_8BIT = 6,
+	BLK_SIZE_READ_16BIT = 7,
+	BLK_SIZE_256 = 8,
+	BLK_SIZE_512 = 9,
+	BLK_SIZE_1024 = 10,
+	BLK_SIZE_2048 = 11,
+	BLK_SIZE_4096 = 12,
+	BLK_SIZE_8192 = 13,
+	BLK_SIZE_16384 = 14,
+	BLK_SIZE_32768 = 15
+} fx_flac_block_size_t;
+
+static const int32_t fx_flac_block_sizes_[] = {
+    -1,  192, 576,  1152, 2304, 4608, 0,     0,
+    256, 512, 1024, 2048, 4096, 8192, 16384, 32768};
+
+typedef enum {
+	FS_STREAMINFO = 0,
+	FS_88_2KHZ = 1,
+	FS_176_4KHZ = 2,
+	FS_192KHZ = 3,
+	FS_8KHZ = 4,
+	FS_16KHZ = 5,
+	FS_22_05KHZ = 6,
+	FS_24KHZ = 7,
+	FS_32KHZ = 8,
+	FS_44_1KHZ = 9,
+	FS_48KHZ = 10,
+	FS_96KHZ = 11,
+	FS_READ_8BIT_KHZ = 12,
+	FS_READ_16BIT_HZ = 13,
+	FS_READ_16BIT_DHZ = 14,
+	FS_INVALID = 15
+} fx_flac_sample_rate_t;
+
+static const int32_t fx_flac_sample_rates_[] = {
+    0,     88200, 176400, 192000, 8000, 16000, 22050, 24000,
+    32000, 44100, 48000,  96000,  0,    0,     0,     -1};
+
+typedef enum {
+	SS_STREAMINFO = 0,
+	SS_8BIT = 1,
+	SS_12BIT = 2,
+	SS_RESERVED_1 = 3,
+	SS_16BIT = 4,
+	SS_20BIT = 5,
+	SS_24BIT = 6,
+	SS_RESERVED_2 = 7
+} fx_flac_sample_size_t;
+
+static const int8_t fx_flac_sample_sizes_[] = {0, 8, 12, -1, 16, 20, 24, -1};
+
+typedef enum {
+	SFT_CONSTANT,
+	SFT_VERBATIM,
+	SFT_FIXED,
+	SFT_LPC
+} fx_flac_subframe_type_t;
+
+typedef enum {
+	RES_RICE = 0,
+	RES_RICE2 = 1,
+	RES_RESERVED_1 = 2,
+	RES_RESERVED_2 = 3
+} fx_flac_residual_method_t;
+
+/******************************************************************************
+ * Structs defined in the flac format specification                           *
+ ******************************************************************************/
+
+/**
+ * Struc holding all the information stored in an individual block header.
+ */
+typedef struct {
+	/**
+	 * Set to 1 if this metadata block is the last metadata block in the
+	 * stream.
+	 */
+	bool is_last;
+
+	/**
+	 * Type of the metadata block.
+	 */
+	fx_flac_metadata_type_t type;
+
+	/**
+	 * Length of the metadata block in bytes.
+	 */
+	uint32_t length;
+} fx_flac_metadata_t;
+
+/**
+ * Data stored in the STREAMINFO header.
+ */
+typedef struct {
+	uint16_t min_block_size;
+	uint16_t max_block_size;
+	uint32_t min_frame_size;
+	uint32_t max_frame_size;
+	uint32_t sample_rate;
+	uint8_t n_channels;
+	uint8_t sample_size;
+	uint64_t n_samples;
+	uint8_t md5_sum[16];
+} fx_flac_streaminfo_t;
+
+/**
+ * Frame header prepended to each FLAC audio block.
+ */
+typedef struct {
+	fx_flac_blocking_strategy_t blocking_strategy;
+	fx_flac_block_size_t block_size_enum;
+	fx_flac_sample_rate_t sample_rate_enum;
+	fx_flac_channel_assignment_t channel_assignment;
+	fx_flac_sample_size_t sample_size_enum;
+	uint32_t block_size;
+	uint32_t sample_rate;
+	uint8_t channel_count;
+	uint8_t sample_size;
+	uint64_t sync_info;
+	uint8_t crc8;
+} fx_flac_frame_header_t;
+
+/**
+ * Header prepended to the channel-specific data of each FLAC audio block.
+ */
+typedef struct {
+	/**
+	 * Specifies the method used to encode the data.
+	 */
+	fx_flac_subframe_type_t type;
+
+	/**
+	 * Order of this frame.
+	 */
+	uint8_t order;
+
+	/**
+	 * Number of bits the decoded result has to be shifted to the left.
+	 */
+	uint8_t wasted_bits;
+
+	/**
+	 * Number of bits used to encode the linear predictor coefficients.
+	 */
+	uint8_t lpc_prec;
+
+	/**
+	 * Shift applied to the coefficients.
+	 */
+	int8_t lpc_shift;
+
+	/**
+	 * LPC coefficients. Number of used coefficients corresponds to the order.
+	 */
+	int32_t *lpc_coeffs;
+
+	/**
+	 * Method used to code the residual. FLAC currently only supports RICE and
+	 * RICE2.
+	 */
+	fx_flac_residual_method_t residual_method;
+
+	/**
+	 * Number of partitions the signal is divided into.
+	 */
+	uint8_t rice_partition_order;
+
+	/**
+	 * RICE parameter, i.e. the logarithm of the divisor.
+	 */
+	uint8_t rice_parameter;
+
+} fx_flac_subframe_header_t;
+
+/**
+ * Array containing the LPC coefficients for the fixed coding mode.
+ */
+static const int32_t _fx_flac_fixed_coeffs[5][4] = {
+    {0,0,0,0}, {1,0,0,0}, {2, -1, 0, 0}, {3, -3, 1, 0}, {4, -6, 4, -1}
+};
+
+/******************************************************************************
+ * Internal state machine enums                                               *
+ ******************************************************************************/
+
+/**
+ * More fine-grained state descriptor used in the internal state machine.
+ */
+typedef enum {
+	FLAC_SYNC_INIT = 0,
+	FLAC_SYNC_F = 100,
+	FLAC_SYNC_L = 101,
+	FLAC_SYNC_A = 102,
+	FLAC_METADATA_HEADER = 200,
+	FLAC_METADATA_SKIP = 201,
+	FLAC_METADATA_SINFO = 202,
+	FLAC_FRAME_SYNC = 300,
+	FLAC_FRAME_HEADER = 400,
+	FLAC_FRAME_HEADER_SYNC_INFO = 401,
+	FLAC_FRAME_HEADER_AUX = 402,
+	FLAC_FRAME_HEADER_CRC = 403,
+	FLAC_SUBFRAME_HEADER = 500,
+	FLAC_SUBFRAME_CONSTANT = 502,
+	FLAC_SUBFRAME_FIXED = 503,
+	FLAC_SUBFRAME_FIXED_RESIDUAL = 504,
+	FLAC_SUBFRAME_LPC = 505,
+	FLAC_SUBFRAME_LPC_HEADER = 506,
+	FLAC_SUBFRAME_LPC_COEFFS = 507,
+	FLAC_SUBFRAME_LPC_RESIDUAL = 508,
+	FLAC_SUBFRAME_RICE_INIT = 509,
+	FLAC_SUBFRAME_RICE = 510,
+	FLAC_SUBFRAME_RICE_UNARY = 511,
+	FLAC_SUBFRAME_RICE_VERBATIM = 512,
+	FLAC_SUBFRAME_RICE_FINALIZE = 513,
+	FLAC_SUBFRAME_VERBATIM = 514,
+	FLAC_SUBFRAME_FINALIZE = 515
+} fx_flac_private_state_t;
+
+/******************************************************************************
+ * Internal structs                                                           *
+ ******************************************************************************/
+
+/**
+ * Private definition of the fx_flac structure.
+ */
+struct fx_flac {
+	/**
+	 * Bitstream reader used to read individual bits from the input.
+	 */
+	fx_bitstream_t bitstream;
+
+	/**
+	 * Current state of the decoder.
+	 */
+	fx_flac_state_t state;
+
+	/**
+	 * Current private state of the decoder.
+	 */
+	fx_flac_private_state_t priv_state;
+
+	/**
+	 * Number of bytes remaining to read for the current frame/block.
+	 */
+	uint32_t n_bytes_rem;
+
+	/**
+	 * Maximum numbers of samples in a single block, per channel.
+	 */
+	uint16_t max_block_size;
+
+	/**
+	 * Maximum number of channels supported by the decoder.
+	 */
+	uint8_t max_channels;
+
+	/**
+	 * Current coefficient.
+	 */
+	uint8_t coef_cur;
+
+	/**
+	 * Current rice partition.
+	 */
+	uint16_t partition_cur;
+
+	/**
+	 * Current sample index in the current rice partition (decremented to zero).
+	 */
+	uint16_t partition_sample;
+
+	/**
+	 * Current rice partition unary quotient counter.
+	 */
+	uint16_t rice_unary_counter;
+
+	/**
+	 * Current channel. This is reset at frame boundaries.
+	 */
+	uint8_t chan_cur;
+
+	/**
+	 * Pointer into the current block buffer.
+	 */
+	uint16_t blk_cur;
+
+	/**
+	 * Variable holding the checksum computed when reading the frame_header.
+	 */
+	uint8_t crc8;
+
+	/**
+	 * Variable holding the checksum of an entire frame. If this checksum does
+	 * not match after decoding a frame, the entire frame is rejected.
+	 */
+	uint16_t crc16;
+
+	/**
+	 * Flag indicating whether the current metadata block is the last metadata
+	 * block.
+	 */
+	fx_flac_metadata_t *metadata;
+
+	/**
+	 * Structure holding the current stream metadata.
+	 */
+	fx_flac_streaminfo_t *streaminfo;
+
+	/**
+	 * Structure holding the frame header.
+	 */
+	fx_flac_frame_header_t *frame_header;
+
+	/**
+	 * Structure holding the subframe header.
+	 */
+	fx_flac_subframe_header_t *subframe_header;
+
+	/**
+	 * Buffer used for storing the LPC coefficients.
+	 */
+	int32_t *qbuf;
+
+	/**
+	 * Structure holding the temporary/output buffers for each channel.
+	 */
+	int32_t *blkbuf[FLAC_MAX_CHANNEL_COUNT];
+};
+
+/******************************************************************************
+ * PRIVATE CODE                                                               *
+ ******************************************************************************/
+
+/******************************************************************************
+ * Initialization code utils                                                  *
+ ******************************************************************************/
+
+static bool _fx_flac_check_params(uint16_t max_block_size,
+                                  uint8_t max_channels) {
+	return (max_block_size > 0U) && (max_channels > 0U) &&
+	       (max_channels <= FLAC_MAX_CHANNEL_COUNT);
+}
+
+/******************************************************************************
+ * FLAC enum decoders                                                         *
+ ******************************************************************************/
+
+static bool _fx_flac_decode_block_size(fx_flac_block_size_t block_size_enum,
+                                       uint32_t *block_size) {
+	const int32_t bs = fx_flac_block_sizes_[(int)block_size_enum];
+	if (bs < 0) {
+		return false; /* Invalid */
+	} else if (bs > 0) {
+		*block_size = bs;
+	}
+	return true;
+}
+
+static bool _fx_flac_decode_sample_rate(fx_flac_sample_rate_t sample_rate_enum,
+                                        uint32_t *sample_rate) {
+	const int32_t fs = fx_flac_sample_rates_[(int)sample_rate_enum];
+	if (fs < 0) {
+		return false; /* Invalid */
+	} else if (fs > 0) {
+		*sample_rate = fs;
+	}
+	return true;
+}
+
+static bool _fx_flac_decode_sample_size(fx_flac_sample_size_t sample_size_enum,
+                                        uint8_t *sample_size) {
+	const int8_t ss = fx_flac_sample_sizes_[(int)sample_size_enum];
+	if (ss < 0) {
+		return false; /* Invalid */
+	} else if (ss > 0) {
+		*sample_size = ss;
+	}
+	return true;
+}
+
+/**
+ * Returns the number of channels encoded in the frame header.
+ */
+static bool _fx_flac_decode_channel_count(
+    fx_flac_channel_assignment_t channel_assignment, uint8_t *channel_count) {
+	*channel_count = (channel_assignment >= LEFT_SIDE_STEREO)
+	                     ? 2U
+	                     : (uint8_t)channel_assignment + 1U;
+	return true;
+}
+
+/******************************************************************************
+ * Decoding functions                                                         *
+ ******************************************************************************/
+
+static inline void _fx_flac_post_process_left_side(int32_t *blk1, int32_t *blk2,
+                                                   uint32_t blk_size) {
+	blk1 = (int32_t *)FX_ASSUME_ALIGNED(blk1);
+	blk2 = (int32_t *)FX_ASSUME_ALIGNED(blk2);
+	for (uint32_t i = 0U; i < blk_size; i++) {
+		blk2[i] = blk1[i] - blk2[i];
+	}
+}
+
+static inline void _fx_flac_post_process_right_side(int32_t *blk1,
+                                                    int32_t *blk2,
+                                                    uint32_t blk_size) {
+	blk1 = (int32_t *)FX_ASSUME_ALIGNED(blk1);
+	blk2 = (int32_t *)FX_ASSUME_ALIGNED(blk2);
+	for (uint32_t i = 0U; i < blk_size; i++) {
+		blk1[i] = blk1[i] + blk2[i];
+	}
+}
+
+static inline void _fx_flac_post_process_mid_side(int32_t *blk1, int32_t *blk2,
+                                                  uint32_t blk_size) {
+	blk1 = (int32_t *)FX_ASSUME_ALIGNED(blk1);
+	blk2 = (int32_t *)FX_ASSUME_ALIGNED(blk2);
+	for (uint32_t i = 0U; i < blk_size; i++) {
+		/* Code libflac from stream_decoder.c */
+		int32_t mid = blk1[i];
+		int32_t side = blk2[i];
+		mid = ((uint32_t)mid) << 1;
+		mid |= (side & 1); /* Round correctly */
+		blk1[i] = (mid + side) >> 1;
+		blk2[i] = (mid - side) >> 1;
+	}
+}
+
+static inline void _fx_flac_restore_lpc_signal(int32_t *blk, uint32_t blk_size,
+                                               int32_t *lpc_coeffs,
+                                               uint8_t lpc_order,
+                                               int8_t lpc_shift) {
+	blk = (int32_t *)FX_ASSUME_ALIGNED(blk);
+	lpc_coeffs = (int32_t *)FX_ASSUME_ALIGNED(lpc_coeffs);
+
+	for (uint32_t i = lpc_order; i < blk_size; i++) {
+		int64_t accu = 0;
+		for (uint8_t j = 0; j < lpc_order; j++) {
+			accu += (int64_t)lpc_coeffs[j] * (int64_t)blk[i - j - 1];
+		}
+		blk[i] = blk[i] + (accu >> lpc_shift);
+	}
+}
+
+/******************************************************************************
+ * Stream utility functions and macros                                        *
+ ******************************************************************************/
+
+/* http://graphics.stanford.edu/~seander/bithacks.html#FixedSignExtend */
+#define SIGN_EXTEND(x, b) \
+	(int64_t)((x) ^ (1LU << ((b)-1U))) - (int64_t)(1LU << ((b)-1U))
+
+#define ENSURE_BITS(n)                                 \
+	if (!fx_bitstream_can_read(&inst->bitstream, n)) { \
+		return false; /* Need more data */             \
+	}
+
+#define READ_BITS(n)                                         \
+	(tmp_ = fx_bitstream_try_read_msb(&inst->bitstream, n)); \
+	if (tmp_ < 0) {                                          \
+		return false; /* Need more data */                   \
+	}
+
+#define READ_BITS_FAST(n) (tmp_ = fx_bitstream_read_msb(&inst->bitstream, n));
+
+#define PEEK_BITS(n)                                         \
+	(tmp_ = fx_bitstream_try_peek_msb(&inst->bitstream, n)); \
+	if (tmp_ < 0) {                                          \
+		return false; /* Need more data */                   \
+	}
+
+#define SYNC_BYTESTREAM()                        \
+	{                                            \
+		uint8_t n_ = inst->bitstream.pos & 0x07; \
+		if (n_) {                                \
+			READ_BITS(8U - n_);                  \
+		}                                        \
+	}
+
+#ifdef FX_FLAC_NO_CRC
+
+/* Just alias the CRC versions of the READ macros to the read macros
+   themselves */
+#define READ_BITS_CRC(n) READ_BITS(n)
+#define READ_BITS_FAST_CRC(n) READ_BITS_FAST(n)
+#define READ_BITS_DCRC(n) READ_BITS(n)
+#define READ_BITS_FAST_DCRC(n) READ_BITS_FAST(n)
+#define SYNC_BYTESTREAM_CRC() SYNC_BYTESTREAM()
+
+#else /* FX_FLAC_NO_CRC */
+
+/* Update the frame checksum while reading data */
+
+#define READ_BITS_CRC(n)                                                       \
+	(tmp_ = fx_bitstream_try_read_msb_ex(&inst->bitstream, n, _fx_flac_crc16_, \
+	                                     inst));                               \
+	if (tmp_ < 0) {                                                            \
+		return false; /* Need more data */                                     \
+	}
+
+#define READ_BITS_FAST_CRC(n)                                              \
+	(tmp_ = fx_bitstream_read_msb_ex(&inst->bitstream, n, _fx_flac_crc16_, \
+	                                 inst));
+
+/* DCRC -> Dual CRC, update both the header and the frame checksum */
+
+#define READ_BITS_DCRC(n)                                              \
+	(tmp_ = fx_bitstream_try_read_msb_ex(&inst->bitstream, n,          \
+	                                     _fx_flac_double_crc_, inst)); \
+	if (tmp_ < 0) {                                                    \
+		return false; /* Need more data */                             \
+	}
+
+#define READ_BITS_FAST_DCRC(n)                            \
+	(tmp_ = fx_bitstream_read_msb_ex(&inst->bitstream, n, \
+	                                 _fx_flac_double_crc_, inst));
+
+#define SYNC_BYTESTREAM_CRC()                    \
+	{                                            \
+		uint8_t n_ = inst->bitstream.pos & 0x07; \
+		if (n_) {                                \
+			READ_BITS_CRC(8U - n_);              \
+		}                                        \
+	}
+
+static const uint8_t fx_flac_crc8_table_[256] = {
+    0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31,
+    0x24, 0x23, 0x2a, 0x2d, 0x70, 0x77, 0x7e, 0x79, 0x6c, 0x6b, 0x62, 0x65,
+    0x48, 0x4f, 0x46, 0x41, 0x54, 0x53, 0x5a, 0x5d, 0xe0, 0xe7, 0xee, 0xe9,
+    0xfc, 0xfb, 0xf2, 0xf5, 0xd8, 0xdf, 0xd6, 0xd1, 0xc4, 0xc3, 0xca, 0xcd,
+    0x90, 0x97, 0x9e, 0x99, 0x8c, 0x8b, 0x82, 0x85, 0xa8, 0xaf, 0xa6, 0xa1,
+    0xb4, 0xb3, 0xba, 0xbd, 0xc7, 0xc0, 0xc9, 0xce, 0xdb, 0xdc, 0xd5, 0xd2,
+    0xff, 0xf8, 0xf1, 0xf6, 0xe3, 0xe4, 0xed, 0xea, 0xb7, 0xb0, 0xb9, 0xbe,
+    0xab, 0xac, 0xa5, 0xa2, 0x8f, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9d, 0x9a,
+    0x27, 0x20, 0x29, 0x2e, 0x3b, 0x3c, 0x35, 0x32, 0x1f, 0x18, 0x11, 0x16,
+    0x03, 0x04, 0x0d, 0x0a, 0x57, 0x50, 0x59, 0x5e, 0x4b, 0x4c, 0x45, 0x42,
+    0x6f, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7d, 0x7a, 0x89, 0x8e, 0x87, 0x80,
+    0x95, 0x92, 0x9b, 0x9c, 0xb1, 0xb6, 0xbf, 0xb8, 0xad, 0xaa, 0xa3, 0xa4,
+    0xf9, 0xfe, 0xf7, 0xf0, 0xe5, 0xe2, 0xeb, 0xec, 0xc1, 0xc6, 0xcf, 0xc8,
+    0xdd, 0xda, 0xd3, 0xd4, 0x69, 0x6e, 0x67, 0x60, 0x75, 0x72, 0x7b, 0x7c,
+    0x51, 0x56, 0x5f, 0x58, 0x4d, 0x4a, 0x43, 0x44, 0x19, 0x1e, 0x17, 0x10,
+    0x05, 0x02, 0x0b, 0x0c, 0x21, 0x26, 0x2f, 0x28, 0x3d, 0x3a, 0x33, 0x34,
+    0x4e, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5c, 0x5b, 0x76, 0x71, 0x78, 0x7f,
+    0x6a, 0x6d, 0x64, 0x63, 0x3e, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2c, 0x2b,
+    0x06, 0x01, 0x08, 0x0f, 0x1a, 0x1d, 0x14, 0x13, 0xae, 0xa9, 0xa0, 0xa7,
+    0xb2, 0xb5, 0xbc, 0xbb, 0x96, 0x91, 0x98, 0x9f, 0x8a, 0x8d, 0x84, 0x83,
+    0xde, 0xd9, 0xd0, 0xd7, 0xc2, 0xc5, 0xcc, 0xcb, 0xe6, 0xe1, 0xe8, 0xef,
+    0xfa, 0xfd, 0xf4, 0xf3};
+
+static const uint16_t fx_flac_crc16_table_[256] = {
+    0x0000, 0x8005, 0x800f, 0x000a, 0x801b, 0x001e, 0x0014, 0x8011, 0x8033,
+    0x0036, 0x003c, 0x8039, 0x0028, 0x802d, 0x8027, 0x0022, 0x8063, 0x0066,
+    0x006c, 0x8069, 0x0078, 0x807d, 0x8077, 0x0072, 0x0050, 0x8055, 0x805f,
+    0x005a, 0x804b, 0x004e, 0x0044, 0x8041, 0x80c3, 0x00c6, 0x00cc, 0x80c9,
+    0x00d8, 0x80dd, 0x80d7, 0x00d2, 0x00f0, 0x80f5, 0x80ff, 0x00fa, 0x80eb,
+    0x00ee, 0x00e4, 0x80e1, 0x00a0, 0x80a5, 0x80af, 0x00aa, 0x80bb, 0x00be,
+    0x00b4, 0x80b1, 0x8093, 0x0096, 0x009c, 0x8099, 0x0088, 0x808d, 0x8087,
+    0x0082, 0x8183, 0x0186, 0x018c, 0x8189, 0x0198, 0x819d, 0x8197, 0x0192,
+    0x01b0, 0x81b5, 0x81bf, 0x01ba, 0x81ab, 0x01ae, 0x01a4, 0x81a1, 0x01e0,
+    0x81e5, 0x81ef, 0x01ea, 0x81fb, 0x01fe, 0x01f4, 0x81f1, 0x81d3, 0x01d6,
+    0x01dc, 0x81d9, 0x01c8, 0x81cd, 0x81c7, 0x01c2, 0x0140, 0x8145, 0x814f,
+    0x014a, 0x815b, 0x015e, 0x0154, 0x8151, 0x8173, 0x0176, 0x017c, 0x8179,
+    0x0168, 0x816d, 0x8167, 0x0162, 0x8123, 0x0126, 0x012c, 0x8129, 0x0138,
+    0x813d, 0x8137, 0x0132, 0x0110, 0x8115, 0x811f, 0x011a, 0x810b, 0x010e,
+    0x0104, 0x8101, 0x8303, 0x0306, 0x030c, 0x8309, 0x0318, 0x831d, 0x8317,
+    0x0312, 0x0330, 0x8335, 0x833f, 0x033a, 0x832b, 0x032e, 0x0324, 0x8321,
+    0x0360, 0x8365, 0x836f, 0x036a, 0x837b, 0x037e, 0x0374, 0x8371, 0x8353,
+    0x0356, 0x035c, 0x8359, 0x0348, 0x834d, 0x8347, 0x0342, 0x03c0, 0x83c5,
+    0x83cf, 0x03ca, 0x83db, 0x03de, 0x03d4, 0x83d1, 0x83f3, 0x03f6, 0x03fc,
+    0x83f9, 0x03e8, 0x83ed, 0x83e7, 0x03e2, 0x83a3, 0x03a6, 0x03ac, 0x83a9,
+    0x03b8, 0x83bd, 0x83b7, 0x03b2, 0x0390, 0x8395, 0x839f, 0x039a, 0x838b,
+    0x038e, 0x0384, 0x8381, 0x0280, 0x8285, 0x828f, 0x028a, 0x829b, 0x029e,
+    0x0294, 0x8291, 0x82b3, 0x02b6, 0x02bc, 0x82b9, 0x02a8, 0x82ad, 0x82a7,
+    0x02a2, 0x82e3, 0x02e6, 0x02ec, 0x82e9, 0x02f8, 0x82fd, 0x82f7, 0x02f2,
+    0x02d0, 0x82d5, 0x82df, 0x02da, 0x82cb, 0x02ce, 0x02c4, 0x82c1, 0x8243,
+    0x0246, 0x024c, 0x8249, 0x0258, 0x825d, 0x8257, 0x0252, 0x0270, 0x8275,
+    0x827f, 0x027a, 0x826b, 0x026e, 0x0264, 0x8261, 0x0220, 0x8225, 0x822f,
+    0x022a, 0x823b, 0x023e, 0x0234, 0x8231, 0x8213, 0x0216, 0x021c, 0x8219,
+    0x0208, 0x820d, 0x8207, 0x0202};
+
+static inline void _fx_flac_crc8_(uint8_t byte, void *data) {
+	fx_flac_t *inst = (fx_flac_t *)FX_ASSUME_ALIGNED(data);
+	inst->crc8 = fx_flac_crc8_table_[inst->crc8 ^ byte];
+}
+
+static inline void _fx_flac_crc16_(uint8_t byte, void *data) {
+	fx_flac_t *inst = (fx_flac_t *)FX_ASSUME_ALIGNED(data);
+	const uint8_t i = ((inst->crc16 >> 8U) ^ byte) & 0xFF;
+	inst->crc16 = fx_flac_crc16_table_[i] ^ (inst->crc16 << 8U);
+}
+
+static inline void _fx_flac_double_crc_(uint8_t byte, void *data) {
+	_fx_flac_crc8_(byte, data);
+	_fx_flac_crc16_(byte, data);
+}
+#endif /* FX_FLAC_NO_CRC */
+
+static bool _fx_flac_reader_utf8_coded_int(fx_flac_t *inst, uint8_t max_n,
+                                           uint64_t *tar) {
+	int64_t tmp_; /* Used by the READ_BITS macro */
+
+	ENSURE_BITS(max_n * 8U);
+	/* Read the first byte */
+	uint8_t v = READ_BITS_FAST_DCRC(8U);
+
+	/* Count the number of ones in the first byte */
+	uint8_t n_ones = 0U;
+	while (v & 0x80U) {
+		v = v << 1U;
+		n_ones++;
+	}
+
+	/* Abort if the number of bytes to read is larger than max_n */
+	if (n_ones > max_n) {
+		inst->priv_state = FLAC_FRAME_SYNC; /* Invalid header */
+		return true;
+	}
+
+	/* Shift v back and store in var */
+	*tar = (v >> n_ones);
+
+	/* Read all continuation bytes */
+	for (uint8_t i = 1U; i < n_ones; i++) {
+		v = READ_BITS_FAST_DCRC(8U);
+		/* Abort if continuation byte doesn't start with correct sequence */
+		if ((v & 0xC0U) != 0x80) {
+			inst->priv_state = FLAC_FRAME_SYNC; /* Invalid header */
+			return true;
+		}
+		*tar = ((*tar) << 6U) | (v & 0x3F);
+	}
+	return true;
+}
+
+/******************************************************************************
+ * Private decoder state machine                                              *
+ ******************************************************************************/
+
+/*
+ * Note: the boolean return value of these functions indicates whether they
+ * rand out of data -- true indicates that there is still enough data left,
+ * false indicates that the outer state machine should return to the user code
+ * to read more data. The return value does NOT indicate success/failure. This
+ * is what inst->state == FLAC_ERR is for.
+ */
+
+static bool _fx_flac_handle_err(fx_flac_t *inst) {
+	/* TODO: Add flags to fx_flac_t which control this behaviour */
+
+	/* If an error happens while searching for metadata, this is fatal. */
+	if (inst->state < FLAC_END_OF_METADATA) {
+		inst->state = FLAC_ERR;
+		return false;
+	}
+
+	/* Otherwise just try to re-synchronise with the stream by searching for the
+	   next frame */
+	inst->state = FLAC_SEARCH_FRAME;
+	inst->priv_state = FLAC_FRAME_SYNC;
+	return true;
+}
+
+/**
+ * Statemachine used to search the beginning of the stream. This (for example)
+ * skips IDv3 tags prepended to the file.
+ */
+static bool _fx_flac_process_init(fx_flac_t *inst) {
+	int64_t tmp_; /* Used by the READ_BITS macro */
+	/* Search for the 'fLaC' sync word */
+	uint8_t byte = READ_BITS(8);
+	switch (inst->priv_state) {
+		case FLAC_SYNC_INIT:
+			if (byte == 'f') {
+				inst->priv_state = FLAC_SYNC_F;
+			}
+			break;
+		case FLAC_SYNC_F:
+			if (byte == 'L') {
+				inst->priv_state = FLAC_SYNC_L;
+			} else {
+				inst->priv_state = FLAC_SYNC_INIT;
+			}
+			break;
+		case FLAC_SYNC_L:
+			if (byte == 'a') {
+				inst->priv_state = FLAC_SYNC_A;
+			} else {
+				inst->priv_state = FLAC_SYNC_INIT;
+			}
+			break;
+		case FLAC_SYNC_A:
+			if (byte == 'C') {
+				inst->state = FLAC_IN_METADATA;
+				inst->priv_state = FLAC_METADATA_HEADER;
+			} else {
+				inst->priv_state = FLAC_SYNC_INIT;
+			}
+			break;
+		default:
+			return _fx_flac_handle_err(inst);
+	}
+	return true;
+}
+
+static bool _fx_flac_process_in_metadata(fx_flac_t *inst) {
+	int64_t tmp_; /* Used by the READ_BITS macro */
+	switch (inst->priv_state) {
+		case FLAC_METADATA_HEADER:
+			ENSURE_BITS(32U);
+			inst->metadata->is_last = READ_BITS_FAST(1U);
+			inst->metadata->type = (fx_flac_metadata_type_t)READ_BITS_FAST(7U);
+			if (inst->metadata->type == META_TYPE_INVALID) {
+				return _fx_flac_handle_err(inst);
+			}
+			inst->metadata->length = inst->n_bytes_rem = READ_BITS_FAST(24U);
+			if (inst->metadata->type == META_TYPE_STREAMINFO) {
+				inst->priv_state = FLAC_METADATA_SINFO;
+				/* The stream info header must be exactly 33 bytes long */
+				if (inst->metadata->length != 34U) {
+					return _fx_flac_handle_err(inst);
+				}
+			} else {
+				inst->priv_state = FLAC_METADATA_SKIP;
+			}
+			break;
+		case FLAC_METADATA_SINFO:
+			switch (inst->n_bytes_rem) {
+				case 34U:
+					inst->streaminfo->min_block_size = READ_BITS(16U);
+					inst->n_bytes_rem -= 2U;
+					break;
+				case 32U:
+					inst->streaminfo->max_block_size = READ_BITS(16U);
+					inst->n_bytes_rem -= 2U;
+					break;
+				case 30U:
+					inst->streaminfo->min_frame_size = READ_BITS(24U);
+					inst->n_bytes_rem -= 3U;
+					break;
+				case 27U:
+					inst->streaminfo->max_frame_size = READ_BITS(24U);
+					inst->n_bytes_rem -= 3U;
+					break;
+				case 24U:
+					ENSURE_BITS(28U);
+					inst->streaminfo->sample_rate = READ_BITS_FAST(20U);
+					inst->streaminfo->n_channels = 1U + READ_BITS_FAST(3U);
+					inst->streaminfo->sample_size = 1U + READ_BITS_FAST(5U);
+					inst->n_bytes_rem -= 4U;
+					break;
+				case 20U:
+					inst->streaminfo->n_samples = READ_BITS(36U);
+					inst->n_bytes_rem -= 4U;
+					break;
+				case 1U:
+				case 2U:
+				case 3U:
+				case 4U:
+				case 5U:
+				case 6U:
+				case 7U:
+				case 8U:
+				case 9U:
+				case 10U:
+				case 11U:
+				case 12U:
+				case 13U:
+				case 14U:
+				case 15U:
+				case 16U:
+					inst->streaminfo->md5_sum[16U - inst->n_bytes_rem] =
+					    READ_BITS(8);
+					inst->n_bytes_rem -= 1U;
+					break;
+				case 0U:
+					/* Use the FLAC_END_OF_METADATA_SKIP state logic below */
+					inst->priv_state = FLAC_METADATA_SKIP;
+					break;
+				default:
+					return _fx_flac_handle_err(inst);
+			}
+			break;
+		case FLAC_METADATA_SKIP: {
+			const uint8_t n_read =
+			    (inst->n_bytes_rem >= 7U) ? 7U : inst->n_bytes_rem;
+			if (n_read == 0U) { /* We read all the data for this block */
+				if (inst->metadata->is_last) {
+					/* Last metadata block, transition to the next state */
+					inst->state = FLAC_END_OF_METADATA;
+				} else {
+					/* End of metadata block, read the next one */
+					inst->priv_state = FLAC_METADATA_HEADER;
+				}
+				break;
+			}
+			READ_BITS(n_read * 8U);
+			inst->n_bytes_rem -= n_read;
+			break;
+		}
+		default:
+			return _fx_flac_handle_err(inst); /* Internal error */
+	}
+	return true;
+}
+
+static bool _fx_flac_process_search_frame(fx_flac_t *inst) {
+	int64_t tmp_; /* Used by the READ_BITS macro */
+	fx_flac_frame_header_t *fh = inst->frame_header;
+	fx_flac_streaminfo_t *si = inst->streaminfo;
+	switch (inst->priv_state) {
+		case FLAC_FRAME_SYNC:
+			/* Synchronise with the underlying bytestream */
+			SYNC_BYTESTREAM();
+
+			ENSURE_BITS(15U);
+			uint16_t sync_code = PEEK_BITS(15U);
+			if (sync_code != 0x7FFCU) {
+				READ_BITS(8U); /* Next byte (assume frames are byte aligned). */
+				return true;
+			} else {
+				inst->crc8 = 0U; /* Reset the checksums */
+				inst->crc16 = 0U;
+				inst->priv_state = FLAC_FRAME_HEADER;
+				READ_BITS_FAST_DCRC(15U);
+			}
+			break;
+		case FLAC_FRAME_HEADER:
+			ENSURE_BITS(17U);
+
+			/* Read the frame header bits */
+			fh->blocking_strategy =
+			    (fx_flac_blocking_strategy_t)READ_BITS_FAST_DCRC(1U);
+			fh->block_size_enum = (fx_flac_block_size_t)READ_BITS_FAST_DCRC(4U);
+			fh->sample_rate_enum =
+			    (fx_flac_sample_rate_t)READ_BITS_FAST_DCRC(4U);
+			fh->channel_assignment =
+			    (fx_flac_channel_assignment_t)READ_BITS_FAST_DCRC(4U);
+			fh->sample_size_enum =
+			    (fx_flac_sample_size_t)READ_BITS_FAST_DCRC(3U);
+			READ_BITS_FAST_DCRC(1U);
+			if (tmp_ != 0U || fh->channel_assignment > MID_SIDE_STEREO) {
+				return _fx_flac_handle_err(inst); /* Invalid header */
+			}
+
+			/* Copy sample rate and sample size from the streaminfo */
+			fh->sample_rate = si->sample_rate;
+			fh->sample_size = si->sample_size;
+
+			/* Decode the individual enums */
+			if (!_fx_flac_decode_block_size(fh->block_size_enum,
+			                                &fh->block_size) ||
+			    !_fx_flac_decode_sample_rate(fh->sample_rate_enum,
+			                                 &fh->sample_rate) ||
+			    !_fx_flac_decode_sample_size(fh->sample_size_enum,
+			                                 &fh->sample_size) ||
+			    !_fx_flac_decode_channel_count(fh->channel_assignment,
+			                                   &fh->channel_count)) {
+				inst->priv_state = FLAC_FRAME_SYNC; /* Got invalid value */
+				break;
+			}
+			inst->priv_state = FLAC_FRAME_HEADER_SYNC_INFO;
+			break;
+		case FLAC_FRAME_HEADER_SYNC_INFO:
+			if (!_fx_flac_reader_utf8_coded_int(
+			        inst, (fh->blocking_strategy == BLK_VARIABLE) ? 7U : 6U,
+			        &fh->sync_info)) {
+				return false;
+			}
+			inst->priv_state = FLAC_FRAME_HEADER_AUX;
+			break;
+		case FLAC_FRAME_HEADER_AUX:
+			ENSURE_BITS(32U);
+			/* Read block size/sample rate if not directly packed into the
+			   previous header */
+			switch (fh->block_size_enum) {
+				case BLK_SIZE_READ_8BIT:
+					fh->block_size = 1U + READ_BITS_FAST_DCRC(8U);
+					break;
+				case BLK_SIZE_READ_16BIT:
+					fh->block_size = 1U + READ_BITS_FAST_DCRC(16U);
+					break;
+				default:
+					break;
+			}
+			switch (fh->sample_rate_enum) {
+				case FS_READ_8BIT_KHZ:
+					fh->sample_rate = 1000UL * READ_BITS_FAST_DCRC(8U);
+					break;
+				case FS_READ_16BIT_HZ:
+					fh->sample_rate = READ_BITS_FAST_DCRC(16U);
+					break;
+				case FS_READ_16BIT_DHZ:
+					fh->sample_rate = 10UL * READ_BITS_FAST_DCRC(16U);
+					break;
+				default:
+					break;
+			}
+			inst->priv_state = FLAC_FRAME_HEADER_CRC;
+			break;
+		case FLAC_FRAME_HEADER_CRC:
+			/* Read the CRC8 checksum, make sure it equals the checksum written
+			   to the header. If not, this is not a valid header. Continue
+			   searching. */
+			fh->crc8 = READ_BITS_CRC(8U);
+#ifndef FX_FLAC_NO_CRC
+			if (fh->crc8 != inst->crc8) {
+				return _fx_flac_handle_err(inst);
+			}
+#endif
+
+			/* Make sure the decode has enough space */
+			if ((fh->block_size > inst->max_block_size) ||
+			    (fh->channel_count > inst->max_channels)) {
+				return _fx_flac_handle_err(inst);
+			}
+
+			/* Decode the subframes */
+			inst->state = FLAC_IN_FRAME;
+			inst->priv_state = FLAC_SUBFRAME_HEADER;
+			inst->chan_cur = 0U; /* Start with the first channel */
+			break;
+		default:
+			return _fx_flac_handle_err(inst);
+	}
+	return true;
+}
+
+static bool _fx_flac_process_in_frame(fx_flac_t *inst) {
+	int64_t tmp_ = 0; /* Used by the READ_BITS macro */
+	fx_flac_frame_header_t *fh = inst->frame_header;
+	fx_flac_subframe_header_t *sfh = inst->subframe_header;
+	int32_t *blk = inst->blkbuf[inst->chan_cur % FLAC_MAX_CHANNEL_COUNT];
+	const uint32_t blk_n = fh->block_size;
+
+	/* Figure out the number of bits to read for sample. This depends on the
+	   channel assignment. */
+	uint8_t bps = fh->sample_size - sfh->wasted_bits;
+	if ((fh->channel_assignment == LEFT_SIDE_STEREO && inst->chan_cur == 1) ||
+	    (fh->channel_assignment == RIGHT_SIDE_STEREO && inst->chan_cur == 0) ||
+	    (fh->channel_assignment == MID_SIDE_STEREO && inst->chan_cur == 1)) {
+		bps++;
+	}
+
+	/* Discard frames with invalid bits per sample values */
+	if (bps == 0U || bps > 32U) {
+		return _fx_flac_handle_err(inst);
+	}
+
+	/* This flag is set to false whenever a state in the state machine
+	   encounters and error. */
+	switch (inst->priv_state) {
+		case FLAC_SUBFRAME_HEADER: {
+			ENSURE_BITS(40U);
+
+			/* Reset the block write cursor, make sure initial blk sample is set
+			   to zero for zero-order fixed LPC */
+			inst->blk_cur = 0U;
+			blk[0U] = 0U;
+
+			/* Read a zero padding bit. This must be zero. */
+			uint8_t padding = READ_BITS_FAST_CRC(1U);
+			bool valid = padding == 0U;
+
+			/* Read the frame type and order */
+			uint8_t type = READ_BITS_FAST_CRC(6U);
+			if (type & 0x20U) {
+				sfh->order = (type & 0x1FU) + 1U;
+				sfh->type = SFT_LPC;
+				sfh->lpc_coeffs = inst->qbuf;
+				inst->priv_state = FLAC_SUBFRAME_LPC;
+			} else if (type & 0x10U) {
+				return _fx_flac_handle_err(inst);
+			} else if (type & 0x08U) {
+				sfh->order = type & 0x07U;
+				sfh->type = SFT_FIXED;
+				sfh->lpc_shift = 0;
+				inst->priv_state = FLAC_SUBFRAME_FIXED;
+				valid = valid && (sfh->order <= 4U);
+				if (valid) {
+					sfh->lpc_coeffs =
+					    (int32_t *)_fx_flac_fixed_coeffs[sfh->order];
+				}
+			} else if ((type & 0x04U) || (type & 0x02U)) {
+				return _fx_flac_handle_err(inst);
+			} else if (type & 0x01U) {
+				sfh->type = SFT_VERBATIM;
+				inst->priv_state = FLAC_SUBFRAME_VERBATIM;
+			} else {
+				sfh->type = SFT_CONSTANT;
+				inst->priv_state = FLAC_SUBFRAME_CONSTANT;
+			}
+
+			/* Read the "wasted_bits" flag */
+			sfh->wasted_bits = READ_BITS_FAST_CRC(1U);
+			if (sfh->wasted_bits) {
+				for (uint8_t i = 1U; i <= 30U; i++) {
+					const uint8_t bit = READ_BITS_FAST_CRC(1U);
+					if (bit == 1U) {
+						sfh->wasted_bits = i;
+						break;
+					}
+				}
+				valid = valid && (sfh->wasted_bits > 0U) &&
+				        (sfh->wasted_bits < fh->sample_size);
+			}
+
+			/* Make sure the block is large enough for the initial samples */
+			valid = valid && (blk_n >= sfh->order);
+			if (!valid) {
+				_fx_flac_handle_err(inst);
+			}
+			break;
+		}
+		case FLAC_SUBFRAME_CONSTANT: {
+			/* Read a single sample value and spread it over the entire block
+			   buffer for this subframe. */
+			blk[0U] = READ_BITS_CRC(bps);
+			blk[0U] = SIGN_EXTEND(blk[0U], bps);
+			for (uint16_t i = 1U; i < blk_n; i++) {
+				blk[i] = blk[0U];
+			}
+			inst->priv_state = FLAC_SUBFRAME_FINALIZE;
+			break;
+		}
+		case FLAC_SUBFRAME_VERBATIM:
+		case FLAC_SUBFRAME_FIXED:
+		case FLAC_SUBFRAME_LPC: {
+			/* Either just read up to "order" samples, or the entire block */
+			const uint32_t n = (sfh->type == SFT_VERBATIM) ? blk_n : sfh->order;
+			while (inst->blk_cur < n) {
+				blk[inst->blk_cur] = READ_BITS_CRC(bps);
+				blk[inst->blk_cur] = SIGN_EXTEND(blk[inst->blk_cur], bps);
+				inst->blk_cur++;
+			}
+			inst->priv_state =
+			    (fx_flac_private_state_t)((int)inst->priv_state + 1U);
+			break;
+		}
+		case FLAC_SUBFRAME_LPC_HEADER: {
+			/* Read the coefficient precision as well as the shift value */
+			ENSURE_BITS(9U);
+			const uint8_t prec = READ_BITS_FAST_CRC(4U);
+			const uint8_t shift = READ_BITS_FAST_CRC(5U);
+			if (prec == 15U) { /* Precision of 15 bits is invalid */
+				return _fx_flac_handle_err(inst);
+			}
+			sfh->lpc_prec = prec + 1U;
+			sfh->lpc_shift = SIGN_EXTEND(shift, 5U);
+			if (sfh->lpc_shift < 0) {
+				return _fx_flac_handle_err(inst);
+			}
+			inst->coef_cur = 0U;
+			inst->priv_state = FLAC_SUBFRAME_LPC_COEFFS;
+			break;
+		}
+		case FLAC_SUBFRAME_LPC_COEFFS:
+			/* Read the individual predictor coefficients */
+			while (inst->coef_cur < sfh->order) {
+				uint32_t coef = READ_BITS_CRC(sfh->lpc_prec);
+				sfh->lpc_coeffs[inst->coef_cur] =
+				    SIGN_EXTEND(coef, sfh->lpc_prec);
+				inst->coef_cur++;
+			}
+			inst->priv_state = FLAC_SUBFRAME_LPC_RESIDUAL;
+			break;
+		case FLAC_SUBFRAME_FIXED_RESIDUAL:
+		case FLAC_SUBFRAME_LPC_RESIDUAL: {
+			ENSURE_BITS(6U);
+
+			/* Read the residual encoding type and the rice partition order */
+			sfh->residual_method =
+			    (fx_flac_residual_method_t)READ_BITS_FAST_CRC(2U);
+			if (sfh->residual_method > RES_RICE2) {
+				return _fx_flac_handle_err(inst);
+			}
+			sfh->rice_partition_order = READ_BITS_FAST_CRC(4U);
+			inst->partition_cur = 0U;
+			inst->priv_state = FLAC_SUBFRAME_RICE_INIT;
+			break;
+		}
+		case FLAC_SUBFRAME_RICE_INIT: {
+			/* Read the Rice parameter */
+			ENSURE_BITS(10U);
+
+			uint8_t n_bits = (sfh->residual_method == RES_RICE) ? 4U : 5U;
+			sfh->rice_parameter = READ_BITS_FAST_CRC(n_bits);
+			if (sfh->rice_parameter == ((1U << n_bits) - 1U)) {
+				sfh->rice_parameter = READ_BITS_FAST_CRC(5U);
+				inst->priv_state = FLAC_SUBFRAME_RICE_VERBATIM;
+			} else {
+				inst->priv_state = FLAC_SUBFRAME_RICE_UNARY;
+				inst->rice_unary_counter = 0U;
+			}
+
+			/* Compute the number of samples to read */
+			inst->partition_sample = blk_n >> sfh->rice_partition_order;
+			if (inst->partition_cur == 0U) {
+				/* First partition alread includes verbatim samples */
+				if (inst->partition_sample < sfh->order) {
+					return _fx_flac_handle_err(
+					    inst); /* Number of samples is negative */
+				}
+				inst->partition_sample -= sfh->order;
+			}
+
+			/* Make sure we're never writing beyond the buffer for this
+			   channel */
+			if ((inst->partition_sample + inst->blk_cur) > blk_n) {
+				return _fx_flac_handle_err(inst);
+			}
+			break;
+		}
+		case FLAC_SUBFRAME_RICE:
+		case FLAC_SUBFRAME_RICE_UNARY:
+			/* Read the individual rice samples */
+			while (inst->partition_sample > 0U) {
+				/* Read the unary part of the Rice encoded sample bit-by-bit */
+				if (inst->priv_state == FLAC_SUBFRAME_RICE_UNARY) {
+					while (true) {
+						const uint8_t bit = READ_BITS_CRC(1U);
+						if (bit) {
+							break;
+						}
+						inst->rice_unary_counter++;
+					}
+				}
+
+				/* If there are no more bits left below, make sure we end up
+				   here instead of going through the unary decoder again. */
+				inst->priv_state = FLAC_SUBFRAME_RICE;
+
+				/* Read the remainder */
+				uint32_t r = 0U;
+				if (sfh->rice_parameter > 0U) {
+					r = READ_BITS_CRC(sfh->rice_parameter);
+				}
+				const uint16_t q = inst->rice_unary_counter;
+				const uint32_t val = (q << sfh->rice_parameter) | r;
+
+				/* Last bit determines sign */
+				if (val & 1) {
+					blk[inst->blk_cur] = -((int32_t)(val >> 1)) - 1;
+				} else {
+					blk[inst->blk_cur] = (int32_t)(val >> 1);
+				}
+
+				/* Read the next sample */
+				inst->rice_unary_counter = 0U;
+				inst->priv_state = FLAC_SUBFRAME_RICE_UNARY;
+				inst->blk_cur++;
+				inst->partition_sample--;
+			}
+			inst->priv_state = FLAC_SUBFRAME_RICE_FINALIZE;
+			break;
+		case FLAC_SUBFRAME_RICE_VERBATIM: {
+			/* Samples are encoded in verbatim in this partition */
+			const uint8_t bps = sfh->rice_parameter;
+			while (inst->partition_sample > 0U) {
+				blk[inst->blk_cur] = (bps == 0) ? 0U : READ_BITS_CRC(bps);
+				blk[inst->blk_cur] = SIGN_EXTEND(blk[inst->blk_cur], bps);
+				inst->blk_cur++;
+				inst->partition_sample--;
+			}
+			inst->priv_state = FLAC_SUBFRAME_RICE_FINALIZE;
+			break;
+		}
+		case FLAC_SUBFRAME_RICE_FINALIZE:
+			/* Go to the next partition or finalize this subframe */
+			inst->partition_cur++;
+			if (inst->partition_cur == (1U << sfh->rice_partition_order)) {
+				/* Decode the residual */
+				_fx_flac_restore_lpc_signal(blk, blk_n, sfh->lpc_coeffs,
+				                            sfh->order, sfh->lpc_shift);
+				inst->priv_state = FLAC_SUBFRAME_FINALIZE;
+			} else {
+				inst->priv_state = FLAC_SUBFRAME_RICE_INIT;
+			}
+			break;
+		case FLAC_SUBFRAME_FINALIZE: {
+			/* Apply the wasted bits transformation */
+			if (sfh->wasted_bits) {
+				uint8_t shift = sfh->wasted_bits;
+				for (uint16_t i = 0U; i < blk_n; i++) {
+					blk[i] = blk[i] * (1 << shift);
+				}
+			}
+
+			/* There is another subframe to read, continue! */
+			inst->chan_cur++; /* Go to the next channel */
+			if (inst->chan_cur < fh->channel_count) {
+				inst->priv_state = FLAC_SUBFRAME_HEADER;
+				break;
+			}
+
+			/* Synchronise with the underlying byte stream */
+			SYNC_BYTESTREAM_CRC();
+
+			/* Read the CRC16 sum, resync if it doesn't match our own */
+			uint16_t crc16 = READ_BITS(16U);
+#ifndef FX_FLAC_NO_CRC
+			if (crc16 != inst->crc16) {
+				return _fx_flac_handle_err(inst);
+			}
+#else
+			(void)crc16;
+#endif
+
+			/* Post process side-stereo */
+			int32_t *c1 = inst->blkbuf[0], *c2 = inst->blkbuf[1];
+			switch (fh->channel_assignment) {
+				case LEFT_SIDE_STEREO:
+					_fx_flac_post_process_left_side(c1, c2, blk_n);
+					break;
+				case RIGHT_SIDE_STEREO:
+					_fx_flac_post_process_right_side(c1, c2, blk_n);
+					break;
+				case MID_SIDE_STEREO:
+					_fx_flac_post_process_mid_side(c1, c2, blk_n);
+					break;
+				default:
+					break;
+			}
+
+			/* Shift the output such that the resulting int32 stream can be
+			   played back. */
+			uint8_t shift = 32U - fh->sample_size;
+			if (shift) {
+				for (uint8_t c = 0U; c < fh->channel_count; c++) {
+					int32_t *blk = inst->blkbuf[c];
+					for (uint16_t i = 0U; i < blk_n; i++) {
+						blk[i] = blk[i] * (1 << shift);
+					}
+				}
+			}
+
+			/* We're done decoding this frame! Notify the outer loop! */
+			inst->blk_cur = 0U; /* Reset the read cursor */
+			inst->chan_cur = 0U;
+			inst->state = FLAC_DECODED_FRAME;
+			break;
+		}
+		default:
+			inst->state = FLAC_ERR;
+			break;
+	}
+	return true;
+}
+
+static bool _fx_flac_process_decoded_frame(fx_flac_t *inst, int32_t *out,
+                                           uint32_t *out_len) {
+	/* Fetch the current stream and frame info. */
+	const fx_flac_frame_header_t *fh = inst->frame_header;
+
+	/* Fetch channel count and number of samples left to write */
+	const uint8_t cc = fh->channel_count;
+	uint32_t n_smpls_rem =
+	    (fh->block_size - inst->blk_cur - 1U) * cc + (cc - inst->chan_cur);
+
+	/* Truncate to the actually available space. */
+	if (n_smpls_rem > *out_len) {
+		n_smpls_rem = *out_len;
+	}
+
+	/* Interlace the decoded samples in the output array */
+	uint32_t tar = 0U; /* Number of samples written. */
+	while (tar < n_smpls_rem) {
+		/* Write to the output buffer */
+		out[tar] = inst->blkbuf[inst->chan_cur][inst->blk_cur];
+
+		/* Advance the read and write cursors */
+		inst->chan_cur++;
+		if (inst->chan_cur == cc) {
+			inst->chan_cur = 0U;
+			inst->blk_cur++;
+		}
+		tar++;
+	}
+
+	/* Inform the caller about the number of samples written */
+	*out_len = tar;
+
+	/* We're done with this frame! */
+	if (inst->blk_cur == fh->block_size) {
+		inst->state = FLAC_END_OF_FRAME;
+		return true;
+	}
+
+	/* Since we're here, we need more space in the output array. */
+	return false;
+}
+
+/******************************************************************************
+ * PUBLIC API                                                                 *
+ ******************************************************************************/
+
+uint32_t fx_flac_size(uint32_t max_block_size, uint8_t max_channels) {
+	/* Calculate the size of the fixed-size structures */
+	uint32_t size;
+	bool ok = _fx_flac_check_params(max_block_size, max_channels) &&
+	          fx_mem_init_size(&size) &&
+	          fx_mem_update_size(&size, sizeof(fx_flac_t)) &&
+	          fx_mem_update_size(&size, sizeof(fx_flac_metadata_t)) &&
+	          fx_mem_update_size(&size, sizeof(fx_flac_streaminfo_t)) &&
+	          fx_mem_update_size(&size, sizeof(fx_flac_frame_header_t)) &&
+	          fx_mem_update_size(&size, sizeof(fx_flac_subframe_header_t)) &&
+	          fx_mem_update_size(&size, sizeof(int32_t) * 32U);
+
+	/* Calculate the size of the structures depending on the given parameters.
+	 */
+	for (uint8_t i = 0; i < max_channels; i++) {
+		ok = ok && fx_mem_update_size(&size, sizeof(int32_t) * max_block_size);
+	}
+	return ok ? size : 0;
+}
+
+fx_flac_t *fx_flac_init(void *mem, uint16_t max_block_size,
+                        uint8_t max_channels) {
+	/* Make sure the parameters are valid. */
+	if (!_fx_flac_check_params(max_block_size, max_channels)) {
+		return NULL;
+	}
+
+	/* Abort if mem is NULL to allow passing malloc as a direct argument to this
+	   code. Furthermore, store the original "mem" pointer and return it later
+	   so the calling code is safe to pass the returned pointer to free. */
+	fx_flac_t *inst_unaligned = (fx_flac_t *)mem;
+	if (mem) {
+		/* Fetch the base address of the flac_t address. */
+		fx_flac_t *inst = (fx_flac_t *)fx_mem_align(&mem, sizeof(fx_flac_t));
+
+		/* Copy the given parameters */
+		inst->max_block_size = max_block_size;
+		inst->max_channels = max_channels;
+
+		/* Fetch the base addresses of the internal pointers. */
+		inst->metadata = (fx_flac_metadata_t *)fx_mem_align(
+		    &mem, sizeof(fx_flac_metadata_t));
+		inst->streaminfo = (fx_flac_streaminfo_t *)fx_mem_align(
+		    &mem, sizeof(fx_flac_streaminfo_t));
+		inst->frame_header = (fx_flac_frame_header_t *)fx_mem_align(
+		    &mem, sizeof(fx_flac_frame_header_t));
+		inst->subframe_header = (fx_flac_subframe_header_t *)fx_mem_align(
+		    &mem, sizeof(fx_flac_subframe_header_t));
+		inst->qbuf = (int32_t *)fx_mem_align(&mem, sizeof(int32_t) * 32U);
+
+		/* Compute the addresses of the per-channel buffers */
+		for (uint8_t i = 0; i < FLAC_MAX_CHANNEL_COUNT; i++) {
+			inst->blkbuf[i] = NULL;
+		}
+		for (uint8_t i = 0; i < max_channels; i++) {
+			inst->blkbuf[i] =
+			    (int32_t *)fx_mem_align(&mem, sizeof(int32_t) * max_block_size);
+		}
+
+		/* Reset the instance, i.e. zero most/all fields. */
+		fx_flac_reset(inst);
+	}
+	/* Return the original pointer. */
+	return inst_unaligned;
+}
+
+void fx_flac_reset(fx_flac_t *inst) {
+	inst = (fx_flac_t *)FX_ALIGN_ADDR(inst);
+
+	/* Initialize the bitstream reader */
+	fx_bitstream_init(&inst->bitstream);
+
+	/* Initialize the current metadata block header */
+	FX_MEM_ZERO_ALIGNED(inst->metadata);
+	inst->metadata->type = META_TYPE_INVALID;
+
+	/* Initialize the streaminfo structure */
+	FX_MEM_ZERO_ALIGNED(inst->streaminfo);
+
+	/* Initialize the frame_header structure */
+	FX_MEM_ZERO_ALIGNED(inst->frame_header);
+
+	/* Initialize the subframe_header structure */
+	FX_MEM_ZERO_ALIGNED(inst->subframe_header);
+
+	/* Initialize private member variables */
+	inst->state = FLAC_INIT;
+	inst->priv_state = FLAC_SYNC_INIT;
+	inst->n_bytes_rem = 0U;
+	inst->crc8 = 0U;
+	inst->coef_cur = 0U;
+	inst->partition_cur = 0U;
+	inst->partition_sample = 0U;
+	inst->rice_unary_counter = 0U;
+	inst->chan_cur = 0U;
+	inst->blk_cur = 0U;
+}
+
+fx_flac_state_t fx_flac_get_state(const fx_flac_t *inst) {
+	return ((const fx_flac_t *)FX_ALIGN_ADDR(inst))->state;
+}
+
+int64_t fx_flac_get_streaminfo(fx_flac_t const *inst,
+                               fx_flac_streaminfo_key_t key) {
+	inst = (fx_flac_t *)FX_ALIGN_ADDR(inst);
+	switch (key) {
+		case FLAC_KEY_MIN_BLOCK_SIZE:
+			return inst->streaminfo->min_block_size;
+		case FLAC_KEY_MAX_BLOCK_SIZE:
+			return inst->streaminfo->max_block_size;
+		case FLAC_KEY_MIN_FRAME_SIZE:
+			return inst->streaminfo->min_frame_size;
+		case FLAC_KEY_MAX_FRAME_SIZE:
+			return inst->streaminfo->max_frame_size;
+		case FLAC_KEY_SAMPLE_RATE:
+			return inst->streaminfo->sample_rate;
+		case FLAC_KEY_N_CHANNELS:
+			return inst->streaminfo->n_channels;
+		case FLAC_KEY_SAMPLE_SIZE:
+			return inst->streaminfo->sample_size;
+		case FLAC_KEY_N_SAMPLES:
+			return inst->streaminfo->n_samples;
+		case FLAC_KEY_MD5_SUM_0:
+		case FLAC_KEY_MD5_SUM_1:
+		case FLAC_KEY_MD5_SUM_2:
+		case FLAC_KEY_MD5_SUM_3:
+		case FLAC_KEY_MD5_SUM_4:
+		case FLAC_KEY_MD5_SUM_5:
+		case FLAC_KEY_MD5_SUM_6:
+		case FLAC_KEY_MD5_SUM_7:
+		case FLAC_KEY_MD5_SUM_8:
+		case FLAC_KEY_MD5_SUM_9:
+		case FLAC_KEY_MD5_SUM_A:
+		case FLAC_KEY_MD5_SUM_B:
+		case FLAC_KEY_MD5_SUM_C:
+		case FLAC_KEY_MD5_SUM_D:
+		case FLAC_KEY_MD5_SUM_E:
+		case FLAC_KEY_MD5_SUM_F:
+			return inst->streaminfo->md5_sum[key - FLAC_KEY_MD5_SUM_0];
+		default:
+			return FLAC_INVALID_METADATA_KEY;
+	}
+}
+
+fx_flac_state_t fx_flac_process(fx_flac_t *inst, const uint8_t *in,
+                                uint32_t *in_len, int32_t *out,
+                                uint32_t *out_len) {
+	inst = (fx_flac_t *)FX_ALIGN_ADDR(inst);
+
+	/* Set the current bytestream source to the provided input buffer */
+	fx_bitstream_t *bs = &inst->bitstream; /* Alias */
+	fx_bitstream_set_source(bs, in, *in_len);
+
+	/* Advance the statemachine */
+	bool done = false;
+	uint32_t out_len_ = 0U;
+	fx_flac_state_t old_state = inst->state;
+	while (!done) {
+		/* Abort once we've reached an error state. */
+		if (inst->state == FLAC_ERR) {
+			done = true;
+			continue; /* Panic, all hope is lost! */
+		}
+
+		/* Automatically return once the state transitions to a relevant state,
+		   even if there is still data to read. */
+		if (old_state != inst->state) {
+			old_state = inst->state;
+			switch (inst->state) {
+				case FLAC_END_OF_METADATA:
+				case FLAC_END_OF_FRAME:
+					done = true; /* Good point to return to the caller */
+					continue;
+				default:
+					break;
+			}
+		}
+
+		/* Main state machine. Dispatch calls to the corresponding state
+		   handlers. These will returns false in case there is no more data
+		   to read/space to write to. */
+		switch (inst->state) {
+			case FLAC_INIT:
+				done = !_fx_flac_process_init(inst);
+				break;
+			case FLAC_IN_METADATA:
+				done = !_fx_flac_process_in_metadata(inst);
+				break;
+			case FLAC_END_OF_METADATA:
+			case FLAC_END_OF_FRAME:
+				inst->state = FLAC_SEARCH_FRAME;
+				inst->priv_state = FLAC_FRAME_SYNC;
+				break;
+			case FLAC_SEARCH_FRAME:
+				done = !_fx_flac_process_search_frame(inst);
+				break;
+			case FLAC_IN_FRAME:
+				done = !_fx_flac_process_in_frame(inst);
+				break;
+			case FLAC_DECODED_FRAME:
+				/* If no output buffers are given, just discard the data. */
+				if (!out || !out_len) {
+					inst->state = FLAC_END_OF_FRAME;
+					break;
+				}
+				out_len_ = *out_len;
+				done = !_fx_flac_process_decoded_frame(inst, out, &out_len_);
+				break;
+			default:
+				inst->state = FLAC_ERR; /* Internal error */
+				break;
+		}
+	}
+
+	/* Write the number of bytes we read from the input stream to in_len, the
+	   caller must not provide these bytes again. Also write the number of
+	   samples we wrote to the output buffer. */
+	if (out_len) {
+		*out_len = out_len_;
+	}
+	*in_len = bs->src - in;
+
+	/* Return the current state */
+	return inst->state;
+}
+
diff --git a/lib/libfoxenflac/include/foxen/flac.h b/lib/libfoxenflac/include/foxen/flac.h
new file mode 100644
index 00000000..ec89ea36
--- /dev/null
+++ b/lib/libfoxenflac/include/foxen/flac.h
@@ -0,0 +1,297 @@
+/*
+ *  libfoxenflac -- Tiny FLAC Decoder Library
+ *  Copyright (C) 2018-2022  Andreas Stöckel
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/**
+ * @file flac.h
+ *
+ * Provides a decoder for FLAC (Free Lossless Audio Codec).
+ *
+ * @author Andreas Stöckel
+ */
+
+#ifndef FOXEN_FLAC_H
+#define FOXEN_FLAC_H
+
+#include <stdint.h>
+
+#ifndef FX_EXPORT
+#if __EMSCRIPTEN__
+#import <emscripten.h>
+#define FX_EXPORT EMSCRIPTEN_KEEPALIVE
+#else
+#define FX_EXPORT
+#endif /* __EMSCRIPTEN__ */
+#endif /* FX_EXPORT */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Value returned by the fx_flac_get_streaminfo() method if the given streaminfo
+ * key is invalid.
+ */
+#define FLAC_INVALID_METADATA_KEY 0x7FFFFFFFFFFFFFFFULL
+
+/**
+ * Maximum number of channels that can be encoded in a FLAC stream.
+ */
+#define FLAC_MAX_CHANNEL_COUNT 8U
+
+/**
+ * Maximum block size that can be used if the stream is encoded in the FLAC
+ * Subset format and the sample rate is smaller than 48000 kHz.
+ */
+#define FLAC_SUBSET_MAX_BLOCK_SIZE_48KHZ 4608U
+
+/**
+ * Maximum block size than can always be safely used if the stream is encoded
+ * in the FLAC Subset format.
+ */
+#define FLAC_SUBSET_MAX_BLOCK_SIZE 16384U
+
+/**
+ * Maximum block size in samples that can be used in a FLAC stream.
+ */
+#define FLAC_MAX_BLOCK_SIZE 65535U
+
+/**
+ * Opaque struct representing a FLAC decoder.
+ */
+struct fx_flac;
+
+/**
+ * Typedef for the fx_flac struct.
+ */
+typedef struct fx_flac fx_flac_t;
+
+/**
+ * Enum representing the state of a FLAC decoder instance.
+ */
+typedef enum {
+	/**
+	 * The decoder is in an error state; the decoder cannot recover from this
+	 * error. This error may for example occur if the data in the stream is
+	 * invalid, or the stream has a format that is outside the maximum specs
+	 * that are supported by the decoder. Call fx_flac_reset() and start anew!
+	 */
+	FLAC_ERR = -1,
+
+	/**
+	 * The decoder is currently in its initial state, fx_flac_process() has not
+	 * been called.
+	 */
+	FLAC_INIT = 0,
+
+	/**
+	 * The decoder found the beginning of the metadata packet!
+	 */
+	FLAC_IN_METADATA = 1,
+
+	/**
+	 * The decoder is done reading the current metadata block, this may be
+	 * followed by more metadata blocks, in which case the state is reset to
+	 * FLAC_IN_METADATA.
+	 */
+	FLAC_END_OF_METADATA = 2,
+
+	/**
+	 * The decoder is currently searching for an audio frame.
+	 */
+	FLAC_SEARCH_FRAME = 3,
+
+	/**
+	 * The decoder is currently inside the stream of audio frames.
+	 */
+	FLAC_IN_FRAME = 4,
+
+	/**
+	 * The decoder successfully decoded an entire frame. Write the data to the
+	 * client.
+	 */
+	FLAC_DECODED_FRAME = 5,
+
+	/**
+	 * The decoder reached the end of a block.
+	 */
+	FLAC_END_OF_FRAME = 6
+} fx_flac_state_t;
+
+/**
+ * Enum used in fx_flac_get_streaminfo() to query metadata about the stream.
+ */
+typedef enum {
+	FLAC_KEY_MIN_BLOCK_SIZE = 0,
+	FLAC_KEY_MAX_BLOCK_SIZE = 1,
+	FLAC_KEY_MIN_FRAME_SIZE = 2,
+	FLAC_KEY_MAX_FRAME_SIZE = 3,
+	FLAC_KEY_SAMPLE_RATE = 4,
+	FLAC_KEY_N_CHANNELS = 5,
+	FLAC_KEY_SAMPLE_SIZE = 6,
+	FLAC_KEY_N_SAMPLES = 7,
+	FLAC_KEY_MD5_SUM_0 = 128,
+	FLAC_KEY_MD5_SUM_1 = 129,
+	FLAC_KEY_MD5_SUM_2 = 130,
+	FLAC_KEY_MD5_SUM_3 = 131,
+	FLAC_KEY_MD5_SUM_4 = 132,
+	FLAC_KEY_MD5_SUM_5 = 133,
+	FLAC_KEY_MD5_SUM_6 = 134,
+	FLAC_KEY_MD5_SUM_7 = 135,
+	FLAC_KEY_MD5_SUM_8 = 136,
+	FLAC_KEY_MD5_SUM_9 = 137,
+	FLAC_KEY_MD5_SUM_A = 138,
+	FLAC_KEY_MD5_SUM_B = 139,
+	FLAC_KEY_MD5_SUM_C = 140,
+	FLAC_KEY_MD5_SUM_D = 141,
+	FLAC_KEY_MD5_SUM_E = 142,
+	FLAC_KEY_MD5_SUM_F = 143,
+} fx_flac_streaminfo_key_t;
+
+/**
+ * Returns the size of the FLAC decoder instance in bytes. This assumes that the
+ * FLAC audio that is being decoded uses the maximum settings, i.e. the largest
+ * bit depth and block size. See fx_flac_init() regarding parameters.
+ *
+ * @return zero if the given parameters are out of range, the number of bytes
+ * required to hold the FLAC decoder structure otherwise.
+ */
+FX_EXPORT uint32_t fx_flac_size(uint32_t max_block_size, uint8_t max_channels);
+
+/**
+ * Initializes the FLAC decoder at the given memory location. Each decoder can
+ * decode exactly one stream at a time.
+ *
+ * @param mem is a pointer at the memory region at which the FLAC decoder should
+ * store its private data. The memory region must be at last as large as
+ * indicated by fx_flac_size(). May be NULL, in which case NULL is returned.
+ * @param max_block_size is the maximum block size for which the FLAC instance
+ * will provide a buffer. For streams in the Subset format (which is used per
+ * default in most FLAC encoders), max_block_size should can be set to 4608 if
+ * the sample rate is <= 48000kHz, otherwise, for larger sample rates,
+ * max_block_size must be set to 16384.
+ * @param max_channels is the maximum number of channels that will be decoded.
+ * @return a pointer at the FLAC decoder instance; note that this pointer may be
+ * different from what was passed to mem. However, you may still pass the
+ * original `mem` as `inst` parameter to other functions. Returns NULL if the
+ * input pointer is NULL or the given parameters are invalid.
+ */
+FX_EXPORT fx_flac_t *fx_flac_init(void *mem, uint16_t max_block_size,
+                                  uint8_t max_channels);
+
+/**
+ * Macro which calls malloc to allocate memory for a new fx_flac instance. The
+ * returned pointer must be freed using free. Returns NULL if the allocation
+ * fails or the given parameters are invalid.
+ *
+ * Note that this code is implemented as a macro to prevent explicitly having
+ * a dependency on malloc while still providing a convenient allocation routine.
+ */
+#define FX_FLAC_ALLOC(max_block_size, max_channels)                            \
+	(fx_flac_size((max_block_size), (max_channels)) == 0U)                     \
+	    ? NULL                                                                 \
+	    : fx_flac_init(malloc(fx_flac_size((max_block_size), (max_channels))), \
+	                   (max_block_size), (max_channels))
+
+/**
+ * Returns a new fx_flac instance that is sufficient to decode FLAC streams in
+ * the FLAC Subset format with DAT parameters, i.e. up to 48 kHz, and two
+ * channels. This will allocate about 40 kiB of memory.
+ */
+#define FX_FLAC_ALLOC_SUBSET_FORMAT_DAT() \
+	FX_FLAC_ALLOC(FLAC_SUBSET_MAX_BLOCK_SIZE_48KHZ, 2U)
+
+/**
+ * Returns a new fx_flac instance that is sufficient to decode FLAC streams in
+ * the FLAC Subset format. This will allocate about 1.5 MiB of memory.
+ */
+#define FX_FLAC_ALLOC_SUBSET_FORMAT_ANY() \
+	FX_FLAC_ALLOC(FLAC_SUBSET_MAX_BLOCK_SIZE, FLAC_MAX_CHANNEL_COUNT)
+
+/**
+ * Returns a new fx_flac instance that is sufficient to decode any valid FLAC
+ * stream. Note that this will allocate between 2-3 MiB of memory.
+ */
+#define FX_FLAC_ALLOC_DEFAULT() \
+	FX_FLAC_ALLOC(FLAC_MAX_BLOCK_SIZE, FLAC_MAX_CHANNEL_COUNT)
+
+/**
+ * Resets the FLAC decoder.
+ *
+ * @param inst is the FLAC decoder that should be reset.
+ */
+FX_EXPORT void fx_flac_reset(fx_flac_t *inst);
+
+/**
+ * Returns the current decoder state.
+ *
+ * @param inst is the FLAC decoder instance for which the state should be
+ * returned.
+ * @return the current state of the decoder.
+ */
+FX_EXPORT fx_flac_state_t fx_flac_get_state(const fx_flac_t *inst);
+
+/**
+ * Returns metadata about the FLAC stream that is currently being parsed. This
+ * function may only be called if the decoder is in the state
+ * FLAC_END_OF_METADATA or greater, otherwise the result may be undefined
+ * (it will likely return zero for most of the metadata keys).
+ *
+ * @param inst is a pointer at the FLAC decoder instance for which the metadata
+ * should be retrieved.
+ * @param key is the metadata that should be retrieved.
+ * @return the requested metadata value or FLAC_INVALID_METADATA_KEY if the
+ * given key is unknown.
+ */
+FX_EXPORT int64_t fx_flac_get_streaminfo(const fx_flac_t *inst,
+                                         fx_flac_streaminfo_key_t key);
+
+/**
+ * Decodes the given raw FLAC data; the given data must be RAW FLAC data as
+ * specified in the FLAC format specification https://xiph.org/flac/format.html
+ * This function will always return right after the decoder transitions to a new
+ * relevant state.
+ *
+ * @param inst is the decoder instance.
+ * @param in is a pointer at the encoded bytestream.
+ * @param in_len is a pointer at a integer containing the number of valid bytes
+ * in "in". After the function returns, in will contain the number of bytes that
+ * were actually read. This number may be zero if the decoder is in the FLAC_ERR
+ * or FLAC_STREAM_DONE state, or the internal buffers are full and need to be
+ * flushed to the provided output first.
+ * @param out is a pointer at a memory region that will accept the decoded
+ * interleaved audio data. Samples are decoded as 32-bit signed integer; the
+ * minimum and maximum value will depend on the original bit depth of the audio
+ * stored in the bitstream. If this is NULL, the decoder will silently discard
+ * the output.
+ * @param out_len is a pointer at an integer containing the number of available
+ * signed 32-bit integers at the memory address pointed at by out. After the
+ * function returns, this value will contain the number of samples that were
+ * written. If this is NULL, the deocder will silently discard the output.
+ * @return the current state of the decoder. If the state transitions to
+ * FLAC_END_OF_METADATA, FLAC_END_OF_FRAME or FLAC_END_OF_STREAM this function
+ * will return immediately; only the data up to the point causing the transition
+ * has been read.
+ */
+FX_EXPORT fx_flac_state_t fx_flac_process(fx_flac_t *inst, const uint8_t *in,
+                                          uint32_t *in_len, int32_t *out,
+                                          uint32_t *out_len);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* FOXEN_FLAC_H */
diff --git a/lib/stb_vorbis/CMakeLists.txt b/lib/stb_vorbis/CMakeLists.txt
new file mode 100644
index 00000000..325ac4f1
--- /dev/null
+++ b/lib/stb_vorbis/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2023 jacqueline <me@jacqueline.id.au>
+#
+# SPDX-License-Identifier: GPL-3.0-only
+
+idf_component_register(
+  SRCS "stb_vorbis.c"
+  INCLUDE_DIRS "include"
+)
diff --git a/lib/stb_vorbis/include/stb_vorbis.h b/lib/stb_vorbis/include/stb_vorbis.h
new file mode 100644
index 00000000..209d448a
--- /dev/null
+++ b/lib/stb_vorbis/include/stb_vorbis.h
@@ -0,0 +1,418 @@
+// Ogg Vorbis audio decoder - v1.22 - public domain
+// http://nothings.org/stb_vorbis/
+//
+// Original version written by Sean Barrett in 2007.
+//
+// Originally sponsored by RAD Game Tools. Seeking implementation
+// sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
+// Elias Software, Aras Pranckevicius, and Sean Barrett.
+//
+// LICENSE
+//
+//   See end of file for license information.
+//
+// Limitations:
+//
+//   - floor 0 not supported (used in old ogg vorbis files pre-2004)
+//   - lossless sample-truncation at beginning ignored
+//   - cannot concatenate multiple vorbis streams
+//   - sample positions are 32-bit, limiting seekable 192Khz
+//       files to around 6 hours (Ogg supports 64-bit)
+//
+// Feature contributors:
+//    Dougall Johnson (sample-exact seeking)
+//
+// Bugfix/warning contributors:
+//    Terje Mathisen     Niklas Frykholm     Andy Hill
+//    Casey Muratori     John Bolton         Gargaj
+//    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
+//    Bernhard Wodo      Evan Balster        github:alxprd
+//    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
+//    Phillip Bennefall  Rohit               Thiago Goulart
+//    github:manxorist   Saga Musix          github:infatum
+//    Timur Gagiev       Maxwell Koo         Peter Waller
+//    github:audinowho   Dougall Johnson     David Reid
+//    github:Clownacy    Pedro J. Estebanez  Remi Verschelde
+//    AnthoFoxo          github:morlat       Gabriel Ravier
+//
+// Partial history:
+//    1.22    - 2021-07-11 - various small fixes
+//    1.21    - 2021-07-02 - fix bug for files with no comments
+//    1.20    - 2020-07-11 - several small fixes
+//    1.19    - 2020-02-05 - warnings
+//    1.18    - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
+//    1.17    - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
+//    1.16    - 2019-03-04 - fix warnings
+//    1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
+//    1.14    - 2018-02-11 - delete bogus dealloca usage
+//    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
+//    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
+//    1.11    - 2017-07-23 - fix MinGW compilation
+//    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
+//    1.09    - 2016-04-04 - back out 'truncation of last frame' fix from previous version
+//    1.08    - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
+//    1.07    - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
+//    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
+//                           some crash fixes when out of memory or with corrupt files
+//                           fix some inappropriately signed shifts
+//    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
+//    1.04    - 2014-08-27 - fix missing const-correct case in API
+//    1.03    - 2014-08-07 - warning fixes
+//    1.02    - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
+//    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
+//    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
+//                           (API change) report sample rate for decode-full-file funcs
+//
+// See end of file for full version history.
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  HEADER BEGINS HERE
+//
+
+#ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
+#define STB_VORBIS_INCLUDE_STB_VORBIS_H
+
+#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
+#define STB_VORBIS_NO_STDIO 1
+#endif
+
+#ifndef STB_VORBIS_NO_STDIO
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+///////////   THREAD SAFETY
+
+// Individual stb_vorbis* handles are not thread-safe; you cannot decode from
+// them from multiple threads at the same time. However, you can have multiple
+// stb_vorbis* handles and decode from them independently in multiple thrads.
+
+
+///////////   MEMORY ALLOCATION
+
+// normally stb_vorbis uses malloc() to allocate memory at startup,
+// and alloca() to allocate temporary memory during a frame on the
+// stack. (Memory consumption will depend on the amount of setup
+// data in the file and how you set the compile flags for speed
+// vs. size. In my test files the maximal-size usage is ~150KB.)
+//
+// You can modify the wrapper functions in the source (setup_malloc,
+// setup_temp_malloc, temp_malloc) to change this behavior, or you
+// can use a simpler allocation model: you pass in a buffer from
+// which stb_vorbis will allocate _all_ its memory (including the
+// temp memory). "open" may fail with a VORBIS_outofmem if you
+// do not pass in enough data; there is no way to determine how
+// much you do need except to succeed (at which point you can
+// query get_info to find the exact amount required. yes I know
+// this is lame).
+//
+// If you pass in a non-NULL buffer of the type below, allocation
+// will occur from it as described above. Otherwise just pass NULL
+// to use malloc()/alloca()
+
+typedef struct
+{
+   char *alloc_buffer;
+   int   alloc_buffer_length_in_bytes;
+} stb_vorbis_alloc;
+
+
+///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
+
+typedef struct stb_vorbis stb_vorbis;
+
+typedef struct
+{
+   unsigned int sample_rate;
+   int channels;
+
+   unsigned int setup_memory_required;
+   unsigned int setup_temp_memory_required;
+   unsigned int temp_memory_required;
+
+   int max_frame_size;
+} stb_vorbis_info;
+
+typedef struct
+{
+   char *vendor;
+
+   int comment_list_length;
+   char **comment_list;
+} stb_vorbis_comment;
+
+// get general information about the file
+extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
+
+// get ogg comments
+extern stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f);
+
+// get the last error detected (clears it, too)
+extern int stb_vorbis_get_error(stb_vorbis *f);
+
+// close an ogg vorbis file and free all memory in use
+extern void stb_vorbis_close(stb_vorbis *f);
+
+// this function returns the offset (in samples) from the beginning of the
+// file that will be returned by the next decode, if it is known, or -1
+// otherwise. after a flush_pushdata() call, this may take a while before
+// it becomes valid again.
+// NOT WORKING YET after a seek with PULLDATA API
+extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
+
+// returns the current seek point within the file, or offset from the beginning
+// of the memory buffer. In pushdata mode it returns 0.
+extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
+
+///////////   PUSHDATA API
+
+#ifndef STB_VORBIS_NO_PUSHDATA_API
+
+// this API allows you to get blocks of data from any source and hand
+// them to stb_vorbis. you have to buffer them; stb_vorbis will tell
+// you how much it used, and you have to give it the rest next time;
+// and stb_vorbis may not have enough data to work with and you will
+// need to give it the same data again PLUS more. Note that the Vorbis
+// specification does not bound the size of an individual frame.
+
+extern stb_vorbis *stb_vorbis_open_pushdata(
+         const unsigned char * datablock, int datablock_length_in_bytes,
+         int *datablock_memory_consumed_in_bytes,
+         int *error,
+         const stb_vorbis_alloc *alloc_buffer);
+// create a vorbis decoder by passing in the initial data block containing
+//    the ogg&vorbis headers (you don't need to do parse them, just provide
+//    the first N bytes of the file--you're told if it's not enough, see below)
+// on success, returns an stb_vorbis *, does not set error, returns the amount of
+//    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
+// on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
+// if returns NULL and *error is VORBIS_need_more_data, then the input block was
+//       incomplete and you need to pass in a larger block from the start of the file
+
+extern int stb_vorbis_decode_frame_pushdata(
+         stb_vorbis *f,
+         const unsigned char *datablock, int datablock_length_in_bytes,
+         int *channels,             // place to write number of float * buffers
+         float ***output,           // place to write float ** array of float * buffers
+         int *samples               // place to write number of output samples
+     );
+// decode a frame of audio sample data if possible from the passed-in data block
+//
+// return value: number of bytes we used from datablock
+//
+// possible cases:
+//     0 bytes used, 0 samples output (need more data)
+//     N bytes used, 0 samples output (resynching the stream, keep going)
+//     N bytes used, M samples output (one frame of data)
+// note that after opening a file, you will ALWAYS get one N-bytes,0-sample
+// frame, because Vorbis always "discards" the first frame.
+//
+// Note that on resynch, stb_vorbis will rarely consume all of the buffer,
+// instead only datablock_length_in_bytes-3 or less. This is because it wants
+// to avoid missing parts of a page header if they cross a datablock boundary,
+// without writing state-machiney code to record a partial detection.
+//
+// The number of channels returned are stored in *channels (which can be
+// NULL--it is always the same as the number of channels reported by
+// get_info). *output will contain an array of float* buffers, one per
+// channel. In other words, (*output)[0][0] contains the first sample from
+// the first channel, and (*output)[1][0] contains the first sample from
+// the second channel.
+//
+// *output points into stb_vorbis's internal output buffer storage; these
+// buffers are owned by stb_vorbis and application code should not free
+// them or modify their contents. They are transient and will be overwritten
+// once you ask for more data to get decoded, so be sure to grab any data
+// you need before then.
+
+extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
+// inform stb_vorbis that your next datablock will not be contiguous with
+// previous ones (e.g. you've seeked in the data); future attempts to decode
+// frames will cause stb_vorbis to resynchronize (as noted above), and
+// once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
+// will begin decoding the _next_ frame.
+//
+// if you want to seek using pushdata, you need to seek in your file, then
+// call stb_vorbis_flush_pushdata(), then start calling decoding, then once
+// decoding is returning you data, call stb_vorbis_get_sample_offset, and
+// if you don't like the result, seek your file again and repeat.
+#endif
+
+
+//////////   PULLING INPUT API
+
+#ifndef STB_VORBIS_NO_PULLDATA_API
+// This API assumes stb_vorbis is allowed to pull data from a source--
+// either a block of memory containing the _entire_ vorbis stream, or a
+// FILE * that you or it create, or possibly some other reading mechanism
+// if you go modify the source to replace the FILE * case with some kind
+// of callback to your code. (But if you don't support seeking, you may
+// just want to go ahead and use pushdata.)
+
+#if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
+extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
+#endif
+#if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
+extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
+#endif
+// decode an entire file and output the data interleaved into a malloc()ed
+// buffer stored in *output. The return value is the number of samples
+// decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
+// When you're done with it, just free() the pointer returned in *output.
+
+extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
+                                  int *error, const stb_vorbis_alloc *alloc_buffer);
+// create an ogg vorbis decoder from an ogg vorbis stream in memory (note
+// this must be the entire stream!). on failure, returns NULL and sets *error
+
+#ifndef STB_VORBIS_NO_STDIO
+extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
+                                  int *error, const stb_vorbis_alloc *alloc_buffer);
+// create an ogg vorbis decoder from a filename via fopen(). on failure,
+// returns NULL and sets *error (possibly to VORBIS_file_open_failure).
+
+extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
+                                  int *error, const stb_vorbis_alloc *alloc_buffer);
+// create an ogg vorbis decoder from an open FILE *, looking for a stream at
+// the _current_ seek point (ftell). on failure, returns NULL and sets *error.
+// note that stb_vorbis must "own" this stream; if you seek it in between
+// calls to stb_vorbis, it will become confused. Moreover, if you attempt to
+// perform stb_vorbis_seek_*() operations on this file, it will assume it
+// owns the _entire_ rest of the file after the start point. Use the next
+// function, stb_vorbis_open_file_section(), to limit it.
+
+extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
+                int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
+// create an ogg vorbis decoder from an open FILE *, looking for a stream at
+// the _current_ seek point (ftell); the stream will be of length 'len' bytes.
+// on failure, returns NULL and sets *error. note that stb_vorbis must "own"
+// this stream; if you seek it in between calls to stb_vorbis, it will become
+// confused.
+#endif
+
+extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
+extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
+// these functions seek in the Vorbis file to (approximately) 'sample_number'.
+// after calling seek_frame(), the next call to get_frame_*() will include
+// the specified sample. after calling stb_vorbis_seek(), the next call to
+// stb_vorbis_get_samples_* will start with the specified sample. If you
+// do not need to seek to EXACTLY the target sample when using get_samples_*,
+// you can also use seek_frame().
+
+extern int stb_vorbis_seek_start(stb_vorbis *f);
+// this function is equivalent to stb_vorbis_seek(f,0)
+
+extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
+extern float        stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
+// these functions return the total length of the vorbis stream
+
+extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
+// decode the next frame and return the number of samples. the number of
+// channels returned are stored in *channels (which can be NULL--it is always
+// the same as the number of channels reported by get_info). *output will
+// contain an array of float* buffers, one per channel. These outputs will
+// be overwritten on the next call to stb_vorbis_get_frame_*.
+//
+// You generally should not intermix calls to stb_vorbis_get_frame_*()
+// and stb_vorbis_get_samples_*(), since the latter calls the former.
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
+extern int stb_vorbis_get_frame_short            (stb_vorbis *f, int num_c, short **buffer, int num_samples);
+#endif
+// decode the next frame and return the number of *samples* per channel.
+// Note that for interleaved data, you pass in the number of shorts (the
+// size of your array), but the return value is the number of samples per
+// channel, not the total number of samples.
+//
+// The data is coerced to the number of channels you request according to the
+// channel coercion rules (see below). You must pass in the size of your
+// buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
+// The maximum buffer size needed can be gotten from get_info(); however,
+// the Vorbis I specification implies an absolute maximum of 4096 samples
+// per channel.
+
+// Channel coercion rules:
+//    Let M be the number of channels requested, and N the number of channels present,
+//    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
+//    and stereo R be the sum of all R and center channels (channel assignment from the
+//    vorbis spec).
+//        M    N       output
+//        1    k      sum(Ck) for all k
+//        2    *      stereo L, stereo R
+//        k    l      k > l, the first l channels, then 0s
+//        k    l      k <= l, the first k channels
+//    Note that this is not _good_ surround etc. mixing at all! It's just so
+//    you get something useful.
+
+extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
+extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
+// gets num_samples samples, not necessarily on a frame boundary--this requires
+// buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
+// Returns the number of samples stored per channel; it may be less than requested
+// at the end of the file. If there are no more samples in the file, returns 0.
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
+extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
+#endif
+// gets num_samples samples, not necessarily on a frame boundary--this requires
+// buffering so you have to supply the buffers. Applies the coercion rules above
+// to produce 'channels' channels. Returns the number of samples stored per channel;
+// it may be less than requested at the end of the file. If there are no more
+// samples in the file, returns 0.
+
+#endif
+
+////////   ERROR CODES
+
+enum STBVorbisError
+{
+   VORBIS__no_error,
+
+   VORBIS_need_more_data=1,             // not a real error
+
+   VORBIS_invalid_api_mixing,           // can't mix API modes
+   VORBIS_outofmem,                     // not enough memory
+   VORBIS_feature_not_supported,        // uses floor 0
+   VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
+   VORBIS_file_open_failure,            // fopen() failed
+   VORBIS_seek_without_length,          // can't seek in unknown-length file
+
+   VORBIS_unexpected_eof=10,            // file is truncated?
+   VORBIS_seek_invalid,                 // seek past EOF
+
+   // decoding errors (corrupt/invalid stream) -- you probably
+   // don't care about the exact details of these
+
+   // vorbis errors:
+   VORBIS_invalid_setup=20,
+   VORBIS_invalid_stream,
+
+   // ogg errors:
+   VORBIS_missing_capture_pattern=30,
+   VORBIS_invalid_stream_structure_version,
+   VORBIS_continued_packet_flag_invalid,
+   VORBIS_incorrect_stream_serial_number,
+   VORBIS_invalid_first_page,
+   VORBIS_bad_packet_type,
+   VORBIS_cant_find_last_page,
+   VORBIS_seek_failed,
+   VORBIS_ogg_skeleton_not_supported
+};
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
+//
+//  HEADER ENDS HERE
+//
+//////////////////////////////////////////////////////////////////////////////
diff --git a/lib/stb_vorbis/stb_vorbis.c b/lib/stb_vorbis/stb_vorbis.c
new file mode 100644
index 00000000..3e5c2504
--- /dev/null
+++ b/lib/stb_vorbis/stb_vorbis.c
@@ -0,0 +1,5584 @@
+// Ogg Vorbis audio decoder - v1.22 - public domain
+// http://nothings.org/stb_vorbis/
+//
+// Original version written by Sean Barrett in 2007.
+//
+// Originally sponsored by RAD Game Tools. Seeking implementation
+// sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
+// Elias Software, Aras Pranckevicius, and Sean Barrett.
+//
+// LICENSE
+//
+//   See end of file for license information.
+//
+// Limitations:
+//
+//   - floor 0 not supported (used in old ogg vorbis files pre-2004)
+//   - lossless sample-truncation at beginning ignored
+//   - cannot concatenate multiple vorbis streams
+//   - sample positions are 32-bit, limiting seekable 192Khz
+//       files to around 6 hours (Ogg supports 64-bit)
+//
+// Feature contributors:
+//    Dougall Johnson (sample-exact seeking)
+//
+// Bugfix/warning contributors:
+//    Terje Mathisen     Niklas Frykholm     Andy Hill
+//    Casey Muratori     John Bolton         Gargaj
+//    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
+//    Bernhard Wodo      Evan Balster        github:alxprd
+//    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
+//    Phillip Bennefall  Rohit               Thiago Goulart
+//    github:manxorist   Saga Musix          github:infatum
+//    Timur Gagiev       Maxwell Koo         Peter Waller
+//    github:audinowho   Dougall Johnson     David Reid
+//    github:Clownacy    Pedro J. Estebanez  Remi Verschelde
+//    AnthoFoxo          github:morlat       Gabriel Ravier
+//
+// Partial history:
+//    1.22    - 2021-07-11 - various small fixes
+//    1.21    - 2021-07-02 - fix bug for files with no comments
+//    1.20    - 2020-07-11 - several small fixes
+//    1.19    - 2020-02-05 - warnings
+//    1.18    - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
+//    1.17    - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
+//    1.16    - 2019-03-04 - fix warnings
+//    1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
+//    1.14    - 2018-02-11 - delete bogus dealloca usage
+//    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
+//    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
+//    1.11    - 2017-07-23 - fix MinGW compilation
+//    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
+//    1.09    - 2016-04-04 - back out 'truncation of last frame' fix from previous version
+//    1.08    - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
+//    1.07    - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
+//    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
+//                           some crash fixes when out of memory or with corrupt files
+//                           fix some inappropriately signed shifts
+//    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
+//    1.04    - 2014-08-27 - fix missing const-correct case in API
+//    1.03    - 2014-08-07 - warning fixes
+//    1.02    - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
+//    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
+//    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
+//                           (API change) report sample rate for decode-full-file funcs
+//
+// See end of file for full version history.
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  HEADER BEGINS HERE
+//
+
+#ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
+#define STB_VORBIS_INCLUDE_STB_VORBIS_H
+
+#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
+#define STB_VORBIS_NO_STDIO 1
+#endif
+
+#ifndef STB_VORBIS_NO_STDIO
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+///////////   THREAD SAFETY
+
+// Individual stb_vorbis* handles are not thread-safe; you cannot decode from
+// them from multiple threads at the same time. However, you can have multiple
+// stb_vorbis* handles and decode from them independently in multiple thrads.
+
+
+///////////   MEMORY ALLOCATION
+
+// normally stb_vorbis uses malloc() to allocate memory at startup,
+// and alloca() to allocate temporary memory during a frame on the
+// stack. (Memory consumption will depend on the amount of setup
+// data in the file and how you set the compile flags for speed
+// vs. size. In my test files the maximal-size usage is ~150KB.)
+//
+// You can modify the wrapper functions in the source (setup_malloc,
+// setup_temp_malloc, temp_malloc) to change this behavior, or you
+// can use a simpler allocation model: you pass in a buffer from
+// which stb_vorbis will allocate _all_ its memory (including the
+// temp memory). "open" may fail with a VORBIS_outofmem if you
+// do not pass in enough data; there is no way to determine how
+// much you do need except to succeed (at which point you can
+// query get_info to find the exact amount required. yes I know
+// this is lame).
+//
+// If you pass in a non-NULL buffer of the type below, allocation
+// will occur from it as described above. Otherwise just pass NULL
+// to use malloc()/alloca()
+
+typedef struct
+{
+   char *alloc_buffer;
+   int   alloc_buffer_length_in_bytes;
+} stb_vorbis_alloc;
+
+
+///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
+
+typedef struct stb_vorbis stb_vorbis;
+
+typedef struct
+{
+   unsigned int sample_rate;
+   int channels;
+
+   unsigned int setup_memory_required;
+   unsigned int setup_temp_memory_required;
+   unsigned int temp_memory_required;
+
+   int max_frame_size;
+} stb_vorbis_info;
+
+typedef struct
+{
+   char *vendor;
+
+   int comment_list_length;
+   char **comment_list;
+} stb_vorbis_comment;
+
+// get general information about the file
+extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
+
+// get ogg comments
+extern stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f);
+
+// get the last error detected (clears it, too)
+extern int stb_vorbis_get_error(stb_vorbis *f);
+
+// close an ogg vorbis file and free all memory in use
+extern void stb_vorbis_close(stb_vorbis *f);
+
+// this function returns the offset (in samples) from the beginning of the
+// file that will be returned by the next decode, if it is known, or -1
+// otherwise. after a flush_pushdata() call, this may take a while before
+// it becomes valid again.
+// NOT WORKING YET after a seek with PULLDATA API
+extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
+
+// returns the current seek point within the file, or offset from the beginning
+// of the memory buffer. In pushdata mode it returns 0.
+extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
+
+///////////   PUSHDATA API
+
+#ifndef STB_VORBIS_NO_PUSHDATA_API
+
+// this API allows you to get blocks of data from any source and hand
+// them to stb_vorbis. you have to buffer them; stb_vorbis will tell
+// you how much it used, and you have to give it the rest next time;
+// and stb_vorbis may not have enough data to work with and you will
+// need to give it the same data again PLUS more. Note that the Vorbis
+// specification does not bound the size of an individual frame.
+
+extern stb_vorbis *stb_vorbis_open_pushdata(
+         const unsigned char * datablock, int datablock_length_in_bytes,
+         int *datablock_memory_consumed_in_bytes,
+         int *error,
+         const stb_vorbis_alloc *alloc_buffer);
+// create a vorbis decoder by passing in the initial data block containing
+//    the ogg&vorbis headers (you don't need to do parse them, just provide
+//    the first N bytes of the file--you're told if it's not enough, see below)
+// on success, returns an stb_vorbis *, does not set error, returns the amount of
+//    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
+// on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
+// if returns NULL and *error is VORBIS_need_more_data, then the input block was
+//       incomplete and you need to pass in a larger block from the start of the file
+
+extern int stb_vorbis_decode_frame_pushdata(
+         stb_vorbis *f,
+         const unsigned char *datablock, int datablock_length_in_bytes,
+         int *channels,             // place to write number of float * buffers
+         float ***output,           // place to write float ** array of float * buffers
+         int *samples               // place to write number of output samples
+     );
+// decode a frame of audio sample data if possible from the passed-in data block
+//
+// return value: number of bytes we used from datablock
+//
+// possible cases:
+//     0 bytes used, 0 samples output (need more data)
+//     N bytes used, 0 samples output (resynching the stream, keep going)
+//     N bytes used, M samples output (one frame of data)
+// note that after opening a file, you will ALWAYS get one N-bytes,0-sample
+// frame, because Vorbis always "discards" the first frame.
+//
+// Note that on resynch, stb_vorbis will rarely consume all of the buffer,
+// instead only datablock_length_in_bytes-3 or less. This is because it wants
+// to avoid missing parts of a page header if they cross a datablock boundary,
+// without writing state-machiney code to record a partial detection.
+//
+// The number of channels returned are stored in *channels (which can be
+// NULL--it is always the same as the number of channels reported by
+// get_info). *output will contain an array of float* buffers, one per
+// channel. In other words, (*output)[0][0] contains the first sample from
+// the first channel, and (*output)[1][0] contains the first sample from
+// the second channel.
+//
+// *output points into stb_vorbis's internal output buffer storage; these
+// buffers are owned by stb_vorbis and application code should not free
+// them or modify their contents. They are transient and will be overwritten
+// once you ask for more data to get decoded, so be sure to grab any data
+// you need before then.
+
+extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
+// inform stb_vorbis that your next datablock will not be contiguous with
+// previous ones (e.g. you've seeked in the data); future attempts to decode
+// frames will cause stb_vorbis to resynchronize (as noted above), and
+// once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
+// will begin decoding the _next_ frame.
+//
+// if you want to seek using pushdata, you need to seek in your file, then
+// call stb_vorbis_flush_pushdata(), then start calling decoding, then once
+// decoding is returning you data, call stb_vorbis_get_sample_offset, and
+// if you don't like the result, seek your file again and repeat.
+#endif
+
+
+//////////   PULLING INPUT API
+
+#ifndef STB_VORBIS_NO_PULLDATA_API
+// This API assumes stb_vorbis is allowed to pull data from a source--
+// either a block of memory containing the _entire_ vorbis stream, or a
+// FILE * that you or it create, or possibly some other reading mechanism
+// if you go modify the source to replace the FILE * case with some kind
+// of callback to your code. (But if you don't support seeking, you may
+// just want to go ahead and use pushdata.)
+
+#if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
+extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
+#endif
+#if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
+extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
+#endif
+// decode an entire file and output the data interleaved into a malloc()ed
+// buffer stored in *output. The return value is the number of samples
+// decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
+// When you're done with it, just free() the pointer returned in *output.
+
+extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
+                                  int *error, const stb_vorbis_alloc *alloc_buffer);
+// create an ogg vorbis decoder from an ogg vorbis stream in memory (note
+// this must be the entire stream!). on failure, returns NULL and sets *error
+
+#ifndef STB_VORBIS_NO_STDIO
+extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
+                                  int *error, const stb_vorbis_alloc *alloc_buffer);
+// create an ogg vorbis decoder from a filename via fopen(). on failure,
+// returns NULL and sets *error (possibly to VORBIS_file_open_failure).
+
+extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
+                                  int *error, const stb_vorbis_alloc *alloc_buffer);
+// create an ogg vorbis decoder from an open FILE *, looking for a stream at
+// the _current_ seek point (ftell). on failure, returns NULL and sets *error.
+// note that stb_vorbis must "own" this stream; if you seek it in between
+// calls to stb_vorbis, it will become confused. Moreover, if you attempt to
+// perform stb_vorbis_seek_*() operations on this file, it will assume it
+// owns the _entire_ rest of the file after the start point. Use the next
+// function, stb_vorbis_open_file_section(), to limit it.
+
+extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
+                int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
+// create an ogg vorbis decoder from an open FILE *, looking for a stream at
+// the _current_ seek point (ftell); the stream will be of length 'len' bytes.
+// on failure, returns NULL and sets *error. note that stb_vorbis must "own"
+// this stream; if you seek it in between calls to stb_vorbis, it will become
+// confused.
+#endif
+
+extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
+extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
+// these functions seek in the Vorbis file to (approximately) 'sample_number'.
+// after calling seek_frame(), the next call to get_frame_*() will include
+// the specified sample. after calling stb_vorbis_seek(), the next call to
+// stb_vorbis_get_samples_* will start with the specified sample. If you
+// do not need to seek to EXACTLY the target sample when using get_samples_*,
+// you can also use seek_frame().
+
+extern int stb_vorbis_seek_start(stb_vorbis *f);
+// this function is equivalent to stb_vorbis_seek(f,0)
+
+extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
+extern float        stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
+// these functions return the total length of the vorbis stream
+
+extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
+// decode the next frame and return the number of samples. the number of
+// channels returned are stored in *channels (which can be NULL--it is always
+// the same as the number of channels reported by get_info). *output will
+// contain an array of float* buffers, one per channel. These outputs will
+// be overwritten on the next call to stb_vorbis_get_frame_*.
+//
+// You generally should not intermix calls to stb_vorbis_get_frame_*()
+// and stb_vorbis_get_samples_*(), since the latter calls the former.
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
+extern int stb_vorbis_get_frame_short            (stb_vorbis *f, int num_c, short **buffer, int num_samples);
+#endif
+// decode the next frame and return the number of *samples* per channel.
+// Note that for interleaved data, you pass in the number of shorts (the
+// size of your array), but the return value is the number of samples per
+// channel, not the total number of samples.
+//
+// The data is coerced to the number of channels you request according to the
+// channel coercion rules (see below). You must pass in the size of your
+// buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
+// The maximum buffer size needed can be gotten from get_info(); however,
+// the Vorbis I specification implies an absolute maximum of 4096 samples
+// per channel.
+
+// Channel coercion rules:
+//    Let M be the number of channels requested, and N the number of channels present,
+//    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
+//    and stereo R be the sum of all R and center channels (channel assignment from the
+//    vorbis spec).
+//        M    N       output
+//        1    k      sum(Ck) for all k
+//        2    *      stereo L, stereo R
+//        k    l      k > l, the first l channels, then 0s
+//        k    l      k <= l, the first k channels
+//    Note that this is not _good_ surround etc. mixing at all! It's just so
+//    you get something useful.
+
+extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
+extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
+// gets num_samples samples, not necessarily on a frame boundary--this requires
+// buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
+// Returns the number of samples stored per channel; it may be less than requested
+// at the end of the file. If there are no more samples in the file, returns 0.
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
+extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
+#endif
+// gets num_samples samples, not necessarily on a frame boundary--this requires
+// buffering so you have to supply the buffers. Applies the coercion rules above
+// to produce 'channels' channels. Returns the number of samples stored per channel;
+// it may be less than requested at the end of the file. If there are no more
+// samples in the file, returns 0.
+
+#endif
+
+////////   ERROR CODES
+
+enum STBVorbisError
+{
+   VORBIS__no_error,
+
+   VORBIS_need_more_data=1,             // not a real error
+
+   VORBIS_invalid_api_mixing,           // can't mix API modes
+   VORBIS_outofmem,                     // not enough memory
+   VORBIS_feature_not_supported,        // uses floor 0
+   VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
+   VORBIS_file_open_failure,            // fopen() failed
+   VORBIS_seek_without_length,          // can't seek in unknown-length file
+
+   VORBIS_unexpected_eof=10,            // file is truncated?
+   VORBIS_seek_invalid,                 // seek past EOF
+
+   // decoding errors (corrupt/invalid stream) -- you probably
+   // don't care about the exact details of these
+
+   // vorbis errors:
+   VORBIS_invalid_setup=20,
+   VORBIS_invalid_stream,
+
+   // ogg errors:
+   VORBIS_missing_capture_pattern=30,
+   VORBIS_invalid_stream_structure_version,
+   VORBIS_continued_packet_flag_invalid,
+   VORBIS_incorrect_stream_serial_number,
+   VORBIS_invalid_first_page,
+   VORBIS_bad_packet_type,
+   VORBIS_cant_find_last_page,
+   VORBIS_seek_failed,
+   VORBIS_ogg_skeleton_not_supported
+};
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
+//
+//  HEADER ENDS HERE
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#ifndef STB_VORBIS_HEADER_ONLY
+
+// global configuration settings (e.g. set these in the project/makefile),
+// or just set them in this file at the top (although ideally the first few
+// should be visible when the header file is compiled too, although it's not
+// crucial)
+
+// STB_VORBIS_NO_PUSHDATA_API
+//     does not compile the code for the various stb_vorbis_*_pushdata()
+//     functions
+// #define STB_VORBIS_NO_PUSHDATA_API
+
+// STB_VORBIS_NO_PULLDATA_API
+//     does not compile the code for the non-pushdata APIs
+// #define STB_VORBIS_NO_PULLDATA_API
+
+// STB_VORBIS_NO_STDIO
+//     does not compile the code for the APIs that use FILE *s internally
+//     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
+// #define STB_VORBIS_NO_STDIO
+
+// STB_VORBIS_NO_INTEGER_CONVERSION
+//     does not compile the code for converting audio sample data from
+//     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
+// #define STB_VORBIS_NO_INTEGER_CONVERSION
+
+// STB_VORBIS_NO_FAST_SCALED_FLOAT
+//      does not use a fast float-to-int trick to accelerate float-to-int on
+//      most platforms which requires endianness be defined correctly.
+//#define STB_VORBIS_NO_FAST_SCALED_FLOAT
+
+
+// STB_VORBIS_MAX_CHANNELS [number]
+//     globally define this to the maximum number of channels you need.
+//     The spec does not put a restriction on channels except that
+//     the count is stored in a byte, so 255 is the hard limit.
+//     Reducing this saves about 16 bytes per value, so using 16 saves
+//     (255-16)*16 or around 4KB. Plus anything other memory usage
+//     I forgot to account for. Can probably go as low as 8 (7.1 audio),
+//     6 (5.1 audio), or 2 (stereo only).
+#ifndef STB_VORBIS_MAX_CHANNELS
+#define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
+#endif
+
+// STB_VORBIS_PUSHDATA_CRC_COUNT [number]
+//     after a flush_pushdata(), stb_vorbis begins scanning for the
+//     next valid page, without backtracking. when it finds something
+//     that looks like a page, it streams through it and verifies its
+//     CRC32. Should that validation fail, it keeps scanning. But it's
+//     possible that _while_ streaming through to check the CRC32 of
+//     one candidate page, it sees another candidate page. This #define
+//     determines how many "overlapping" candidate pages it can search
+//     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
+//     garbage pages could be as big as 64KB, but probably average ~16KB.
+//     So don't hose ourselves by scanning an apparent 64KB page and
+//     missing a ton of real ones in the interim; so minimum of 2
+#ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
+#define STB_VORBIS_PUSHDATA_CRC_COUNT  4
+#endif
+
+// STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
+//     sets the log size of the huffman-acceleration table.  Maximum
+//     supported value is 24. with larger numbers, more decodings are O(1),
+//     but the table size is larger so worse cache missing, so you'll have
+//     to probe (and try multiple ogg vorbis files) to find the sweet spot.
+#ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
+#define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
+#endif
+
+// STB_VORBIS_FAST_BINARY_LENGTH [number]
+//     sets the log size of the binary-search acceleration table. this
+//     is used in similar fashion to the fast-huffman size to set initial
+//     parameters for the binary search
+
+// STB_VORBIS_FAST_HUFFMAN_INT
+//     The fast huffman tables are much more efficient if they can be
+//     stored as 16-bit results instead of 32-bit results. This restricts
+//     the codebooks to having only 65535 possible outcomes, though.
+//     (At least, accelerated by the huffman table.)
+#ifndef STB_VORBIS_FAST_HUFFMAN_INT
+#define STB_VORBIS_FAST_HUFFMAN_SHORT
+#endif
+
+// STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
+//     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
+//     back on binary searching for the correct one. This requires storing
+//     extra tables with the huffman codes in sorted order. Defining this
+//     symbol trades off space for speed by forcing a linear search in the
+//     non-fast case, except for "sparse" codebooks.
+// #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
+
+// STB_VORBIS_DIVIDES_IN_RESIDUE
+//     stb_vorbis precomputes the result of the scalar residue decoding
+//     that would otherwise require a divide per chunk. you can trade off
+//     space for time by defining this symbol.
+// #define STB_VORBIS_DIVIDES_IN_RESIDUE
+
+// STB_VORBIS_DIVIDES_IN_CODEBOOK
+//     vorbis VQ codebooks can be encoded two ways: with every case explicitly
+//     stored, or with all elements being chosen from a small range of values,
+//     and all values possible in all elements. By default, stb_vorbis expands
+//     this latter kind out to look like the former kind for ease of decoding,
+//     because otherwise an integer divide-per-vector-element is required to
+//     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
+//     trade off storage for speed.
+//#define STB_VORBIS_DIVIDES_IN_CODEBOOK
+
+#ifdef STB_VORBIS_CODEBOOK_SHORTS
+#error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
+#endif
+
+// STB_VORBIS_DIVIDE_TABLE
+//     this replaces small integer divides in the floor decode loop with
+//     table lookups. made less than 1% difference, so disabled by default.
+
+// STB_VORBIS_NO_INLINE_DECODE
+//     disables the inlining of the scalar codebook fast-huffman decode.
+//     might save a little codespace; useful for debugging
+// #define STB_VORBIS_NO_INLINE_DECODE
+
+// STB_VORBIS_NO_DEFER_FLOOR
+//     Normally we only decode the floor without synthesizing the actual
+//     full curve. We can instead synthesize the curve immediately. This
+//     requires more memory and is very likely slower, so I don't think
+//     you'd ever want to do it except for debugging.
+// #define STB_VORBIS_NO_DEFER_FLOOR
+
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+
+#ifdef STB_VORBIS_NO_PULLDATA_API
+   #define STB_VORBIS_NO_INTEGER_CONVERSION
+   #define STB_VORBIS_NO_STDIO
+#endif
+
+#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
+   #define STB_VORBIS_NO_STDIO 1
+#endif
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
+
+   // only need endianness for fast-float-to-int, which we don't
+   // use for pushdata
+
+   #ifndef STB_VORBIS_BIG_ENDIAN
+     #define STB_VORBIS_ENDIAN  0
+   #else
+     #define STB_VORBIS_ENDIAN  1
+   #endif
+
+#endif
+#endif
+
+
+#ifndef STB_VORBIS_NO_STDIO
+#include <stdio.h>
+#endif
+
+#ifndef STB_VORBIS_NO_CRT
+   #include <stdlib.h>
+   #include <string.h>
+   #include <assert.h>
+   #include <math.h>
+
+   // find definition of alloca if it's not in stdlib.h:
+   #if defined(_MSC_VER) || defined(__MINGW32__)
+      #include <malloc.h>
+   #endif
+   #if defined(__linux__) || defined(__linux) || defined(__sun__) || defined(__EMSCRIPTEN__) || defined(__NEWLIB__)
+      #include <alloca.h>
+   #endif
+#else // STB_VORBIS_NO_CRT
+   #define NULL 0
+   #define malloc(s)   0
+   #define free(s)     ((void) 0)
+   #define realloc(s)  0
+#endif // STB_VORBIS_NO_CRT
+
+#include <limits.h>
+
+#ifdef __MINGW32__
+   // eff you mingw:
+   //     "fixed":
+   //         http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
+   //     "no that broke the build, reverted, who cares about C":
+   //         http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
+   #ifdef __forceinline
+   #undef __forceinline
+   #endif
+   #define __forceinline
+   #ifndef alloca
+   #define alloca __builtin_alloca
+   #endif
+#elif !defined(_MSC_VER)
+   #if __GNUC__
+      #define __forceinline inline
+   #else
+      #define __forceinline
+   #endif
+#endif
+
+#if STB_VORBIS_MAX_CHANNELS > 256
+#error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
+#endif
+
+#if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
+#error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
+#endif
+
+
+#if 0
+#include <crtdbg.h>
+#define CHECK(f)   _CrtIsValidHeapPointer(f->channel_buffers[1])
+#else
+#define CHECK(f)   ((void) 0)
+#endif
+
+#define MAX_BLOCKSIZE_LOG  13   // from specification
+#define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
+
+
+typedef unsigned char  uint8;
+typedef   signed char   int8;
+typedef unsigned short uint16;
+typedef   signed short  int16;
+typedef unsigned int   uint32;
+typedef   signed int    int32;
+
+#ifndef TRUE
+#define TRUE 1
+#define FALSE 0
+#endif
+
+typedef float codetype;
+
+#ifdef _MSC_VER
+#define STBV_NOTUSED(v)  (void)(v)
+#else
+#define STBV_NOTUSED(v)  (void)sizeof(v)
+#endif
+
+// @NOTE
+//
+// Some arrays below are tagged "//varies", which means it's actually
+// a variable-sized piece of data, but rather than malloc I assume it's
+// small enough it's better to just allocate it all together with the
+// main thing
+//
+// Most of the variables are specified with the smallest size I could pack
+// them into. It might give better performance to make them all full-sized
+// integers. It should be safe to freely rearrange the structures or change
+// the sizes larger--nothing relies on silently truncating etc., nor the
+// order of variables.
+
+#define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
+#define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
+
+typedef struct
+{
+   int dimensions, entries;
+   uint8 *codeword_lengths;
+   float  minimum_value;
+   float  delta_value;
+   uint8  value_bits;
+   uint8  lookup_type;
+   uint8  sequence_p;
+   uint8  sparse;
+   uint32 lookup_values;
+   codetype *multiplicands;
+   uint32 *codewords;
+   #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
+    int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
+   #else
+    int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
+   #endif
+   uint32 *sorted_codewords;
+   int    *sorted_values;
+   int     sorted_entries;
+} Codebook;
+
+typedef struct
+{
+   uint8 order;
+   uint16 rate;
+   uint16 bark_map_size;
+   uint8 amplitude_bits;
+   uint8 amplitude_offset;
+   uint8 number_of_books;
+   uint8 book_list[16]; // varies
+} Floor0;
+
+typedef struct
+{
+   uint8 partitions;
+   uint8 partition_class_list[32]; // varies
+   uint8 class_dimensions[16]; // varies
+   uint8 class_subclasses[16]; // varies
+   uint8 class_masterbooks[16]; // varies
+   int16 subclass_books[16][8]; // varies
+   uint16 Xlist[31*8+2]; // varies
+   uint8 sorted_order[31*8+2];
+   uint8 neighbors[31*8+2][2];
+   uint8 floor1_multiplier;
+   uint8 rangebits;
+   int values;
+} Floor1;
+
+typedef union
+{
+   Floor0 floor0;
+   Floor1 floor1;
+} Floor;
+
+typedef struct
+{
+   uint32 begin, end;
+   uint32 part_size;
+   uint8 classifications;
+   uint8 classbook;
+   uint8 **classdata;
+   int16 (*residue_books)[8];
+} Residue;
+
+typedef struct
+{
+   uint8 magnitude;
+   uint8 angle;
+   uint8 mux;
+} MappingChannel;
+
+typedef struct
+{
+   uint16 coupling_steps;
+   MappingChannel *chan;
+   uint8  submaps;
+   uint8  submap_floor[15]; // varies
+   uint8  submap_residue[15]; // varies
+} Mapping;
+
+typedef struct
+{
+   uint8 blockflag;
+   uint8 mapping;
+   uint16 windowtype;
+   uint16 transformtype;
+} Mode;
+
+typedef struct
+{
+   uint32  goal_crc;    // expected crc if match
+   int     bytes_left;  // bytes left in packet
+   uint32  crc_so_far;  // running crc
+   int     bytes_done;  // bytes processed in _current_ chunk
+   uint32  sample_loc;  // granule pos encoded in page
+} CRCscan;
+
+typedef struct
+{
+   uint32 page_start, page_end;
+   uint32 last_decoded_sample;
+} ProbedPage;
+
+struct stb_vorbis
+{
+  // user-accessible info
+   unsigned int sample_rate;
+   int channels;
+
+   unsigned int setup_memory_required;
+   unsigned int temp_memory_required;
+   unsigned int setup_temp_memory_required;
+
+   char *vendor;
+   int comment_list_length;
+   char **comment_list;
+
+  // input config
+#ifndef STB_VORBIS_NO_STDIO
+   FILE *f;
+   uint32 f_start;
+   int close_on_free;
+#endif
+
+   uint8 *stream;
+   uint8 *stream_start;
+   uint8 *stream_end;
+
+   uint32 stream_len;
+
+   uint8  push_mode;
+
+   // the page to seek to when seeking to start, may be zero
+   uint32 first_audio_page_offset;
+
+   // p_first is the page on which the first audio packet ends
+   // (but not necessarily the page on which it starts)
+   ProbedPage p_first, p_last;
+
+  // memory management
+   stb_vorbis_alloc alloc;
+   int setup_offset;
+   int temp_offset;
+
+  // run-time results
+   int eof;
+   enum STBVorbisError error;
+
+  // user-useful data
+
+  // header info
+   int blocksize[2];
+   int blocksize_0, blocksize_1;
+   int codebook_count;
+   Codebook *codebooks;
+   int floor_count;
+   uint16 floor_types[64]; // varies
+   Floor *floor_config;
+   int residue_count;
+   uint16 residue_types[64]; // varies
+   Residue *residue_config;
+   int mapping_count;
+   Mapping *mapping;
+   int mode_count;
+   Mode mode_config[64];  // varies
+
+   uint32 total_samples;
+
+  // decode buffer
+   float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
+   float *outputs        [STB_VORBIS_MAX_CHANNELS];
+
+   float *previous_window[STB_VORBIS_MAX_CHANNELS];
+   int previous_length;
+
+   #ifndef STB_VORBIS_NO_DEFER_FLOOR
+   int16 *finalY[STB_VORBIS_MAX_CHANNELS];
+   #else
+   float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
+   #endif
+
+   uint32 current_loc; // sample location of next frame to decode
+   int    current_loc_valid;
+
+  // per-blocksize precomputed data
+
+   // twiddle factors
+   float *A[2],*B[2],*C[2];
+   float *window[2];
+   uint16 *bit_reverse[2];
+
+  // current page/packet/segment streaming info
+   uint32 serial; // stream serial number for verification
+   int last_page;
+   int segment_count;
+   uint8 segments[255];
+   uint8 page_flag;
+   uint8 bytes_in_seg;
+   uint8 first_decode;
+   int next_seg;
+   int last_seg;  // flag that we're on the last segment
+   int last_seg_which; // what was the segment number of the last seg?
+   uint32 acc;
+   int valid_bits;
+   int packet_bytes;
+   int end_seg_with_known_loc;
+   uint32 known_loc_for_packet;
+   int discard_samples_deferred;
+   uint32 samples_output;
+
+  // push mode scanning
+   int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
+#ifndef STB_VORBIS_NO_PUSHDATA_API
+   CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
+#endif
+
+  // sample-access
+   int channel_buffer_start;
+   int channel_buffer_end;
+};
+
+#if defined(STB_VORBIS_NO_PUSHDATA_API)
+   #define IS_PUSH_MODE(f)   FALSE
+#elif defined(STB_VORBIS_NO_PULLDATA_API)
+   #define IS_PUSH_MODE(f)   TRUE
+#else
+   #define IS_PUSH_MODE(f)   ((f)->push_mode)
+#endif
+
+typedef struct stb_vorbis vorb;
+
+static int error(vorb *f, enum STBVorbisError e)
+{
+   f->error = e;
+   if (!f->eof && e != VORBIS_need_more_data) {
+      f->error=e; // breakpoint for debugging
+   }
+   return 0;
+}
+
+
+// these functions are used for allocating temporary memory
+// while decoding. if you can afford the stack space, use
+// alloca(); otherwise, provide a temp buffer and it will
+// allocate out of those.
+
+#define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
+
+#define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
+#define temp_free(f,p)                  (void)0
+#define temp_alloc_save(f)              ((f)->temp_offset)
+#define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
+
+#define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
+
+// given a sufficiently large block of memory, make an array of pointers to subblocks of it
+static void *make_block_array(void *mem, int count, int size)
+{
+   int i;
+   void ** p = (void **) mem;
+   char *q = (char *) (p + count);
+   for (i=0; i < count; ++i) {
+      p[i] = q;
+      q += size;
+   }
+   return p;
+}
+
+static void *setup_malloc(vorb *f, int sz)
+{
+   sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
+   f->setup_memory_required += sz;
+   if (f->alloc.alloc_buffer) {
+      void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
+      if (f->setup_offset + sz > f->temp_offset) return NULL;
+      f->setup_offset += sz;
+      return p;
+   }
+   return sz ? malloc(sz) : NULL;
+}
+
+static void setup_free(vorb *f, void *p)
+{
+   if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
+   free(p);
+}
+
+static void *setup_temp_malloc(vorb *f, int sz)
+{
+   sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
+   if (f->alloc.alloc_buffer) {
+      if (f->temp_offset - sz < f->setup_offset) return NULL;
+      f->temp_offset -= sz;
+      return (char *) f->alloc.alloc_buffer + f->temp_offset;
+   }
+   return malloc(sz);
+}
+
+static void setup_temp_free(vorb *f, void *p, int sz)
+{
+   if (f->alloc.alloc_buffer) {
+      f->temp_offset += (sz+7)&~7;
+      return;
+   }
+   free(p);
+}
+
+#define CRC32_POLY    0x04c11db7   // from spec
+
+static uint32 crc_table[256];
+static void crc32_init(void)
+{
+   int i,j;
+   uint32 s;
+   for(i=0; i < 256; i++) {
+      for (s=(uint32) i << 24, j=0; j < 8; ++j)
+         s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
+      crc_table[i] = s;
+   }
+}
+
+static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
+{
+   return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
+}
+
+
+// used in setup, and for huffman that doesn't go fast path
+static unsigned int bit_reverse(unsigned int n)
+{
+  n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
+  n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
+  n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
+  n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
+  return (n >> 16) | (n << 16);
+}
+
+static float square(float x)
+{
+   return x*x;
+}
+
+// this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
+// as required by the specification. fast(?) implementation from stb.h
+// @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
+static int ilog(int32 n)
+{
+   static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
+
+   if (n < 0) return 0; // signed n returns 0
+
+   // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
+   if (n < (1 << 14))
+        if (n < (1 <<  4))            return  0 + log2_4[n      ];
+        else if (n < (1 <<  9))       return  5 + log2_4[n >>  5];
+             else                     return 10 + log2_4[n >> 10];
+   else if (n < (1 << 24))
+             if (n < (1 << 19))       return 15 + log2_4[n >> 15];
+             else                     return 20 + log2_4[n >> 20];
+        else if (n < (1 << 29))       return 25 + log2_4[n >> 25];
+             else                     return 30 + log2_4[n >> 30];
+}
+
+#ifndef M_PI
+  #define M_PI  3.14159265358979323846264f  // from CRC
+#endif
+
+// code length assigned to a value with no huffman encoding
+#define NO_CODE   255
+
+/////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
+//
+// these functions are only called at setup, and only a few times
+// per file
+
+static float float32_unpack(uint32 x)
+{
+   // from the specification
+   uint32 mantissa = x & 0x1fffff;
+   uint32 sign = x & 0x80000000;
+   uint32 exp = (x & 0x7fe00000) >> 21;
+   double res = sign ? -(double)mantissa : (double)mantissa;
+   return (float) ldexp((float)res, (int)exp-788);
+}
+
+
+// zlib & jpeg huffman tables assume that the output symbols
+// can either be arbitrarily arranged, or have monotonically
+// increasing frequencies--they rely on the lengths being sorted;
+// this makes for a very simple generation algorithm.
+// vorbis allows a huffman table with non-sorted lengths. This
+// requires a more sophisticated construction, since symbols in
+// order do not map to huffman codes "in order".
+static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
+{
+   if (!c->sparse) {
+      c->codewords      [symbol] = huff_code;
+   } else {
+      c->codewords       [count] = huff_code;
+      c->codeword_lengths[count] = len;
+      values             [count] = symbol;
+   }
+}
+
+static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
+{
+   int i,k,m=0;
+   uint32 available[32];
+
+   memset(available, 0, sizeof(available));
+   // find the first entry
+   for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
+   if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
+   assert(len[k] < 32); // no error return required, code reading lens checks this
+   // add to the list
+   add_entry(c, 0, k, m++, len[k], values);
+   // add all available leaves
+   for (i=1; i <= len[k]; ++i)
+      available[i] = 1U << (32-i);
+   // note that the above code treats the first case specially,
+   // but it's really the same as the following code, so they
+   // could probably be combined (except the initial code is 0,
+   // and I use 0 in available[] to mean 'empty')
+   for (i=k+1; i < n; ++i) {
+      uint32 res;
+      int z = len[i], y;
+      if (z == NO_CODE) continue;
+      assert(z < 32); // no error return required, code reading lens checks this
+      // find lowest available leaf (should always be earliest,
+      // which is what the specification calls for)
+      // note that this property, and the fact we can never have
+      // more than one free leaf at a given level, isn't totally
+      // trivial to prove, but it seems true and the assert never
+      // fires, so!
+      while (z > 0 && !available[z]) --z;
+      if (z == 0) { return FALSE; }
+      res = available[z];
+      available[z] = 0;
+      add_entry(c, bit_reverse(res), i, m++, len[i], values);
+      // propagate availability up the tree
+      if (z != len[i]) {
+         for (y=len[i]; y > z; --y) {
+            assert(available[y] == 0);
+            available[y] = res + (1 << (32-y));
+         }
+      }
+   }
+   return TRUE;
+}
+
+// accelerated huffman table allows fast O(1) match of all symbols
+// of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
+static void compute_accelerated_huffman(Codebook *c)
+{
+   int i, len;
+   for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
+      c->fast_huffman[i] = -1;
+
+   len = c->sparse ? c->sorted_entries : c->entries;
+   #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
+   if (len > 32767) len = 32767; // largest possible value we can encode!
+   #endif
+   for (i=0; i < len; ++i) {
+      if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
+         uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
+         // set table entries for all bit combinations in the higher bits
+         while (z < FAST_HUFFMAN_TABLE_SIZE) {
+             c->fast_huffman[z] = i;
+             z += 1 << c->codeword_lengths[i];
+         }
+      }
+   }
+}
+
+#ifdef _MSC_VER
+#define STBV_CDECL __cdecl
+#else
+#define STBV_CDECL
+#endif
+
+static int STBV_CDECL uint32_compare(const void *p, const void *q)
+{
+   uint32 x = * (uint32 *) p;
+   uint32 y = * (uint32 *) q;
+   return x < y ? -1 : x > y;
+}
+
+static int include_in_sort(Codebook *c, uint8 len)
+{
+   if (c->sparse) { assert(len != NO_CODE); return TRUE; }
+   if (len == NO_CODE) return FALSE;
+   if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
+   return FALSE;
+}
+
+// if the fast table above doesn't work, we want to binary
+// search them... need to reverse the bits
+static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
+{
+   int i, len;
+   // build a list of all the entries
+   // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
+   // this is kind of a frivolous optimization--I don't see any performance improvement,
+   // but it's like 4 extra lines of code, so.
+   if (!c->sparse) {
+      int k = 0;
+      for (i=0; i < c->entries; ++i)
+         if (include_in_sort(c, lengths[i]))
+            c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
+      assert(k == c->sorted_entries);
+   } else {
+      for (i=0; i < c->sorted_entries; ++i)
+         c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
+   }
+
+   qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
+   c->sorted_codewords[c->sorted_entries] = 0xffffffff;
+
+   len = c->sparse ? c->sorted_entries : c->entries;
+   // now we need to indicate how they correspond; we could either
+   //   #1: sort a different data structure that says who they correspond to
+   //   #2: for each sorted entry, search the original list to find who corresponds
+   //   #3: for each original entry, find the sorted entry
+   // #1 requires extra storage, #2 is slow, #3 can use binary search!
+   for (i=0; i < len; ++i) {
+      int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
+      if (include_in_sort(c,huff_len)) {
+         uint32 code = bit_reverse(c->codewords[i]);
+         int x=0, n=c->sorted_entries;
+         while (n > 1) {
+            // invariant: sc[x] <= code < sc[x+n]
+            int m = x + (n >> 1);
+            if (c->sorted_codewords[m] <= code) {
+               x = m;
+               n -= (n>>1);
+            } else {
+               n >>= 1;
+            }
+         }
+         assert(c->sorted_codewords[x] == code);
+         if (c->sparse) {
+            c->sorted_values[x] = values[i];
+            c->codeword_lengths[x] = huff_len;
+         } else {
+            c->sorted_values[x] = i;
+         }
+      }
+   }
+}
+
+// only run while parsing the header (3 times)
+static int vorbis_validate(uint8 *data)
+{
+   static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
+   return memcmp(data, vorbis, 6) == 0;
+}
+
+// called from setup only, once per code book
+// (formula implied by specification)
+static int lookup1_values(int entries, int dim)
+{
+   int r = (int) floor(exp((float) log((float) entries) / dim));
+   if ((int) floor(pow((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
+      ++r;                                              // floor() to avoid _ftol() when non-CRT
+   if (pow((float) r+1, dim) <= entries)
+      return -1;
+   if ((int) floor(pow((float) r, dim)) > entries)
+      return -1;
+   return r;
+}
+
+// called twice per file
+static void compute_twiddle_factors(int n, float *A, float *B, float *C)
+{
+   int n4 = n >> 2, n8 = n >> 3;
+   int k,k2;
+
+   for (k=k2=0; k < n4; ++k,k2+=2) {
+      A[k2  ] = (float)  cos(4*k*M_PI/n);
+      A[k2+1] = (float) -sin(4*k*M_PI/n);
+      B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
+      B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
+   }
+   for (k=k2=0; k < n8; ++k,k2+=2) {
+      C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
+      C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
+   }
+}
+
+static void compute_window(int n, float *window)
+{
+   int n2 = n >> 1, i;
+   for (i=0; i < n2; ++i)
+      window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
+}
+
+static void compute_bitreverse(int n, uint16 *rev)
+{
+   int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
+   int i, n8 = n >> 3;
+   for (i=0; i < n8; ++i)
+      rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
+}
+
+static int init_blocksize(vorb *f, int b, int n)
+{
+   int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
+   f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
+   f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
+   f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
+   if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
+   compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
+   f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
+   if (!f->window[b]) return error(f, VORBIS_outofmem);
+   compute_window(n, f->window[b]);
+   f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
+   if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
+   compute_bitreverse(n, f->bit_reverse[b]);
+   return TRUE;
+}
+
+static void neighbors(uint16 *x, int n, int *plow, int *phigh)
+{
+   int low = -1;
+   int high = 65536;
+   int i;
+   for (i=0; i < n; ++i) {
+      if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
+      if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
+   }
+}
+
+// this has been repurposed so y is now the original index instead of y
+typedef struct
+{
+   uint16 x,id;
+} stbv__floor_ordering;
+
+static int STBV_CDECL point_compare(const void *p, const void *q)
+{
+   stbv__floor_ordering *a = (stbv__floor_ordering *) p;
+   stbv__floor_ordering *b = (stbv__floor_ordering *) q;
+   return a->x < b->x ? -1 : a->x > b->x;
+}
+
+//
+/////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
+
+
+#if defined(STB_VORBIS_NO_STDIO)
+   #define USE_MEMORY(z)    TRUE
+#else
+   #define USE_MEMORY(z)    ((z)->stream)
+#endif
+
+static uint8 get8(vorb *z)
+{
+   if (USE_MEMORY(z)) {
+      if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
+      return *z->stream++;
+   }
+
+   #ifndef STB_VORBIS_NO_STDIO
+   {
+   int c = fgetc(z->f);
+   if (c == EOF) { z->eof = TRUE; return 0; }
+   return c;
+   }
+   #endif
+}
+
+static uint32 get32(vorb *f)
+{
+   uint32 x;
+   x = get8(f);
+   x += get8(f) << 8;
+   x += get8(f) << 16;
+   x += (uint32) get8(f) << 24;
+   return x;
+}
+
+static int getn(vorb *z, uint8 *data, int n)
+{
+   if (USE_MEMORY(z)) {
+      if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
+      memcpy(data, z->stream, n);
+      z->stream += n;
+      return 1;
+   }
+
+   #ifndef STB_VORBIS_NO_STDIO
+   if (fread(data, n, 1, z->f) == 1)
+      return 1;
+   else {
+      z->eof = 1;
+      return 0;
+   }
+   #endif
+}
+
+static void skip(vorb *z, int n)
+{
+   if (USE_MEMORY(z)) {
+      z->stream += n;
+      if (z->stream >= z->stream_end) z->eof = 1;
+      return;
+   }
+   #ifndef STB_VORBIS_NO_STDIO
+   {
+      long x = ftell(z->f);
+      fseek(z->f, x+n, SEEK_SET);
+   }
+   #endif
+}
+
+static int set_file_offset(stb_vorbis *f, unsigned int loc)
+{
+   #ifndef STB_VORBIS_NO_PUSHDATA_API
+   if (f->push_mode) return 0;
+   #endif
+   f->eof = 0;
+   if (USE_MEMORY(f)) {
+      if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
+         f->stream = f->stream_end;
+         f->eof = 1;
+         return 0;
+      } else {
+         f->stream = f->stream_start + loc;
+         return 1;
+      }
+   }
+   #ifndef STB_VORBIS_NO_STDIO
+   if (loc + f->f_start < loc || loc >= 0x80000000) {
+      loc = 0x7fffffff;
+      f->eof = 1;
+   } else {
+      loc += f->f_start;
+   }
+   if (!fseek(f->f, loc, SEEK_SET))
+      return 1;
+   f->eof = 1;
+   fseek(f->f, f->f_start, SEEK_END);
+   return 0;
+   #endif
+}
+
+
+static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
+
+static int capture_pattern(vorb *f)
+{
+   if (0x4f != get8(f)) return FALSE;
+   if (0x67 != get8(f)) return FALSE;
+   if (0x67 != get8(f)) return FALSE;
+   if (0x53 != get8(f)) return FALSE;
+   return TRUE;
+}
+
+#define PAGEFLAG_continued_packet   1
+#define PAGEFLAG_first_page         2
+#define PAGEFLAG_last_page          4
+
+static int start_page_no_capturepattern(vorb *f)
+{
+   uint32 loc0,loc1,n;
+   if (f->first_decode && !IS_PUSH_MODE(f)) {
+      f->p_first.page_start = stb_vorbis_get_file_offset(f) - 4;
+   }
+   // stream structure version
+   if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
+   // header flag
+   f->page_flag = get8(f);
+   // absolute granule position
+   loc0 = get32(f);
+   loc1 = get32(f);
+   // @TODO: validate loc0,loc1 as valid positions?
+   // stream serial number -- vorbis doesn't interleave, so discard
+   get32(f);
+   //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
+   // page sequence number
+   n = get32(f);
+   f->last_page = n;
+   // CRC32
+   get32(f);
+   // page_segments
+   f->segment_count = get8(f);
+   if (!getn(f, f->segments, f->segment_count))
+      return error(f, VORBIS_unexpected_eof);
+   // assume we _don't_ know any the sample position of any segments
+   f->end_seg_with_known_loc = -2;
+   if (loc0 != ~0U || loc1 != ~0U) {
+      int i;
+      // determine which packet is the last one that will complete
+      for (i=f->segment_count-1; i >= 0; --i)
+         if (f->segments[i] < 255)
+            break;
+      // 'i' is now the index of the _last_ segment of a packet that ends
+      if (i >= 0) {
+         f->end_seg_with_known_loc = i;
+         f->known_loc_for_packet   = loc0;
+      }
+   }
+   if (f->first_decode) {
+      int i,len;
+      len = 0;
+      for (i=0; i < f->segment_count; ++i)
+         len += f->segments[i];
+      len += 27 + f->segment_count;
+      f->p_first.page_end = f->p_first.page_start + len;
+      f->p_first.last_decoded_sample = loc0;
+   }
+   f->next_seg = 0;
+   return TRUE;
+}
+
+static int start_page(vorb *f)
+{
+   if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
+   return start_page_no_capturepattern(f);
+}
+
+static int start_packet(vorb *f)
+{
+   while (f->next_seg == -1) {
+      if (!start_page(f)) return FALSE;
+      if (f->page_flag & PAGEFLAG_continued_packet)
+         return error(f, VORBIS_continued_packet_flag_invalid);
+   }
+   f->last_seg = FALSE;
+   f->valid_bits = 0;
+   f->packet_bytes = 0;
+   f->bytes_in_seg = 0;
+   // f->next_seg is now valid
+   return TRUE;
+}
+
+static int maybe_start_packet(vorb *f)
+{
+   if (f->next_seg == -1) {
+      int x = get8(f);
+      if (f->eof) return FALSE; // EOF at page boundary is not an error!
+      if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
+      if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
+      if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
+      if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
+      if (!start_page_no_capturepattern(f)) return FALSE;
+      if (f->page_flag & PAGEFLAG_continued_packet) {
+         // set up enough state that we can read this packet if we want,
+         // e.g. during recovery
+         f->last_seg = FALSE;
+         f->bytes_in_seg = 0;
+         return error(f, VORBIS_continued_packet_flag_invalid);
+      }
+   }
+   return start_packet(f);
+}
+
+static int next_segment(vorb *f)
+{
+   int len;
+   if (f->last_seg) return 0;
+   if (f->next_seg == -1) {
+      f->last_seg_which = f->segment_count-1; // in case start_page fails
+      if (!start_page(f)) { f->last_seg = 1; return 0; }
+      if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
+   }
+   len = f->segments[f->next_seg++];
+   if (len < 255) {
+      f->last_seg = TRUE;
+      f->last_seg_which = f->next_seg-1;
+   }
+   if (f->next_seg >= f->segment_count)
+      f->next_seg = -1;
+   assert(f->bytes_in_seg == 0);
+   f->bytes_in_seg = len;
+   return len;
+}
+
+#define EOP    (-1)
+#define INVALID_BITS  (-1)
+
+static int get8_packet_raw(vorb *f)
+{
+   if (!f->bytes_in_seg) {  // CLANG!
+      if (f->last_seg) return EOP;
+      else if (!next_segment(f)) return EOP;
+   }
+   assert(f->bytes_in_seg > 0);
+   --f->bytes_in_seg;
+   ++f->packet_bytes;
+   return get8(f);
+}
+
+static int get8_packet(vorb *f)
+{
+   int x = get8_packet_raw(f);
+   f->valid_bits = 0;
+   return x;
+}
+
+static int get32_packet(vorb *f)
+{
+   uint32 x;
+   x = get8_packet(f);
+   x += get8_packet(f) << 8;
+   x += get8_packet(f) << 16;
+   x += (uint32) get8_packet(f) << 24;
+   return x;
+}
+
+static void flush_packet(vorb *f)
+{
+   while (get8_packet_raw(f) != EOP);
+}
+
+// @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
+// as the huffman decoder?
+static uint32 get_bits(vorb *f, int n)
+{
+   uint32 z;
+
+   if (f->valid_bits < 0) return 0;
+   if (f->valid_bits < n) {
+      if (n > 24) {
+         // the accumulator technique below would not work correctly in this case
+         z = get_bits(f, 24);
+         z += get_bits(f, n-24) << 24;
+         return z;
+      }
+      if (f->valid_bits == 0) f->acc = 0;
+      while (f->valid_bits < n) {
+         int z = get8_packet_raw(f);
+         if (z == EOP) {
+            f->valid_bits = INVALID_BITS;
+            return 0;
+         }
+         f->acc += z << f->valid_bits;
+         f->valid_bits += 8;
+      }
+   }
+
+   assert(f->valid_bits >= n);
+   z = f->acc & ((1 << n)-1);
+   f->acc >>= n;
+   f->valid_bits -= n;
+   return z;
+}
+
+// @OPTIMIZE: primary accumulator for huffman
+// expand the buffer to as many bits as possible without reading off end of packet
+// it might be nice to allow f->valid_bits and f->acc to be stored in registers,
+// e.g. cache them locally and decode locally
+static __forceinline void prep_huffman(vorb *f)
+{
+   if (f->valid_bits <= 24) {
+      if (f->valid_bits == 0) f->acc = 0;
+      do {
+         int z;
+         if (f->last_seg && !f->bytes_in_seg) return;
+         z = get8_packet_raw(f);
+         if (z == EOP) return;
+         f->acc += (unsigned) z << f->valid_bits;
+         f->valid_bits += 8;
+      } while (f->valid_bits <= 24);
+   }
+}
+
+enum
+{
+   VORBIS_packet_id = 1,
+   VORBIS_packet_comment = 3,
+   VORBIS_packet_setup = 5
+};
+
+static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
+{
+   int i;
+   prep_huffman(f);
+
+   if (c->codewords == NULL && c->sorted_codewords == NULL)
+      return -1;
+
+   // cases to use binary search: sorted_codewords && !c->codewords
+   //                             sorted_codewords && c->entries > 8
+   if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
+      // binary search
+      uint32 code = bit_reverse(f->acc);
+      int x=0, n=c->sorted_entries, len;
+
+      while (n > 1) {
+         // invariant: sc[x] <= code < sc[x+n]
+         int m = x + (n >> 1);
+         if (c->sorted_codewords[m] <= code) {
+            x = m;
+            n -= (n>>1);
+         } else {
+            n >>= 1;
+         }
+      }
+      // x is now the sorted index
+      if (!c->sparse) x = c->sorted_values[x];
+      // x is now sorted index if sparse, or symbol otherwise
+      len = c->codeword_lengths[x];
+      if (f->valid_bits >= len) {
+         f->acc >>= len;
+         f->valid_bits -= len;
+         return x;
+      }
+
+      f->valid_bits = 0;
+      return -1;
+   }
+
+   // if small, linear search
+   assert(!c->sparse);
+   for (i=0; i < c->entries; ++i) {
+      if (c->codeword_lengths[i] == NO_CODE) continue;
+      if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
+         if (f->valid_bits >= c->codeword_lengths[i]) {
+            f->acc >>= c->codeword_lengths[i];
+            f->valid_bits -= c->codeword_lengths[i];
+            return i;
+         }
+         f->valid_bits = 0;
+         return -1;
+      }
+   }
+
+   error(f, VORBIS_invalid_stream);
+   f->valid_bits = 0;
+   return -1;
+}
+
+#ifndef STB_VORBIS_NO_INLINE_DECODE
+
+#define DECODE_RAW(var, f,c)                                  \
+   if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
+      prep_huffman(f);                                        \
+   var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
+   var = c->fast_huffman[var];                                \
+   if (var >= 0) {                                            \
+      int n = c->codeword_lengths[var];                       \
+      f->acc >>= n;                                           \
+      f->valid_bits -= n;                                     \
+      if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
+   } else {                                                   \
+      var = codebook_decode_scalar_raw(f,c);                  \
+   }
+
+#else
+
+static int codebook_decode_scalar(vorb *f, Codebook *c)
+{
+   int i;
+   if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
+      prep_huffman(f);
+   // fast huffman table lookup
+   i = f->acc & FAST_HUFFMAN_TABLE_MASK;
+   i = c->fast_huffman[i];
+   if (i >= 0) {
+      f->acc >>= c->codeword_lengths[i];
+      f->valid_bits -= c->codeword_lengths[i];
+      if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
+      return i;
+   }
+   return codebook_decode_scalar_raw(f,c);
+}
+
+#define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
+
+#endif
+
+#define DECODE(var,f,c)                                       \
+   DECODE_RAW(var,f,c)                                        \
+   if (c->sparse) var = c->sorted_values[var];
+
+#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
+  #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
+#else
+  #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
+#endif
+
+
+
+
+
+
+// CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
+// where we avoid one addition
+#define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
+#define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
+#define CODEBOOK_ELEMENT_BASE(c)         (0)
+
+static int codebook_decode_start(vorb *f, Codebook *c)
+{
+   int z = -1;
+
+   // type 0 is only legal in a scalar context
+   if (c->lookup_type == 0)
+      error(f, VORBIS_invalid_stream);
+   else {
+      DECODE_VQ(z,f,c);
+      if (c->sparse) assert(z < c->sorted_entries);
+      if (z < 0) {  // check for EOP
+         if (!f->bytes_in_seg)
+            if (f->last_seg)
+               return z;
+         error(f, VORBIS_invalid_stream);
+      }
+   }
+   return z;
+}
+
+static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
+{
+   int i,z = codebook_decode_start(f,c);
+   if (z < 0) return FALSE;
+   if (len > c->dimensions) len = c->dimensions;
+
+#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
+   if (c->lookup_type == 1) {
+      float last = CODEBOOK_ELEMENT_BASE(c);
+      int div = 1;
+      for (i=0; i < len; ++i) {
+         int off = (z / div) % c->lookup_values;
+         float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
+         output[i] += val;
+         if (c->sequence_p) last = val + c->minimum_value;
+         div *= c->lookup_values;
+      }
+      return TRUE;
+   }
+#endif
+
+   z *= c->dimensions;
+   if (c->sequence_p) {
+      float last = CODEBOOK_ELEMENT_BASE(c);
+      for (i=0; i < len; ++i) {
+         float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+         output[i] += val;
+         last = val + c->minimum_value;
+      }
+   } else {
+      float last = CODEBOOK_ELEMENT_BASE(c);
+      for (i=0; i < len; ++i) {
+         output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+      }
+   }
+
+   return TRUE;
+}
+
+static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
+{
+   int i,z = codebook_decode_start(f,c);
+   float last = CODEBOOK_ELEMENT_BASE(c);
+   if (z < 0) return FALSE;
+   if (len > c->dimensions) len = c->dimensions;
+
+#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
+   if (c->lookup_type == 1) {
+      int div = 1;
+      for (i=0; i < len; ++i) {
+         int off = (z / div) % c->lookup_values;
+         float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
+         output[i*step] += val;
+         if (c->sequence_p) last = val;
+         div *= c->lookup_values;
+      }
+      return TRUE;
+   }
+#endif
+
+   z *= c->dimensions;
+   for (i=0; i < len; ++i) {
+      float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+      output[i*step] += val;
+      if (c->sequence_p) last = val;
+   }
+
+   return TRUE;
+}
+
+static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
+{
+   int c_inter = *c_inter_p;
+   int p_inter = *p_inter_p;
+   int i,z, effective = c->dimensions;
+
+   // type 0 is only legal in a scalar context
+   if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
+
+   while (total_decode > 0) {
+      float last = CODEBOOK_ELEMENT_BASE(c);
+      DECODE_VQ(z,f,c);
+      #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
+      assert(!c->sparse || z < c->sorted_entries);
+      #endif
+      if (z < 0) {
+         if (!f->bytes_in_seg)
+            if (f->last_seg) return FALSE;
+         return error(f, VORBIS_invalid_stream);
+      }
+
+      // if this will take us off the end of the buffers, stop short!
+      // we check by computing the length of the virtual interleaved
+      // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
+      // and the length we'll be using (effective)
+      if (c_inter + p_inter*ch + effective > len * ch) {
+         effective = len*ch - (p_inter*ch - c_inter);
+      }
+
+   #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
+      if (c->lookup_type == 1) {
+         int div = 1;
+         for (i=0; i < effective; ++i) {
+            int off = (z / div) % c->lookup_values;
+            float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
+            if (outputs[c_inter])
+               outputs[c_inter][p_inter] += val;
+            if (++c_inter == ch) { c_inter = 0; ++p_inter; }
+            if (c->sequence_p) last = val;
+            div *= c->lookup_values;
+         }
+      } else
+   #endif
+      {
+         z *= c->dimensions;
+         if (c->sequence_p) {
+            for (i=0; i < effective; ++i) {
+               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+               if (outputs[c_inter])
+                  outputs[c_inter][p_inter] += val;
+               if (++c_inter == ch) { c_inter = 0; ++p_inter; }
+               last = val;
+            }
+         } else {
+            for (i=0; i < effective; ++i) {
+               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+               if (outputs[c_inter])
+                  outputs[c_inter][p_inter] += val;
+               if (++c_inter == ch) { c_inter = 0; ++p_inter; }
+            }
+         }
+      }
+
+      total_decode -= effective;
+   }
+   *c_inter_p = c_inter;
+   *p_inter_p = p_inter;
+   return TRUE;
+}
+
+static int predict_point(int x, int x0, int x1, int y0, int y1)
+{
+   int dy = y1 - y0;
+   int adx = x1 - x0;
+   // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
+   int err = abs(dy) * (x - x0);
+   int off = err / adx;
+   return dy < 0 ? y0 - off : y0 + off;
+}
+
+// the following table is block-copied from the specification
+static float inverse_db_table[256] =
+{
+  1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
+  1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
+  1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
+  2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
+  2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
+  3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
+  4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
+  6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
+  7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
+  1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
+  1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
+  1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
+  2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
+  2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
+  3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
+  4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
+  5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
+  7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
+  9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
+  1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
+  1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
+  2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
+  2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
+  3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
+  4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
+  5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
+  7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
+  9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
+  0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
+  0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
+  0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
+  0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
+  0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
+  0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
+  0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
+  0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
+  0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
+  0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
+  0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
+  0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
+  0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
+  0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
+  0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
+  0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
+  0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
+  0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
+  0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
+  0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
+  0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
+  0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
+  0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
+  0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
+  0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
+  0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
+  0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
+  0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
+  0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
+  0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
+  0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
+  0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
+  0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
+  0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
+  0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
+  0.82788260f,    0.88168307f,    0.9389798f,     1.0f
+};
+
+
+// @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
+// note that you must produce bit-identical output to decode correctly;
+// this specific sequence of operations is specified in the spec (it's
+// drawing integer-quantized frequency-space lines that the encoder
+// expects to be exactly the same)
+//     ... also, isn't the whole point of Bresenham's algorithm to NOT
+// have to divide in the setup? sigh.
+#ifndef STB_VORBIS_NO_DEFER_FLOOR
+#define LINE_OP(a,b)   a *= b
+#else
+#define LINE_OP(a,b)   a = b
+#endif
+
+#ifdef STB_VORBIS_DIVIDE_TABLE
+#define DIVTAB_NUMER   32
+#define DIVTAB_DENOM   64
+int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
+#endif
+
+static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
+{
+   int dy = y1 - y0;
+   int adx = x1 - x0;
+   int ady = abs(dy);
+   int base;
+   int x=x0,y=y0;
+   int err = 0;
+   int sy;
+
+#ifdef STB_VORBIS_DIVIDE_TABLE
+   if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
+      if (dy < 0) {
+         base = -integer_divide_table[ady][adx];
+         sy = base-1;
+      } else {
+         base =  integer_divide_table[ady][adx];
+         sy = base+1;
+      }
+   } else {
+      base = dy / adx;
+      if (dy < 0)
+         sy = base - 1;
+      else
+         sy = base+1;
+   }
+#else
+   base = dy / adx;
+   if (dy < 0)
+      sy = base - 1;
+   else
+      sy = base+1;
+#endif
+   ady -= abs(base) * adx;
+   if (x1 > n) x1 = n;
+   if (x < x1) {
+      LINE_OP(output[x], inverse_db_table[y&255]);
+      for (++x; x < x1; ++x) {
+         err += ady;
+         if (err >= adx) {
+            err -= adx;
+            y += sy;
+         } else
+            y += base;
+         LINE_OP(output[x], inverse_db_table[y&255]);
+      }
+   }
+}
+
+static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
+{
+   int k;
+   if (rtype == 0) {
+      int step = n / book->dimensions;
+      for (k=0; k < step; ++k)
+         if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
+            return FALSE;
+   } else {
+      for (k=0; k < n; ) {
+         if (!codebook_decode(f, book, target+offset, n-k))
+            return FALSE;
+         k += book->dimensions;
+         offset += book->dimensions;
+      }
+   }
+   return TRUE;
+}
+
+// n is 1/2 of the blocksize --
+// specification: "Correct per-vector decode length is [n]/2"
+static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
+{
+   int i,j,pass;
+   Residue *r = f->residue_config + rn;
+   int rtype = f->residue_types[rn];
+   int c = r->classbook;
+   int classwords = f->codebooks[c].dimensions;
+   unsigned int actual_size = rtype == 2 ? n*2 : n;
+   unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
+   unsigned int limit_r_end   = (r->end   < actual_size ? r->end   : actual_size);
+   int n_read = limit_r_end - limit_r_begin;
+   int part_read = n_read / r->part_size;
+   int temp_alloc_point = temp_alloc_save(f);
+   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+   uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
+   #else
+   int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
+   #endif
+
+   CHECK(f);
+
+   for (i=0; i < ch; ++i)
+      if (!do_not_decode[i])
+         memset(residue_buffers[i], 0, sizeof(float) * n);
+
+   if (rtype == 2 && ch != 1) {
+      for (j=0; j < ch; ++j)
+         if (!do_not_decode[j])
+            break;
+      if (j == ch)
+         goto done;
+
+      for (pass=0; pass < 8; ++pass) {
+         int pcount = 0, class_set = 0;
+         if (ch == 2) {
+            while (pcount < part_read) {
+               int z = r->begin + pcount*r->part_size;
+               int c_inter = (z & 1), p_inter = z>>1;
+               if (pass == 0) {
+                  Codebook *c = f->codebooks+r->classbook;
+                  int q;
+                  DECODE(q,f,c);
+                  if (q == EOP) goto done;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  part_classdata[0][class_set] = r->classdata[q];
+                  #else
+                  for (i=classwords-1; i >= 0; --i) {
+                     classifications[0][i+pcount] = q % r->classifications;
+                     q /= r->classifications;
+                  }
+                  #endif
+               }
+               for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
+                  int z = r->begin + pcount*r->part_size;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  int c = part_classdata[0][class_set][i];
+                  #else
+                  int c = classifications[0][pcount];
+                  #endif
+                  int b = r->residue_books[c][pass];
+                  if (b >= 0) {
+                     Codebook *book = f->codebooks + b;
+                     #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
+                     if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
+                        goto done;
+                     #else
+                     // saves 1%
+                     if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
+                        goto done;
+                     #endif
+                  } else {
+                     z += r->part_size;
+                     c_inter = z & 1;
+                     p_inter = z >> 1;
+                  }
+               }
+               #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+               ++class_set;
+               #endif
+            }
+         } else if (ch > 2) {
+            while (pcount < part_read) {
+               int z = r->begin + pcount*r->part_size;
+               int c_inter = z % ch, p_inter = z/ch;
+               if (pass == 0) {
+                  Codebook *c = f->codebooks+r->classbook;
+                  int q;
+                  DECODE(q,f,c);
+                  if (q == EOP) goto done;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  part_classdata[0][class_set] = r->classdata[q];
+                  #else
+                  for (i=classwords-1; i >= 0; --i) {
+                     classifications[0][i+pcount] = q % r->classifications;
+                     q /= r->classifications;
+                  }
+                  #endif
+               }
+               for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
+                  int z = r->begin + pcount*r->part_size;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  int c = part_classdata[0][class_set][i];
+                  #else
+                  int c = classifications[0][pcount];
+                  #endif
+                  int b = r->residue_books[c][pass];
+                  if (b >= 0) {
+                     Codebook *book = f->codebooks + b;
+                     if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
+                        goto done;
+                  } else {
+                     z += r->part_size;
+                     c_inter = z % ch;
+                     p_inter = z / ch;
+                  }
+               }
+               #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+               ++class_set;
+               #endif
+            }
+         }
+      }
+      goto done;
+   }
+   CHECK(f);
+
+   for (pass=0; pass < 8; ++pass) {
+      int pcount = 0, class_set=0;
+      while (pcount < part_read) {
+         if (pass == 0) {
+            for (j=0; j < ch; ++j) {
+               if (!do_not_decode[j]) {
+                  Codebook *c = f->codebooks+r->classbook;
+                  int temp;
+                  DECODE(temp,f,c);
+                  if (temp == EOP) goto done;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  part_classdata[j][class_set] = r->classdata[temp];
+                  #else
+                  for (i=classwords-1; i >= 0; --i) {
+                     classifications[j][i+pcount] = temp % r->classifications;
+                     temp /= r->classifications;
+                  }
+                  #endif
+               }
+            }
+         }
+         for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
+            for (j=0; j < ch; ++j) {
+               if (!do_not_decode[j]) {
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  int c = part_classdata[j][class_set][i];
+                  #else
+                  int c = classifications[j][pcount];
+                  #endif
+                  int b = r->residue_books[c][pass];
+                  if (b >= 0) {
+                     float *target = residue_buffers[j];
+                     int offset = r->begin + pcount * r->part_size;
+                     int n = r->part_size;
+                     Codebook *book = f->codebooks + b;
+                     if (!residue_decode(f, book, target, offset, n, rtype))
+                        goto done;
+                  }
+               }
+            }
+         }
+         #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+         ++class_set;
+         #endif
+      }
+   }
+  done:
+   CHECK(f);
+   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+   temp_free(f,part_classdata);
+   #else
+   temp_free(f,classifications);
+   #endif
+   temp_alloc_restore(f,temp_alloc_point);
+}
+
+
+#if 0
+// slow way for debugging
+void inverse_mdct_slow(float *buffer, int n)
+{
+   int i,j;
+   int n2 = n >> 1;
+   float *x = (float *) malloc(sizeof(*x) * n2);
+   memcpy(x, buffer, sizeof(*x) * n2);
+   for (i=0; i < n; ++i) {
+      float acc = 0;
+      for (j=0; j < n2; ++j)
+         // formula from paper:
+         //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
+         // formula from wikipedia
+         //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
+         // these are equivalent, except the formula from the paper inverts the multiplier!
+         // however, what actually works is NO MULTIPLIER!?!
+         //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
+         acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
+      buffer[i] = acc;
+   }
+   free(x);
+}
+#elif 0
+// same as above, but just barely able to run in real time on modern machines
+void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
+{
+   float mcos[16384];
+   int i,j;
+   int n2 = n >> 1, nmask = (n << 2) -1;
+   float *x = (float *) malloc(sizeof(*x) * n2);
+   memcpy(x, buffer, sizeof(*x) * n2);
+   for (i=0; i < 4*n; ++i)
+      mcos[i] = (float) cos(M_PI / 2 * i / n);
+
+   for (i=0; i < n; ++i) {
+      float acc = 0;
+      for (j=0; j < n2; ++j)
+         acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
+      buffer[i] = acc;
+   }
+   free(x);
+}
+#elif 0
+// transform to use a slow dct-iv; this is STILL basically trivial,
+// but only requires half as many ops
+void dct_iv_slow(float *buffer, int n)
+{
+   float mcos[16384];
+   float x[2048];
+   int i,j;
+   int n2 = n >> 1, nmask = (n << 3) - 1;
+   memcpy(x, buffer, sizeof(*x) * n);
+   for (i=0; i < 8*n; ++i)
+      mcos[i] = (float) cos(M_PI / 4 * i / n);
+   for (i=0; i < n; ++i) {
+      float acc = 0;
+      for (j=0; j < n; ++j)
+         acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
+      buffer[i] = acc;
+   }
+}
+
+void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
+{
+   int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
+   float temp[4096];
+
+   memcpy(temp, buffer, n2 * sizeof(float));
+   dct_iv_slow(temp, n2);  // returns -c'-d, a-b'
+
+   for (i=0; i < n4  ; ++i) buffer[i] = temp[i+n4];            // a-b'
+   for (   ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1];   // b-a', c+d'
+   for (   ; i < n   ; ++i) buffer[i] = -temp[i - n3_4];       // c'+d
+}
+#endif
+
+#ifndef LIBVORBIS_MDCT
+#define LIBVORBIS_MDCT 0
+#endif
+
+#if LIBVORBIS_MDCT
+// directly call the vorbis MDCT using an interface documented
+// by Jeff Roberts... useful for performance comparison
+typedef struct
+{
+  int n;
+  int log2n;
+
+  float *trig;
+  int   *bitrev;
+
+  float scale;
+} mdct_lookup;
+
+extern void mdct_init(mdct_lookup *lookup, int n);
+extern void mdct_clear(mdct_lookup *l);
+extern void mdct_backward(mdct_lookup *init, float *in, float *out);
+
+mdct_lookup M1,M2;
+
+void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
+{
+   mdct_lookup *M;
+   if (M1.n == n) M = &M1;
+   else if (M2.n == n) M = &M2;
+   else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
+   else {
+      if (M2.n) __asm int 3;
+      mdct_init(&M2, n);
+      M = &M2;
+   }
+
+   mdct_backward(M, buffer, buffer);
+}
+#endif
+
+
+// the following were split out into separate functions while optimizing;
+// they could be pushed back up but eh. __forceinline showed no change;
+// they're probably already being inlined.
+static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
+{
+   float *ee0 = e + i_off;
+   float *ee2 = ee0 + k_off;
+   int i;
+
+   assert((n & 3) == 0);
+   for (i=(n>>2); i > 0; --i) {
+      float k00_20, k01_21;
+      k00_20  = ee0[ 0] - ee2[ 0];
+      k01_21  = ee0[-1] - ee2[-1];
+      ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
+      ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
+      ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
+      ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
+      A += 8;
+
+      k00_20  = ee0[-2] - ee2[-2];
+      k01_21  = ee0[-3] - ee2[-3];
+      ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
+      ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
+      ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
+      ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
+      A += 8;
+
+      k00_20  = ee0[-4] - ee2[-4];
+      k01_21  = ee0[-5] - ee2[-5];
+      ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
+      ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
+      ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
+      ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
+      A += 8;
+
+      k00_20  = ee0[-6] - ee2[-6];
+      k01_21  = ee0[-7] - ee2[-7];
+      ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
+      ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
+      ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
+      ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
+      A += 8;
+      ee0 -= 8;
+      ee2 -= 8;
+   }
+}
+
+static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
+{
+   int i;
+   float k00_20, k01_21;
+
+   float *e0 = e + d0;
+   float *e2 = e0 + k_off;
+
+   for (i=lim >> 2; i > 0; --i) {
+      k00_20 = e0[-0] - e2[-0];
+      k01_21 = e0[-1] - e2[-1];
+      e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
+      e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
+      e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
+      e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
+
+      A += k1;
+
+      k00_20 = e0[-2] - e2[-2];
+      k01_21 = e0[-3] - e2[-3];
+      e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
+      e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
+      e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
+      e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
+
+      A += k1;
+
+      k00_20 = e0[-4] - e2[-4];
+      k01_21 = e0[-5] - e2[-5];
+      e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
+      e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
+      e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
+      e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
+
+      A += k1;
+
+      k00_20 = e0[-6] - e2[-6];
+      k01_21 = e0[-7] - e2[-7];
+      e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
+      e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
+      e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
+      e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
+
+      e0 -= 8;
+      e2 -= 8;
+
+      A += k1;
+   }
+}
+
+static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
+{
+   int i;
+   float A0 = A[0];
+   float A1 = A[0+1];
+   float A2 = A[0+a_off];
+   float A3 = A[0+a_off+1];
+   float A4 = A[0+a_off*2+0];
+   float A5 = A[0+a_off*2+1];
+   float A6 = A[0+a_off*3+0];
+   float A7 = A[0+a_off*3+1];
+
+   float k00,k11;
+
+   float *ee0 = e  +i_off;
+   float *ee2 = ee0+k_off;
+
+   for (i=n; i > 0; --i) {
+      k00     = ee0[ 0] - ee2[ 0];
+      k11     = ee0[-1] - ee2[-1];
+      ee0[ 0] =  ee0[ 0] + ee2[ 0];
+      ee0[-1] =  ee0[-1] + ee2[-1];
+      ee2[ 0] = (k00) * A0 - (k11) * A1;
+      ee2[-1] = (k11) * A0 + (k00) * A1;
+
+      k00     = ee0[-2] - ee2[-2];
+      k11     = ee0[-3] - ee2[-3];
+      ee0[-2] =  ee0[-2] + ee2[-2];
+      ee0[-3] =  ee0[-3] + ee2[-3];
+      ee2[-2] = (k00) * A2 - (k11) * A3;
+      ee2[-3] = (k11) * A2 + (k00) * A3;
+
+      k00     = ee0[-4] - ee2[-4];
+      k11     = ee0[-5] - ee2[-5];
+      ee0[-4] =  ee0[-4] + ee2[-4];
+      ee0[-5] =  ee0[-5] + ee2[-5];
+      ee2[-4] = (k00) * A4 - (k11) * A5;
+      ee2[-5] = (k11) * A4 + (k00) * A5;
+
+      k00     = ee0[-6] - ee2[-6];
+      k11     = ee0[-7] - ee2[-7];
+      ee0[-6] =  ee0[-6] + ee2[-6];
+      ee0[-7] =  ee0[-7] + ee2[-7];
+      ee2[-6] = (k00) * A6 - (k11) * A7;
+      ee2[-7] = (k11) * A6 + (k00) * A7;
+
+      ee0 -= k0;
+      ee2 -= k0;
+   }
+}
+
+static __forceinline void iter_54(float *z)
+{
+   float k00,k11,k22,k33;
+   float y0,y1,y2,y3;
+
+   k00  = z[ 0] - z[-4];
+   y0   = z[ 0] + z[-4];
+   y2   = z[-2] + z[-6];
+   k22  = z[-2] - z[-6];
+
+   z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
+   z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
+
+   // done with y0,y2
+
+   k33  = z[-3] - z[-7];
+
+   z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
+   z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
+
+   // done with k33
+
+   k11  = z[-1] - z[-5];
+   y1   = z[-1] + z[-5];
+   y3   = z[-3] + z[-7];
+
+   z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
+   z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
+   z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
+   z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
+}
+
+static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
+{
+   int a_off = base_n >> 3;
+   float A2 = A[0+a_off];
+   float *z = e + i_off;
+   float *base = z - 16 * n;
+
+   while (z > base) {
+      float k00,k11;
+      float l00,l11;
+
+      k00    = z[-0] - z[ -8];
+      k11    = z[-1] - z[ -9];
+      l00    = z[-2] - z[-10];
+      l11    = z[-3] - z[-11];
+      z[ -0] = z[-0] + z[ -8];
+      z[ -1] = z[-1] + z[ -9];
+      z[ -2] = z[-2] + z[-10];
+      z[ -3] = z[-3] + z[-11];
+      z[ -8] = k00;
+      z[ -9] = k11;
+      z[-10] = (l00+l11) * A2;
+      z[-11] = (l11-l00) * A2;
+
+      k00    = z[ -4] - z[-12];
+      k11    = z[ -5] - z[-13];
+      l00    = z[ -6] - z[-14];
+      l11    = z[ -7] - z[-15];
+      z[ -4] = z[ -4] + z[-12];
+      z[ -5] = z[ -5] + z[-13];
+      z[ -6] = z[ -6] + z[-14];
+      z[ -7] = z[ -7] + z[-15];
+      z[-12] = k11;
+      z[-13] = -k00;
+      z[-14] = (l11-l00) * A2;
+      z[-15] = (l00+l11) * -A2;
+
+      iter_54(z);
+      iter_54(z-8);
+      z -= 16;
+   }
+}
+
+static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
+{
+   int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
+   int ld;
+   // @OPTIMIZE: reduce register pressure by using fewer variables?
+   int save_point = temp_alloc_save(f);
+   float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
+   float *u=NULL,*v=NULL;
+   // twiddle factors
+   float *A = f->A[blocktype];
+
+   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
+   // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
+
+   // kernel from paper
+
+
+   // merged:
+   //   copy and reflect spectral data
+   //   step 0
+
+   // note that it turns out that the items added together during
+   // this step are, in fact, being added to themselves (as reflected
+   // by step 0). inexplicable inefficiency! this became obvious
+   // once I combined the passes.
+
+   // so there's a missing 'times 2' here (for adding X to itself).
+   // this propagates through linearly to the end, where the numbers
+   // are 1/2 too small, and need to be compensated for.
+
+   {
+      float *d,*e, *AA, *e_stop;
+      d = &buf2[n2-2];
+      AA = A;
+      e = &buffer[0];
+      e_stop = &buffer[n2];
+      while (e != e_stop) {
+         d[1] = (e[0] * AA[0] - e[2]*AA[1]);
+         d[0] = (e[0] * AA[1] + e[2]*AA[0]);
+         d -= 2;
+         AA += 2;
+         e += 4;
+      }
+
+      e = &buffer[n2-3];
+      while (d >= buf2) {
+         d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
+         d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
+         d -= 2;
+         AA += 2;
+         e -= 4;
+      }
+   }
+
+   // now we use symbolic names for these, so that we can
+   // possibly swap their meaning as we change which operations
+   // are in place
+
+   u = buffer;
+   v = buf2;
+
+   // step 2    (paper output is w, now u)
+   // this could be in place, but the data ends up in the wrong
+   // place... _somebody_'s got to swap it, so this is nominated
+   {
+      float *AA = &A[n2-8];
+      float *d0,*d1, *e0, *e1;
+
+      e0 = &v[n4];
+      e1 = &v[0];
+
+      d0 = &u[n4];
+      d1 = &u[0];
+
+      while (AA >= A) {
+         float v40_20, v41_21;
+
+         v41_21 = e0[1] - e1[1];
+         v40_20 = e0[0] - e1[0];
+         d0[1]  = e0[1] + e1[1];
+         d0[0]  = e0[0] + e1[0];
+         d1[1]  = v41_21*AA[4] - v40_20*AA[5];
+         d1[0]  = v40_20*AA[4] + v41_21*AA[5];
+
+         v41_21 = e0[3] - e1[3];
+         v40_20 = e0[2] - e1[2];
+         d0[3]  = e0[3] + e1[3];
+         d0[2]  = e0[2] + e1[2];
+         d1[3]  = v41_21*AA[0] - v40_20*AA[1];
+         d1[2]  = v40_20*AA[0] + v41_21*AA[1];
+
+         AA -= 8;
+
+         d0 += 4;
+         d1 += 4;
+         e0 += 4;
+         e1 += 4;
+      }
+   }
+
+   // step 3
+   ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
+
+   // optimized step 3:
+
+   // the original step3 loop can be nested r inside s or s inside r;
+   // it's written originally as s inside r, but this is dumb when r
+   // iterates many times, and s few. So I have two copies of it and
+   // switch between them halfway.
+
+   // this is iteration 0 of step 3
+   imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
+   imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
+
+   // this is iteration 1 of step 3
+   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
+   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
+   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
+   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
+
+   l=2;
+   for (; l < (ld-3)>>1; ++l) {
+      int k0 = n >> (l+2), k0_2 = k0>>1;
+      int lim = 1 << (l+1);
+      int i;
+      for (i=0; i < lim; ++i)
+         imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
+   }
+
+   for (; l < ld-6; ++l) {
+      int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
+      int rlim = n >> (l+6), r;
+      int lim = 1 << (l+1);
+      int i_off;
+      float *A0 = A;
+      i_off = n2-1;
+      for (r=rlim; r > 0; --r) {
+         imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
+         A0 += k1*4;
+         i_off -= 8;
+      }
+   }
+
+   // iterations with count:
+   //   ld-6,-5,-4 all interleaved together
+   //       the big win comes from getting rid of needless flops
+   //         due to the constants on pass 5 & 4 being all 1 and 0;
+   //       combining them to be simultaneous to improve cache made little difference
+   imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
+
+   // output is u
+
+   // step 4, 5, and 6
+   // cannot be in-place because of step 5
+   {
+      uint16 *bitrev = f->bit_reverse[blocktype];
+      // weirdly, I'd have thought reading sequentially and writing
+      // erratically would have been better than vice-versa, but in
+      // fact that's not what my testing showed. (That is, with
+      // j = bitreverse(i), do you read i and write j, or read j and write i.)
+
+      float *d0 = &v[n4-4];
+      float *d1 = &v[n2-4];
+      while (d0 >= v) {
+         int k4;
+
+         k4 = bitrev[0];
+         d1[3] = u[k4+0];
+         d1[2] = u[k4+1];
+         d0[3] = u[k4+2];
+         d0[2] = u[k4+3];
+
+         k4 = bitrev[1];
+         d1[1] = u[k4+0];
+         d1[0] = u[k4+1];
+         d0[1] = u[k4+2];
+         d0[0] = u[k4+3];
+
+         d0 -= 4;
+         d1 -= 4;
+         bitrev += 2;
+      }
+   }
+   // (paper output is u, now v)
+
+
+   // data must be in buf2
+   assert(v == buf2);
+
+   // step 7   (paper output is v, now v)
+   // this is now in place
+   {
+      float *C = f->C[blocktype];
+      float *d, *e;
+
+      d = v;
+      e = v + n2 - 4;
+
+      while (d < e) {
+         float a02,a11,b0,b1,b2,b3;
+
+         a02 = d[0] - e[2];
+         a11 = d[1] + e[3];
+
+         b0 = C[1]*a02 + C[0]*a11;
+         b1 = C[1]*a11 - C[0]*a02;
+
+         b2 = d[0] + e[ 2];
+         b3 = d[1] - e[ 3];
+
+         d[0] = b2 + b0;
+         d[1] = b3 + b1;
+         e[2] = b2 - b0;
+         e[3] = b1 - b3;
+
+         a02 = d[2] - e[0];
+         a11 = d[3] + e[1];
+
+         b0 = C[3]*a02 + C[2]*a11;
+         b1 = C[3]*a11 - C[2]*a02;
+
+         b2 = d[2] + e[ 0];
+         b3 = d[3] - e[ 1];
+
+         d[2] = b2 + b0;
+         d[3] = b3 + b1;
+         e[0] = b2 - b0;
+         e[1] = b1 - b3;
+
+         C += 4;
+         d += 4;
+         e -= 4;
+      }
+   }
+
+   // data must be in buf2
+
+
+   // step 8+decode   (paper output is X, now buffer)
+   // this generates pairs of data a la 8 and pushes them directly through
+   // the decode kernel (pushing rather than pulling) to avoid having
+   // to make another pass later
+
+   // this cannot POSSIBLY be in place, so we refer to the buffers directly
+
+   {
+      float *d0,*d1,*d2,*d3;
+
+      float *B = f->B[blocktype] + n2 - 8;
+      float *e = buf2 + n2 - 8;
+      d0 = &buffer[0];
+      d1 = &buffer[n2-4];
+      d2 = &buffer[n2];
+      d3 = &buffer[n-4];
+      while (e >= v) {
+         float p0,p1,p2,p3;
+
+         p3 =  e[6]*B[7] - e[7]*B[6];
+         p2 = -e[6]*B[6] - e[7]*B[7];
+
+         d0[0] =   p3;
+         d1[3] = - p3;
+         d2[0] =   p2;
+         d3[3] =   p2;
+
+         p1 =  e[4]*B[5] - e[5]*B[4];
+         p0 = -e[4]*B[4] - e[5]*B[5];
+
+         d0[1] =   p1;
+         d1[2] = - p1;
+         d2[1] =   p0;
+         d3[2] =   p0;
+
+         p3 =  e[2]*B[3] - e[3]*B[2];
+         p2 = -e[2]*B[2] - e[3]*B[3];
+
+         d0[2] =   p3;
+         d1[1] = - p3;
+         d2[2] =   p2;
+         d3[1] =   p2;
+
+         p1 =  e[0]*B[1] - e[1]*B[0];
+         p0 = -e[0]*B[0] - e[1]*B[1];
+
+         d0[3] =   p1;
+         d1[0] = - p1;
+         d2[3] =   p0;
+         d3[0] =   p0;
+
+         B -= 8;
+         e -= 8;
+         d0 += 4;
+         d2 += 4;
+         d1 -= 4;
+         d3 -= 4;
+      }
+   }
+
+   temp_free(f,buf2);
+   temp_alloc_restore(f,save_point);
+}
+
+#if 0
+// this is the original version of the above code, if you want to optimize it from scratch
+void inverse_mdct_naive(float *buffer, int n)
+{
+   float s;
+   float A[1 << 12], B[1 << 12], C[1 << 11];
+   int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
+   int n3_4 = n - n4, ld;
+   // how can they claim this only uses N words?!
+   // oh, because they're only used sparsely, whoops
+   float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
+   // set up twiddle factors
+
+   for (k=k2=0; k < n4; ++k,k2+=2) {
+      A[k2  ] = (float)  cos(4*k*M_PI/n);
+      A[k2+1] = (float) -sin(4*k*M_PI/n);
+      B[k2  ] = (float)  cos((k2+1)*M_PI/n/2);
+      B[k2+1] = (float)  sin((k2+1)*M_PI/n/2);
+   }
+   for (k=k2=0; k < n8; ++k,k2+=2) {
+      C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
+      C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
+   }
+
+   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
+   // Note there are bugs in that pseudocode, presumably due to them attempting
+   // to rename the arrays nicely rather than representing the way their actual
+   // implementation bounces buffers back and forth. As a result, even in the
+   // "some formulars corrected" version, a direct implementation fails. These
+   // are noted below as "paper bug".
+
+   // copy and reflect spectral data
+   for (k=0; k < n2; ++k) u[k] = buffer[k];
+   for (   ; k < n ; ++k) u[k] = -buffer[n - k - 1];
+   // kernel from paper
+   // step 1
+   for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
+      v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2]   - (u[k4+2] - u[n-k4-3])*A[k2+1];
+      v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
+   }
+   // step 2
+   for (k=k4=0; k < n8; k+=1, k4+=4) {
+      w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
+      w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
+      w[k4+3]    = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
+      w[k4+1]    = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
+   }
+   // step 3
+   ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
+   for (l=0; l < ld-3; ++l) {
+      int k0 = n >> (l+2), k1 = 1 << (l+3);
+      int rlim = n >> (l+4), r4, r;
+      int s2lim = 1 << (l+2), s2;
+      for (r=r4=0; r < rlim; r4+=4,++r) {
+         for (s2=0; s2 < s2lim; s2+=2) {
+            u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
+            u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
+            u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
+                                - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
+            u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
+                                + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
+         }
+      }
+      if (l+1 < ld-3) {
+         // paper bug: ping-ponging of u&w here is omitted
+         memcpy(w, u, sizeof(u));
+      }
+   }
+
+   // step 4
+   for (i=0; i < n8; ++i) {
+      int j = bit_reverse(i) >> (32-ld+3);
+      assert(j < n8);
+      if (i == j) {
+         // paper bug: original code probably swapped in place; if copying,
+         //            need to directly copy in this case
+         int i8 = i << 3;
+         v[i8+1] = u[i8+1];
+         v[i8+3] = u[i8+3];
+         v[i8+5] = u[i8+5];
+         v[i8+7] = u[i8+7];
+      } else if (i < j) {
+         int i8 = i << 3, j8 = j << 3;
+         v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
+         v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
+         v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
+         v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
+      }
+   }
+   // step 5
+   for (k=0; k < n2; ++k) {
+      w[k] = v[k*2+1];
+   }
+   // step 6
+   for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
+      u[n-1-k2] = w[k4];
+      u[n-2-k2] = w[k4+1];
+      u[n3_4 - 1 - k2] = w[k4+2];
+      u[n3_4 - 2 - k2] = w[k4+3];
+   }
+   // step 7
+   for (k=k2=0; k < n8; ++k, k2 += 2) {
+      v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
+      v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
+      v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
+      v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
+   }
+   // step 8
+   for (k=k2=0; k < n4; ++k,k2 += 2) {
+      X[k]      = v[k2+n2]*B[k2  ] + v[k2+1+n2]*B[k2+1];
+      X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2  ];
+   }
+
+   // decode kernel to output
+   // determined the following value experimentally
+   // (by first figuring out what made inverse_mdct_slow work); then matching that here
+   // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
+   s = 0.5; // theoretically would be n4
+
+   // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
+   //     so it needs to use the "old" B values to behave correctly, or else
+   //     set s to 1.0 ]]]
+   for (i=0; i < n4  ; ++i) buffer[i] = s * X[i+n4];
+   for (   ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
+   for (   ; i < n   ; ++i) buffer[i] = -s * X[i - n3_4];
+}
+#endif
+
+static float *get_window(vorb *f, int len)
+{
+   len <<= 1;
+   if (len == f->blocksize_0) return f->window[0];
+   if (len == f->blocksize_1) return f->window[1];
+   return NULL;
+}
+
+#ifndef STB_VORBIS_NO_DEFER_FLOOR
+typedef int16 YTYPE;
+#else
+typedef int YTYPE;
+#endif
+static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
+{
+   int n2 = n >> 1;
+   int s = map->chan[i].mux, floor;
+   floor = map->submap_floor[s];
+   if (f->floor_types[floor] == 0) {
+      return error(f, VORBIS_invalid_stream);
+   } else {
+      Floor1 *g = &f->floor_config[floor].floor1;
+      int j,q;
+      int lx = 0, ly = finalY[0] * g->floor1_multiplier;
+      for (q=1; q < g->values; ++q) {
+         j = g->sorted_order[q];
+         #ifndef STB_VORBIS_NO_DEFER_FLOOR
+         STBV_NOTUSED(step2_flag);
+         if (finalY[j] >= 0)
+         #else
+         if (step2_flag[j])
+         #endif
+         {
+            int hy = finalY[j] * g->floor1_multiplier;
+            int hx = g->Xlist[j];
+            if (lx != hx)
+               draw_line(target, lx,ly, hx,hy, n2);
+            CHECK(f);
+            lx = hx, ly = hy;
+         }
+      }
+      if (lx < n2) {
+         // optimization of: draw_line(target, lx,ly, n,ly, n2);
+         for (j=lx; j < n2; ++j)
+            LINE_OP(target[j], inverse_db_table[ly]);
+         CHECK(f);
+      }
+   }
+   return TRUE;
+}
+
+// The meaning of "left" and "right"
+//
+// For a given frame:
+//     we compute samples from 0..n
+//     window_center is n/2
+//     we'll window and mix the samples from left_start to left_end with data from the previous frame
+//     all of the samples from left_end to right_start can be output without mixing; however,
+//        this interval is 0-length except when transitioning between short and long frames
+//     all of the samples from right_start to right_end need to be mixed with the next frame,
+//        which we don't have, so those get saved in a buffer
+//     frame N's right_end-right_start, the number of samples to mix with the next frame,
+//        has to be the same as frame N+1's left_end-left_start (which they are by
+//        construction)
+
+static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
+{
+   Mode *m;
+   int i, n, prev, next, window_center;
+   f->channel_buffer_start = f->channel_buffer_end = 0;
+
+  retry:
+   if (f->eof) return FALSE;
+   if (!maybe_start_packet(f))
+      return FALSE;
+   // check packet type
+   if (get_bits(f,1) != 0) {
+      if (IS_PUSH_MODE(f))
+         return error(f,VORBIS_bad_packet_type);
+      while (EOP != get8_packet(f));
+      goto retry;
+   }
+
+   if (f->alloc.alloc_buffer)
+      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
+
+   i = get_bits(f, ilog(f->mode_count-1));
+   if (i == EOP) return FALSE;
+   if (i >= f->mode_count) return FALSE;
+   *mode = i;
+   m = f->mode_config + i;
+   if (m->blockflag) {
+      n = f->blocksize_1;
+      prev = get_bits(f,1);
+      next = get_bits(f,1);
+   } else {
+      prev = next = 0;
+      n = f->blocksize_0;
+   }
+
+// WINDOWING
+
+   window_center = n >> 1;
+   if (m->blockflag && !prev) {
+      *p_left_start = (n - f->blocksize_0) >> 2;
+      *p_left_end   = (n + f->blocksize_0) >> 2;
+   } else {
+      *p_left_start = 0;
+      *p_left_end   = window_center;
+   }
+   if (m->blockflag && !next) {
+      *p_right_start = (n*3 - f->blocksize_0) >> 2;
+      *p_right_end   = (n*3 + f->blocksize_0) >> 2;
+   } else {
+      *p_right_start = window_center;
+      *p_right_end   = n;
+   }
+
+   return TRUE;
+}
+
+static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
+{
+   Mapping *map;
+   int i,j,k,n,n2;
+   int zero_channel[256];
+   int really_zero_channel[256];
+
+// WINDOWING
+
+   STBV_NOTUSED(left_end);
+   n = f->blocksize[m->blockflag];
+   map = &f->mapping[m->mapping];
+
+// FLOORS
+   n2 = n >> 1;
+
+   CHECK(f);
+
+   for (i=0; i < f->channels; ++i) {
+      int s = map->chan[i].mux, floor;
+      zero_channel[i] = FALSE;
+      floor = map->submap_floor[s];
+      if (f->floor_types[floor] == 0) {
+         return error(f, VORBIS_invalid_stream);
+      } else {
+         Floor1 *g = &f->floor_config[floor].floor1;
+         if (get_bits(f, 1)) {
+            short *finalY;
+            uint8 step2_flag[256];
+            static int range_list[4] = { 256, 128, 86, 64 };
+            int range = range_list[g->floor1_multiplier-1];
+            int offset = 2;
+            finalY = f->finalY[i];
+            finalY[0] = get_bits(f, ilog(range)-1);
+            finalY[1] = get_bits(f, ilog(range)-1);
+            for (j=0; j < g->partitions; ++j) {
+               int pclass = g->partition_class_list[j];
+               int cdim = g->class_dimensions[pclass];
+               int cbits = g->class_subclasses[pclass];
+               int csub = (1 << cbits)-1;
+               int cval = 0;
+               if (cbits) {
+                  Codebook *c = f->codebooks + g->class_masterbooks[pclass];
+                  DECODE(cval,f,c);
+               }
+               for (k=0; k < cdim; ++k) {
+                  int book = g->subclass_books[pclass][cval & csub];
+                  cval = cval >> cbits;
+                  if (book >= 0) {
+                     int temp;
+                     Codebook *c = f->codebooks + book;
+                     DECODE(temp,f,c);
+                     finalY[offset++] = temp;
+                  } else
+                     finalY[offset++] = 0;
+               }
+            }
+            if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
+            step2_flag[0] = step2_flag[1] = 1;
+            for (j=2; j < g->values; ++j) {
+               int low, high, pred, highroom, lowroom, room, val;
+               low = g->neighbors[j][0];
+               high = g->neighbors[j][1];
+               //neighbors(g->Xlist, j, &low, &high);
+               pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
+               val = finalY[j];
+               highroom = range - pred;
+               lowroom = pred;
+               if (highroom < lowroom)
+                  room = highroom * 2;
+               else
+                  room = lowroom * 2;
+               if (val) {
+                  step2_flag[low] = step2_flag[high] = 1;
+                  step2_flag[j] = 1;
+                  if (val >= room)
+                     if (highroom > lowroom)
+                        finalY[j] = val - lowroom + pred;
+                     else
+                        finalY[j] = pred - val + highroom - 1;
+                  else
+                     if (val & 1)
+                        finalY[j] = pred - ((val+1)>>1);
+                     else
+                        finalY[j] = pred + (val>>1);
+               } else {
+                  step2_flag[j] = 0;
+                  finalY[j] = pred;
+               }
+            }
+
+#ifdef STB_VORBIS_NO_DEFER_FLOOR
+            do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
+#else
+            // defer final floor computation until _after_ residue
+            for (j=0; j < g->values; ++j) {
+               if (!step2_flag[j])
+                  finalY[j] = -1;
+            }
+#endif
+         } else {
+           error:
+            zero_channel[i] = TRUE;
+         }
+         // So we just defer everything else to later
+
+         // at this point we've decoded the floor into buffer
+      }
+   }
+   CHECK(f);
+   // at this point we've decoded all floors
+
+   if (f->alloc.alloc_buffer)
+      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
+
+   // re-enable coupled channels if necessary
+   memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
+   for (i=0; i < map->coupling_steps; ++i)
+      if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
+         zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
+      }
+
+   CHECK(f);
+// RESIDUE DECODE
+   for (i=0; i < map->submaps; ++i) {
+      float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
+      int r;
+      uint8 do_not_decode[256];
+      int ch = 0;
+      for (j=0; j < f->channels; ++j) {
+         if (map->chan[j].mux == i) {
+            if (zero_channel[j]) {
+               do_not_decode[ch] = TRUE;
+               residue_buffers[ch] = NULL;
+            } else {
+               do_not_decode[ch] = FALSE;
+               residue_buffers[ch] = f->channel_buffers[j];
+            }
+            ++ch;
+         }
+      }
+      r = map->submap_residue[i];
+      decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
+   }
+
+   if (f->alloc.alloc_buffer)
+      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
+   CHECK(f);
+
+// INVERSE COUPLING
+   for (i = map->coupling_steps-1; i >= 0; --i) {
+      int n2 = n >> 1;
+      float *m = f->channel_buffers[map->chan[i].magnitude];
+      float *a = f->channel_buffers[map->chan[i].angle    ];
+      for (j=0; j < n2; ++j) {
+         float a2,m2;
+         if (m[j] > 0)
+            if (a[j] > 0)
+               m2 = m[j], a2 = m[j] - a[j];
+            else
+               a2 = m[j], m2 = m[j] + a[j];
+         else
+            if (a[j] > 0)
+               m2 = m[j], a2 = m[j] + a[j];
+            else
+               a2 = m[j], m2 = m[j] - a[j];
+         m[j] = m2;
+         a[j] = a2;
+      }
+   }
+   CHECK(f);
+
+   // finish decoding the floors
+#ifndef STB_VORBIS_NO_DEFER_FLOOR
+   for (i=0; i < f->channels; ++i) {
+      if (really_zero_channel[i]) {
+         memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
+      } else {
+         do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
+      }
+   }
+#else
+   for (i=0; i < f->channels; ++i) {
+      if (really_zero_channel[i]) {
+         memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
+      } else {
+         for (j=0; j < n2; ++j)
+            f->channel_buffers[i][j] *= f->floor_buffers[i][j];
+      }
+   }
+#endif
+
+// INVERSE MDCT
+   CHECK(f);
+   for (i=0; i < f->channels; ++i)
+      inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
+   CHECK(f);
+
+   // this shouldn't be necessary, unless we exited on an error
+   // and want to flush to get to the next packet
+   flush_packet(f);
+
+   if (f->first_decode) {
+      // assume we start so first non-discarded sample is sample 0
+      // this isn't to spec, but spec would require us to read ahead
+      // and decode the size of all current frames--could be done,
+      // but presumably it's not a commonly used feature
+      f->current_loc = 0u - n2; // start of first frame is positioned for discard (NB this is an intentional unsigned overflow/wrap-around)
+      // we might have to discard samples "from" the next frame too,
+      // if we're lapping a large block then a small at the start?
+      f->discard_samples_deferred = n - right_end;
+      f->current_loc_valid = TRUE;
+      f->first_decode = FALSE;
+   } else if (f->discard_samples_deferred) {
+      if (f->discard_samples_deferred >= right_start - left_start) {
+         f->discard_samples_deferred -= (right_start - left_start);
+         left_start = right_start;
+         *p_left = left_start;
+      } else {
+         left_start += f->discard_samples_deferred;
+         *p_left = left_start;
+         f->discard_samples_deferred = 0;
+      }
+   } else if (f->previous_length == 0 && f->current_loc_valid) {
+      // we're recovering from a seek... that means we're going to discard
+      // the samples from this packet even though we know our position from
+      // the last page header, so we need to update the position based on
+      // the discarded samples here
+      // but wait, the code below is going to add this in itself even
+      // on a discard, so we don't need to do it here...
+   }
+
+   // check if we have ogg information about the sample # for this packet
+   if (f->last_seg_which == f->end_seg_with_known_loc) {
+      // if we have a valid current loc, and this is final:
+      if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
+         uint32 current_end = f->known_loc_for_packet;
+         // then let's infer the size of the (probably) short final frame
+         if (current_end < f->current_loc + (right_end-left_start)) {
+            if (current_end < f->current_loc) {
+               // negative truncation, that's impossible!
+               *len = 0;
+            } else {
+               *len = current_end - f->current_loc;
+            }
+            *len += left_start; // this doesn't seem right, but has no ill effect on my test files
+            if (*len > right_end) *len = right_end; // this should never happen
+            f->current_loc += *len;
+            return TRUE;
+         }
+      }
+      // otherwise, just set our sample loc
+      // guess that the ogg granule pos refers to the _middle_ of the
+      // last frame?
+      // set f->current_loc to the position of left_start
+      f->current_loc = f->known_loc_for_packet - (n2-left_start);
+      f->current_loc_valid = TRUE;
+   }
+   if (f->current_loc_valid)
+      f->current_loc += (right_start - left_start);
+
+   if (f->alloc.alloc_buffer)
+      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
+   *len = right_end;  // ignore samples after the window goes to 0
+   CHECK(f);
+
+   return TRUE;
+}
+
+static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
+{
+   int mode, left_end, right_end;
+   if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
+   return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
+}
+
+static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
+{
+   int prev,i,j;
+   // we use right&left (the start of the right- and left-window sin()-regions)
+   // to determine how much to return, rather than inferring from the rules
+   // (same result, clearer code); 'left' indicates where our sin() window
+   // starts, therefore where the previous window's right edge starts, and
+   // therefore where to start mixing from the previous buffer. 'right'
+   // indicates where our sin() ending-window starts, therefore that's where
+   // we start saving, and where our returned-data ends.
+
+   // mixin from previous window
+   if (f->previous_length) {
+      int i,j, n = f->previous_length;
+      float *w = get_window(f, n);
+      if (w == NULL) return 0;
+      for (i=0; i < f->channels; ++i) {
+         for (j=0; j < n; ++j)
+            f->channel_buffers[i][left+j] =
+               f->channel_buffers[i][left+j]*w[    j] +
+               f->previous_window[i][     j]*w[n-1-j];
+      }
+   }
+
+   prev = f->previous_length;
+
+   // last half of this data becomes previous window
+   f->previous_length = len - right;
+
+   // @OPTIMIZE: could avoid this copy by double-buffering the
+   // output (flipping previous_window with channel_buffers), but
+   // then previous_window would have to be 2x as large, and
+   // channel_buffers couldn't be temp mem (although they're NOT
+   // currently temp mem, they could be (unless we want to level
+   // performance by spreading out the computation))
+   for (i=0; i < f->channels; ++i)
+      for (j=0; right+j < len; ++j)
+         f->previous_window[i][j] = f->channel_buffers[i][right+j];
+
+   if (!prev)
+      // there was no previous packet, so this data isn't valid...
+      // this isn't entirely true, only the would-have-overlapped data
+      // isn't valid, but this seems to be what the spec requires
+      return 0;
+
+   // truncate a short frame
+   if (len < right) right = len;
+
+   f->samples_output += right-left;
+
+   return right - left;
+}
+
+static int vorbis_pump_first_frame(stb_vorbis *f)
+{
+   int len, right, left, res;
+   res = vorbis_decode_packet(f, &len, &left, &right);
+   if (res)
+      vorbis_finish_frame(f, len, left, right);
+   return res;
+}
+
+#ifndef STB_VORBIS_NO_PUSHDATA_API
+static int is_whole_packet_present(stb_vorbis *f)
+{
+   // make sure that we have the packet available before continuing...
+   // this requires a full ogg parse, but we know we can fetch from f->stream
+
+   // instead of coding this out explicitly, we could save the current read state,
+   // read the next packet with get8() until end-of-packet, check f->eof, then
+   // reset the state? but that would be slower, esp. since we'd have over 256 bytes
+   // of state to restore (primarily the page segment table)
+
+   int s = f->next_seg, first = TRUE;
+   uint8 *p = f->stream;
+
+   if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
+      for (; s < f->segment_count; ++s) {
+         p += f->segments[s];
+         if (f->segments[s] < 255)               // stop at first short segment
+            break;
+      }
+      // either this continues, or it ends it...
+      if (s == f->segment_count)
+         s = -1; // set 'crosses page' flag
+      if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
+      first = FALSE;
+   }
+   for (; s == -1;) {
+      uint8 *q;
+      int n;
+
+      // check that we have the page header ready
+      if (p + 26 >= f->stream_end)               return error(f, VORBIS_need_more_data);
+      // validate the page
+      if (memcmp(p, ogg_page_header, 4))         return error(f, VORBIS_invalid_stream);
+      if (p[4] != 0)                             return error(f, VORBIS_invalid_stream);
+      if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
+         if (f->previous_length)
+            if ((p[5] & PAGEFLAG_continued_packet))  return error(f, VORBIS_invalid_stream);
+         // if no previous length, we're resynching, so we can come in on a continued-packet,
+         // which we'll just drop
+      } else {
+         if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
+      }
+      n = p[26]; // segment counts
+      q = p+27;  // q points to segment table
+      p = q + n; // advance past header
+      // make sure we've read the segment table
+      if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
+      for (s=0; s < n; ++s) {
+         p += q[s];
+         if (q[s] < 255)
+            break;
+      }
+      if (s == n)
+         s = -1; // set 'crosses page' flag
+      if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
+      first = FALSE;
+   }
+   return TRUE;
+}
+#endif // !STB_VORBIS_NO_PUSHDATA_API
+
+static int start_decoder(vorb *f)
+{
+   uint8 header[6], x,y;
+   int len,i,j,k, max_submaps = 0;
+   int longest_floorlist=0;
+
+   // first page, first packet
+   f->first_decode = TRUE;
+
+   if (!start_page(f))                              return FALSE;
+   // validate page flag
+   if (!(f->page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
+   if (f->page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
+   if (f->page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
+   // check for expected packet length
+   if (f->segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
+   if (f->segments[0] != 30) {
+      // check for the Ogg skeleton fishead identifying header to refine our error
+      if (f->segments[0] == 64 &&
+          getn(f, header, 6) &&
+          header[0] == 'f' &&
+          header[1] == 'i' &&
+          header[2] == 's' &&
+          header[3] == 'h' &&
+          header[4] == 'e' &&
+          header[5] == 'a' &&
+          get8(f)   == 'd' &&
+          get8(f)   == '\0')                        return error(f, VORBIS_ogg_skeleton_not_supported);
+      else
+                                                    return error(f, VORBIS_invalid_first_page);
+   }
+
+   // read packet
+   // check packet header
+   if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
+   if (!getn(f, header, 6))                         return error(f, VORBIS_unexpected_eof);
+   if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_first_page);
+   // vorbis_version
+   if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
+   f->channels = get8(f); if (!f->channels)         return error(f, VORBIS_invalid_first_page);
+   if (f->channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
+   f->sample_rate = get32(f); if (!f->sample_rate)  return error(f, VORBIS_invalid_first_page);
+   get32(f); // bitrate_maximum
+   get32(f); // bitrate_nominal
+   get32(f); // bitrate_minimum
+   x = get8(f);
+   {
+      int log0,log1;
+      log0 = x & 15;
+      log1 = x >> 4;
+      f->blocksize_0 = 1 << log0;
+      f->blocksize_1 = 1 << log1;
+      if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
+      if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
+      if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
+   }
+
+   // framing_flag
+   x = get8(f);
+   if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
+
+   // second packet!
+   if (!start_page(f))                              return FALSE;
+
+   if (!start_packet(f))                            return FALSE;
+
+   if (!next_segment(f))                            return FALSE;
+
+   if (get8_packet(f) != VORBIS_packet_comment)            return error(f, VORBIS_invalid_setup);
+   for (i=0; i < 6; ++i) header[i] = get8_packet(f);
+   if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
+   //file vendor
+   len = get32_packet(f);
+   f->vendor = (char*)setup_malloc(f, sizeof(char) * (len+1));
+   if (f->vendor == NULL)                           return error(f, VORBIS_outofmem);
+   for(i=0; i < len; ++i) {
+      f->vendor[i] = get8_packet(f);
+   }
+   f->vendor[len] = (char)'\0';
+   //user comments
+   f->comment_list_length = get32_packet(f);
+   f->comment_list = NULL;
+   if (f->comment_list_length > 0)
+   {
+      f->comment_list = (char**) setup_malloc(f, sizeof(char*) * (f->comment_list_length));
+      if (f->comment_list == NULL)                  return error(f, VORBIS_outofmem);
+   }
+
+   for(i=0; i < f->comment_list_length; ++i) {
+      len = get32_packet(f);
+      f->comment_list[i] = (char*)setup_malloc(f, sizeof(char) * (len+1));
+      if (f->comment_list[i] == NULL)               return error(f, VORBIS_outofmem);
+
+      for(j=0; j < len; ++j) {
+         f->comment_list[i][j] = get8_packet(f);
+      }
+      f->comment_list[i][len] = (char)'\0';
+   }
+
+   // framing_flag
+   x = get8_packet(f);
+   if (!(x & 1))                                    return error(f, VORBIS_invalid_setup);
+
+
+   skip(f, f->bytes_in_seg);
+   f->bytes_in_seg = 0;
+
+   do {
+      len = next_segment(f);
+      skip(f, len);
+      f->bytes_in_seg = 0;
+   } while (len);
+
+   // third packet!
+   if (!start_packet(f))                            return FALSE;
+
+   #ifndef STB_VORBIS_NO_PUSHDATA_API
+   if (IS_PUSH_MODE(f)) {
+      if (!is_whole_packet_present(f)) {
+         // convert error in ogg header to write type
+         if (f->error == VORBIS_invalid_stream)
+            f->error = VORBIS_invalid_setup;
+         return FALSE;
+      }
+   }
+   #endif
+
+   crc32_init(); // always init it, to avoid multithread race conditions
+
+   if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
+   for (i=0; i < 6; ++i) header[i] = get8_packet(f);
+   if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
+
+   // codebooks
+
+   f->codebook_count = get_bits(f,8) + 1;
+   f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
+   if (f->codebooks == NULL)                        return error(f, VORBIS_outofmem);
+   memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
+   for (i=0; i < f->codebook_count; ++i) {
+      uint32 *values;
+      int ordered, sorted_count;
+      int total=0;
+      uint8 *lengths;
+      Codebook *c = f->codebooks+i;
+      CHECK(f);
+      x = get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
+      x = get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
+      x = get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
+      x = get_bits(f, 8);
+      c->dimensions = (get_bits(f, 8)<<8) + x;
+      x = get_bits(f, 8);
+      y = get_bits(f, 8);
+      c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
+      ordered = get_bits(f,1);
+      c->sparse = ordered ? 0 : get_bits(f,1);
+
+      if (c->dimensions == 0 && c->entries != 0)    return error(f, VORBIS_invalid_setup);
+
+      if (c->sparse)
+         lengths = (uint8 *) setup_temp_malloc(f, c->entries);
+      else
+         lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
+
+      if (!lengths) return error(f, VORBIS_outofmem);
+
+      if (ordered) {
+         int current_entry = 0;
+         int current_length = get_bits(f,5) + 1;
+         while (current_entry < c->entries) {
+            int limit = c->entries - current_entry;
+            int n = get_bits(f, ilog(limit));
+            if (current_length >= 32) return error(f, VORBIS_invalid_setup);
+            if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
+            memset(lengths + current_entry, current_length, n);
+            current_entry += n;
+            ++current_length;
+         }
+      } else {
+         for (j=0; j < c->entries; ++j) {
+            int present = c->sparse ? get_bits(f,1) : 1;
+            if (present) {
+               lengths[j] = get_bits(f, 5) + 1;
+               ++total;
+               if (lengths[j] == 32)
+                  return error(f, VORBIS_invalid_setup);
+            } else {
+               lengths[j] = NO_CODE;
+            }
+         }
+      }
+
+      if (c->sparse && total >= c->entries >> 2) {
+         // convert sparse items to non-sparse!
+         if (c->entries > (int) f->setup_temp_memory_required)
+            f->setup_temp_memory_required = c->entries;
+
+         c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
+         if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
+         memcpy(c->codeword_lengths, lengths, c->entries);
+         setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
+         lengths = c->codeword_lengths;
+         c->sparse = 0;
+      }
+
+      // compute the size of the sorted tables
+      if (c->sparse) {
+         sorted_count = total;
+      } else {
+         sorted_count = 0;
+         #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
+         for (j=0; j < c->entries; ++j)
+            if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
+               ++sorted_count;
+         #endif
+      }
+
+      c->sorted_entries = sorted_count;
+      values = NULL;
+
+      CHECK(f);
+      if (!c->sparse) {
+         c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
+         if (!c->codewords)                  return error(f, VORBIS_outofmem);
+      } else {
+         unsigned int size;
+         if (c->sorted_entries) {
+            c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
+            if (!c->codeword_lengths)           return error(f, VORBIS_outofmem);
+            c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
+            if (!c->codewords)                  return error(f, VORBIS_outofmem);
+            values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
+            if (!values)                        return error(f, VORBIS_outofmem);
+         }
+         size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
+         if (size > f->setup_temp_memory_required)
+            f->setup_temp_memory_required = size;
+      }
+
+      if (!compute_codewords(c, lengths, c->entries, values)) {
+         if (c->sparse) setup_temp_free(f, values, 0);
+         return error(f, VORBIS_invalid_setup);
+      }
+
+      if (c->sorted_entries) {
+         // allocate an extra slot for sentinels
+         c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
+         if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
+         // allocate an extra slot at the front so that c->sorted_values[-1] is defined
+         // so that we can catch that case without an extra if
+         c->sorted_values    = ( int   *) setup_malloc(f, sizeof(*c->sorted_values   ) * (c->sorted_entries+1));
+         if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
+         ++c->sorted_values;
+         c->sorted_values[-1] = -1;
+         compute_sorted_huffman(c, lengths, values);
+      }
+
+      if (c->sparse) {
+         setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
+         setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
+         setup_temp_free(f, lengths, c->entries);
+         c->codewords = NULL;
+      }
+
+      compute_accelerated_huffman(c);
+
+      CHECK(f);
+      c->lookup_type = get_bits(f, 4);
+      if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
+      if (c->lookup_type > 0) {
+         uint16 *mults;
+         c->minimum_value = float32_unpack(get_bits(f, 32));
+         c->delta_value = float32_unpack(get_bits(f, 32));
+         c->value_bits = get_bits(f, 4)+1;
+         c->sequence_p = get_bits(f,1);
+         if (c->lookup_type == 1) {
+            int values = lookup1_values(c->entries, c->dimensions);
+            if (values < 0) return error(f, VORBIS_invalid_setup);
+            c->lookup_values = (uint32) values;
+         } else {
+            c->lookup_values = c->entries * c->dimensions;
+         }
+         if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
+         mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
+         if (mults == NULL) return error(f, VORBIS_outofmem);
+         for (j=0; j < (int) c->lookup_values; ++j) {
+            int q = get_bits(f, c->value_bits);
+            if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
+            mults[j] = q;
+         }
+
+#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
+         if (c->lookup_type == 1) {
+            int len, sparse = c->sparse;
+            float last=0;
+            // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
+            if (sparse) {
+               if (c->sorted_entries == 0) goto skip;
+               c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
+            } else
+               c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries        * c->dimensions);
+            if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
+            len = sparse ? c->sorted_entries : c->entries;
+            for (j=0; j < len; ++j) {
+               unsigned int z = sparse ? c->sorted_values[j] : j;
+               unsigned int div=1;
+               for (k=0; k < c->dimensions; ++k) {
+                  int off = (z / div) % c->lookup_values;
+                  float val = mults[off]*c->delta_value + c->minimum_value + last;
+                  c->multiplicands[j*c->dimensions + k] = val;
+                  if (c->sequence_p)
+                     last = val;
+                  if (k+1 < c->dimensions) {
+                     if (div > UINT_MAX / (unsigned int) c->lookup_values) {
+                        setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
+                        return error(f, VORBIS_invalid_setup);
+                     }
+                     div *= c->lookup_values;
+                  }
+               }
+            }
+            c->lookup_type = 2;
+         }
+         else
+#endif
+         {
+            float last=0;
+            CHECK(f);
+            c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
+            if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
+            for (j=0; j < (int) c->lookup_values; ++j) {
+               float val = mults[j] * c->delta_value + c->minimum_value + last;
+               c->multiplicands[j] = val;
+               if (c->sequence_p)
+                  last = val;
+            }
+         }
+#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
+        skip:;
+#endif
+         setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
+
+         CHECK(f);
+      }
+      CHECK(f);
+   }
+
+   // time domain transfers (notused)
+
+   x = get_bits(f, 6) + 1;
+   for (i=0; i < x; ++i) {
+      uint32 z = get_bits(f, 16);
+      if (z != 0) return error(f, VORBIS_invalid_setup);
+   }
+
+   // Floors
+   f->floor_count = get_bits(f, 6)+1;
+   f->floor_config = (Floor *)  setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
+   if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
+   for (i=0; i < f->floor_count; ++i) {
+      f->floor_types[i] = get_bits(f, 16);
+      if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
+      if (f->floor_types[i] == 0) {
+         Floor0 *g = &f->floor_config[i].floor0;
+         g->order = get_bits(f,8);
+         g->rate = get_bits(f,16);
+         g->bark_map_size = get_bits(f,16);
+         g->amplitude_bits = get_bits(f,6);
+         g->amplitude_offset = get_bits(f,8);
+         g->number_of_books = get_bits(f,4) + 1;
+         for (j=0; j < g->number_of_books; ++j)
+            g->book_list[j] = get_bits(f,8);
+         return error(f, VORBIS_feature_not_supported);
+      } else {
+         stbv__floor_ordering p[31*8+2];
+         Floor1 *g = &f->floor_config[i].floor1;
+         int max_class = -1;
+         g->partitions = get_bits(f, 5);
+         for (j=0; j < g->partitions; ++j) {
+            g->partition_class_list[j] = get_bits(f, 4);
+            if (g->partition_class_list[j] > max_class)
+               max_class = g->partition_class_list[j];
+         }
+         for (j=0; j <= max_class; ++j) {
+            g->class_dimensions[j] = get_bits(f, 3)+1;
+            g->class_subclasses[j] = get_bits(f, 2);
+            if (g->class_subclasses[j]) {
+               g->class_masterbooks[j] = get_bits(f, 8);
+               if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
+            }
+            for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
+               g->subclass_books[j][k] = (int16)get_bits(f,8)-1;
+               if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
+            }
+         }
+         g->floor1_multiplier = get_bits(f,2)+1;
+         g->rangebits = get_bits(f,4);
+         g->Xlist[0] = 0;
+         g->Xlist[1] = 1 << g->rangebits;
+         g->values = 2;
+         for (j=0; j < g->partitions; ++j) {
+            int c = g->partition_class_list[j];
+            for (k=0; k < g->class_dimensions[c]; ++k) {
+               g->Xlist[g->values] = get_bits(f, g->rangebits);
+               ++g->values;
+            }
+         }
+         // precompute the sorting
+         for (j=0; j < g->values; ++j) {
+            p[j].x = g->Xlist[j];
+            p[j].id = j;
+         }
+         qsort(p, g->values, sizeof(p[0]), point_compare);
+         for (j=0; j < g->values-1; ++j)
+            if (p[j].x == p[j+1].x)
+               return error(f, VORBIS_invalid_setup);
+         for (j=0; j < g->values; ++j)
+            g->sorted_order[j] = (uint8) p[j].id;
+         // precompute the neighbors
+         for (j=2; j < g->values; ++j) {
+            int low = 0,hi = 0;
+            neighbors(g->Xlist, j, &low,&hi);
+            g->neighbors[j][0] = low;
+            g->neighbors[j][1] = hi;
+         }
+
+         if (g->values > longest_floorlist)
+            longest_floorlist = g->values;
+      }
+   }
+
+   // Residue
+   f->residue_count = get_bits(f, 6)+1;
+   f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
+   if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
+   memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
+   for (i=0; i < f->residue_count; ++i) {
+      uint8 residue_cascade[64];
+      Residue *r = f->residue_config+i;
+      f->residue_types[i] = get_bits(f, 16);
+      if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
+      r->begin = get_bits(f, 24);
+      r->end = get_bits(f, 24);
+      if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
+      r->part_size = get_bits(f,24)+1;
+      r->classifications = get_bits(f,6)+1;
+      r->classbook = get_bits(f,8);
+      if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
+      for (j=0; j < r->classifications; ++j) {
+         uint8 high_bits=0;
+         uint8 low_bits=get_bits(f,3);
+         if (get_bits(f,1))
+            high_bits = get_bits(f,5);
+         residue_cascade[j] = high_bits*8 + low_bits;
+      }
+      r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
+      if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
+      for (j=0; j < r->classifications; ++j) {
+         for (k=0; k < 8; ++k) {
+            if (residue_cascade[j] & (1 << k)) {
+               r->residue_books[j][k] = get_bits(f, 8);
+               if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
+            } else {
+               r->residue_books[j][k] = -1;
+            }
+         }
+      }
+      // precompute the classifications[] array to avoid inner-loop mod/divide
+      // call it 'classdata' since we already have r->classifications
+      r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
+      if (!r->classdata) return error(f, VORBIS_outofmem);
+      memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
+      for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
+         int classwords = f->codebooks[r->classbook].dimensions;
+         int temp = j;
+         r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
+         if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
+         for (k=classwords-1; k >= 0; --k) {
+            r->classdata[j][k] = temp % r->classifications;
+            temp /= r->classifications;
+         }
+      }
+   }
+
+   f->mapping_count = get_bits(f,6)+1;
+   f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
+   if (f->mapping == NULL) return error(f, VORBIS_outofmem);
+   memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
+   for (i=0; i < f->mapping_count; ++i) {
+      Mapping *m = f->mapping + i;
+      int mapping_type = get_bits(f,16);
+      if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
+      m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
+      if (m->chan == NULL) return error(f, VORBIS_outofmem);
+      if (get_bits(f,1))
+         m->submaps = get_bits(f,4)+1;
+      else
+         m->submaps = 1;
+      if (m->submaps > max_submaps)
+         max_submaps = m->submaps;
+      if (get_bits(f,1)) {
+         m->coupling_steps = get_bits(f,8)+1;
+         if (m->coupling_steps > f->channels) return error(f, VORBIS_invalid_setup);
+         for (k=0; k < m->coupling_steps; ++k) {
+            m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
+            m->chan[k].angle = get_bits(f, ilog(f->channels-1));
+            if (m->chan[k].magnitude >= f->channels)        return error(f, VORBIS_invalid_setup);
+            if (m->chan[k].angle     >= f->channels)        return error(f, VORBIS_invalid_setup);
+            if (m->chan[k].magnitude == m->chan[k].angle)   return error(f, VORBIS_invalid_setup);
+         }
+      } else
+         m->coupling_steps = 0;
+
+      // reserved field
+      if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
+      if (m->submaps > 1) {
+         for (j=0; j < f->channels; ++j) {
+            m->chan[j].mux = get_bits(f, 4);
+            if (m->chan[j].mux >= m->submaps)                return error(f, VORBIS_invalid_setup);
+         }
+      } else
+         // @SPECIFICATION: this case is missing from the spec
+         for (j=0; j < f->channels; ++j)
+            m->chan[j].mux = 0;
+
+      for (j=0; j < m->submaps; ++j) {
+         get_bits(f,8); // discard
+         m->submap_floor[j] = get_bits(f,8);
+         m->submap_residue[j] = get_bits(f,8);
+         if (m->submap_floor[j] >= f->floor_count)      return error(f, VORBIS_invalid_setup);
+         if (m->submap_residue[j] >= f->residue_count)  return error(f, VORBIS_invalid_setup);
+      }
+   }
+
+   // Modes
+   f->mode_count = get_bits(f, 6)+1;
+   for (i=0; i < f->mode_count; ++i) {
+      Mode *m = f->mode_config+i;
+      m->blockflag = get_bits(f,1);
+      m->windowtype = get_bits(f,16);
+      m->transformtype = get_bits(f,16);
+      m->mapping = get_bits(f,8);
+      if (m->windowtype != 0)                 return error(f, VORBIS_invalid_setup);
+      if (m->transformtype != 0)              return error(f, VORBIS_invalid_setup);
+      if (m->mapping >= f->mapping_count)     return error(f, VORBIS_invalid_setup);
+   }
+
+   flush_packet(f);
+
+   f->previous_length = 0;
+
+   for (i=0; i < f->channels; ++i) {
+      f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
+      f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
+      f->finalY[i]          = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
+      if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
+      memset(f->channel_buffers[i], 0, sizeof(float) * f->blocksize_1);
+      #ifdef STB_VORBIS_NO_DEFER_FLOOR
+      f->floor_buffers[i]   = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
+      if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
+      #endif
+   }
+
+   if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
+   if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
+   f->blocksize[0] = f->blocksize_0;
+   f->blocksize[1] = f->blocksize_1;
+
+#ifdef STB_VORBIS_DIVIDE_TABLE
+   if (integer_divide_table[1][1]==0)
+      for (i=0; i < DIVTAB_NUMER; ++i)
+         for (j=1; j < DIVTAB_DENOM; ++j)
+            integer_divide_table[i][j] = i / j;
+#endif
+
+   // compute how much temporary memory is needed
+
+   // 1.
+   {
+      uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
+      uint32 classify_mem;
+      int i,max_part_read=0;
+      for (i=0; i < f->residue_count; ++i) {
+         Residue *r = f->residue_config + i;
+         unsigned int actual_size = f->blocksize_1 / 2;
+         unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size;
+         unsigned int limit_r_end   = r->end   < actual_size ? r->end   : actual_size;
+         int n_read = limit_r_end - limit_r_begin;
+         int part_read = n_read / r->part_size;
+         if (part_read > max_part_read)
+            max_part_read = part_read;
+      }
+      #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+      classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
+      #else
+      classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
+      #endif
+
+      // maximum reasonable partition size is f->blocksize_1
+
+      f->temp_memory_required = classify_mem;
+      if (imdct_mem > f->temp_memory_required)
+         f->temp_memory_required = imdct_mem;
+   }
+
+
+   if (f->alloc.alloc_buffer) {
+      assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
+      // check if there's enough temp memory so we don't error later
+      if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
+         return error(f, VORBIS_outofmem);
+   }
+
+   // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page
+   // without PAGEFLAG_continued_packet, so this either points to the first page, or
+   // the page after the end of the headers. It might be cleaner to point to a page
+   // in the middle of the headers, when that's the page where the first audio packet
+   // starts, but we'd have to also correctly skip the end of any continued packet in
+   // stb_vorbis_seek_start.
+   if (f->next_seg == -1) {
+      f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
+   } else {
+      f->first_audio_page_offset = 0;
+   }
+
+   return TRUE;
+}
+
+static void vorbis_deinit(stb_vorbis *p)
+{
+   int i,j;
+
+   setup_free(p, p->vendor);
+   for (i=0; i < p->comment_list_length; ++i) {
+      setup_free(p, p->comment_list[i]);
+   }
+   setup_free(p, p->comment_list);
+
+   if (p->residue_config) {
+      for (i=0; i < p->residue_count; ++i) {
+         Residue *r = p->residue_config+i;
+         if (r->classdata) {
+            for (j=0; j < p->codebooks[r->classbook].entries; ++j)
+               setup_free(p, r->classdata[j]);
+            setup_free(p, r->classdata);
+         }
+         setup_free(p, r->residue_books);
+      }
+   }
+
+   if (p->codebooks) {
+      CHECK(p);
+      for (i=0; i < p->codebook_count; ++i) {
+         Codebook *c = p->codebooks + i;
+         setup_free(p, c->codeword_lengths);
+         setup_free(p, c->multiplicands);
+         setup_free(p, c->codewords);
+         setup_free(p, c->sorted_codewords);
+         // c->sorted_values[-1] is the first entry in the array
+         setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
+      }
+      setup_free(p, p->codebooks);
+   }
+   setup_free(p, p->floor_config);
+   setup_free(p, p->residue_config);
+   if (p->mapping) {
+      for (i=0; i < p->mapping_count; ++i)
+         setup_free(p, p->mapping[i].chan);
+      setup_free(p, p->mapping);
+   }
+   CHECK(p);
+   for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
+      setup_free(p, p->channel_buffers[i]);
+      setup_free(p, p->previous_window[i]);
+      #ifdef STB_VORBIS_NO_DEFER_FLOOR
+      setup_free(p, p->floor_buffers[i]);
+      #endif
+      setup_free(p, p->finalY[i]);
+   }
+   for (i=0; i < 2; ++i) {
+      setup_free(p, p->A[i]);
+      setup_free(p, p->B[i]);
+      setup_free(p, p->C[i]);
+      setup_free(p, p->window[i]);
+      setup_free(p, p->bit_reverse[i]);
+   }
+   #ifndef STB_VORBIS_NO_STDIO
+   if (p->close_on_free) fclose(p->f);
+   #endif
+}
+
+void stb_vorbis_close(stb_vorbis *p)
+{
+   if (p == NULL) return;
+   vorbis_deinit(p);
+   setup_free(p,p);
+}
+
+static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
+{
+   memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
+   if (z) {
+      p->alloc = *z;
+      p->alloc.alloc_buffer_length_in_bytes &= ~7;
+      p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
+   }
+   p->eof = 0;
+   p->error = VORBIS__no_error;
+   p->stream = NULL;
+   p->codebooks = NULL;
+   p->page_crc_tests = -1;
+   #ifndef STB_VORBIS_NO_STDIO
+   p->close_on_free = FALSE;
+   p->f = NULL;
+   #endif
+}
+
+int stb_vorbis_get_sample_offset(stb_vorbis *f)
+{
+   if (f->current_loc_valid)
+      return f->current_loc;
+   else
+      return -1;
+}
+
+stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
+{
+   stb_vorbis_info d;
+   d.channels = f->channels;
+   d.sample_rate = f->sample_rate;
+   d.setup_memory_required = f->setup_memory_required;
+   d.setup_temp_memory_required = f->setup_temp_memory_required;
+   d.temp_memory_required = f->temp_memory_required;
+   d.max_frame_size = f->blocksize_1 >> 1;
+   return d;
+}
+
+stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f)
+{
+   stb_vorbis_comment d;
+   d.vendor = f->vendor;
+   d.comment_list_length = f->comment_list_length;
+   d.comment_list = f->comment_list;
+   return d;
+}
+
+int stb_vorbis_get_error(stb_vorbis *f)
+{
+   int e = f->error;
+   f->error = VORBIS__no_error;
+   return e;
+}
+
+static stb_vorbis * vorbis_alloc(stb_vorbis *f)
+{
+   stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
+   return p;
+}
+
+#ifndef STB_VORBIS_NO_PUSHDATA_API
+
+void stb_vorbis_flush_pushdata(stb_vorbis *f)
+{
+   f->previous_length = 0;
+   f->page_crc_tests  = 0;
+   f->discard_samples_deferred = 0;
+   f->current_loc_valid = FALSE;
+   f->first_decode = FALSE;
+   f->samples_output = 0;
+   f->channel_buffer_start = 0;
+   f->channel_buffer_end = 0;
+}
+
+static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
+{
+   int i,n;
+   for (i=0; i < f->page_crc_tests; ++i)
+      f->scan[i].bytes_done = 0;
+
+   // if we have room for more scans, search for them first, because
+   // they may cause us to stop early if their header is incomplete
+   if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
+      if (data_len < 4) return 0;
+      data_len -= 3; // need to look for 4-byte sequence, so don't miss
+                     // one that straddles a boundary
+      for (i=0; i < data_len; ++i) {
+         if (data[i] == 0x4f) {
+            if (0==memcmp(data+i, ogg_page_header, 4)) {
+               int j,len;
+               uint32 crc;
+               // make sure we have the whole page header
+               if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
+                  // only read up to this page start, so hopefully we'll
+                  // have the whole page header start next time
+                  data_len = i;
+                  break;
+               }
+               // ok, we have it all; compute the length of the page
+               len = 27 + data[i+26];
+               for (j=0; j < data[i+26]; ++j)
+                  len += data[i+27+j];
+               // scan everything up to the embedded crc (which we must 0)
+               crc = 0;
+               for (j=0; j < 22; ++j)
+                  crc = crc32_update(crc, data[i+j]);
+               // now process 4 0-bytes
+               for (   ; j < 26; ++j)
+                  crc = crc32_update(crc, 0);
+               // len is the total number of bytes we need to scan
+               n = f->page_crc_tests++;
+               f->scan[n].bytes_left = len-j;
+               f->scan[n].crc_so_far = crc;
+               f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
+               // if the last frame on a page is continued to the next, then
+               // we can't recover the sample_loc immediately
+               if (data[i+27+data[i+26]-1] == 255)
+                  f->scan[n].sample_loc = ~0;
+               else
+                  f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
+               f->scan[n].bytes_done = i+j;
+               if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
+                  break;
+               // keep going if we still have room for more
+            }
+         }
+      }
+   }
+
+   for (i=0; i < f->page_crc_tests;) {
+      uint32 crc;
+      int j;
+      int n = f->scan[i].bytes_done;
+      int m = f->scan[i].bytes_left;
+      if (m > data_len - n) m = data_len - n;
+      // m is the bytes to scan in the current chunk
+      crc = f->scan[i].crc_so_far;
+      for (j=0; j < m; ++j)
+         crc = crc32_update(crc, data[n+j]);
+      f->scan[i].bytes_left -= m;
+      f->scan[i].crc_so_far = crc;
+      if (f->scan[i].bytes_left == 0) {
+         // does it match?
+         if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
+            // Houston, we have page
+            data_len = n+m; // consumption amount is wherever that scan ended
+            f->page_crc_tests = -1; // drop out of page scan mode
+            f->previous_length = 0; // decode-but-don't-output one frame
+            f->next_seg = -1;       // start a new page
+            f->current_loc = f->scan[i].sample_loc; // set the current sample location
+                                    // to the amount we'd have decoded had we decoded this page
+            f->current_loc_valid = f->current_loc != ~0U;
+            return data_len;
+         }
+         // delete entry
+         f->scan[i] = f->scan[--f->page_crc_tests];
+      } else {
+         ++i;
+      }
+   }
+
+   return data_len;
+}
+
+// return value: number of bytes we used
+int stb_vorbis_decode_frame_pushdata(
+         stb_vorbis *f,                   // the file we're decoding
+         const uint8 *data, int data_len, // the memory available for decoding
+         int *channels,                   // place to write number of float * buffers
+         float ***output,                 // place to write float ** array of float * buffers
+         int *samples                     // place to write number of output samples
+     )
+{
+   int i;
+   int len,right,left;
+
+   if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
+
+   if (f->page_crc_tests >= 0) {
+      *samples = 0;
+      return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
+   }
+
+   f->stream     = (uint8 *) data;
+   f->stream_end = (uint8 *) data + data_len;
+   f->error      = VORBIS__no_error;
+
+   // check that we have the entire packet in memory
+   if (!is_whole_packet_present(f)) {
+      *samples = 0;
+      return 0;
+   }
+
+   if (!vorbis_decode_packet(f, &len, &left, &right)) {
+      // save the actual error we encountered
+      enum STBVorbisError error = f->error;
+      if (error == VORBIS_bad_packet_type) {
+         // flush and resynch
+         f->error = VORBIS__no_error;
+         while (get8_packet(f) != EOP)
+            if (f->eof) break;
+         *samples = 0;
+         return (int) (f->stream - data);
+      }
+      if (error == VORBIS_continued_packet_flag_invalid) {
+         if (f->previous_length == 0) {
+            // we may be resynching, in which case it's ok to hit one
+            // of these; just discard the packet
+            f->error = VORBIS__no_error;
+            while (get8_packet(f) != EOP)
+               if (f->eof) break;
+            *samples = 0;
+            return (int) (f->stream - data);
+         }
+      }
+      // if we get an error while parsing, what to do?
+      // well, it DEFINITELY won't work to continue from where we are!
+      stb_vorbis_flush_pushdata(f);
+      // restore the error that actually made us bail
+      f->error = error;
+      *samples = 0;
+      return 1;
+   }
+
+   // success!
+   len = vorbis_finish_frame(f, len, left, right);
+   for (i=0; i < f->channels; ++i)
+      f->outputs[i] = f->channel_buffers[i] + left;
+
+   if (channels) *channels = f->channels;
+   *samples = len;
+   *output = f->outputs;
+   return (int) (f->stream - data);
+}
+
+stb_vorbis *stb_vorbis_open_pushdata(
+         const unsigned char *data, int data_len, // the memory available for decoding
+         int *data_used,              // only defined if result is not NULL
+         int *error, const stb_vorbis_alloc *alloc)
+{
+   stb_vorbis *f, p;
+   vorbis_init(&p, alloc);
+   p.stream     = (uint8 *) data;
+   p.stream_end = (uint8 *) data + data_len;
+   p.push_mode  = TRUE;
+   if (!start_decoder(&p)) {
+      if (p.eof)
+         *error = VORBIS_need_more_data;
+      else
+         *error = p.error;
+      vorbis_deinit(&p);
+      return NULL;
+   }
+   f = vorbis_alloc(&p);
+   if (f) {
+      *f = p;
+      *data_used = (int) (f->stream - data);
+      *error = 0;
+      return f;
+   } else {
+      vorbis_deinit(&p);
+      return NULL;
+   }
+}
+#endif // STB_VORBIS_NO_PUSHDATA_API
+
+unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
+{
+   #ifndef STB_VORBIS_NO_PUSHDATA_API
+   if (f->push_mode) return 0;
+   #endif
+   if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
+   #ifndef STB_VORBIS_NO_STDIO
+   return (unsigned int) (ftell(f->f) - f->f_start);
+   #endif
+}
+
+#ifndef STB_VORBIS_NO_PULLDATA_API
+//
+// DATA-PULLING API
+//
+
+static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
+{
+   for(;;) {
+      int n;
+      if (f->eof) return 0;
+      n = get8(f);
+      if (n == 0x4f) { // page header candidate
+         unsigned int retry_loc = stb_vorbis_get_file_offset(f);
+         int i;
+         // check if we're off the end of a file_section stream
+         if (retry_loc - 25 > f->stream_len)
+            return 0;
+         // check the rest of the header
+         for (i=1; i < 4; ++i)
+            if (get8(f) != ogg_page_header[i])
+               break;
+         if (f->eof) return 0;
+         if (i == 4) {
+            uint8 header[27];
+            uint32 i, crc, goal, len;
+            for (i=0; i < 4; ++i)
+               header[i] = ogg_page_header[i];
+            for (; i < 27; ++i)
+               header[i] = get8(f);
+            if (f->eof) return 0;
+            if (header[4] != 0) goto invalid;
+            goal = header[22] + (header[23] << 8) + (header[24]<<16) + ((uint32)header[25]<<24);
+            for (i=22; i < 26; ++i)
+               header[i] = 0;
+            crc = 0;
+            for (i=0; i < 27; ++i)
+               crc = crc32_update(crc, header[i]);
+            len = 0;
+            for (i=0; i < header[26]; ++i) {
+               int s = get8(f);
+               crc = crc32_update(crc, s);
+               len += s;
+            }
+            if (len && f->eof) return 0;
+            for (i=0; i < len; ++i)
+               crc = crc32_update(crc, get8(f));
+            // finished parsing probable page
+            if (crc == goal) {
+               // we could now check that it's either got the last
+               // page flag set, OR it's followed by the capture
+               // pattern, but I guess TECHNICALLY you could have
+               // a file with garbage between each ogg page and recover
+               // from it automatically? So even though that paranoia
+               // might decrease the chance of an invalid decode by
+               // another 2^32, not worth it since it would hose those
+               // invalid-but-useful files?
+               if (end)
+                  *end = stb_vorbis_get_file_offset(f);
+               if (last) {
+                  if (header[5] & 0x04)
+                     *last = 1;
+                  else
+                     *last = 0;
+               }
+               set_file_offset(f, retry_loc-1);
+               return 1;
+            }
+         }
+        invalid:
+         // not a valid page, so rewind and look for next one
+         set_file_offset(f, retry_loc);
+      }
+   }
+}
+
+
+#define SAMPLE_unknown  0xffffffff
+
+// seeking is implemented with a binary search, which narrows down the range to
+// 64K, before using a linear search (because finding the synchronization
+// pattern can be expensive, and the chance we'd find the end page again is
+// relatively high for small ranges)
+//
+// two initial interpolation-style probes are used at the start of the search
+// to try to bound either side of the binary search sensibly, while still
+// working in O(log n) time if they fail.
+
+static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
+{
+   uint8 header[27], lacing[255];
+   int i,len;
+
+   // record where the page starts
+   z->page_start = stb_vorbis_get_file_offset(f);
+
+   // parse the header
+   getn(f, header, 27);
+   if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
+      return 0;
+   getn(f, lacing, header[26]);
+
+   // determine the length of the payload
+   len = 0;
+   for (i=0; i < header[26]; ++i)
+      len += lacing[i];
+
+   // this implies where the page ends
+   z->page_end = z->page_start + 27 + header[26] + len;
+
+   // read the last-decoded sample out of the data
+   z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
+
+   // restore file state to where we were
+   set_file_offset(f, z->page_start);
+   return 1;
+}
+
+// rarely used function to seek back to the preceding page while finding the
+// start of a packet
+static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
+{
+   unsigned int previous_safe, end;
+
+   // now we want to seek back 64K from the limit
+   if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
+      previous_safe = limit_offset - 65536;
+   else
+      previous_safe = f->first_audio_page_offset;
+
+   set_file_offset(f, previous_safe);
+
+   while (vorbis_find_page(f, &end, NULL)) {
+      if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
+         return 1;
+      set_file_offset(f, end);
+   }
+
+   return 0;
+}
+
+// implements the search logic for finding a page and starting decoding. if
+// the function succeeds, current_loc_valid will be true and current_loc will
+// be less than or equal to the provided sample number (the closer the
+// better).
+static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
+{
+   ProbedPage left, right, mid;
+   int i, start_seg_with_known_loc, end_pos, page_start;
+   uint32 delta, stream_length, padding, last_sample_limit;
+   double offset = 0.0, bytes_per_sample = 0.0;
+   int probe = 0;
+
+   // find the last page and validate the target sample
+   stream_length = stb_vorbis_stream_length_in_samples(f);
+   if (stream_length == 0)            return error(f, VORBIS_seek_without_length);
+   if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
+
+   // this is the maximum difference between the window-center (which is the
+   // actual granule position value), and the right-start (which the spec
+   // indicates should be the granule position (give or take one)).
+   padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
+   if (sample_number < padding)
+      last_sample_limit = 0;
+   else
+      last_sample_limit = sample_number - padding;
+
+   left = f->p_first;
+   while (left.last_decoded_sample == ~0U) {
+      // (untested) the first page does not have a 'last_decoded_sample'
+      set_file_offset(f, left.page_end);
+      if (!get_seek_page_info(f, &left)) goto error;
+   }
+
+   right = f->p_last;
+   assert(right.last_decoded_sample != ~0U);
+
+   // starting from the start is handled differently
+   if (last_sample_limit <= left.last_decoded_sample) {
+      if (stb_vorbis_seek_start(f)) {
+         if (f->current_loc > sample_number)
+            return error(f, VORBIS_seek_failed);
+         return 1;
+      }
+      return 0;
+   }
+
+   while (left.page_end != right.page_start) {
+      assert(left.page_end < right.page_start);
+      // search range in bytes
+      delta = right.page_start - left.page_end;
+      if (delta <= 65536) {
+         // there's only 64K left to search - handle it linearly
+         set_file_offset(f, left.page_end);
+      } else {
+         if (probe < 2) {
+            if (probe == 0) {
+               // first probe (interpolate)
+               double data_bytes = right.page_end - left.page_start;
+               bytes_per_sample = data_bytes / right.last_decoded_sample;
+               offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample);
+            } else {
+               // second probe (try to bound the other side)
+               double error = ((double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample;
+               if (error >= 0 && error <  8000) error =  8000;
+               if (error <  0 && error > -8000) error = -8000;
+               offset += error * 2;
+            }
+
+            // ensure the offset is valid
+            if (offset < left.page_end)
+               offset = left.page_end;
+            if (offset > right.page_start - 65536)
+               offset = right.page_start - 65536;
+
+            set_file_offset(f, (unsigned int) offset);
+         } else {
+            // binary search for large ranges (offset by 32K to ensure
+            // we don't hit the right page)
+            set_file_offset(f, left.page_end + (delta / 2) - 32768);
+         }
+
+         if (!vorbis_find_page(f, NULL, NULL)) goto error;
+      }
+
+      for (;;) {
+         if (!get_seek_page_info(f, &mid)) goto error;
+         if (mid.last_decoded_sample != ~0U) break;
+         // (untested) no frames end on this page
+         set_file_offset(f, mid.page_end);
+         assert(mid.page_start < right.page_start);
+      }
+
+      // if we've just found the last page again then we're in a tricky file,
+      // and we're close enough (if it wasn't an interpolation probe).
+      if (mid.page_start == right.page_start) {
+         if (probe >= 2 || delta <= 65536)
+            break;
+      } else {
+         if (last_sample_limit < mid.last_decoded_sample)
+            right = mid;
+         else
+            left = mid;
+      }
+
+      ++probe;
+   }
+
+   // seek back to start of the last packet
+   page_start = left.page_start;
+   set_file_offset(f, page_start);
+   if (!start_page(f)) return error(f, VORBIS_seek_failed);
+   end_pos = f->end_seg_with_known_loc;
+   assert(end_pos >= 0);
+
+   for (;;) {
+      for (i = end_pos; i > 0; --i)
+         if (f->segments[i-1] != 255)
+            break;
+
+      start_seg_with_known_loc = i;
+
+      if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
+         break;
+
+      // (untested) the final packet begins on an earlier page
+      if (!go_to_page_before(f, page_start))
+         goto error;
+
+      page_start = stb_vorbis_get_file_offset(f);
+      if (!start_page(f)) goto error;
+      end_pos = f->segment_count - 1;
+   }
+
+   // prepare to start decoding
+   f->current_loc_valid = FALSE;
+   f->last_seg = FALSE;
+   f->valid_bits = 0;
+   f->packet_bytes = 0;
+   f->bytes_in_seg = 0;
+   f->previous_length = 0;
+   f->next_seg = start_seg_with_known_loc;
+
+   for (i = 0; i < start_seg_with_known_loc; i++)
+      skip(f, f->segments[i]);
+
+   // start decoding (optimizable - this frame is generally discarded)
+   if (!vorbis_pump_first_frame(f))
+      return 0;
+   if (f->current_loc > sample_number)
+      return error(f, VORBIS_seek_failed);
+   return 1;
+
+error:
+   // try to restore the file to a valid state
+   stb_vorbis_seek_start(f);
+   return error(f, VORBIS_seek_failed);
+}
+
+// the same as vorbis_decode_initial, but without advancing
+static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
+{
+   int bits_read, bytes_read;
+
+   if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
+      return 0;
+
+   // either 1 or 2 bytes were read, figure out which so we can rewind
+   bits_read = 1 + ilog(f->mode_count-1);
+   if (f->mode_config[*mode].blockflag)
+      bits_read += 2;
+   bytes_read = (bits_read + 7) / 8;
+
+   f->bytes_in_seg += bytes_read;
+   f->packet_bytes -= bytes_read;
+   skip(f, -bytes_read);
+   if (f->next_seg == -1)
+      f->next_seg = f->segment_count - 1;
+   else
+      f->next_seg--;
+   f->valid_bits = 0;
+
+   return 1;
+}
+
+int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
+{
+   uint32 max_frame_samples;
+
+   if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
+
+   // fast page-level search
+   if (!seek_to_sample_coarse(f, sample_number))
+      return 0;
+
+   assert(f->current_loc_valid);
+   assert(f->current_loc <= sample_number);
+
+   // linear search for the relevant packet
+   max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
+   while (f->current_loc < sample_number) {
+      int left_start, left_end, right_start, right_end, mode, frame_samples;
+      if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
+         return error(f, VORBIS_seek_failed);
+      // calculate the number of samples returned by the next frame
+      frame_samples = right_start - left_start;
+      if (f->current_loc + frame_samples > sample_number) {
+         return 1; // the next frame will contain the sample
+      } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
+         // there's a chance the frame after this could contain the sample
+         vorbis_pump_first_frame(f);
+      } else {
+         // this frame is too early to be relevant
+         f->current_loc += frame_samples;
+         f->previous_length = 0;
+         maybe_start_packet(f);
+         flush_packet(f);
+      }
+   }
+   // the next frame should start with the sample
+   if (f->current_loc != sample_number) return error(f, VORBIS_seek_failed);
+   return 1;
+}
+
+int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
+{
+   if (!stb_vorbis_seek_frame(f, sample_number))
+      return 0;
+
+   if (sample_number != f->current_loc) {
+      int n;
+      uint32 frame_start = f->current_loc;
+      stb_vorbis_get_frame_float(f, &n, NULL);
+      assert(sample_number > frame_start);
+      assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
+      f->channel_buffer_start += (sample_number - frame_start);
+   }
+
+   return 1;
+}
+
+int stb_vorbis_seek_start(stb_vorbis *f)
+{
+   if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
+   set_file_offset(f, f->first_audio_page_offset);
+   f->previous_length = 0;
+   f->first_decode = TRUE;
+   f->next_seg = -1;
+   return vorbis_pump_first_frame(f);
+}
+
+unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
+{
+   unsigned int restore_offset, previous_safe;
+   unsigned int end, last_page_loc;
+
+   if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
+   if (!f->total_samples) {
+      unsigned int last;
+      uint32 lo,hi;
+      char header[6];
+
+      // first, store the current decode position so we can restore it
+      restore_offset = stb_vorbis_get_file_offset(f);
+
+      // now we want to seek back 64K from the end (the last page must
+      // be at most a little less than 64K, but let's allow a little slop)
+      if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
+         previous_safe = f->stream_len - 65536;
+      else
+         previous_safe = f->first_audio_page_offset;
+
+      set_file_offset(f, previous_safe);
+      // previous_safe is now our candidate 'earliest known place that seeking
+      // to will lead to the final page'
+
+      if (!vorbis_find_page(f, &end, &last)) {
+         // if we can't find a page, we're hosed!
+         f->error = VORBIS_cant_find_last_page;
+         f->total_samples = 0xffffffff;
+         goto done;
+      }
+
+      // check if there are more pages
+      last_page_loc = stb_vorbis_get_file_offset(f);
+
+      // stop when the last_page flag is set, not when we reach eof;
+      // this allows us to stop short of a 'file_section' end without
+      // explicitly checking the length of the section
+      while (!last) {
+         set_file_offset(f, end);
+         if (!vorbis_find_page(f, &end, &last)) {
+            // the last page we found didn't have the 'last page' flag
+            // set. whoops!
+            break;
+         }
+         //previous_safe = last_page_loc+1; // NOTE: not used after this point, but note for debugging
+         last_page_loc = stb_vorbis_get_file_offset(f);
+      }
+
+      set_file_offset(f, last_page_loc);
+
+      // parse the header
+      getn(f, (unsigned char *)header, 6);
+      // extract the absolute granule position
+      lo = get32(f);
+      hi = get32(f);
+      if (lo == 0xffffffff && hi == 0xffffffff) {
+         f->error = VORBIS_cant_find_last_page;
+         f->total_samples = SAMPLE_unknown;
+         goto done;
+      }
+      if (hi)
+         lo = 0xfffffffe; // saturate
+      f->total_samples = lo;
+
+      f->p_last.page_start = last_page_loc;
+      f->p_last.page_end   = end;
+      f->p_last.last_decoded_sample = lo;
+
+     done:
+      set_file_offset(f, restore_offset);
+   }
+   return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
+}
+
+float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
+{
+   return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
+}
+
+
+
+int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
+{
+   int len, right,left,i;
+   if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
+
+   if (!vorbis_decode_packet(f, &len, &left, &right)) {
+      f->channel_buffer_start = f->channel_buffer_end = 0;
+      return 0;
+   }
+
+   len = vorbis_finish_frame(f, len, left, right);
+   for (i=0; i < f->channels; ++i)
+      f->outputs[i] = f->channel_buffers[i] + left;
+
+   f->channel_buffer_start = left;
+   f->channel_buffer_end   = left+len;
+
+   if (channels) *channels = f->channels;
+   if (output)   *output = f->outputs;
+   return len;
+}
+
+#ifndef STB_VORBIS_NO_STDIO
+
+stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
+{
+   stb_vorbis *f, p;
+   vorbis_init(&p, alloc);
+   p.f = file;
+   p.f_start = (uint32) ftell(file);
+   p.stream_len   = length;
+   p.close_on_free = close_on_free;
+   if (start_decoder(&p)) {
+      f = vorbis_alloc(&p);
+      if (f) {
+         *f = p;
+         vorbis_pump_first_frame(f);
+         return f;
+      }
+   }
+   if (error) *error = p.error;
+   vorbis_deinit(&p);
+   return NULL;
+}
+
+stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
+{
+   unsigned int len, start;
+   start = (unsigned int) ftell(file);
+   fseek(file, 0, SEEK_END);
+   len = (unsigned int) (ftell(file) - start);
+   fseek(file, start, SEEK_SET);
+   return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
+}
+
+stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
+{
+   FILE *f;
+#if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
+   if (0 != fopen_s(&f, filename, "rb"))
+      f = NULL;
+#else
+   f = fopen(filename, "rb");
+#endif
+   if (f)
+      return stb_vorbis_open_file(f, TRUE, error, alloc);
+   if (error) *error = VORBIS_file_open_failure;
+   return NULL;
+}
+#endif // STB_VORBIS_NO_STDIO
+
+stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
+{
+   stb_vorbis *f, p;
+   if (!data) {
+      if (error) *error = VORBIS_unexpected_eof;
+      return NULL;
+   }
+   vorbis_init(&p, alloc);
+   p.stream = (uint8 *) data;
+   p.stream_end = (uint8 *) data + len;
+   p.stream_start = (uint8 *) p.stream;
+   p.stream_len = len;
+   p.push_mode = FALSE;
+   if (start_decoder(&p)) {
+      f = vorbis_alloc(&p);
+      if (f) {
+         *f = p;
+         vorbis_pump_first_frame(f);
+         if (error) *error = VORBIS__no_error;
+         return f;
+      }
+   }
+   if (error) *error = p.error;
+   vorbis_deinit(&p);
+   return NULL;
+}
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+#define PLAYBACK_MONO     1
+#define PLAYBACK_LEFT     2
+#define PLAYBACK_RIGHT    4
+
+#define L  (PLAYBACK_LEFT  | PLAYBACK_MONO)
+#define C  (PLAYBACK_LEFT  | PLAYBACK_RIGHT | PLAYBACK_MONO)
+#define R  (PLAYBACK_RIGHT | PLAYBACK_MONO)
+
+static int8 channel_position[7][6] =
+{
+   { 0 },
+   { C },
+   { L, R },
+   { L, C, R },
+   { L, R, L, R },
+   { L, C, R, L, R },
+   { L, C, R, L, R, C },
+};
+
+
+#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
+   typedef union {
+      float f;
+      int i;
+   } float_conv;
+   typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
+   #define FASTDEF(x) float_conv x
+   // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
+   #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
+   #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
+   #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
+   #define check_endianness()
+#else
+   #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
+   #define check_endianness()
+   #define FASTDEF(x)
+#endif
+
+static void copy_samples(short *dest, float *src, int len)
+{
+   int i;
+   check_endianness();
+   for (i=0; i < len; ++i) {
+      FASTDEF(temp);
+      int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
+      if ((unsigned int) (v + 32768) > 65535)
+         v = v < 0 ? -32768 : 32767;
+      dest[i] = v;
+   }
+}
+
+static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
+{
+   #define STB_BUFFER_SIZE  32
+   float buffer[STB_BUFFER_SIZE];
+   int i,j,o,n = STB_BUFFER_SIZE;
+   check_endianness();
+   for (o = 0; o < len; o += STB_BUFFER_SIZE) {
+      memset(buffer, 0, sizeof(buffer));
+      if (o + n > len) n = len - o;
+      for (j=0; j < num_c; ++j) {
+         if (channel_position[num_c][j] & mask) {
+            for (i=0; i < n; ++i)
+               buffer[i] += data[j][d_offset+o+i];
+         }
+      }
+      for (i=0; i < n; ++i) {
+         FASTDEF(temp);
+         int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
+         if ((unsigned int) (v + 32768) > 65535)
+            v = v < 0 ? -32768 : 32767;
+         output[o+i] = v;
+      }
+   }
+   #undef STB_BUFFER_SIZE
+}
+
+static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
+{
+   #define STB_BUFFER_SIZE  32
+   float buffer[STB_BUFFER_SIZE];
+   int i,j,o,n = STB_BUFFER_SIZE >> 1;
+   // o is the offset in the source data
+   check_endianness();
+   for (o = 0; o < len; o += STB_BUFFER_SIZE >> 1) {
+      // o2 is the offset in the output data
+      int o2 = o << 1;
+      memset(buffer, 0, sizeof(buffer));
+      if (o + n > len) n = len - o;
+      for (j=0; j < num_c; ++j) {
+         int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
+         if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
+            for (i=0; i < n; ++i) {
+               buffer[i*2+0] += data[j][d_offset+o+i];
+               buffer[i*2+1] += data[j][d_offset+o+i];
+            }
+         } else if (m == PLAYBACK_LEFT) {
+            for (i=0; i < n; ++i) {
+               buffer[i*2+0] += data[j][d_offset+o+i];
+            }
+         } else if (m == PLAYBACK_RIGHT) {
+            for (i=0; i < n; ++i) {
+               buffer[i*2+1] += data[j][d_offset+o+i];
+            }
+         }
+      }
+      for (i=0; i < (n<<1); ++i) {
+         FASTDEF(temp);
+         int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
+         if ((unsigned int) (v + 32768) > 65535)
+            v = v < 0 ? -32768 : 32767;
+         output[o2+i] = v;
+      }
+   }
+   #undef STB_BUFFER_SIZE
+}
+
+static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
+{
+   int i;
+   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
+      static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
+      for (i=0; i < buf_c; ++i)
+         compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
+   } else {
+      int limit = buf_c < data_c ? buf_c : data_c;
+      for (i=0; i < limit; ++i)
+         copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
+      for (   ; i < buf_c; ++i)
+         memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
+   }
+}
+
+int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
+{
+   float **output = NULL;
+   int len = stb_vorbis_get_frame_float(f, NULL, &output);
+   if (len > num_samples) len = num_samples;
+   if (len)
+      convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
+   return len;
+}
+
+static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
+{
+   int i;
+   check_endianness();
+   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
+      assert(buf_c == 2);
+      for (i=0; i < buf_c; ++i)
+         compute_stereo_samples(buffer, data_c, data, d_offset, len);
+   } else {
+      int limit = buf_c < data_c ? buf_c : data_c;
+      int j;
+      for (j=0; j < len; ++j) {
+         for (i=0; i < limit; ++i) {
+            FASTDEF(temp);
+            float f = data[i][d_offset+j];
+            int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
+            if ((unsigned int) (v + 32768) > 65535)
+               v = v < 0 ? -32768 : 32767;
+            *buffer++ = v;
+         }
+         for (   ; i < buf_c; ++i)
+            *buffer++ = 0;
+      }
+   }
+}
+
+int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
+{
+   float **output;
+   int len;
+   if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
+   len = stb_vorbis_get_frame_float(f, NULL, &output);
+   if (len) {
+      if (len*num_c > num_shorts) len = num_shorts / num_c;
+      convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
+   }
+   return len;
+}
+
+int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
+{
+   float **outputs;
+   int len = num_shorts / channels;
+   int n=0;
+   while (n < len) {
+      int k = f->channel_buffer_end - f->channel_buffer_start;
+      if (n+k >= len) k = len - n;
+      if (k)
+         convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
+      buffer += k*channels;
+      n += k;
+      f->channel_buffer_start += k;
+      if (n == len) break;
+      if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
+   }
+   return n;
+}
+
+int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
+{
+   float **outputs;
+   int n=0;
+   while (n < len) {
+      int k = f->channel_buffer_end - f->channel_buffer_start;
+      if (n+k >= len) k = len - n;
+      if (k)
+         convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
+      n += k;
+      f->channel_buffer_start += k;
+      if (n == len) break;
+      if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
+   }
+   return n;
+}
+
+#ifndef STB_VORBIS_NO_STDIO
+int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
+{
+   int data_len, offset, total, limit, error;
+   short *data;
+   stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
+   if (v == NULL) return -1;
+   limit = v->channels * 4096;
+   *channels = v->channels;
+   if (sample_rate)
+      *sample_rate = v->sample_rate;
+   offset = data_len = 0;
+   total = limit;
+   data = (short *) malloc(total * sizeof(*data));
+   if (data == NULL) {
+      stb_vorbis_close(v);
+      return -2;
+   }
+   for (;;) {
+      int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
+      if (n == 0) break;
+      data_len += n;
+      offset += n * v->channels;
+      if (offset + limit > total) {
+         short *data2;
+         total *= 2;
+         data2 = (short *) realloc(data, total * sizeof(*data));
+         if (data2 == NULL) {
+            free(data);
+            stb_vorbis_close(v);
+            return -2;
+         }
+         data = data2;
+      }
+   }
+   *output = data;
+   stb_vorbis_close(v);
+   return data_len;
+}
+#endif // NO_STDIO
+
+int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
+{
+   int data_len, offset, total, limit, error;
+   short *data;
+   stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
+   if (v == NULL) return -1;
+   limit = v->channels * 4096;
+   *channels = v->channels;
+   if (sample_rate)
+      *sample_rate = v->sample_rate;
+   offset = data_len = 0;
+   total = limit;
+   data = (short *) malloc(total * sizeof(*data));
+   if (data == NULL) {
+      stb_vorbis_close(v);
+      return -2;
+   }
+   for (;;) {
+      int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
+      if (n == 0) break;
+      data_len += n;
+      offset += n * v->channels;
+      if (offset + limit > total) {
+         short *data2;
+         total *= 2;
+         data2 = (short *) realloc(data, total * sizeof(*data));
+         if (data2 == NULL) {
+            free(data);
+            stb_vorbis_close(v);
+            return -2;
+         }
+         data = data2;
+      }
+   }
+   *output = data;
+   stb_vorbis_close(v);
+   return data_len;
+}
+#endif // STB_VORBIS_NO_INTEGER_CONVERSION
+
+int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
+{
+   float **outputs;
+   int len = num_floats / channels;
+   int n=0;
+   int z = f->channels;
+   if (z > channels) z = channels;
+   while (n < len) {
+      int i,j;
+      int k = f->channel_buffer_end - f->channel_buffer_start;
+      if (n+k >= len) k = len - n;
+      for (j=0; j < k; ++j) {
+         for (i=0; i < z; ++i)
+            *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
+         for (   ; i < channels; ++i)
+            *buffer++ = 0;
+      }
+      n += k;
+      f->channel_buffer_start += k;
+      if (n == len)
+         break;
+      if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
+         break;
+   }
+   return n;
+}
+
+int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
+{
+   float **outputs;
+   int n=0;
+   int z = f->channels;
+   if (z > channels) z = channels;
+   while (n < num_samples) {
+      int i;
+      int k = f->channel_buffer_end - f->channel_buffer_start;
+      if (n+k >= num_samples) k = num_samples - n;
+      if (k) {
+         for (i=0; i < z; ++i)
+            memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
+         for (   ; i < channels; ++i)
+            memset(buffer[i]+n, 0, sizeof(float) * k);
+      }
+      n += k;
+      f->channel_buffer_start += k;
+      if (n == num_samples)
+         break;
+      if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
+         break;
+   }
+   return n;
+}
+#endif // STB_VORBIS_NO_PULLDATA_API
+
+/* Version history
+    1.17    - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
+                           found with Mayhem by ForAllSecure
+    1.16    - 2019-03-04 - fix warnings
+    1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
+    1.14    - 2018-02-11 - delete bogus dealloca usage
+    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
+    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
+    1.11    - 2017-07-23 - fix MinGW compilation
+    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
+    1.09    - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
+    1.08    - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
+                           avoid discarding last frame of audio data
+    1.07    - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
+                           some more crash fixes when out of memory or with corrupt files
+    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
+                           some crash fixes when out of memory or with corrupt files
+    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
+    1.04    - 2014-08-27 - fix missing const-correct case in API
+    1.03    - 2014-08-07 - Warning fixes
+    1.02    - 2014-07-09 - Declare qsort compare function _cdecl on windows
+    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float
+    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
+                           (API change) report sample rate for decode-full-file funcs
+    0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
+    0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
+    0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
+    0.99993 - remove assert that fired on legal files with empty tables
+    0.99992 - rewind-to-start
+    0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
+    0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
+    0.9998 - add a full-decode function with a memory source
+    0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
+    0.9996 - query length of vorbis stream in samples/seconds
+    0.9995 - bugfix to another optimization that only happened in certain files
+    0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
+    0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
+    0.9992 - performance improvement of IMDCT; now performs close to reference implementation
+    0.9991 - performance improvement of IMDCT
+    0.999 - (should have been 0.9990) performance improvement of IMDCT
+    0.998 - no-CRT support from Casey Muratori
+    0.997 - bugfixes for bugs found by Terje Mathisen
+    0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
+    0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
+    0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
+    0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
+    0.992 - fixes for MinGW warning
+    0.991 - turn fast-float-conversion on by default
+    0.990 - fix push-mode seek recovery if you seek into the headers
+    0.98b - fix to bad release of 0.98
+    0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
+    0.97 - builds under c++ (typecasting, don't use 'class' keyword)
+    0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
+    0.95 - clamping code for 16-bit functions
+    0.94 - not publically released
+    0.93 - fixed all-zero-floor case (was decoding garbage)
+    0.92 - fixed a memory leak
+    0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
+    0.90 - first public release
+*/
+
+#endif // STB_VORBIS_HEADER_ONLY
+
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/src/codecs/CMakeLists.txt b/src/codecs/CMakeLists.txt
index 0bf79c78..cdf9c99d 100644
--- a/src/codecs/CMakeLists.txt
+++ b/src/codecs/CMakeLists.txt
@@ -5,6 +5,6 @@
 idf_component_register(
   SRCS "codec.cpp" "mad.cpp"
   INCLUDE_DIRS "include"
-  REQUIRES "result" "span" "libmad")
+  REQUIRES "result" "span" "libmad" "libfoxenflac" "stb_vorbis")
 
 target_compile_options("${COMPONENT_LIB}" PRIVATE ${EXTRA_WARNINGS})
diff --git a/tools/cmake/common.cmake b/tools/cmake/common.cmake
index 6e3d539f..b2e6de48 100644
--- a/tools/cmake/common.cmake
+++ b/tools/cmake/common.cmake
@@ -12,11 +12,13 @@ set(COMPONENTS "")
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/catch2")
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/cbor")
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/komihash")
+list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/libfoxenflac")
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/libmad")
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/libtags")
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/lvgl")
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/result")
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/span")
+list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/stb_vorbis")
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/tinyfsm")
 
 include($ENV{IDF_PATH}/tools/cmake/project.cmake)

From a2c1dfbabddc2b4abaf8bf27c9ed9d1b99594859 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Thu, 15 Jun 2023 10:33:46 +1000
Subject: [PATCH 2/7] Add vorbis and flac decoders, flesh out codec interface

vorbis doesn't quite work yet, not sure why. will pick it up again
later.
---
 src/audio/audio_decoder.cpp         | 125 +++++++++++--------
 src/audio/audio_task.cpp            |   2 +-
 src/audio/fatfs_audio_input.cpp     |   8 +-
 src/audio/include/audio_decoder.hpp |   1 +
 src/audio/include/stream_info.hpp   |   4 +
 src/codecs/CMakeLists.txt           |   2 +-
 src/codecs/codec.cpp                |   7 ++
 src/codecs/foxenflac.cpp            |  80 +++++++++++++
 src/codecs/include/codec.hpp        |  60 ++++++----
 src/codecs/include/foxenflac.hpp    |  38 ++++++
 src/codecs/include/mad.hpp          |  24 +++-
 src/codecs/include/stbvorbis.hpp    |  42 +++++++
 src/codecs/include/types.hpp        |   2 +-
 src/codecs/mad.cpp                  | 179 ++++++++++++++++++++--------
 src/codecs/stbvorbis.cpp            | 128 ++++++++++++++++++++
 src/database/tag_parser.cpp         |  10 ++
 src/tasks/tasks.cpp                 |   2 +-
 17 files changed, 577 insertions(+), 137 deletions(-)
 create mode 100644 src/codecs/foxenflac.cpp
 create mode 100644 src/codecs/include/foxenflac.hpp
 create mode 100644 src/codecs/include/stbvorbis.hpp
 create mode 100644 src/codecs/stbvorbis.cpp

diff --git a/src/audio/audio_decoder.cpp b/src/audio/audio_decoder.cpp
index eb19b75f..310f5740 100644
--- a/src/audio/audio_decoder.cpp
+++ b/src/audio/audio_decoder.cpp
@@ -14,6 +14,7 @@
 #include <memory>
 #include <variant>
 
+#include "codec.hpp"
 #include "freertos/FreeRTOS.h"
 
 #include "esp_heap_caps.h"
@@ -50,6 +51,9 @@ auto AudioDecoder::ProcessStreamInfo(const StreamInfo& info) -> bool {
   // Reuse the existing codec if we can. This will help with gapless playback,
   // since we can potentially just continue to decode as we were before,
   // without any setup overhead.
+  // TODO(jacqueline): Reconsider this. It makes a lot of things harder to smash
+  // streams together at this layer.
+  /*
   if (current_codec_ != nullptr && current_input_format_) {
     auto cur_encoding = std::get<StreamInfo::Encoded>(*current_input_format_);
     if (cur_encoding.type == encoded.type) {
@@ -58,6 +62,7 @@ auto AudioDecoder::ProcessStreamInfo(const StreamInfo& info) -> bool {
       return true;
     }
   }
+  */
   current_input_format_ = info.format;
 
   ESP_LOGI(kTag, "creating new decoder");
@@ -80,68 +85,94 @@ auto AudioDecoder::Process(const std::vector<InputStream>& inputs,
                            OutputStream* output) -> void {
   auto input = inputs.begin();
   const StreamInfo& info = input->info();
-  if (std::holds_alternative<std::monostate>(info.format) ||
-      info.bytes_in_stream == 0) {
-    // TODO(jacqueline): should we clear the stream format?
-    // output->prepare({});
-    return;
-  }
 
+  // Check the input stream's format has changed (or, by extension, if this is
+  // the first stream).
   if (!current_input_format_ || *current_input_format_ != info.format) {
-    // The input stream has changed! Immediately throw everything away and
-    // start from scratch.
+    ESP_LOGI(kTag, "beginning new stream");
     has_samples_to_send_ = false;
     ProcessStreamInfo(info);
+    auto res = current_codec_->BeginStream(input->data());
+    input->consume(res.first);
+    if (res.second.has_error()) {
+      // TODO(jacqueline): Handle errors.
+      return;
+    }
+
+    // The stream started successfully. Record what format the samples are in.
+    codecs::ICodec::OutputFormat format = res.second.value();
+    current_output_format_ = StreamInfo::Pcm{
+        .channels = format.num_channels,
+        .bits_per_sample = format.bits_per_sample,
+        .sample_rate = format.sample_rate_hz,
+    };
+
+    if (info.seek_to_seconds) {
+      seek_to_sample_ = *info.seek_to_seconds * format.sample_rate_hz;
+    } else {
+      seek_to_sample_.reset();
+    }
   }
 
-  current_codec_->SetInput(input->data());
+  while (seek_to_sample_) {
+    ESP_LOGI(kTag, "seeking forwards...");
+    auto res = current_codec_->SeekStream(input->data(), *seek_to_sample_);
+    input->consume(res.first);
+    if (res.second.has_error()) {
+      auto err = res.second.error();
+      if (err == codecs::ICodec::Error::kOutOfInput) {
+        return;
+      } else {
+        // TODO(jacqueline): Handle errors.
+        seek_to_sample_.reset();
+      }
+    } else {
+      seek_to_sample_.reset();
+    }
+  }
 
+  has_input_remaining_ = true;
   while (true) {
-    if (has_samples_to_send_) {
-      auto format = current_codec_->GetOutputFormat();
-      if (format.has_value()) {
-        current_output_format_ = StreamInfo::Pcm{
-            .channels = format->num_channels,
-            .bits_per_sample = format->bits_per_sample,
-            .sample_rate = format->sample_rate_hz,
-        };
-
-        if (!output->prepare(*current_output_format_)) {
-          break;
-        }
-
-        auto write_res = current_codec_->WriteOutputSamples(output->data());
-        output->add(write_res.first);
-        has_samples_to_send_ = !write_res.second;
-
-        if (has_samples_to_send_) {
-          // We weren't able to fit all the generated samples into the output
-          // buffer. Stop trying; we'll finish up during the next pass.
-          break;
-        }
-      }
+    // TODO(jacqueline): Pass through seek info here?
+    if (!output->prepare(*current_output_format_)) {
+      ESP_LOGI(kTag, "waiting for buffer to become free");
+      break;
     }
 
-    auto res = current_codec_->ProcessNextFrame();
-    if (res.has_error()) {
-      // TODO(jacqueline): Handle errors.
+    auto res = current_codec_->ContinueStream(input->data(), output->data());
+    input->consume(res.first);
+    if (res.second.has_error()) {
+      if (res.second.error() == codecs::ICodec::Error::kOutOfInput) {
+        ESP_LOGW(kTag, "out of input");
+        ESP_LOGW(kTag, "(%u bytes left)", input->data().size_bytes());
+        has_input_remaining_ = false;
+        // We can't be halfway through sending samples if the codec is asking
+        // for more input.
+        has_samples_to_send_ = false;
+        input->mark_incomplete();
+      } else {
+        // TODO(jacqueline): Handle errors.
+        ESP_LOGE(kTag, "codec return fatal error");
+      }
       return;
     }
 
-    has_input_remaining_ = !res.value();
-    if (!has_input_remaining_) {
-      // We're out of useable data in this buffer. Finish immediately; there's
-      // nothing to send.
-      input->mark_incomplete();
-      break;
-    } else {
-      has_samples_to_send_ = true;
+    ESP_LOGI(kTag, "enc read: %u", res.first);
+
+    codecs::ICodec::OutputInfo out_info = res.second.value();
+    output->add(out_info.bytes_written);
+    has_samples_to_send_ = !out_info.is_finished_writing;
+
+    ESP_LOGI(kTag, "enc wrote: %u", out_info.bytes_written);
+    if (out_info.is_finished_writing) {
+      ESP_LOGI(kTag, "(write finished)");
     }
-  }
 
-  std::size_t pos = current_codec_->GetInputPosition();
-  if (pos > 0) {
-    input->consume(pos - 1);
+    if (has_samples_to_send_) {
+      // We weren't able to fit all the generated samples into the output
+      // buffer. Stop trying; we'll finish up during the next pass.
+      break;
+    }
   }
 }
 
diff --git a/src/audio/audio_task.cpp b/src/audio/audio_task.cpp
index 9dd7d994..eea84e45 100644
--- a/src/audio/audio_task.cpp
+++ b/src/audio/audio_task.cpp
@@ -126,7 +126,7 @@ void AudioTaskMain(std::unique_ptr<Pipeline> pipeline, IAudioSink* sink) {
 
     if (sink_stream.info().bytes_in_stream == 0) {
       // No new bytes to sink, so skip sinking completely.
-      ESP_LOGI(kTag, "no bytes to sink");
+      ESP_LOGW(kTag, "no bytes to sink");
       continue;
     }
 
diff --git a/src/audio/fatfs_audio_input.cpp b/src/audio/fatfs_audio_input.cpp
index a89858ca..eaa62ee3 100644
--- a/src/audio/fatfs_audio_input.cpp
+++ b/src/audio/fatfs_audio_input.cpp
@@ -56,11 +56,13 @@ auto FatfsAudioInput::OpenFile(const std::string& path) -> bool {
   database::SongTags tags;
   if (!tag_parser.ReadAndParseTags(path, &tags)) {
     ESP_LOGE(kTag, "failed to read tags");
-    return false;
+    tags.encoding = database::Encoding::kFlac;
+    // return false;
   }
 
   auto stream_type = ContainerToStreamType(tags.encoding);
   if (!stream_type.has_value()) {
+    ESP_LOGE(kTag, "couldn't match container to stream");
     return false;
   }
 
@@ -144,8 +146,8 @@ auto FatfsAudioInput::ContainerToStreamType(database::Encoding enc)
       return codecs::StreamType::kPcm;
     case database::Encoding::kFlac:
       return codecs::StreamType::kFlac;
-    case database::Encoding::kOgg:
-      return codecs::StreamType::kOgg;
+    case database::Encoding::kOgg:  // Misnamed; this is Ogg Vorbis.
+      return codecs::StreamType::kVorbis;
     case database::Encoding::kUnsupported:
     default:
       return {};
diff --git a/src/audio/include/audio_decoder.hpp b/src/audio/include/audio_decoder.hpp
index 3cda0305..4e7e127e 100644
--- a/src/audio/include/audio_decoder.hpp
+++ b/src/audio/include/audio_decoder.hpp
@@ -42,6 +42,7 @@ class AudioDecoder : public IAudioElement {
   std::unique_ptr<codecs::ICodec> current_codec_;
   std::optional<StreamInfo::Format> current_input_format_;
   std::optional<StreamInfo::Format> current_output_format_;
+  std::optional<std::size_t> seek_to_sample_;
   bool has_samples_to_send_;
   bool has_input_remaining_;
 
diff --git a/src/audio/include/stream_info.hpp b/src/audio/include/stream_info.hpp
index 91b2f085..54b87003 100644
--- a/src/audio/include/stream_info.hpp
+++ b/src/audio/include/stream_info.hpp
@@ -6,6 +6,7 @@
 
 #pragma once
 
+#include <stdint.h>
 #include <cstdint>
 #include <optional>
 #include <string>
@@ -30,6 +31,9 @@ struct StreamInfo {
   // generated audio, etc.)
   std::optional<std::size_t> length_bytes{};
 
+  //
+  std::optional<uint32_t> seek_to_seconds{};
+
   struct Encoded {
     // The codec that this stream is associated with.
     codecs::StreamType type;
diff --git a/src/codecs/CMakeLists.txt b/src/codecs/CMakeLists.txt
index cdf9c99d..478d4d3f 100644
--- a/src/codecs/CMakeLists.txt
+++ b/src/codecs/CMakeLists.txt
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: GPL-3.0-only
 
 idf_component_register(
-  SRCS "codec.cpp" "mad.cpp"
+  SRCS "codec.cpp" "mad.cpp" "foxenflac.cpp" "stbvorbis.cpp"
   INCLUDE_DIRS "include"
   REQUIRES "result" "span" "libmad" "libfoxenflac" "stb_vorbis")
 
diff --git a/src/codecs/codec.cpp b/src/codecs/codec.cpp
index 73bc9032..e23b8702 100644
--- a/src/codecs/codec.cpp
+++ b/src/codecs/codec.cpp
@@ -8,7 +8,10 @@
 
 #include <memory>
 #include <optional>
+
+#include "foxenflac.hpp"
 #include "mad.hpp"
+#include "stbvorbis.hpp"
 #include "types.hpp"
 
 namespace codecs {
@@ -17,6 +20,10 @@ auto CreateCodecForType(StreamType type) -> std::optional<ICodec*> {
   switch (type) {
     case StreamType::kMp3:
       return new MadMp3Decoder();
+    case StreamType::kFlac:
+      return new FoxenFlacDecoder();
+    case StreamType::kVorbis:
+      return new StbVorbisDecoder();
     default:
       return {};
   }
diff --git a/src/codecs/foxenflac.cpp b/src/codecs/foxenflac.cpp
new file mode 100644
index 00000000..a2d6f000
--- /dev/null
+++ b/src/codecs/foxenflac.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "foxenflac.hpp"
+#include <stdint.h>
+
+#include <cstdlib>
+
+#include "esp_log.h"
+#include "foxen/flac.h"
+
+namespace codecs {
+
+FoxenFlacDecoder::FoxenFlacDecoder()
+    : flac_(FX_FLAC_ALLOC(FLAC_MAX_BLOCK_SIZE, 2)) {}
+
+FoxenFlacDecoder::~FoxenFlacDecoder() {
+  free(flac_);
+}
+
+auto FoxenFlacDecoder::BeginStream(const cpp::span<const std::byte> input)
+    -> Result<OutputFormat> {
+  uint32_t bytes_used = input.size_bytes();
+  fx_flac_state_t state =
+      fx_flac_process(flac_, reinterpret_cast<const uint8_t*>(input.data()),
+                      &bytes_used, NULL, NULL);
+  if (state != FLAC_END_OF_METADATA) {
+    return {bytes_used, cpp::fail(Error::kMalformedData)};
+  }
+
+  int64_t channels = fx_flac_get_streaminfo(flac_, FLAC_KEY_N_CHANNELS);
+  int64_t fs = fx_flac_get_streaminfo(flac_, FLAC_KEY_SAMPLE_RATE);
+  if (channels == FLAC_INVALID_METADATA_KEY ||
+      fs == FLAC_INVALID_METADATA_KEY) {
+    return {bytes_used, cpp::fail(Error::kMalformedData)};
+  }
+
+  return {bytes_used,
+          OutputFormat{
+              .num_channels = static_cast<uint8_t>(channels),
+              .bits_per_sample = 32,  // libfoxenflac output is fixed-size.
+              .sample_rate_hz = static_cast<uint32_t>(fs),
+          }};
+}
+
+auto FoxenFlacDecoder::ContinueStream(cpp::span<const std::byte> input,
+                                      cpp::span<std::byte> output)
+    -> Result<OutputInfo> {
+  cpp::span<int32_t> output_as_samples{
+      reinterpret_cast<int32_t*>(output.data()), output.size_bytes() / 4};
+  uint32_t bytes_read = input.size_bytes();
+  uint32_t samples_written = output_as_samples.size();
+
+  fx_flac_state_t state =
+      fx_flac_process(flac_, reinterpret_cast<const uint8_t*>(input.data()),
+                      &bytes_read, output_as_samples.data(), &samples_written);
+  if (state == FLAC_ERR) {
+    return {bytes_read, cpp::fail(Error::kMalformedData)};
+  }
+
+  if (samples_written > 0) {
+    return {bytes_read,
+            OutputInfo{.bytes_written = samples_written * 4,
+                       .is_finished_writing = state == FLAC_END_OF_FRAME}};
+  }
+
+  // No error, but no samples written. We must be out of data.
+  return {bytes_read, cpp::fail(Error::kOutOfInput)};
+}
+
+auto FoxenFlacDecoder::SeekStream(cpp::span<const std::byte> input,
+                                  std::size_t target_sample) -> Result<void> {
+  // TODO(jacqueline): Implement me.
+  return {0, {}};
+}
+
+}  // namespace codecs
diff --git a/src/codecs/include/codec.hpp b/src/codecs/include/codec.hpp
index 31c67e13..4b5ab47f 100644
--- a/src/codecs/include/codec.hpp
+++ b/src/codecs/include/codec.hpp
@@ -21,48 +21,58 @@
 
 namespace codecs {
 
+/*
+ * Common interface to be implemented by all audio decoders.
+ */
 class ICodec {
  public:
   virtual ~ICodec() {}
 
+  /* Errors that may be returned by codecs. */
+  enum class Error {
+    // Indicates that more data is required before this codec can finish its
+    // operation. E.g. the input buffer ends with a truncated frame.
+    kOutOfInput,
+    // Indicates that the data within the input buffer is fatally malformed.
+    kMalformedData,
+
+    kInternalError,
+  };
+
+  /*
+   * Alias for more readable return types. All codec methods, success or
+   * failure, should also return the number of bytes they consumed.
+   */
+  template <typename T>
+  using Result = std::pair<std::size_t, cpp::result<T, Error>>;
+
   struct OutputFormat {
     uint8_t num_channels;
     uint8_t bits_per_sample;
     uint32_t sample_rate_hz;
   };
 
-  virtual auto GetOutputFormat() -> std::optional<OutputFormat> = 0;
-
-  enum ProcessingError { MALFORMED_DATA };
-
-  virtual auto SetInput(cpp::span<const std::byte> input) -> void = 0;
-
   /*
-   * Returns the codec's next read position within the input buffer. If the
-   * codec is out of usable data, but there is still some data left in the
-   * stream, that data should be prepended to the next input buffer.
+   * Decodes metadata or headers from the given input stream, and returns the
+   * format for the samples that will be decoded from it.
    */
-  virtual auto GetInputPosition() -> std::size_t = 0;
+  virtual auto BeginStream(cpp::span<const std::byte> input)
+      -> Result<OutputFormat> = 0;
 
-  /*
-   * Read one frame (or equivalent discrete chunk) from the input, and
-   * synthesize output samples for it.
-   *
-   * Returns true if we are out of usable data from the input stream, or false
-   * otherwise.
-   */
-  virtual auto ProcessNextFrame() -> cpp::result<bool, ProcessingError> = 0;
+  struct OutputInfo {
+    std::size_t bytes_written;
+    bool is_finished_writing;
+  };
 
   /*
    * Writes PCM samples to the given output buffer.
-   *
-   * Returns the number of bytes that were written, and true if all of the
-   * samples synthesized from the last call to `ProcessNextFrame` have been
-   * written. If this returns false, then this method should be called again
-   * after flushing the output buffer.
    */
-  virtual auto WriteOutputSamples(cpp::span<std::byte> output)
-      -> std::pair<std::size_t, bool> = 0;
+  virtual auto ContinueStream(cpp::span<const std::byte> input,
+                              cpp::span<std::byte> output)
+      -> Result<OutputInfo> = 0;
+
+  virtual auto SeekStream(cpp::span<const std::byte> input,
+                          std::size_t target_sample) -> Result<void> = 0;
 };
 
 auto CreateCodecForType(StreamType type) -> std::optional<ICodec*>;
diff --git a/src/codecs/include/foxenflac.hpp b/src/codecs/include/foxenflac.hpp
new file mode 100644
index 00000000..cce1b762
--- /dev/null
+++ b/src/codecs/include/foxenflac.hpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "foxen/flac.h"
+#include "span.hpp"
+
+#include "codec.hpp"
+
+namespace codecs {
+
+class FoxenFlacDecoder : public ICodec {
+ public:
+  FoxenFlacDecoder();
+  ~FoxenFlacDecoder();
+
+  auto BeginStream(cpp::span<const std::byte>) -> Result<OutputFormat> override;
+  auto ContinueStream(cpp::span<const std::byte>, cpp::span<std::byte>)
+      -> Result<OutputInfo> override;
+  auto SeekStream(cpp::span<const std::byte> input, std::size_t target_sample)
+      -> Result<void> override;
+
+ private:
+  fx_flac_t* flac_;
+};
+
+}  // namespace codecs
diff --git a/src/codecs/include/mad.hpp b/src/codecs/include/mad.hpp
index 5ba4db84..e1c479bf 100644
--- a/src/codecs/include/mad.hpp
+++ b/src/codecs/include/mad.hpp
@@ -24,12 +24,22 @@ class MadMp3Decoder : public ICodec {
   MadMp3Decoder();
   ~MadMp3Decoder();
 
-  auto GetOutputFormat() -> std::optional<OutputFormat> override;
-  auto SetInput(cpp::span<const std::byte> input) -> void override;
-  auto GetInputPosition() -> std::size_t override;
-  auto ProcessNextFrame() -> cpp::result<bool, ProcessingError> override;
-  auto WriteOutputSamples(cpp::span<std::byte> output)
-      -> std::pair<std::size_t, bool> override;
+  /*
+   * Returns the output format for the next frame in the stream. MP3 streams
+   * may represent multiple distinct tracks, with different bitrates, and so we
+   * handle the stream only on a frame-by-frame basis.
+   */
+  auto BeginStream(cpp::span<const std::byte>) -> Result<OutputFormat> override;
+
+  /*
+   * Writes samples for the current frame.
+   */
+  auto ContinueStream(cpp::span<const std::byte> input,
+                      cpp::span<std::byte> output)
+      -> Result<OutputInfo> override;
+
+  auto SeekStream(cpp::span<const std::byte> input, std::size_t target_sample)
+      -> Result<void> override;
 
  private:
   mad_stream stream_;
@@ -37,6 +47,8 @@ class MadMp3Decoder : public ICodec {
   mad_synth synth_;
 
   int current_sample_;
+
+  auto GetInputPosition() -> std::size_t;
 };
 
 }  // namespace codecs
diff --git a/src/codecs/include/stbvorbis.hpp b/src/codecs/include/stbvorbis.hpp
new file mode 100644
index 00000000..045e264e
--- /dev/null
+++ b/src/codecs/include/stbvorbis.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "stb_vorbis.h"
+
+#include "codec.hpp"
+
+namespace codecs {
+
+class StbVorbisDecoder : public ICodec {
+ public:
+  StbVorbisDecoder();
+  ~StbVorbisDecoder();
+
+  auto BeginStream(cpp::span<const std::byte>) -> Result<OutputFormat> override;
+  auto ContinueStream(cpp::span<const std::byte>, cpp::span<std::byte>)
+      -> Result<OutputInfo> override;
+  auto SeekStream(cpp::span<const std::byte> input, std::size_t target_sample)
+      -> Result<void> override;
+
+ private:
+  stb_vorbis* vorbis_;
+
+  int current_sample_;
+  int num_channels_;
+  int num_samples_;
+  float** samples_array_;
+};
+
+}  // namespace codecs
diff --git a/src/codecs/include/types.hpp b/src/codecs/include/types.hpp
index 61d36a28..3dfc1da9 100644
--- a/src/codecs/include/types.hpp
+++ b/src/codecs/include/types.hpp
@@ -13,7 +13,7 @@ namespace codecs {
 enum class StreamType {
   kMp3,
   kPcm,
-  kOgg,
+  kVorbis,
   kFlac,
 };
 
diff --git a/src/codecs/mad.cpp b/src/codecs/mad.cpp
index fbe85213..8b9897eb 100644
--- a/src/codecs/mad.cpp
+++ b/src/codecs/mad.cpp
@@ -13,11 +13,12 @@
 #include "mad.h"
 
 #include "codec.hpp"
+#include "result.hpp"
 #include "types.hpp"
 
 namespace codecs {
 
-static uint32_t scaleToBits(mad_fixed_t sample, uint8_t bits) {
+static uint32_t mad_fixed_to_pcm(mad_fixed_t sample, uint8_t bits) {
   // Round the bottom bits.
   sample += (1L << (MAD_F_FRACBITS - bits));
 
@@ -42,93 +43,167 @@ MadMp3Decoder::~MadMp3Decoder() {
   mad_synth_finish(&synth_);
 }
 
-auto MadMp3Decoder::GetOutputFormat() -> std::optional<OutputFormat> {
-  if (synth_.pcm.channels == 0 || synth_.pcm.samplerate == 0) {
-    return {};
-  }
-  return std::optional<OutputFormat>({
-      .num_channels = static_cast<uint8_t>(synth_.pcm.channels),
-      .bits_per_sample = 24,
-      .sample_rate_hz = synth_.pcm.samplerate,
-  });
+auto MadMp3Decoder::GetInputPosition() -> std::size_t {
+  return stream_.next_frame - stream_.buffer;
 }
 
-auto MadMp3Decoder::SetInput(cpp::span<const std::byte> input) -> void {
+auto MadMp3Decoder::BeginStream(const cpp::span<const std::byte> input)
+    -> Result<OutputFormat> {
   mad_stream_buffer(&stream_,
                     reinterpret_cast<const unsigned char*>(input.data()),
                     input.size());
-}
-
-auto MadMp3Decoder::GetInputPosition() -> std::size_t {
-  return stream_.next_frame - stream_.buffer;
-}
-
-auto MadMp3Decoder::ProcessNextFrame() -> cpp::result<bool, ProcessingError> {
   // Whatever was last synthesized is now invalid, so ensure we don't try to
   // send it.
   current_sample_ = -1;
 
-  // Decode the next frame. To signal errors, this returns -1 and
-  // stashes an error code in the stream structure.
-  if (mad_frame_decode(&frame_, &stream_) < 0) {
+  // To get the output format for MP3 streams, we simply need to decode the
+  // first frame header.
+  mad_header header;
+  mad_header_init(&header);
+  while (mad_header_decode(&header, &stream_) < 0) {
     if (MAD_RECOVERABLE(stream_.error)) {
       // Recoverable errors are usually malformed parts of the stream.
       // We can recover from them by just retrying the decode.
-      return false;
+      continue;
+    } else {
+      // Don't bother checking for other errors; if the first part of the stream
+      // doesn't even contain a header then something's gone wrong.
+      return {GetInputPosition(), cpp::fail(Error::kMalformedData)};
     }
-
-    if (stream_.error == MAD_ERROR_BUFLEN) {
-      // The decoder ran out of bytes before it completed a frame. We
-      // need to return back to the caller to give us more data.
-      return true;
-    }
-
-    // The error is unrecoverable. Give up.
-    return cpp::fail(MALFORMED_DATA);
   }
 
-  // We've successfully decoded a frame!
-  // Now we need to synthesize PCM samples based on the frame, and send
-  // them downstream.
-  mad_synth_frame(&synth_, &frame_);
-  current_sample_ = 0;
-  return false;
+  uint8_t channels = MAD_NCHANNELS(&header);
+  return {GetInputPosition(),
+          OutputFormat{
+              .num_channels = channels,
+              .bits_per_sample = 24,  // We always scale to 24 bits
+              .sample_rate_hz = header.samplerate,
+          }};
 }
 
-auto MadMp3Decoder::WriteOutputSamples(cpp::span<std::byte> output)
-    -> std::pair<std::size_t, bool> {
-  size_t output_byte = 0;
-  // First ensure that we actually have some samples to send off.
+auto MadMp3Decoder::ContinueStream(cpp::span<const std::byte> input,
+                                   cpp::span<std::byte> output)
+    -> Result<OutputInfo> {
   if (current_sample_ < 0) {
-    return std::make_pair(output_byte, true);
+    mad_stream_buffer(&stream_,
+                      reinterpret_cast<const unsigned char*>(input.data()),
+                      input.size());
+
+    // Decode the next frame. To signal errors, this returns -1 and
+    // stashes an error code in the stream structure.
+    while (mad_frame_decode(&frame_, &stream_) < 0) {
+      if (MAD_RECOVERABLE(stream_.error)) {
+        // Recoverable errors are usually malformed parts of the stream.
+        // We can recover from them by just retrying the decode.
+        continue;
+      }
+      if (stream_.error == MAD_ERROR_BUFLEN) {
+        // The decoder ran out of bytes before it completed a frame. We
+        // need to return back to the caller to give us more data.
+        return {GetInputPosition(), cpp::fail(Error::kOutOfInput)};
+      }
+      // The error is unrecoverable. Give up.
+      return {GetInputPosition(), cpp::fail(Error::kMalformedData)};
+    }
+
+    // We've successfully decoded a frame! Now synthesize samples to write out.
+    mad_synth_frame(&synth_, &frame_);
+    current_sample_ = 0;
   }
 
+  size_t output_byte = 0;
   while (current_sample_ < synth_.pcm.length) {
-    if (output_byte + (2 * synth_.pcm.channels) >= output.size()) {
-      return std::make_pair(output_byte, false);
+    if (output_byte + (4 * synth_.pcm.channels) >= output.size()) {
+      // We can't fit the next sample into the buffer. Stop now, and also avoid
+      // writing the sample for only half the channels.
+      return {GetInputPosition(), OutputInfo{.bytes_written = output_byte,
+                                             .is_finished_writing = false}};
     }
 
     for (int channel = 0; channel < synth_.pcm.channels; channel++) {
       uint32_t sample_24 =
-          scaleToBits(synth_.pcm.samples[channel][current_sample_], 24);
+          mad_fixed_to_pcm(synth_.pcm.samples[channel][current_sample_], 24);
       output[output_byte++] = static_cast<std::byte>((sample_24 >> 16) & 0xFF);
       output[output_byte++] = static_cast<std::byte>((sample_24 >> 8) & 0xFF);
       output[output_byte++] = static_cast<std::byte>((sample_24)&0xFF);
       // 24 bit samples must still be aligned to 32 bits. The LSB is ignored.
       output[output_byte++] = static_cast<std::byte>(0);
-      /*
-      uint16_t sample_16 =
-          scaleToBits(synth_.pcm.samples[channel][current_sample_], 16);
-      output[output_byte++] = static_cast<std::byte>((sample_16 >> 8) & 0xFF);
-      output[output_byte++] = static_cast<std::byte>((sample_16)&0xFF);
-      */
     }
     current_sample_++;
   }
 
   // We wrote everything! Reset, ready for the next frame.
   current_sample_ = -1;
-  return std::make_pair(output_byte, true);
+  return {GetInputPosition(), OutputInfo{.bytes_written = output_byte,
+                                         .is_finished_writing = true}};
+}
+
+auto MadMp3Decoder::SeekStream(cpp::span<const std::byte> input,
+                               std::size_t target_sample) -> Result<void> {
+  mad_stream_buffer(&stream_,
+                    reinterpret_cast<const unsigned char*>(input.data()),
+                    input.size());
+  std::size_t current_sample = 0;
+  std::size_t samples_per_frame = 0;
+  while (true) {
+    current_sample += samples_per_frame;
+
+    // First, decode the header for this frame.
+    mad_header header;
+    mad_header_init(&header);
+    while (mad_header_decode(&header, &stream_) < 0) {
+      if (MAD_RECOVERABLE(stream_.error)) {
+        // Recoverable errors are usually malformed parts of the stream.
+        // We can recover from them by just retrying the decode.
+        continue;
+      } else {
+        // Don't bother checking for other errors; if the first part of the
+        // stream doesn't even contain a header then something's gone wrong.
+        return {GetInputPosition(), cpp::fail(Error::kMalformedData)};
+      }
+    }
+
+    // Calculate samples per frame if we haven't already.
+    if (samples_per_frame == 0) {
+      samples_per_frame = 32 * MAD_NSBSAMPLES(&header);
+    }
+
+    // Work out how close we are to the target.
+    std::size_t samples_to_go = target_sample - current_sample;
+    std::size_t frames_to_go = samples_to_go / samples_per_frame;
+    if (frames_to_go > 3) {
+      // The target is far in the distance. Keep skipping through headers only.
+      continue;
+    }
+
+    // The target is within the next few frames. We should decode these, to give
+    // the decoder a chance to sync with the stream.
+    while (mad_frame_decode(&frame_, &stream_) < 0) {
+      if (MAD_RECOVERABLE(stream_.error)) {
+        continue;
+      }
+      if (stream_.error == MAD_ERROR_BUFLEN) {
+        return {GetInputPosition(), cpp::fail(Error::kOutOfInput)};
+      }
+      // The error is unrecoverable. Give up.
+      return {GetInputPosition(), cpp::fail(Error::kMalformedData)};
+    }
+
+    if (frames_to_go <= 1) {
+      // The target is within the next couple of frames. We should start
+      // synthesizing a frame early because this guy says so:
+      // https://lists.mars.org/hyperkitty/list/mad-dev@lists.mars.org/message/UZSHXZTIZEF7FZ4KFOR65DUCKAY2OCUT/
+      mad_synth_frame(&synth_, &frame_);
+    }
+
+    if (frames_to_go == 0) {
+      // The target is actually within this frame! Set up for the ContinueStream
+      // call.
+      current_sample_ =
+          (target_sample > current_sample) ? target_sample - current_sample : 0;
+      return {GetInputPosition(), {}};
+    }
+  }
 }
 
 }  // namespace codecs
diff --git a/src/codecs/stbvorbis.cpp b/src/codecs/stbvorbis.cpp
new file mode 100644
index 00000000..de315416
--- /dev/null
+++ b/src/codecs/stbvorbis.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "stbvorbis.hpp"
+#include <stdint.h>
+
+#include <cstdint>
+#include <optional>
+
+#include "stb_vorbis.h"
+
+namespace codecs {
+
+StbVorbisDecoder::StbVorbisDecoder()
+    : vorbis_(nullptr),
+      current_sample_(-1),
+      num_channels_(0),
+      num_samples_(0),
+      samples_array_(NULL) {}
+
+StbVorbisDecoder::~StbVorbisDecoder() {
+  if (vorbis_ != nullptr) {
+    stb_vorbis_close(vorbis_);
+  }
+}
+
+static uint32_t scaleToBits(float sample, uint8_t bits) {
+  // Scale to range.
+  int32_t max_val = (1 << (bits - 1));
+  int32_t fixed_point = sample * max_val;
+
+  // Clamp within bounds.
+  fixed_point = std::clamp(fixed_point, -max_val, max_val);
+
+  // Remove sign.
+  return *reinterpret_cast<uint32_t*>(&fixed_point);
+}
+
+auto StbVorbisDecoder::BeginStream(const cpp::span<const std::byte> input)
+    -> Result<OutputFormat> {
+  if (vorbis_ != nullptr) {
+    stb_vorbis_close(vorbis_);
+    vorbis_ = nullptr;
+  }
+  current_sample_ = -1;
+  int bytes_read = 0;
+  int error = 0;
+  vorbis_ =
+      stb_vorbis_open_pushdata(reinterpret_cast<const uint8_t*>(input.data()),
+                               input.size_bytes(), &bytes_read, &error, NULL);
+  if (error != 0) {
+    return {0, cpp::fail(Error::kMalformedData)};
+  }
+  stb_vorbis_info info = stb_vorbis_get_info(vorbis_);
+  return {bytes_read,
+          OutputFormat{.num_channels = static_cast<uint8_t>(info.channels),
+                       .bits_per_sample = 24,
+                       .sample_rate_hz = info.sample_rate}};
+}
+
+auto StbVorbisDecoder::ContinueStream(cpp::span<const std::byte> input,
+                                      cpp::span<std::byte> output)
+    -> Result<OutputInfo> {
+  std::size_t bytes_used = 0;
+  if (current_sample_ < 0) {
+    num_channels_ = 0;
+    num_samples_ = 0;
+    samples_array_ = NULL;
+
+    while (true) {
+      auto cropped = input.subspan(bytes_used);
+      std::size_t b = stb_vorbis_decode_frame_pushdata(
+          vorbis_, reinterpret_cast<const uint8_t*>(cropped.data()),
+          cropped.size_bytes(), &num_channels_, &samples_array_, &num_samples_);
+      if (b == 0) {
+        return {bytes_used, cpp::fail(Error::kOutOfInput)};
+      }
+      bytes_used += b;
+
+      if (num_samples_ == 0) {
+        // Decoder is synchronising. Decode more bytes.
+        continue;
+      }
+      if (num_channels_ == 0 || samples_array_ == NULL) {
+        // The decoder isn't satisfying its contract.
+        return {bytes_used, cpp::fail(Error::kInternalError)};
+      }
+      current_sample_ = 0;
+      break;
+    }
+  }
+
+  // We successfully decoded a frame. Time to write out the samples.
+  std::size_t output_byte = 0;
+  while (current_sample_ < num_samples_) {
+    if (output_byte + (2 * num_channels_) >= output.size()) {
+      return {0, OutputInfo{.bytes_written = output_byte,
+                            .is_finished_writing = false}};
+    }
+
+    for (int channel = 0; channel < num_channels_; channel++) {
+      float raw_sample = samples_array_[channel][current_sample_];
+
+      uint16_t sample_24 = scaleToBits(raw_sample, 24);
+      output[output_byte++] = static_cast<std::byte>((sample_24 >> 16) & 0xFF);
+      output[output_byte++] = static_cast<std::byte>((sample_24 >> 8) & 0xFF);
+      output[output_byte++] = static_cast<std::byte>((sample_24)&0xFF);
+      // Pad to 32 bits for alignment.
+      output[output_byte++] = static_cast<std::byte>(0);
+    }
+    current_sample_++;
+  }
+
+  current_sample_ = -1;
+  return {bytes_used, OutputInfo{.bytes_written = output_byte,
+                                 .is_finished_writing = true}};
+}
+
+auto StbVorbisDecoder::SeekStream(cpp::span<const std::byte> input,
+                                  std::size_t target_sample) -> Result<void> {
+  // TODO(jacqueline): Implement me.
+  return {0, {}};
+}
+
+}  // namespace codecs
diff --git a/src/database/tag_parser.cpp b/src/database/tag_parser.cpp
index 27d4163f..589c988f 100644
--- a/src/database/tag_parser.cpp
+++ b/src/database/tag_parser.cpp
@@ -96,6 +96,7 @@ auto TagParserImpl::ReadAndParseTags(const std::string& path, SongTags* out)
 
   if (res != 0) {
     // Parsing failed.
+    ESP_LOGE(kTag, "tag parsing failed, reason %d", res);
     return false;
   }
 
@@ -103,6 +104,15 @@ auto TagParserImpl::ReadAndParseTags(const std::string& path, SongTags* out)
     case Fmp3:
       out->encoding = Encoding::kMp3;
       break;
+    case Fogg:
+      out->encoding = Encoding::kOgg;
+      break;
+    case Fflac:
+      out->encoding = Encoding::kFlac;
+      break;
+    case Fwav:
+      out->encoding = Encoding::kWav;
+      break;
     default:
       out->encoding = Encoding::kUnsupported;
   }
diff --git a/src/tasks/tasks.cpp b/src/tasks/tasks.cpp
index b95d8e16..2477d8b9 100644
--- a/src/tasks/tasks.cpp
+++ b/src/tasks/tasks.cpp
@@ -39,7 +39,7 @@ auto AllocateStack() -> cpp::span<StackType_t>;
 // amount of stack space.
 template <>
 auto AllocateStack<Type::kAudio>() -> cpp::span<StackType_t> {
-  std::size_t size = 32 * 1024;
+  std::size_t size = 48 * 1024;
   return {static_cast<StackType_t*>(heap_caps_malloc(size, MALLOC_CAP_DEFAULT)),
           size};
 }

From 0024bb1dbe0df319bc7bf022f0c4614cc9c8e0ed Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Thu, 15 Jun 2023 10:34:41 +1000
Subject: [PATCH 3/7] Remove chatty loggin

---
 src/audio/audio_decoder.cpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/audio/audio_decoder.cpp b/src/audio/audio_decoder.cpp
index 310f5740..abb94e85 100644
--- a/src/audio/audio_decoder.cpp
+++ b/src/audio/audio_decoder.cpp
@@ -157,17 +157,10 @@ auto AudioDecoder::Process(const std::vector<InputStream>& inputs,
       return;
     }
 
-    ESP_LOGI(kTag, "enc read: %u", res.first);
-
     codecs::ICodec::OutputInfo out_info = res.second.value();
     output->add(out_info.bytes_written);
     has_samples_to_send_ = !out_info.is_finished_writing;
 
-    ESP_LOGI(kTag, "enc wrote: %u", out_info.bytes_written);
-    if (out_info.is_finished_writing) {
-      ESP_LOGI(kTag, "(write finished)");
-    }
-
     if (has_samples_to_send_) {
       // We weren't able to fit all the generated samples into the output
       // buffer. Stop trying; we'll finish up during the next pass.

From c6bb42cdd21b63accd20012373a8a0e41d8566f5 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Thu, 15 Jun 2023 10:42:28 +1000
Subject: [PATCH 4/7] song -> track

---
 src/app_console/app_console.cpp         |  19 +--
 src/audio/audio_fsm.cpp                 |   8 +-
 src/audio/audio_task.cpp                |   4 +-
 src/audio/fatfs_audio_input.cpp         |   4 +-
 src/audio/include/audio_events.hpp      |  10 +-
 src/audio/include/audio_fsm.hpp         |  10 +-
 src/audio/include/fatfs_audio_input.hpp |   2 +-
 src/database/CMakeLists.txt             |   2 +-
 src/database/database.cpp               | 130 +++++++++---------
 src/database/include/database.hpp       |  24 ++--
 src/database/include/records.hpp        |  36 ++---
 src/database/include/song.hpp           | 166 -----------------------
 src/database/include/tag_parser.hpp     |   6 +-
 src/database/include/track.hpp          | 169 ++++++++++++++++++++++++
 src/database/records.cpp                |  38 +++---
 src/database/tag_parser.cpp             |   4 +-
 src/database/test/test_database.cpp     |  80 +++++------
 src/database/test/test_records.cpp      |  22 +--
 src/database/{song.cpp => track.cpp}    |  22 +--
 19 files changed, 380 insertions(+), 376 deletions(-)
 delete mode 100644 src/database/include/song.hpp
 create mode 100644 src/database/include/track.hpp
 rename src/database/{song.cpp => track.cpp} (62%)

diff --git a/src/app_console/app_console.cpp b/src/app_console/app_console.cpp
index 0483bde9..457d66f6 100644
--- a/src/app_console/app_console.cpp
+++ b/src/app_console/app_console.cpp
@@ -121,8 +121,8 @@ void RegisterDbInit() {
   esp_console_cmd_register(&cmd);
 }
 
-int CmdDbSongs(int argc, char** argv) {
-  static const std::string usage = "usage: db_songs";
+int CmdDbTracks(int argc, char** argv) {
+  static const std::string usage = "usage: db_tracks";
   if (argc != 1) {
     std::cout << usage << std::endl;
     return 1;
@@ -133,9 +133,10 @@ int CmdDbSongs(int argc, char** argv) {
     std::cout << "no database open" << std::endl;
     return 1;
   }
-  std::unique_ptr<database::Result<database::Song>> res(db->GetSongs(5).get());
+  std::unique_ptr<database::Result<database::Track>> res(
+      db->GetTracks(5).get());
   while (true) {
-    for (database::Song s : res->values()) {
+    for (database::Track s : res->values()) {
       std::cout << s.tags().title.value_or("[BLANK]") << std::endl;
     }
     if (res->next_page()) {
@@ -149,11 +150,11 @@ int CmdDbSongs(int argc, char** argv) {
   return 0;
 }
 
-void RegisterDbSongs() {
-  esp_console_cmd_t cmd{.command = "db_songs",
-                        .help = "lists titles of ALL songs in the database",
+void RegisterDbTracks() {
+  esp_console_cmd_t cmd{.command = "db_tracks",
+                        .help = "lists titles of ALL tracks in the database",
                         .hint = NULL,
-                        .func = &CmdDbSongs,
+                        .func = &CmdDbTracks,
                         .argtable = NULL};
   esp_console_cmd_register(&cmd);
 }
@@ -217,7 +218,7 @@ auto AppConsole::RegisterExtraComponents() -> void {
   RegisterAudioStatus();
   */
   RegisterDbInit();
-  RegisterDbSongs();
+  RegisterDbTracks();
   RegisterDbDump();
 }
 
diff --git a/src/audio/audio_fsm.cpp b/src/audio/audio_fsm.cpp
index 6c974905..0be28250 100644
--- a/src/audio/audio_fsm.cpp
+++ b/src/audio/audio_fsm.cpp
@@ -28,7 +28,7 @@ std::unique_ptr<FatfsAudioInput> AudioState::sFileSource;
 std::unique_ptr<I2SAudioOutput> AudioState::sI2SOutput;
 std::vector<std::unique_ptr<IAudioElement>> AudioState::sPipeline;
 
-std::deque<AudioState::EnqueuedItem> AudioState::sSongQueue;
+std::deque<AudioState::EnqueuedItem> AudioState::sTrackQueue;
 
 auto AudioState::Init(drivers::GpioExpander* gpio_expander,
                       std::weak_ptr<database::Database> database) -> bool {
@@ -83,11 +83,11 @@ void Playback::exit() {
 
 void Playback::react(const InputFileFinished& ev) {
   ESP_LOGI(kTag, "finished file");
-  if (sSongQueue.empty()) {
+  if (sTrackQueue.empty()) {
     return;
   }
-  EnqueuedItem next_item = sSongQueue.front();
-  sSongQueue.pop_front();
+  EnqueuedItem next_item = sTrackQueue.front();
+  sTrackQueue.pop_front();
 
   if (std::holds_alternative<std::string>(next_item)) {
     sFileSource->OpenFile(std::get<std::string>(next_item));
diff --git a/src/audio/audio_task.cpp b/src/audio/audio_task.cpp
index eea84e45..394a55b6 100644
--- a/src/audio/audio_task.cpp
+++ b/src/audio/audio_task.cpp
@@ -45,7 +45,7 @@ namespace task {
 static const char* kTag = "task";
 
 // The default amount of time to wait between pipeline iterations for a single
-// song.
+// track.
 static constexpr uint_fast16_t kDefaultDelayTicks = pdMS_TO_TICKS(5);
 static constexpr uint_fast16_t kMaxDelayTicks = pdMS_TO_TICKS(10);
 static constexpr uint_fast16_t kMinDelayTicks = pdMS_TO_TICKS(1);
@@ -54,7 +54,7 @@ void AudioTaskMain(std::unique_ptr<Pipeline> pipeline, IAudioSink* sink) {
   // The stream format for bytes currently in the sink buffer.
   std::optional<StreamInfo::Format> output_format;
 
-  // How long to wait between pipeline iterations. This is reset for each song,
+  // How long to wait between pipeline iterations. This is reset for each track,
   // and readjusted on the fly to maintain a reasonable amount playback buffer.
   // Buffering too much will mean we process samples inefficiently, wasting CPU
   // time, whilst buffering too little will affect the quality of the output.
diff --git a/src/audio/fatfs_audio_input.cpp b/src/audio/fatfs_audio_input.cpp
index eaa62ee3..c26ff0ad 100644
--- a/src/audio/fatfs_audio_input.cpp
+++ b/src/audio/fatfs_audio_input.cpp
@@ -24,12 +24,12 @@
 
 #include "audio_element.hpp"
 #include "chunk.hpp"
-#include "song.hpp"
 #include "stream_buffer.hpp"
 #include "stream_event.hpp"
 #include "stream_info.hpp"
 #include "stream_message.hpp"
 #include "tag_parser.hpp"
+#include "track.hpp"
 #include "types.hpp"
 
 static const char* kTag = "SRC";
@@ -53,7 +53,7 @@ auto FatfsAudioInput::OpenFile(const std::string& path) -> bool {
   ESP_LOGI(kTag, "opening file %s", path.c_str());
 
   database::TagParserImpl tag_parser;
-  database::SongTags tags;
+  database::TrackTags tags;
   if (!tag_parser.ReadAndParseTags(path, &tags)) {
     ESP_LOGE(kTag, "failed to read tags");
     tags.encoding = database::Encoding::kFlac;
diff --git a/src/audio/include/audio_events.hpp b/src/audio/include/audio_events.hpp
index 920b134e..eebf5efe 100644
--- a/src/audio/include/audio_events.hpp
+++ b/src/audio/include/audio_events.hpp
@@ -10,7 +10,7 @@
 
 #include "tinyfsm.hpp"
 
-#include "song.hpp"
+#include "track.hpp"
 
 namespace audio {
 
@@ -18,10 +18,10 @@ struct PlayFile : tinyfsm::Event {
   std::string filename;
 };
 
-struct PlaySong : tinyfsm::Event {
-  database::SongId id;
-  std::optional<database::SongData> data;
-  std::optional<database::SongTags> tags;
+struct PlayTrack : tinyfsm::Event {
+  database::TrackId id;
+  std::optional<database::TrackData> data;
+  std::optional<database::TrackTags> tags;
 };
 
 struct InputFileFinished : tinyfsm::Event {};
diff --git a/src/audio/include/audio_fsm.hpp b/src/audio/include/audio_fsm.hpp
index 5dad87c0..72654ab5 100644
--- a/src/audio/include/audio_fsm.hpp
+++ b/src/audio/include/audio_fsm.hpp
@@ -17,9 +17,9 @@
 #include "gpio_expander.hpp"
 #include "i2s_audio_output.hpp"
 #include "i2s_dac.hpp"
-#include "song.hpp"
 #include "storage.hpp"
 #include "tinyfsm.hpp"
+#include "track.hpp"
 
 #include "system_events.hpp"
 
@@ -39,7 +39,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
   void react(const tinyfsm::Event& ev) {}
 
   virtual void react(const system_fsm::BootComplete&) {}
-  virtual void react(const PlaySong&) {}
+  virtual void react(const PlayTrack&) {}
   virtual void react(const PlayFile&) {}
 
   virtual void react(const InputFileFinished&) {}
@@ -55,8 +55,8 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
   static std::unique_ptr<I2SAudioOutput> sI2SOutput;
   static std::vector<std::unique_ptr<IAudioElement>> sPipeline;
 
-  typedef std::variant<database::SongId, std::string> EnqueuedItem;
-  static std::deque<EnqueuedItem> sSongQueue;
+  typedef std::variant<database::TrackId, std::string> EnqueuedItem;
+  static std::deque<EnqueuedItem> sTrackQueue;
 };
 
 namespace states {
@@ -69,7 +69,7 @@ class Uninitialised : public AudioState {
 
 class Standby : public AudioState {
  public:
-  void react(const PlaySong&) override {}
+  void react(const PlayTrack&) override {}
   void react(const PlayFile&) override;
   using AudioState::react;
 };
diff --git a/src/audio/include/fatfs_audio_input.hpp b/src/audio/include/fatfs_audio_input.hpp
index 1f9f36a1..f5a65d0d 100644
--- a/src/audio/include/fatfs_audio_input.hpp
+++ b/src/audio/include/fatfs_audio_input.hpp
@@ -18,8 +18,8 @@
 #include "ff.h"
 #include "freertos/message_buffer.h"
 #include "freertos/queue.h"
-#include "song.hpp"
 #include "span.hpp"
+#include "track.hpp"
 
 #include "audio_element.hpp"
 #include "stream_buffer.hpp"
diff --git a/src/database/CMakeLists.txt b/src/database/CMakeLists.txt
index 211a63cd..e7b1f62c 100644
--- a/src/database/CMakeLists.txt
+++ b/src/database/CMakeLists.txt
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: GPL-3.0-only
 
 idf_component_register(
-  SRCS "env_esp.cpp" "database.cpp" "song.cpp" "records.cpp" "file_gatherer.cpp" "tag_parser.cpp"
+  SRCS "env_esp.cpp" "database.cpp" "track.cpp" "records.cpp" "file_gatherer.cpp" "tag_parser.cpp"
   INCLUDE_DIRS "include"
   REQUIRES "result" "span" "esp_psram" "fatfs" "libtags" "komihash" "cbor" "tasks")
 
diff --git a/src/database/database.cpp b/src/database/database.cpp
index 71954bbb..9206256f 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -28,16 +28,16 @@
 #include "file_gatherer.hpp"
 #include "records.hpp"
 #include "result.hpp"
-#include "song.hpp"
 #include "tag_parser.hpp"
 #include "tasks.hpp"
+#include "track.hpp"
 
 namespace database {
 
 static SingletonEnv<leveldb::EspEnv> sEnv;
 static const char* kTag = "DB";
 
-static const char kSongIdKey[] = "next_song_id";
+static const char kTrackIdKey[] = "next_track_id";
 
 static std::atomic<bool> sIsDbOpen(false);
 
@@ -128,8 +128,8 @@ Database::~Database() {
 
 auto Database::Update() -> std::future<void> {
   return worker_task_->Dispatch<void>([&]() -> void {
-    // Stage 1: verify all existing songs are still valid.
-    ESP_LOGI(kTag, "verifying existing songs");
+    // Stage 1: verify all existing tracks are still valid.
+    ESP_LOGI(kTag, "verifying existing tracks");
     const leveldb::Snapshot* snapshot = db_->GetSnapshot();
     leveldb::ReadOptions read_options;
     read_options.fill_cache = false;
@@ -138,8 +138,8 @@ auto Database::Update() -> std::future<void> {
     OwningSlice prefix = CreateDataPrefix();
     it->Seek(prefix.slice);
     while (it->Valid() && it->key().starts_with(prefix.slice)) {
-      std::optional<SongData> song = ParseDataValue(it->value());
-      if (!song) {
+      std::optional<TrackData> track = ParseDataValue(it->value());
+      if (!track) {
         // The value was malformed. Drop this record.
         ESP_LOGW(kTag, "dropping malformed metadata");
         db_->Delete(leveldb::WriteOptions(), it->key());
@@ -147,33 +147,33 @@ auto Database::Update() -> std::future<void> {
         continue;
       }
 
-      if (song->is_tombstoned()) {
-        ESP_LOGW(kTag, "skipping tombstoned %lx", song->id());
+      if (track->is_tombstoned()) {
+        ESP_LOGW(kTag, "skipping tombstoned %lx", track->id());
         it->Next();
         continue;
       }
 
-      SongTags tags;
-      if (!tag_parser_->ReadAndParseTags(song->filepath(), &tags) ||
+      TrackTags tags;
+      if (!tag_parser_->ReadAndParseTags(track->filepath(), &tags) ||
           tags.encoding == Encoding::kUnsupported) {
-        // We couldn't read the tags for this song. Either they were
+        // We couldn't read the tags for this track. Either they were
         // malformed, or perhaps the file is missing. Either way, tombstone
         // this record.
-        ESP_LOGW(kTag, "entombing missing #%lx", song->id());
-        dbPutSongData(song->Entomb());
+        ESP_LOGW(kTag, "entombing missing #%lx", track->id());
+        dbPutTrackData(track->Entomb());
         it->Next();
         continue;
       }
 
       uint64_t new_hash = tags.Hash();
-      if (new_hash != song->tags_hash()) {
-        // This song's tags have changed. Since the filepath is exactly the
+      if (new_hash != track->tags_hash()) {
+        // This track's tags have changed. Since the filepath is exactly the
         // same, we assume this is a legitimate correction. Update the
         // database.
-        ESP_LOGI(kTag, "updating hash (%llx -> %llx)", song->tags_hash(),
+        ESP_LOGI(kTag, "updating hash (%llx -> %llx)", track->tags_hash(),
                  new_hash);
-        dbPutSongData(song->UpdateHash(new_hash));
-        dbPutHash(new_hash, song->id());
+        dbPutTrackData(track->UpdateHash(new_hash));
+        dbPutHash(new_hash, track->id());
       }
 
       it->Next();
@@ -182,9 +182,9 @@ auto Database::Update() -> std::future<void> {
     db_->ReleaseSnapshot(snapshot);
 
     // Stage 2: search for newly added files.
-    ESP_LOGI(kTag, "scanning for new songs");
+    ESP_LOGI(kTag, "scanning for new tracks");
     file_gatherer_->FindFiles("", [&](const std::string& path) {
-      SongTags tags;
+      TrackTags tags;
       if (!tag_parser_->ReadAndParseTags(path, &tags) ||
           tags.encoding == Encoding::kUnsupported) {
         // No parseable tags; skip this fiile.
@@ -194,32 +194,32 @@ auto Database::Update() -> std::future<void> {
       // Check for any existing record with the same hash.
       uint64_t hash = tags.Hash();
       OwningSlice key = CreateHashKey(hash);
-      std::optional<SongId> existing_hash;
+      std::optional<TrackId> existing_hash;
       std::string raw_entry;
       if (db_->Get(leveldb::ReadOptions(), key.slice, &raw_entry).ok()) {
         existing_hash = ParseHashValue(raw_entry);
       }
 
       if (!existing_hash) {
-        // We've never met this song before! Or we have, but the entry is
-        // malformed. Either way, record this as a new song.
-        SongId id = dbMintNewSongId();
+        // We've never met this track before! Or we have, but the entry is
+        // malformed. Either way, record this as a new track.
+        TrackId id = dbMintNewTrackId();
         ESP_LOGI(kTag, "recording new 0x%lx", id);
-        dbPutSong(id, path, hash);
+        dbPutTrack(id, path, hash);
         return;
       }
 
-      std::optional<SongData> existing_data = dbGetSongData(*existing_hash);
+      std::optional<TrackData> existing_data = dbGetTrackData(*existing_hash);
       if (!existing_data) {
         // We found a hash that matches, but there's no data record? Weird.
-        SongData new_data(*existing_hash, path, hash);
-        dbPutSongData(new_data);
+        TrackData new_data(*existing_hash, path, hash);
+        dbPutTrackData(new_data);
         return;
       }
 
       if (existing_data->is_tombstoned()) {
-        ESP_LOGI(kTag, "exhuming song %lu", existing_data->id());
-        dbPutSongData(existing_data->Exhume(path));
+        ESP_LOGI(kTag, "exhuming track %lu", existing_data->id());
+        dbPutTrackData(existing_data->Exhume(path));
       } else if (existing_data->filepath() != path) {
         ESP_LOGW(kTag, "tag hash collision");
       }
@@ -227,14 +227,14 @@ auto Database::Update() -> std::future<void> {
   });
 }
 
-auto Database::GetSongs(std::size_t page_size) -> std::future<Result<Song>*> {
-  return worker_task_->Dispatch<Result<Song>*>([=, this]() -> Result<Song>* {
-    Continuation<Song> c{.iterator = nullptr,
-                         .prefix = CreateDataPrefix().data,
-                         .start_key = CreateDataPrefix().data,
-                         .forward = true,
-                         .was_prev_forward = true,
-                         .page_size = page_size};
+auto Database::GetTracks(std::size_t page_size) -> std::future<Result<Track>*> {
+  return worker_task_->Dispatch<Result<Track>*>([=, this]() -> Result<Track>* {
+    Continuation<Track> c{.iterator = nullptr,
+                          .prefix = CreateDataPrefix().data,
+                          .start_key = CreateDataPrefix().data,
+                          .forward = true,
+                          .was_prev_forward = true,
+                          .page_size = page_size};
     return dbGetPage(c);
   });
 }
@@ -260,32 +260,32 @@ auto Database::GetPage(Continuation<T>* c) -> std::future<Result<T>*> {
       [=, this]() -> Result<T>* { return dbGetPage(copy); });
 }
 
-template auto Database::GetPage<Song>(Continuation<Song>* c)
-    -> std::future<Result<Song>*>;
+template auto Database::GetPage<Track>(Continuation<Track>* c)
+    -> std::future<Result<Track>*>;
 template auto Database::GetPage<std::string>(Continuation<std::string>* c)
     -> std::future<Result<std::string>*>;
 
-auto Database::dbMintNewSongId() -> SongId {
-  SongId next_id = 1;
+auto Database::dbMintNewTrackId() -> TrackId {
+  TrackId next_id = 1;
   std::string val;
-  auto status = db_->Get(leveldb::ReadOptions(), kSongIdKey, &val);
+  auto status = db_->Get(leveldb::ReadOptions(), kTrackIdKey, &val);
   if (status.ok()) {
-    next_id = BytesToSongId(val).value_or(next_id);
+    next_id = BytesToTrackId(val).value_or(next_id);
   } else if (!status.IsNotFound()) {
     // TODO(jacqueline): Handle this more.
-    ESP_LOGE(kTag, "failed to get next song id");
+    ESP_LOGE(kTag, "failed to get next track id");
   }
 
-  if (!db_->Put(leveldb::WriteOptions(), kSongIdKey,
-                SongIdToBytes(next_id + 1).slice)
+  if (!db_->Put(leveldb::WriteOptions(), kTrackIdKey,
+                TrackIdToBytes(next_id + 1).slice)
            .ok()) {
-    ESP_LOGE(kTag, "failed to write next song id");
+    ESP_LOGE(kTag, "failed to write next track id");
   }
 
   return next_id;
 }
 
-auto Database::dbEntomb(SongId id, uint64_t hash) -> void {
+auto Database::dbEntomb(TrackId id, uint64_t hash) -> void {
   OwningSlice key = CreateHashKey(hash);
   OwningSlice val = CreateHashValue(id);
   if (!db_->Put(leveldb::WriteOptions(), key.slice, val.slice).ok()) {
@@ -293,7 +293,7 @@ auto Database::dbEntomb(SongId id, uint64_t hash) -> void {
   }
 }
 
-auto Database::dbPutSongData(const SongData& s) -> void {
+auto Database::dbPutTrackData(const TrackData& s) -> void {
   OwningSlice key = CreateDataKey(s.id());
   OwningSlice val = CreateDataValue(s);
   if (!db_->Put(leveldb::WriteOptions(), key.slice, val.slice).ok()) {
@@ -301,7 +301,7 @@ auto Database::dbPutSongData(const SongData& s) -> void {
   }
 }
 
-auto Database::dbGetSongData(SongId id) -> std::optional<SongData> {
+auto Database::dbGetTrackData(TrackId id) -> std::optional<TrackData> {
   OwningSlice key = CreateDataKey(id);
   std::string raw_val;
   if (!db_->Get(leveldb::ReadOptions(), key.slice, &raw_val).ok()) {
@@ -311,7 +311,7 @@ auto Database::dbGetSongData(SongId id) -> std::optional<SongData> {
   return ParseDataValue(raw_val);
 }
 
-auto Database::dbPutHash(const uint64_t& hash, SongId i) -> void {
+auto Database::dbPutHash(const uint64_t& hash, TrackId i) -> void {
   OwningSlice key = CreateHashKey(hash);
   OwningSlice val = CreateHashValue(i);
   if (!db_->Put(leveldb::WriteOptions(), key.slice, val.slice).ok()) {
@@ -319,7 +319,7 @@ auto Database::dbPutHash(const uint64_t& hash, SongId i) -> void {
   }
 }
 
-auto Database::dbGetHash(const uint64_t& hash) -> std::optional<SongId> {
+auto Database::dbGetHash(const uint64_t& hash) -> std::optional<TrackId> {
   OwningSlice key = CreateHashKey(hash);
   std::string raw_val;
   if (!db_->Get(leveldb::ReadOptions(), key.slice, &raw_val).ok()) {
@@ -329,10 +329,10 @@ auto Database::dbGetHash(const uint64_t& hash) -> std::optional<SongId> {
   return ParseHashValue(raw_val);
 }
 
-auto Database::dbPutSong(SongId id,
-                         const std::string& path,
-                         const uint64_t& hash) -> void {
-  dbPutSongData(SongData(id, path, hash));
+auto Database::dbPutTrack(TrackId id,
+                          const std::string& path,
+                          const uint64_t& hash) -> void {
+  dbPutTrackData(TrackData(id, path, hash));
   dbPutHash(hash, id);
 }
 
@@ -455,24 +455,24 @@ auto Database::dbGetPage(const Continuation<T>& c) -> Result<T>* {
   return new Result<T>(std::move(records), next_page, prev_page);
 }
 
-template auto Database::dbGetPage<Song>(const Continuation<Song>& c)
-    -> Result<Song>*;
+template auto Database::dbGetPage<Track>(const Continuation<Track>& c)
+    -> Result<Track>*;
 template auto Database::dbGetPage<std::string>(
     const Continuation<std::string>& c) -> Result<std::string>*;
 
 template <>
-auto Database::ParseRecord<Song>(const leveldb::Slice& key,
-                                 const leveldb::Slice& val)
-    -> std::optional<Song> {
-  std::optional<SongData> data = ParseDataValue(val);
+auto Database::ParseRecord<Track>(const leveldb::Slice& key,
+                                  const leveldb::Slice& val)
+    -> std::optional<Track> {
+  std::optional<TrackData> data = ParseDataValue(val);
   if (!data || data->is_tombstoned()) {
     return {};
   }
-  SongTags tags;
+  TrackTags tags;
   if (!tag_parser_->ReadAndParseTags(data->filepath(), &tags)) {
     return {};
   }
-  return Song(*data, tags);
+  return Track(*data, tags);
 }
 
 template <>
diff --git a/src/database/include/database.hpp b/src/database/include/database.hpp
index 5214b8df..1a8388e8 100644
--- a/src/database/include/database.hpp
+++ b/src/database/include/database.hpp
@@ -23,9 +23,9 @@
 #include "leveldb/slice.h"
 #include "records.hpp"
 #include "result.hpp"
-#include "song.hpp"
 #include "tag_parser.hpp"
 #include "tasks.hpp"
+#include "track.hpp"
 
 namespace database {
 
@@ -82,7 +82,7 @@ class Database {
 
   auto Update() -> std::future<void>;
 
-  auto GetSongs(std::size_t page_size) -> std::future<Result<Song>*>;
+  auto GetTracks(std::size_t page_size) -> std::future<Result<Track>*>;
   auto GetDump(std::size_t page_size) -> std::future<Result<std::string>*>;
 
   template <typename T>
@@ -109,14 +109,14 @@ class Database {
            ITagParser* tag_parser,
            std::shared_ptr<tasks::Worker> worker);
 
-  auto dbMintNewSongId() -> SongId;
-  auto dbEntomb(SongId song, uint64_t hash) -> void;
+  auto dbMintNewTrackId() -> TrackId;
+  auto dbEntomb(TrackId track, uint64_t hash) -> void;
 
-  auto dbPutSongData(const SongData& s) -> void;
-  auto dbGetSongData(SongId id) -> std::optional<SongData>;
-  auto dbPutHash(const uint64_t& hash, SongId i) -> void;
-  auto dbGetHash(const uint64_t& hash) -> std::optional<SongId>;
-  auto dbPutSong(SongId id, const std::string& path, const uint64_t& hash)
+  auto dbPutTrackData(const TrackData& s) -> void;
+  auto dbGetTrackData(TrackId id) -> std::optional<TrackData>;
+  auto dbPutHash(const uint64_t& hash, TrackId i) -> void;
+  auto dbGetHash(const uint64_t& hash) -> std::optional<TrackId>;
+  auto dbPutTrack(TrackId id, const std::string& path, const uint64_t& hash)
       -> void;
 
   template <typename T>
@@ -128,9 +128,9 @@ class Database {
 };
 
 template <>
-auto Database::ParseRecord<Song>(const leveldb::Slice& key,
-                                 const leveldb::Slice& val)
-    -> std::optional<Song>;
+auto Database::ParseRecord<Track>(const leveldb::Slice& key,
+                                  const leveldb::Slice& val)
+    -> std::optional<Track>;
 template <>
 auto Database::ParseRecord<std::string>(const leveldb::Slice& key,
                                         const leveldb::Slice& val)
diff --git a/src/database/include/records.hpp b/src/database/include/records.hpp
index 1b66ad42..95a1a1e8 100644
--- a/src/database/include/records.hpp
+++ b/src/database/include/records.hpp
@@ -13,7 +13,7 @@
 #include "leveldb/db.h"
 #include "leveldb/slice.h"
 
-#include "song.hpp"
+#include "track.hpp"
 
 namespace database {
 
@@ -31,49 +31,49 @@ class OwningSlice {
 };
 
 /*
- * Returns the prefix added to every SongData key. This can be used to iterate
+ * Returns the prefix added to every TrackData key. This can be used to iterate
  * over every data record in the database.
  */
 auto CreateDataPrefix() -> OwningSlice;
 
-/* Creates a data key for a song with the specified id. */
-auto CreateDataKey(const SongId& id) -> OwningSlice;
+/* Creates a data key for a track with the specified id. */
+auto CreateDataKey(const TrackId& id) -> OwningSlice;
 
 /*
- * Encodes a SongData instance into bytes, in preparation for storing it within
+ * Encodes a TrackData instance into bytes, in preparation for storing it within
  * the database. This encoding is consistent, and will remain stable over time.
  */
-auto CreateDataValue(const SongData& song) -> OwningSlice;
+auto CreateDataValue(const TrackData& track) -> OwningSlice;
 
 /*
- * Parses bytes previously encoded via CreateDataValue back into a SongData. May
- * return nullopt if parsing fails.
+ * Parses bytes previously encoded via CreateDataValue back into a TrackData.
+ * May return nullopt if parsing fails.
  */
-auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<SongData>;
+auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<TrackData>;
 
 /* Creates a hash key for the specified hash. */
 auto CreateHashKey(const uint64_t& hash) -> OwningSlice;
 
 /*
- * Encodes a hash value (at this point just a song id) into bytes, in
+ * Encodes a hash value (at this point just a track id) into bytes, in
  * preparation for storing within the database. This encoding is consistent, and
  * will remain stable over time.
  */
-auto CreateHashValue(SongId id) -> OwningSlice;
+auto CreateHashValue(TrackId id) -> OwningSlice;
 
 /*
- * Parses bytes previously encoded via CreateHashValue back into a song id. May
+ * Parses bytes previously encoded via CreateHashValue back into a track id. May
  * return nullopt if parsing fails.
  */
-auto ParseHashValue(const leveldb::Slice&) -> std::optional<SongId>;
+auto ParseHashValue(const leveldb::Slice&) -> std::optional<TrackId>;
 
-/* Encodes a SongId as bytes. */
-auto SongIdToBytes(SongId id) -> OwningSlice;
+/* Encodes a TrackId as bytes. */
+auto TrackIdToBytes(TrackId id) -> OwningSlice;
 
 /*
- * Converts a song id encoded via SongIdToBytes back into a SongId. May return
- * nullopt if parsing fails.
+ * Converts a track id encoded via TrackIdToBytes back into a TrackId. May
+ * return nullopt if parsing fails.
  */
-auto BytesToSongId(const std::string& bytes) -> std::optional<SongId>;
+auto BytesToTrackId(const std::string& bytes) -> std::optional<TrackId>;
 
 }  // namespace database
diff --git a/src/database/include/song.hpp b/src/database/include/song.hpp
deleted file mode 100644
index d03660dc..00000000
--- a/src/database/include/song.hpp
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright 2023 jacqueline <me@jacqueline.id.au>
- *
- * SPDX-License-Identifier: GPL-3.0-only
- */
-
-#pragma once
-
-#include <stdint.h>
-
-#include <optional>
-#include <string>
-#include <utility>
-
-#include "leveldb/db.h"
-#include "span.hpp"
-
-namespace database {
-
-/*
- * Uniquely describes a single song within the database. This value will be
- * consistent across database updates, and should ideally (but is not guaranteed
- * to) endure even across a song being removed and re-added.
- *
- * Four billion songs should be enough for anybody.
- */
-typedef uint32_t SongId;
-
-/*
- * Audio file encodings that we are aware of. Used to select an appropriate
- * decoder at play time.
- *
- * Values of this enum are persisted in this database, so it is probably never a
- * good idea to change the int representation of an existing value.
- */
-enum class Encoding {
-  kUnsupported = 0,
-  kMp3 = 1,
-  kWav = 2,
-  kOgg = 3,
-  kFlac = 4,
-};
-
-/*
- * Owning container for tag-related song metadata that was extracted from a
- * file.
- */
-struct SongTags {
-  Encoding encoding;
-  std::optional<std::string> title;
-
-  // TODO(jacqueline): It would be nice to use shared_ptr's for the artist and
-  // album, since there's likely a fair number of duplicates for each
-  // (especially the former).
-
-  std::optional<std::string> artist;
-  std::optional<std::string> album;
-
-  std::optional<int> channels;
-  std::optional<int> sample_rate;
-  std::optional<int> bits_per_sample;
-
-  /*
-   * Returns a hash of the 'identifying' tags of this song. That is, a hash that
-   * can be used to determine if one song is likely the same as another, across
-   * things like re-encoding, re-mastering, or moving the underlying file.
-   */
-  auto Hash() const -> uint64_t;
-
-  bool operator==(const SongTags&) const = default;
-};
-
-/*
- * Immutable owning container for all of the metadata we store for a particular
- * song. This includes two main kinds of metadata:
- *  1. static(ish) attributes, such as the id, path on disk, hash of the tags
- *  2. dynamic attributes, such as the number of times this song has been
- *  played.
- *
- * Because a SongData is immutable, it is thread safe but will not reflect any
- * changes to the dynamic attributes that may happen after it was obtained.
- *
- * Songs may be 'tombstoned'; this indicates that the song is no longer present
- * at its previous location on disk, and we do not have any existing files with
- * a matching tags_hash. When this is the case, we ignore this SongData for most
- * purposes. We keep the entry in our database so that we can properly restore
- * dynamic attributes (such as play count) if the song later re-appears on disk.
- */
-class SongData {
- private:
-  const SongId id_;
-  const std::string filepath_;
-  const uint64_t tags_hash_;
-  const uint32_t play_count_;
-  const bool is_tombstoned_;
-
- public:
-  /* Constructor used when adding new songs to the database. */
-  SongData(SongId id, const std::string& path, uint64_t hash)
-      : id_(id),
-        filepath_(path),
-        tags_hash_(hash),
-        play_count_(0),
-        is_tombstoned_(false) {}
-
-  SongData(SongId id,
-           const std::string& path,
-           uint64_t hash,
-           uint32_t play_count,
-           bool is_tombstoned)
-      : id_(id),
-        filepath_(path),
-        tags_hash_(hash),
-        play_count_(play_count),
-        is_tombstoned_(is_tombstoned) {}
-
-  auto id() const -> SongId { return id_; }
-  auto filepath() const -> std::string { return filepath_; }
-  auto play_count() const -> uint32_t { return play_count_; }
-  auto tags_hash() const -> uint64_t { return tags_hash_; }
-  auto is_tombstoned() const -> bool { return is_tombstoned_; }
-
-  auto UpdateHash(uint64_t new_hash) const -> SongData;
-
-  /*
-   * Marks this song data as a 'tombstone'. Tombstoned songs are not playable,
-   * and should not generally be shown to users.
-   */
-  auto Entomb() const -> SongData;
-
-  /*
-   * Clears the tombstone bit of this song, and updates the path to reflect its
-   * new location.
-   */
-  auto Exhume(const std::string& new_path) const -> SongData;
-
-  bool operator==(const SongData&) const = default;
-};
-
-/*
- * Immutable and owning combination of a song's tags and metadata.
- *
- * Note that instances of this class may have a fairly large memory impact, due
- * to the large number of strings they own. Prefer to query the database again
- * (which has its own caching layer), rather than retaining Song instances for a
- * long time.
- */
-class Song {
- public:
-  Song(const SongData& data, const SongTags& tags) : data_(data), tags_(tags) {}
-  Song(const Song& other) = default;
-
-  auto data() const -> const SongData& { return data_; }
-  auto tags() const -> const SongTags& { return tags_; }
-
-  bool operator==(const Song&) const = default;
-  Song operator=(const Song& other) const { return Song(other); }
-
- private:
-  const SongData data_;
-  const SongTags tags_;
-};
-
-void swap(Song& first, Song& second);
-
-}  // namespace database
diff --git a/src/database/include/tag_parser.hpp b/src/database/include/tag_parser.hpp
index 7dab93a1..4be5ad16 100644
--- a/src/database/include/tag_parser.hpp
+++ b/src/database/include/tag_parser.hpp
@@ -8,20 +8,20 @@
 
 #include <string>
 
-#include "song.hpp"
+#include "track.hpp"
 
 namespace database {
 
 class ITagParser {
  public:
   virtual ~ITagParser() {}
-  virtual auto ReadAndParseTags(const std::string& path, SongTags* out)
+  virtual auto ReadAndParseTags(const std::string& path, TrackTags* out)
       -> bool = 0;
 };
 
 class TagParserImpl : public ITagParser {
  public:
-  virtual auto ReadAndParseTags(const std::string& path, SongTags* out)
+  virtual auto ReadAndParseTags(const std::string& path, TrackTags* out)
       -> bool override;
 };
 
diff --git a/src/database/include/track.hpp b/src/database/include/track.hpp
new file mode 100644
index 00000000..5a0c0ca8
--- /dev/null
+++ b/src/database/include/track.hpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "leveldb/db.h"
+#include "span.hpp"
+
+namespace database {
+
+/*
+ * Uniquely describes a single track within the database. This value will be
+ * consistent across database updates, and should ideally (but is not guaranteed
+ * to) endure even across a track being removed and re-added.
+ *
+ * Four billion tracks should be enough for anybody.
+ */
+typedef uint32_t TrackId;
+
+/*
+ * Audio file encodings that we are aware of. Used to select an appropriate
+ * decoder at play time.
+ *
+ * Values of this enum are persisted in this database, so it is probably never a
+ * good idea to change the int representation of an existing value.
+ */
+enum class Encoding {
+  kUnsupported = 0,
+  kMp3 = 1,
+  kWav = 2,
+  kOgg = 3,
+  kFlac = 4,
+};
+
+/*
+ * Owning container for tag-related track metadata that was extracted from a
+ * file.
+ */
+struct TrackTags {
+  Encoding encoding;
+  std::optional<std::string> title;
+
+  // TODO(jacqueline): It would be nice to use shared_ptr's for the artist and
+  // album, since there's likely a fair number of duplicates for each
+  // (especially the former).
+
+  std::optional<std::string> artist;
+  std::optional<std::string> album;
+
+  std::optional<int> channels;
+  std::optional<int> sample_rate;
+  std::optional<int> bits_per_sample;
+
+  /*
+   * Returns a hash of the 'identifying' tags of this track. That is, a hash
+   * that can be used to determine if one track is likely the same as another,
+   * across things like re-encoding, re-mastering, or moving the underlying
+   * file.
+   */
+  auto Hash() const -> uint64_t;
+
+  bool operator==(const TrackTags&) const = default;
+};
+
+/*
+ * Immutable owning container for all of the metadata we store for a particular
+ * track. This includes two main kinds of metadata:
+ *  1. static(ish) attributes, such as the id, path on disk, hash of the tags
+ *  2. dynamic attributes, such as the number of times this track has been
+ *  played.
+ *
+ * Because a TrackData is immutable, it is thread safe but will not reflect any
+ * changes to the dynamic attributes that may happen after it was obtained.
+ *
+ * Tracks may be 'tombstoned'; this indicates that the track is no longer
+ * present at its previous location on disk, and we do not have any existing
+ * files with a matching tags_hash. When this is the case, we ignore this
+ * TrackData for most purposes. We keep the entry in our database so that we can
+ * properly restore dynamic attributes (such as play count) if the track later
+ * re-appears on disk.
+ */
+class TrackData {
+ private:
+  const TrackId id_;
+  const std::string filepath_;
+  const uint64_t tags_hash_;
+  const uint32_t play_count_;
+  const bool is_tombstoned_;
+
+ public:
+  /* Constructor used when adding new tracks to the database. */
+  TrackData(TrackId id, const std::string& path, uint64_t hash)
+      : id_(id),
+        filepath_(path),
+        tags_hash_(hash),
+        play_count_(0),
+        is_tombstoned_(false) {}
+
+  TrackData(TrackId id,
+            const std::string& path,
+            uint64_t hash,
+            uint32_t play_count,
+            bool is_tombstoned)
+      : id_(id),
+        filepath_(path),
+        tags_hash_(hash),
+        play_count_(play_count),
+        is_tombstoned_(is_tombstoned) {}
+
+  auto id() const -> TrackId { return id_; }
+  auto filepath() const -> std::string { return filepath_; }
+  auto play_count() const -> uint32_t { return play_count_; }
+  auto tags_hash() const -> uint64_t { return tags_hash_; }
+  auto is_tombstoned() const -> bool { return is_tombstoned_; }
+
+  auto UpdateHash(uint64_t new_hash) const -> TrackData;
+
+  /*
+   * Marks this track data as a 'tombstone'. Tombstoned tracks are not playable,
+   * and should not generally be shown to users.
+   */
+  auto Entomb() const -> TrackData;
+
+  /*
+   * Clears the tombstone bit of this track, and updates the path to reflect its
+   * new location.
+   */
+  auto Exhume(const std::string& new_path) const -> TrackData;
+
+  bool operator==(const TrackData&) const = default;
+};
+
+/*
+ * Immutable and owning combination of a track's tags and metadata.
+ *
+ * Note that instances of this class may have a fairly large memory impact, due
+ * to the large number of strings they own. Prefer to query the database again
+ * (which has its own caching layer), rather than retaining Track instances for
+ * a long time.
+ */
+class Track {
+ public:
+  Track(const TrackData& data, const TrackTags& tags)
+      : data_(data), tags_(tags) {}
+  Track(const Track& other) = default;
+
+  auto data() const -> const TrackData& { return data_; }
+  auto tags() const -> const TrackTags& { return tags_; }
+
+  bool operator==(const Track&) const = default;
+  Track operator=(const Track& other) const { return Track(other); }
+
+ private:
+  const TrackData data_;
+  const TrackTags tags_;
+};
+
+void swap(Track& first, Track& second);
+
+}  // namespace database
diff --git a/src/database/records.cpp b/src/database/records.cpp
index f04e5da7..49e5db0b 100644
--- a/src/database/records.cpp
+++ b/src/database/records.cpp
@@ -14,7 +14,7 @@
 #include "cbor.h"
 #include "esp_log.h"
 
-#include "song.hpp"
+#include "track.hpp"
 
 namespace database {
 
@@ -60,14 +60,14 @@ auto CreateDataPrefix() -> OwningSlice {
   return OwningSlice({data, 2});
 }
 
-auto CreateDataKey(const SongId& id) -> OwningSlice {
+auto CreateDataKey(const TrackId& id) -> OwningSlice {
   std::ostringstream output;
   output.put(kDataPrefix).put(kFieldSeparator);
-  output << SongIdToBytes(id).data;
+  output << TrackIdToBytes(id).data;
   return OwningSlice(output.str());
 }
 
-auto CreateDataValue(const SongData& song) -> OwningSlice {
+auto CreateDataValue(const TrackData& track) -> OwningSlice {
   uint8_t* buf;
   std::size_t buf_len = cbor_encode(&buf, [&](CborEncoder* enc) {
     CborEncoder array_encoder;
@@ -77,28 +77,28 @@ auto CreateDataValue(const SongData& song) -> OwningSlice {
       ESP_LOGE(kTag, "encoding err %u", err);
       return;
     }
-    err = cbor_encode_int(&array_encoder, song.id());
+    err = cbor_encode_int(&array_encoder, track.id());
     if (err != CborNoError && err != CborErrorOutOfMemory) {
       ESP_LOGE(kTag, "encoding err %u", err);
       return;
     }
-    err = cbor_encode_text_string(&array_encoder, song.filepath().c_str(),
-                                  song.filepath().size());
+    err = cbor_encode_text_string(&array_encoder, track.filepath().c_str(),
+                                  track.filepath().size());
     if (err != CborNoError && err != CborErrorOutOfMemory) {
       ESP_LOGE(kTag, "encoding err %u", err);
       return;
     }
-    err = cbor_encode_uint(&array_encoder, song.tags_hash());
+    err = cbor_encode_uint(&array_encoder, track.tags_hash());
     if (err != CborNoError && err != CborErrorOutOfMemory) {
       ESP_LOGE(kTag, "encoding err %u", err);
       return;
     }
-    err = cbor_encode_int(&array_encoder, song.play_count());
+    err = cbor_encode_int(&array_encoder, track.play_count());
     if (err != CborNoError && err != CborErrorOutOfMemory) {
       ESP_LOGE(kTag, "encoding err %u", err);
       return;
     }
-    err = cbor_encode_boolean(&array_encoder, song.is_tombstoned());
+    err = cbor_encode_boolean(&array_encoder, track.is_tombstoned());
     if (err != CborNoError && err != CborErrorOutOfMemory) {
       ESP_LOGE(kTag, "encoding err %u", err);
       return;
@@ -114,7 +114,7 @@ auto CreateDataValue(const SongData& song) -> OwningSlice {
   return OwningSlice(as_str);
 }
 
-auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<SongData> {
+auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<TrackData> {
   CborParser parser;
   CborValue container;
   CborError err;
@@ -135,7 +135,7 @@ auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<SongData> {
   if (err != CborNoError) {
     return {};
   }
-  SongId id = raw_int;
+  TrackId id = raw_int;
   err = cbor_value_advance(&val);
   if (err != CborNoError || !cbor_value_is_text_string(&val)) {
     return {};
@@ -176,7 +176,7 @@ auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<SongData> {
     return {};
   }
 
-  return SongData(id, path, hash, play_count, is_tombstoned);
+  return TrackData(id, path, hash, play_count, is_tombstoned);
 }
 
 auto CreateHashKey(const uint64_t& hash) -> OwningSlice {
@@ -193,15 +193,15 @@ auto CreateHashKey(const uint64_t& hash) -> OwningSlice {
   return OwningSlice(output.str());
 }
 
-auto ParseHashValue(const leveldb::Slice& slice) -> std::optional<SongId> {
-  return BytesToSongId(slice.ToString());
+auto ParseHashValue(const leveldb::Slice& slice) -> std::optional<TrackId> {
+  return BytesToTrackId(slice.ToString());
 }
 
-auto CreateHashValue(SongId id) -> OwningSlice {
-  return SongIdToBytes(id);
+auto CreateHashValue(TrackId id) -> OwningSlice {
+  return TrackIdToBytes(id);
 }
 
-auto SongIdToBytes(SongId id) -> OwningSlice {
+auto TrackIdToBytes(TrackId id) -> OwningSlice {
   uint8_t buf[8];
   CborEncoder enc;
   cbor_encoder_init(&enc, buf, sizeof(buf), 0);
@@ -211,7 +211,7 @@ auto SongIdToBytes(SongId id) -> OwningSlice {
   return OwningSlice(as_str);
 }
 
-auto BytesToSongId(const std::string& bytes) -> std::optional<SongId> {
+auto BytesToTrackId(const std::string& bytes) -> std::optional<TrackId> {
   CborParser parser;
   CborValue val;
   cbor_parser_init(reinterpret_cast<const uint8_t*>(bytes.data()), bytes.size(),
diff --git a/src/database/tag_parser.cpp b/src/database/tag_parser.cpp
index 589c988f..5bca0b58 100644
--- a/src/database/tag_parser.cpp
+++ b/src/database/tag_parser.cpp
@@ -17,7 +17,7 @@ namespace libtags {
 struct Aux {
   FIL file;
   FILINFO info;
-  SongTags* tags;
+  TrackTags* tags;
 };
 
 static int read(Tagctx* ctx, void* buf, int cnt) {
@@ -71,7 +71,7 @@ static void toc(Tagctx* ctx, int ms, int offset) {}
 static const std::size_t kBufSize = 1024;
 static const char* kTag = "TAGS";
 
-auto TagParserImpl::ReadAndParseTags(const std::string& path, SongTags* out)
+auto TagParserImpl::ReadAndParseTags(const std::string& path, TrackTags* out)
     -> bool {
   libtags::Aux aux;
   aux.tags = out;
diff --git a/src/database/test/test_database.cpp b/src/database/test/test_database.cpp
index ebaa6307..1ce364d9 100644
--- a/src/database/test/test_database.cpp
+++ b/src/database/test/test_database.cpp
@@ -18,41 +18,41 @@
 #include "file_gatherer.hpp"
 #include "i2c_fixture.hpp"
 #include "leveldb/db.h"
-#include "song.hpp"
 #include "spi_fixture.hpp"
 #include "tag_parser.hpp"
+#include "track.hpp"
 
 namespace database {
 
 class TestBackends : public IFileGatherer, public ITagParser {
  public:
-  std::map<std::string, SongTags> songs;
+  std::map<std::string, TrackTags> tracks;
 
-  auto MakeSong(const std::string& path, const std::string& title) -> void {
-    SongTags tags;
+  auto MakeTrack(const std::string& path, const std::string& title) -> void {
+    TrackTags tags;
     tags.encoding = Encoding::kMp3;
     tags.title = title;
-    songs[path] = tags;
+    tracks[path] = tags;
   }
 
   auto FindFiles(const std::string& root,
                  std::function<void(const std::string&)> cb) -> void override {
-    for (auto keyval : songs) {
+    for (auto keyval : tracks) {
       std::invoke(cb, keyval.first);
     }
   }
 
-  auto ReadAndParseTags(const std::string& path, SongTags* out)
+  auto ReadAndParseTags(const std::string& path, TrackTags* out)
       -> bool override {
-    if (songs.contains(path)) {
-      *out = songs.at(path);
+    if (tracks.contains(path)) {
+      *out = tracks.at(path);
       return true;
     }
     return false;
   }
 };
 
-TEST_CASE("song database", "[integration]") {
+TEST_CASE("track database", "[integration]") {
   I2CFixture i2c;
   SpiFixture spi;
   drivers::DriverCache drivers;
@@ -60,104 +60,104 @@ TEST_CASE("song database", "[integration]") {
 
   Database::Destroy();
 
-  TestBackends songs;
-  auto open_res = Database::Open(&songs, &songs);
+  TestBackends tracks;
+  auto open_res = Database::Open(&tracks, &tracks);
   REQUIRE(open_res.has_value());
   std::unique_ptr<Database> db(open_res.value());
 
   SECTION("empty database") {
-    std::unique_ptr<Result<Song>> res(db->GetSongs(10).get());
+    std::unique_ptr<Result<Track>> res(db->GetTracks(10).get());
     REQUIRE(res->values().size() == 0);
   }
 
-  SECTION("add new songs") {
-    songs.MakeSong("song1.mp3", "Song 1");
-    songs.MakeSong("song2.wav", "Song 2");
-    songs.MakeSong("song3.exe", "Song 3");
+  SECTION("add new tracks") {
+    tracks.MakeTrack("track1.mp3", "Track 1");
+    tracks.MakeTrack("track2.wav", "Track 2");
+    tracks.MakeTrack("track3.exe", "Track 3");
 
     db->Update();
 
-    std::unique_ptr<Result<Song>> res(db->GetSongs(10).get());
+    std::unique_ptr<Result<Track>> res(db->GetTracks(10).get());
     REQUIRE(res->values().size() == 3);
-    CHECK(*res->values().at(0).tags().title == "Song 1");
+    CHECK(*res->values().at(0).tags().title == "Track 1");
     CHECK(res->values().at(0).data().id() == 1);
-    CHECK(*res->values().at(1).tags().title == "Song 2");
+    CHECK(*res->values().at(1).tags().title == "Track 2");
     CHECK(res->values().at(1).data().id() == 2);
-    CHECK(*res->values().at(2).tags().title == "Song 3");
+    CHECK(*res->values().at(2).tags().title == "Track 3");
     CHECK(res->values().at(2).data().id() == 3);
 
     SECTION("update with no filesystem changes") {
       db->Update();
 
-      std::unique_ptr<Result<Song>> new_res(db->GetSongs(10).get());
+      std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get());
       REQUIRE(new_res->values().size() == 3);
       CHECK(res->values().at(0) == new_res->values().at(0));
       CHECK(res->values().at(1) == new_res->values().at(1));
       CHECK(res->values().at(2) == new_res->values().at(2));
     }
 
-    SECTION("update with all songs gone") {
-      songs.songs.clear();
+    SECTION("update with all tracks gone") {
+      tracks.tracks.clear();
 
       db->Update();
 
-      std::unique_ptr<Result<Song>> new_res(db->GetSongs(10).get());
+      std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get());
       CHECK(new_res->values().size() == 0);
 
-      SECTION("update with one song returned") {
-        songs.MakeSong("song2.wav", "Song 2");
+      SECTION("update with one track returned") {
+        tracks.MakeTrack("track2.wav", "Track 2");
 
         db->Update();
 
-        std::unique_ptr<Result<Song>> new_res(db->GetSongs(10).get());
+        std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get());
         REQUIRE(new_res->values().size() == 1);
         CHECK(res->values().at(1) == new_res->values().at(0));
       }
     }
 
-    SECTION("update with one song gone") {
-      songs.songs.erase("song2.wav");
+    SECTION("update with one track gone") {
+      tracks.tracks.erase("track2.wav");
 
       db->Update();
 
-      std::unique_ptr<Result<Song>> new_res(db->GetSongs(10).get());
+      std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get());
       REQUIRE(new_res->values().size() == 2);
       CHECK(res->values().at(0) == new_res->values().at(0));
       CHECK(res->values().at(2) == new_res->values().at(1));
     }
 
     SECTION("update with tags changed") {
-      songs.MakeSong("song3.exe", "The Song 3");
+      tracks.MakeTrack("track3.exe", "The Track 3");
 
       db->Update();
 
-      std::unique_ptr<Result<Song>> new_res(db->GetSongs(10).get());
+      std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get());
       REQUIRE(new_res->values().size() == 3);
       CHECK(res->values().at(0) == new_res->values().at(0));
       CHECK(res->values().at(1) == new_res->values().at(1));
-      CHECK(*new_res->values().at(2).tags().title == "The Song 3");
+      CHECK(*new_res->values().at(2).tags().title == "The Track 3");
       // The id should not have changed, since this was just a tag update.
       CHECK(res->values().at(2).data().id() ==
             new_res->values().at(2).data().id());
     }
 
-    SECTION("update with one new song") {
-      songs.MakeSong("my song.midi", "Song 1 (nightcore remix)");
+    SECTION("update with one new track") {
+      tracks.MakeTrack("my track.midi", "Track 1 (nightcore remix)");
 
       db->Update();
 
-      std::unique_ptr<Result<Song>> new_res(db->GetSongs(10).get());
+      std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get());
       REQUIRE(new_res->values().size() == 4);
       CHECK(res->values().at(0) == new_res->values().at(0));
       CHECK(res->values().at(1) == new_res->values().at(1));
       CHECK(res->values().at(2) == new_res->values().at(2));
       CHECK(*new_res->values().at(3).tags().title ==
-            "Song 1 (nightcore remix)");
+            "Track 1 (nightcore remix)");
       CHECK(new_res->values().at(3).data().id() == 4);
     }
 
-    SECTION("get songs with pagination") {
-      std::unique_ptr<Result<Song>> res(db->GetSongs(1).get());
+    SECTION("get tracks with pagination") {
+      std::unique_ptr<Result<Track>> res(db->GetTracks(1).get());
 
       REQUIRE(res->values().size() == 1);
       CHECK(res->values().at(0).data().id() == 1);
diff --git a/src/database/test/test_records.cpp b/src/database/test/test_records.cpp
index ca518458..5729003e 100644
--- a/src/database/test/test_records.cpp
+++ b/src/database/test/test_records.cpp
@@ -25,9 +25,9 @@ std::string ToHex(const std::string& s) {
 namespace database {
 
 TEST_CASE("database record encoding", "[unit]") {
-  SECTION("song id to bytes") {
-    SongId id = 1234678;
-    OwningSlice as_bytes = SongIdToBytes(id);
+  SECTION("track id to bytes") {
+    TrackId id = 1234678;
+    OwningSlice as_bytes = TrackIdToBytes(id);
 
     SECTION("encodes correctly") {
       // Purposefully a brittle test, since we need to be very careful about
@@ -44,18 +44,18 @@ TEST_CASE("database record encoding", "[unit]") {
     }
 
     SECTION("round-trips") {
-      CHECK(*BytesToSongId(as_bytes.data) == id);
+      CHECK(*BytesToTrackId(as_bytes.data) == id);
     }
 
     SECTION("encodes compactly") {
-      OwningSlice small_id = SongIdToBytes(1);
-      OwningSlice large_id = SongIdToBytes(999999);
+      OwningSlice small_id = TrackIdToBytes(1);
+      OwningSlice large_id = TrackIdToBytes(999999);
 
       CHECK(small_id.data.size() < large_id.data.size());
     }
 
     SECTION("decoding rejects garbage") {
-      std::optional<SongId> res = BytesToSongId("i'm gay");
+      std::optional<TrackId> res = BytesToTrackId("i'm gay");
 
       CHECK(res.has_value() == false);
     }
@@ -73,7 +73,7 @@ TEST_CASE("database record encoding", "[unit]") {
   }
 
   SECTION("data values") {
-    SongData data(123, "/some/path.mp3", 0xACAB, 69, true);
+    TrackData data(123, "/some/path.mp3", 0xACAB, 69, true);
 
     OwningSlice enc = CreateDataValue(data);
 
@@ -109,7 +109,7 @@ TEST_CASE("database record encoding", "[unit]") {
     }
 
     SECTION("decoding rejects garbage") {
-      std::optional<SongData> res = ParseDataValue("hi!");
+      std::optional<TrackData> res = ParseDataValue("hi!");
 
       CHECK(res.has_value() == false);
     }
@@ -129,14 +129,14 @@ TEST_CASE("database record encoding", "[unit]") {
   SECTION("hash values") {
     OwningSlice val = CreateHashValue(123456);
 
-    CHECK(val.data == SongIdToBytes(123456).data);
+    CHECK(val.data == TrackIdToBytes(123456).data);
 
     SECTION("round-trips") {
       CHECK(ParseHashValue(val.slice) == 123456);
     }
 
     SECTION("decoding rejects garbage") {
-      std::optional<SongId> res = ParseHashValue("the first song :)");
+      std::optional<TrackId> res = ParseHashValue("the first track :)");
 
       CHECK(res.has_value() == false);
     }
diff --git a/src/database/song.cpp b/src/database/track.cpp
similarity index 62%
rename from src/database/song.cpp
rename to src/database/track.cpp
index c717e55e..00acc1f6 100644
--- a/src/database/song.cpp
+++ b/src/database/track.cpp
@@ -4,7 +4,7 @@
  * SPDX-License-Identifier: GPL-3.0-only
  */
 
-#include "song.hpp"
+#include "track.hpp"
 
 #include <komihash.h>
 
@@ -19,8 +19,8 @@ auto HashString(komihash_stream_t* stream, std::string str) -> void {
  * Uses a komihash stream to incrementally hash tags. This lowers the function's
  * memory footprint a little so that it's safe to call from any stack.
  */
-auto SongTags::Hash() const -> uint64_t {
-  // TODO(jacqueline): this function doesn't work very well for songs with no
+auto TrackTags::Hash() const -> uint64_t {
+  // TODO(jacqueline): this function doesn't work very well for tracks with no
   // tags at all.
   komihash_stream_t stream;
   komihash_stream_init(&stream, 0);
@@ -30,20 +30,20 @@ auto SongTags::Hash() const -> uint64_t {
   return komihash_stream_final(&stream);
 }
 
-auto SongData::UpdateHash(uint64_t new_hash) const -> SongData {
-  return SongData(id_, filepath_, new_hash, play_count_, is_tombstoned_);
+auto TrackData::UpdateHash(uint64_t new_hash) const -> TrackData {
+  return TrackData(id_, filepath_, new_hash, play_count_, is_tombstoned_);
 }
 
-auto SongData::Entomb() const -> SongData {
-  return SongData(id_, filepath_, tags_hash_, play_count_, true);
+auto TrackData::Entomb() const -> TrackData {
+  return TrackData(id_, filepath_, tags_hash_, play_count_, true);
 }
 
-auto SongData::Exhume(const std::string& new_path) const -> SongData {
-  return SongData(id_, new_path, tags_hash_, play_count_, false);
+auto TrackData::Exhume(const std::string& new_path) const -> TrackData {
+  return TrackData(id_, new_path, tags_hash_, play_count_, false);
 }
 
-void swap(Song& first, Song& second) {
-  Song temp = first;
+void swap(Track& first, Track& second) {
+  Track temp = first;
   first = second;
   second = temp;
 }

From 5b7b88420b169d1dfdeea6d082762fccefbc7d49 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Fri, 16 Jun 2023 13:14:45 +1000
Subject: [PATCH 5/7] Fix issues with importing my entire library

---
 src/app_console/app_console.cpp         | 61 ++++++++++++++-----------
 src/app_console/include/app_console.hpp |  5 +-
 src/database/database.cpp               |  8 ++--
 src/database/env_esp.cpp                |  5 +-
 src/database/include/env_esp.hpp        |  2 +-
 src/database/tag_parser.cpp             |  6 +++
 src/drivers/samd.cpp                    |  6 +--
 src/drivers/touchwheel.cpp              |  4 +-
 src/system_fsm/booting.cpp              |  4 +-
 src/system_fsm/include/system_fsm.hpp   |  5 +-
 src/system_fsm/running.cpp              |  4 ++
 src/system_fsm/system_fsm.cpp           |  2 +
 src/tasks/tasks.cpp                     | 22 ++++++++-
 src/tasks/tasks.hpp                     |  5 ++
 14 files changed, 91 insertions(+), 48 deletions(-)

diff --git a/src/app_console/app_console.cpp b/src/app_console/app_console.cpp
index 457d66f6..81a49e99 100644
--- a/src/app_console/app_console.cpp
+++ b/src/app_console/app_console.cpp
@@ -20,14 +20,11 @@
 #include "esp_console.h"
 #include "esp_log.h"
 #include "event_queue.hpp"
+#include "ff.h"
 
 namespace console {
 
-static AppConsole* sInstance = nullptr;
-
-std::string toSdPath(const std::string& filepath) {
-  return std::string("/") + filepath;
-}
+std::weak_ptr<database::Database> AppConsole::sDatabase;
 
 int CmdListDir(int argc, char** argv) {
   static const std::string usage = "usage: ls [directory]";
@@ -36,7 +33,7 @@ int CmdListDir(int argc, char** argv) {
     return 1;
   }
 
-  auto lock = sInstance->database_.lock();
+  auto lock = AppConsole::sDatabase.lock();
   if (lock == nullptr) {
     std::cout << "storage is not available" << std::endl;
     return 1;
@@ -44,18 +41,38 @@ int CmdListDir(int argc, char** argv) {
 
   std::string path;
   if (argc == 2) {
-    path = toSdPath(argv[1]);
+    path = argv[1];
   } else {
-    path = toSdPath("");
+    path = "";
+  }
+
+  FF_DIR dir;
+  FRESULT res = f_opendir(&dir, path.c_str());
+  if (res != FR_OK) {
+    std::cout << "failed to open directory. does it exist?" << std::endl;
+    return 1;
   }
 
-  DIR* dir;
-  struct dirent* ent;
-  dir = opendir(path.c_str());
-  while ((ent = readdir(dir))) {
-    std::cout << ent->d_name << std::endl;
+  for (;;) {
+    FILINFO info;
+    res = f_readdir(&dir, &info);
+    if (res != FR_OK || info.fname[0] == 0) {
+      // No more files in the directory.
+      break;
+    } else {
+      std::cout << path;
+      if (!path.ends_with('/') && !path.empty()) {
+        std::cout << '/';
+      }
+      std::cout << info.fname;
+      if (info.fattrib & AM_DIR) {
+        std::cout << '/';
+      }
+      std::cout << std::endl;
+    }
   }
-  closedir(dir);
+
+  f_closedir(&dir);
 
   return 0;
 }
@@ -101,7 +118,7 @@ int CmdDbInit(int argc, char** argv) {
     return 1;
   }
 
-  auto db = sInstance->database_.lock();
+  auto db = AppConsole::sDatabase.lock();
   if (!db) {
     std::cout << "no database open" << std::endl;
     return 1;
@@ -128,13 +145,13 @@ int CmdDbTracks(int argc, char** argv) {
     return 1;
   }
 
-  auto db = sInstance->database_.lock();
+  auto db = AppConsole::sDatabase.lock();
   if (!db) {
     std::cout << "no database open" << std::endl;
     return 1;
   }
   std::unique_ptr<database::Result<database::Track>> res(
-      db->GetTracks(5).get());
+      db->GetTracks(20).get());
   while (true) {
     for (database::Track s : res->values()) {
       std::cout << s.tags().title.value_or("[BLANK]") << std::endl;
@@ -166,7 +183,7 @@ int CmdDbDump(int argc, char** argv) {
     return 1;
   }
 
-  auto db = sInstance->database_.lock();
+  auto db = AppConsole::sDatabase.lock();
   if (!db) {
     std::cout << "no database open" << std::endl;
     return 1;
@@ -201,14 +218,6 @@ void RegisterDbDump() {
   esp_console_cmd_register(&cmd);
 }
 
-AppConsole::AppConsole(const std::weak_ptr<database::Database>& database)
-    : database_(database) {
-  sInstance = this;
-}
-AppConsole::~AppConsole() {
-  sInstance = nullptr;
-}
-
 auto AppConsole::RegisterExtraComponents() -> void {
   RegisterListDir();
   RegisterPlayFile();
diff --git a/src/app_console/include/app_console.hpp b/src/app_console/include/app_console.hpp
index 32242d16..48ce0d38 100644
--- a/src/app_console/include/app_console.hpp
+++ b/src/app_console/include/app_console.hpp
@@ -15,10 +15,7 @@ namespace console {
 
 class AppConsole : public Console {
  public:
-  explicit AppConsole(const std::weak_ptr<database::Database>& database);
-  virtual ~AppConsole();
-
-  const std::weak_ptr<database::Database>& database_;
+  static std::weak_ptr<database::Database> sDatabase;
 
  protected:
   virtual auto RegisterExtraComponents() -> void;
diff --git a/src/database/database.cpp b/src/database/database.cpp
index 9206256f..a96b3eab 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -17,6 +17,7 @@
 
 #include "esp_log.h"
 #include "ff.h"
+#include "freertos/projdefs.h"
 #include "leveldb/cache.h"
 #include "leveldb/db.h"
 #include "leveldb/iterator.h"
@@ -68,12 +69,13 @@ auto Database::Open(IFileGatherer* gatherer, ITagParser* parser)
     return cpp::fail(DatabaseError::ALREADY_OPEN);
   }
 
+  leveldb::sBackgroundThread.reset(
+      tasks::Worker::Start<tasks::Type::kDatabaseBackground>());
   std::shared_ptr<tasks::Worker> worker(
       tasks::Worker::Start<tasks::Type::kDatabase>());
-  leveldb::sBackgroundThread = std::weak_ptr<tasks::Worker>(worker);
   return worker
       ->Dispatch<cpp::result<Database*, DatabaseError>>(
-          [&]() -> cpp::result<Database*, DatabaseError> {
+          [=]() -> cpp::result<Database*, DatabaseError> {
             leveldb::DB* db;
             leveldb::Cache* cache = leveldb::NewLRUCache(24 * 1024);
             leveldb::Options options;
@@ -121,7 +123,7 @@ Database::~Database() {
   delete db_;
   delete cache_;
 
-  leveldb::sBackgroundThread = std::weak_ptr<tasks::Worker>();
+  leveldb::sBackgroundThread.reset();
 
   sIsDbOpen.store(false);
 }
diff --git a/src/database/env_esp.cpp b/src/database/env_esp.cpp
index 704e0a54..ad1f2221 100644
--- a/src/database/env_esp.cpp
+++ b/src/database/env_esp.cpp
@@ -15,6 +15,7 @@
 #include <cstring>
 #include <functional>
 #include <limits>
+#include <memory>
 #include <mutex>
 #include <queue>
 #include <set>
@@ -39,7 +40,7 @@
 
 namespace leveldb {
 
-std::weak_ptr<tasks::Worker> sBackgroundThread;
+std::shared_ptr<tasks::Worker> sBackgroundThread;
 
 std::string ErrToStr(FRESULT err) {
   switch (err) {
@@ -463,7 +464,7 @@ EspEnv::EspEnv() {}
 void EspEnv::Schedule(
     void (*background_work_function)(void* background_work_arg),
     void* background_work_arg) {
-  auto worker = sBackgroundThread.lock();
+  auto worker = sBackgroundThread;
   if (worker) {
     worker->Dispatch<void>(
         [=]() { std::invoke(background_work_function, background_work_arg); });
diff --git a/src/database/include/env_esp.hpp b/src/database/include/env_esp.hpp
index c7da6d91..eba6e8a9 100644
--- a/src/database/include/env_esp.hpp
+++ b/src/database/include/env_esp.hpp
@@ -18,7 +18,7 @@
 
 namespace leveldb {
 
-extern std::weak_ptr<tasks::Worker> sBackgroundThread;
+extern std::shared_ptr<tasks::Worker> sBackgroundThread;
 
 // Tracks the files locked by EspEnv::LockFile().
 //
diff --git a/src/database/tag_parser.cpp b/src/database/tag_parser.cpp
index 5bca0b58..83b0a796 100644
--- a/src/database/tag_parser.cpp
+++ b/src/database/tag_parser.cpp
@@ -73,6 +73,12 @@ static const char* kTag = "TAGS";
 
 auto TagParserImpl::ReadAndParseTags(const std::string& path, TrackTags* out)
     -> bool {
+  if (path.ends_with(".m4a")) {
+    // TODO(jacqueline): Re-enabled once libtags is fixed.
+    ESP_LOGW(kTag, "skipping m4a %s", path.c_str());
+    return false;
+  }
+
   libtags::Aux aux;
   aux.tags = out;
   if (f_stat(path.c_str(), &aux.info) != FR_OK ||
diff --git a/src/drivers/samd.cpp b/src/drivers/samd.cpp
index 5ed99666..e87fc9d8 100644
--- a/src/drivers/samd.cpp
+++ b/src/drivers/samd.cpp
@@ -38,7 +38,7 @@ Samd::Samd() {
       .read(&raw_res, I2C_MASTER_NACK)
       .stop();
   ESP_LOGI(kTag, "checking samd firmware rev");
-  ESP_ERROR_CHECK(transaction.Execute());
+  transaction.Execute();
   ESP_LOGI(kTag, "samd firmware: %u", raw_res);
 }
 Samd::~Samd() {}
@@ -53,7 +53,7 @@ auto Samd::ReadChargeStatus() -> std::optional<ChargeStatus> {
       .read(&raw_res, I2C_MASTER_NACK)
       .stop();
   ESP_LOGI(kTag, "checking charge status");
-  ESP_ERROR_CHECK(transaction.Execute());
+  transaction.Execute();
   ESP_LOGI(kTag, "raw charge status: %x", raw_res);
 
   uint8_t usb_state = raw_res & 0b11;
@@ -83,7 +83,7 @@ auto Samd::WriteAllowUsbMsc(bool is_allowed) -> void {
       .write_addr(kAddress, I2C_MASTER_WRITE)
       .write_ack(kRegisterUsbMsc, is_allowed)
       .stop();
-  ESP_ERROR_CHECK(transaction.Execute());
+  transaction.Execute();
 }
 
 auto Samd::ReadUsbMscStatus() -> UsbMscStatus {
diff --git a/src/drivers/touchwheel.cpp b/src/drivers/touchwheel.cpp
index cd189388..576b6dad 100644
--- a/src/drivers/touchwheel.cpp
+++ b/src/drivers/touchwheel.cpp
@@ -65,7 +65,7 @@ void TouchWheel::WriteRegister(uint8_t reg, uint8_t val) {
       .write_addr(kTouchWheelAddress, I2C_MASTER_WRITE)
       .write_ack(reg, val)
       .stop();
-  ESP_ERROR_CHECK(transaction.Execute());
+  transaction.Execute();
 }
 
 uint8_t TouchWheel::ReadRegister(uint8_t reg) {
@@ -78,7 +78,7 @@ uint8_t TouchWheel::ReadRegister(uint8_t reg) {
       .write_addr(kTouchWheelAddress, I2C_MASTER_READ)
       .read(&res, I2C_MASTER_NACK)
       .stop();
-  ESP_ERROR_CHECK(transaction.Execute());
+  transaction.Execute();
   return res;
 }
 
diff --git a/src/system_fsm/booting.cpp b/src/system_fsm/booting.cpp
index 1ad8c02d..1e1b2959 100644
--- a/src/system_fsm/booting.cpp
+++ b/src/system_fsm/booting.cpp
@@ -27,8 +27,6 @@ namespace states {
 
 static const char kTag[] = "BOOT";
 
-console::AppConsole* Booting::sAppConsole;
-
 auto Booting::entry() -> void {
   ESP_LOGI(kTag, "beginning tangara boot");
   ESP_LOGI(kTag, "installing early drivers");
@@ -78,7 +76,7 @@ auto Booting::entry() -> void {
 
 auto Booting::exit() -> void {
   // TODO(jacqueline): Gate this on something. Debug flag? Flashing mode?
-  sAppConsole = new console::AppConsole(sDatabase);
+  sAppConsole = new console::AppConsole();
   sAppConsole->Launch();
 }
 
diff --git a/src/system_fsm/include/system_fsm.hpp b/src/system_fsm/include/system_fsm.hpp
index 89cdcf47..037c0a0e 100644
--- a/src/system_fsm/include/system_fsm.hpp
+++ b/src/system_fsm/include/system_fsm.hpp
@@ -56,6 +56,8 @@ class SystemState : public tinyfsm::Fsm<SystemState> {
   static std::shared_ptr<drivers::SdStorage> sStorage;
   static std::shared_ptr<drivers::Display> sDisplay;
   static std::shared_ptr<database::Database> sDatabase;
+
+  static console::AppConsole* sAppConsole;
 };
 
 namespace states {
@@ -65,9 +67,6 @@ namespace states {
  * looks good.
  */
 class Booting : public SystemState {
- private:
-  static console::AppConsole* sAppConsole;
-
  public:
   void entry() override;
   void exit() override;
diff --git a/src/system_fsm/running.cpp b/src/system_fsm/running.cpp
index 9116ec9d..f9ff6140 100644
--- a/src/system_fsm/running.cpp
+++ b/src/system_fsm/running.cpp
@@ -4,6 +4,7 @@
  * SPDX-License-Identifier: GPL-3.0-only
  */
 
+#include "app_console.hpp"
 #include "freertos/projdefs.h"
 #include "result.hpp"
 
@@ -28,6 +29,7 @@ void Running::entry() {
   vTaskDelay(pdMS_TO_TICKS(250));
   auto storage_res = drivers::SdStorage::Create(sGpioExpander.get());
   if (storage_res.has_error()) {
+    ESP_LOGW(kTag, "failed to mount!");
     events::Dispatch<StorageError, SystemState, audio::AudioState, ui::UiState>(
         StorageError());
     return;
@@ -38,11 +40,13 @@ void Running::entry() {
   ESP_LOGI(kTag, "opening database");
   auto database_res = database::Database::Open();
   if (database_res.has_error()) {
+    ESP_LOGW(kTag, "failed to open!");
     events::Dispatch<StorageError, SystemState, audio::AudioState, ui::UiState>(
         StorageError());
     return;
   }
   sDatabase.reset(database_res.value());
+  console::AppConsole::sDatabase = sDatabase;
 
   ESP_LOGI(kTag, "storage loaded okay");
   events::Dispatch<StorageMounted, SystemState, audio::AudioState, ui::UiState>(
diff --git a/src/system_fsm/system_fsm.cpp b/src/system_fsm/system_fsm.cpp
index bcab298c..c59c0908 100644
--- a/src/system_fsm/system_fsm.cpp
+++ b/src/system_fsm/system_fsm.cpp
@@ -20,6 +20,8 @@ std::shared_ptr<drivers::SdStorage> SystemState::sStorage;
 std::shared_ptr<drivers::Display> SystemState::sDisplay;
 std::shared_ptr<database::Database> SystemState::sDatabase;
 
+console::AppConsole* SystemState::sAppConsole;
+
 void SystemState::react(const FatalError& err) {
   if (!is_in_state<states::Error>()) {
     transit<states::Error>();
diff --git a/src/tasks/tasks.cpp b/src/tasks/tasks.cpp
index 2477d8b9..c28f463c 100644
--- a/src/tasks/tasks.cpp
+++ b/src/tasks/tasks.cpp
@@ -5,7 +5,9 @@
  */
 
 #include "tasks.hpp"
+
 #include <functional>
+
 #include "esp_heap_caps.h"
 #include "freertos/FreeRTOS.h"
 #include "freertos/portmacro.h"
@@ -31,6 +33,10 @@ template <>
 auto Name<Type::kDatabase>() -> std::string {
   return "DB";
 }
+template <>
+auto Name<Type::kDatabaseBackground>() -> std::string {
+  return "DB_BG";
+}
 
 template <Type t>
 auto AllocateStack() -> cpp::span<StackType_t>;
@@ -39,7 +45,7 @@ auto AllocateStack() -> cpp::span<StackType_t>;
 // amount of stack space.
 template <>
 auto AllocateStack<Type::kAudio>() -> cpp::span<StackType_t> {
-  std::size_t size = 48 * 1024;
+  std::size_t size = 32 * 1024;
   return {static_cast<StackType_t*>(heap_caps_malloc(size, MALLOC_CAP_DEFAULT)),
           size};
 }
@@ -67,6 +73,12 @@ auto AllocateStack<Type::kDatabase>() -> cpp::span<StackType_t> {
   return {static_cast<StackType_t*>(heap_caps_malloc(size, MALLOC_CAP_SPIRAM)),
           size};
 }
+template <>
+auto AllocateStack<Type::kDatabaseBackground>() -> cpp::span<StackType_t> {
+  std::size_t size = 256 * 1024;
+  return {static_cast<StackType_t*>(heap_caps_malloc(size, MALLOC_CAP_SPIRAM)),
+          size};
+}
 
 // 2048 bytes in internal ram
 // 302 KiB in external ram.
@@ -106,6 +118,10 @@ template <>
 auto Priority<Type::kDatabase>() -> UBaseType_t {
   return 8;
 }
+template <>
+auto Priority<Type::kDatabaseBackground>() -> UBaseType_t {
+  return 7;
+}
 
 template <Type t>
 auto WorkerQueueSize() -> std::size_t;
@@ -114,6 +130,10 @@ template <>
 auto WorkerQueueSize<Type::kDatabase>() -> std::size_t {
   return 8;
 }
+template <>
+auto WorkerQueueSize<Type::kDatabaseBackground>() -> std::size_t {
+  return 8;
+}
 
 template <>
 auto WorkerQueueSize<Type::kUiFlush>() -> std::size_t {
diff --git a/src/tasks/tasks.hpp b/src/tasks/tasks.hpp
index 955acd9f..4e5dfd17 100644
--- a/src/tasks/tasks.hpp
+++ b/src/tasks/tasks.hpp
@@ -36,6 +36,8 @@ enum class Type {
   kAudio,
   // Task for running database queries.
   kDatabase,
+  // Task for internal database operations
+  kDatabaseBackground,
 };
 
 template <Type t>
@@ -102,6 +104,9 @@ class Worker {
   }
 
   ~Worker();
+
+  Worker(const Worker&) = delete;
+  Worker& operator=(const Worker&) = delete;
 };
 
 /* Specialisation of Evaluate for functions that return nothing. */

From 4e5dba158367f9b8b440e6c5051c95d9c22b76d7 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Mon, 19 Jun 2023 08:49:48 +1000
Subject: [PATCH 6/7] Fix leveldb's alignment assumptions (we have 4 byte
 alignment)

---
 lib/leveldb/util/arena.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/leveldb/util/arena.cc b/lib/leveldb/util/arena.cc
index 46e3b2eb..99224447 100644
--- a/lib/leveldb/util/arena.cc
+++ b/lib/leveldb/util/arena.cc
@@ -36,7 +36,7 @@ char* Arena::AllocateFallback(size_t bytes) {
 }
 
 char* Arena::AllocateAligned(size_t bytes) {
-  const int align = (sizeof(void*) > 8) ? sizeof(void*) : 8;
+  const int align = 4;
   static_assert((align & (align - 1)) == 0,
                 "Pointer size should be a power of 2");
   size_t current_mod = reinterpret_cast<uintptr_t>(alloc_ptr_) & (align - 1);

From 6ff8b5886ef91ed46dba08686900d519f6c9c62d Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Mon, 19 Jun 2023 08:51:34 +1000
Subject: [PATCH 7/7] Support playing tracks by track id

---
 src/audio/audio_decoder.cpp              |  6 ++--
 src/audio/audio_fsm.cpp                  | 46 ++++++++++++++++++++++--
 src/audio/fatfs_audio_input.cpp          | 30 +++++++++++++++-
 src/audio/include/audio_events.hpp       |  2 +-
 src/audio/include/audio_fsm.hpp          | 11 +++++-
 src/audio/include/fatfs_audio_input.hpp  |  3 ++
 src/database/database.cpp                | 12 +++++++
 src/database/include/database.hpp        |  2 ++
 src/system_fsm/include/system_events.hpp |  7 +++-
 src/system_fsm/running.cpp               |  2 +-
 src/tasks/tasks.cpp                      |  2 +-
 11 files changed, 112 insertions(+), 11 deletions(-)

diff --git a/src/audio/audio_decoder.cpp b/src/audio/audio_decoder.cpp
index abb94e85..b4af65fb 100644
--- a/src/audio/audio_decoder.cpp
+++ b/src/audio/audio_decoder.cpp
@@ -89,9 +89,11 @@ auto AudioDecoder::Process(const std::vector<InputStream>& inputs,
   // Check the input stream's format has changed (or, by extension, if this is
   // the first stream).
   if (!current_input_format_ || *current_input_format_ != info.format) {
-    ESP_LOGI(kTag, "beginning new stream");
     has_samples_to_send_ = false;
-    ProcessStreamInfo(info);
+    if (!ProcessStreamInfo(info)) {
+      return;
+    }
+    ESP_LOGI(kTag, "beginning new stream");
     auto res = current_codec_->BeginStream(input->data());
     input->consume(res.first);
     if (res.second.has_error()) {
diff --git a/src/audio/audio_fsm.cpp b/src/audio/audio_fsm.cpp
index 0be28250..a65b9f3b 100644
--- a/src/audio/audio_fsm.cpp
+++ b/src/audio/audio_fsm.cpp
@@ -5,6 +5,7 @@
  */
 
 #include "audio_fsm.hpp"
+#include <future>
 #include <memory>
 #include <variant>
 #include "audio_decoder.hpp"
@@ -14,6 +15,7 @@
 #include "i2s_audio_output.hpp"
 #include "i2s_dac.hpp"
 #include "pipeline.hpp"
+#include "track.hpp"
 
 namespace audio {
 
@@ -59,18 +61,38 @@ auto AudioState::Init(drivers::GpioExpander* gpio_expander,
   return true;
 }
 
+void AudioState::react(const system_fsm::StorageMounted& ev) {
+  sDatabase = ev.db;
+}
+
 namespace states {
 
 void Uninitialised::react(const system_fsm::BootComplete&) {
   transit<Standby>();
 }
 
-void Standby::react(const PlayFile& ev) {
-  if (sFileSource->OpenFile(ev.filename)) {
-    transit<Playback>();
+void Standby::react(const InputFileOpened& ev) {
+  transit<Playback>();
+}
+
+void Standby::react(const PlayTrack& ev) {
+  auto db = sDatabase.lock();
+  if (!db) {
+    ESP_LOGW(kTag, "database not open; ignoring play request");
+    return;
+  }
+
+  if (ev.data) {
+    sFileSource->OpenFile(ev.data->filepath());
+  } else {
+    sFileSource->OpenFile(db->GetTrackPath(ev.id));
   }
 }
 
+void Standby::react(const PlayFile& ev) {
+  sFileSource->OpenFile(ev.filename);
+}
+
 void Playback::entry() {
   ESP_LOGI(kTag, "beginning playback");
   sI2SOutput->SetInUse(true);
@@ -81,6 +103,16 @@ void Playback::exit() {
   sI2SOutput->SetInUse(false);
 }
 
+void Playback::react(const PlayTrack& ev) {
+  sTrackQueue.push_back(EnqueuedItem(ev.id));
+}
+
+void Playback::react(const PlayFile& ev) {
+  sTrackQueue.push_back(EnqueuedItem(ev.filename));
+}
+
+void Playback::react(const InputFileOpened& ev) {}
+
 void Playback::react(const InputFileFinished& ev) {
   ESP_LOGI(kTag, "finished file");
   if (sTrackQueue.empty()) {
@@ -91,6 +123,14 @@ void Playback::react(const InputFileFinished& ev) {
 
   if (std::holds_alternative<std::string>(next_item)) {
     sFileSource->OpenFile(std::get<std::string>(next_item));
+  } else if (std::holds_alternative<database::TrackId>(next_item)) {
+    auto db = sDatabase.lock();
+    if (!db) {
+      ESP_LOGW(kTag, "database not open; ignoring play request");
+      return;
+    }
+    sFileSource->OpenFile(
+        db->GetTrackPath(std::get<database::TrackId>(next_item)));
   }
 }
 
diff --git a/src/audio/fatfs_audio_input.cpp b/src/audio/fatfs_audio_input.cpp
index c26ff0ad..77b104d3 100644
--- a/src/audio/fatfs_audio_input.cpp
+++ b/src/audio/fatfs_audio_input.cpp
@@ -8,7 +8,9 @@
 #include <stdint.h>
 
 #include <algorithm>
+#include <chrono>
 #include <cstdint>
+#include <future>
 #include <memory>
 #include <string>
 #include <variant>
@@ -38,6 +40,7 @@ namespace audio {
 
 FatfsAudioInput::FatfsAudioInput()
     : IAudioElement(),
+      pending_path_(),
       current_file_(),
       is_file_open_(false),
       current_container_(),
@@ -45,11 +48,19 @@ FatfsAudioInput::FatfsAudioInput()
 
 FatfsAudioInput::~FatfsAudioInput() {}
 
+auto FatfsAudioInput::OpenFile(std::future<std::optional<std::string>>&& path)
+    -> void {
+  pending_path_ = std::move(path);
+}
+
 auto FatfsAudioInput::OpenFile(const std::string& path) -> bool {
   if (is_file_open_) {
     f_close(&current_file_);
     is_file_open_ = false;
   }
+  if (pending_path_) {
+    pending_path_ = {};
+  }
   ESP_LOGI(kTag, "opening file %s", path.c_str());
 
   database::TagParserImpl tag_parser;
@@ -89,16 +100,33 @@ auto FatfsAudioInput::OpenFile(const std::string& path) -> bool {
     return false;
   }
 
+  events::Dispatch<InputFileOpened, AudioState>({});
   is_file_open_ = true;
   return true;
 }
 
 auto FatfsAudioInput::NeedsToProcess() const -> bool {
-  return is_file_open_;
+  return is_file_open_ || pending_path_;
 }
 
 auto FatfsAudioInput::Process(const std::vector<InputStream>& inputs,
                               OutputStream* output) -> void {
+  if (pending_path_) {
+    ESP_LOGI(kTag, "waiting for path");
+    if (!pending_path_->valid()) {
+      pending_path_ = {};
+    } else {
+      if (pending_path_->wait_for(std::chrono::seconds(0)) ==
+          std::future_status::ready) {
+        ESP_LOGI(kTag, "path ready!");
+        auto result = pending_path_->get();
+        if (result) {
+          OpenFile(*result);
+        }
+      }
+    }
+  }
+
   if (!is_file_open_) {
     return;
   }
diff --git a/src/audio/include/audio_events.hpp b/src/audio/include/audio_events.hpp
index eebf5efe..60a0740c 100644
--- a/src/audio/include/audio_events.hpp
+++ b/src/audio/include/audio_events.hpp
@@ -21,9 +21,9 @@ struct PlayFile : tinyfsm::Event {
 struct PlayTrack : tinyfsm::Event {
   database::TrackId id;
   std::optional<database::TrackData> data;
-  std::optional<database::TrackTags> tags;
 };
 
+struct InputFileOpened : tinyfsm::Event {};
 struct InputFileFinished : tinyfsm::Event {};
 struct AudioPipelineIdle : tinyfsm::Event {};
 
diff --git a/src/audio/include/audio_fsm.hpp b/src/audio/include/audio_fsm.hpp
index 72654ab5..bd902706 100644
--- a/src/audio/include/audio_fsm.hpp
+++ b/src/audio/include/audio_fsm.hpp
@@ -38,10 +38,13 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
   /* Fallback event handler. Does nothing. */
   void react(const tinyfsm::Event& ev) {}
 
+  void react(const system_fsm::StorageMounted&);
+
   virtual void react(const system_fsm::BootComplete&) {}
   virtual void react(const PlayTrack&) {}
   virtual void react(const PlayFile&) {}
 
+  virtual void react(const InputFileOpened&) {}
   virtual void react(const InputFileFinished&) {}
   virtual void react(const AudioPipelineIdle&) {}
 
@@ -69,8 +72,10 @@ class Uninitialised : public AudioState {
 
 class Standby : public AudioState {
  public:
-  void react(const PlayTrack&) override {}
+  void react(const InputFileOpened&) override;
+  void react(const PlayTrack&) override;
   void react(const PlayFile&) override;
+
   using AudioState::react;
 };
 
@@ -79,6 +84,10 @@ class Playback : public AudioState {
   void entry() override;
   void exit() override;
 
+  void react(const PlayTrack&) override;
+  void react(const PlayFile&) override;
+
+  void react(const InputFileOpened&) override;
   void react(const InputFileFinished&) override;
   void react(const AudioPipelineIdle&) override;
 
diff --git a/src/audio/include/fatfs_audio_input.hpp b/src/audio/include/fatfs_audio_input.hpp
index f5a65d0d..ab392f54 100644
--- a/src/audio/include/fatfs_audio_input.hpp
+++ b/src/audio/include/fatfs_audio_input.hpp
@@ -7,6 +7,7 @@
 #pragma once
 
 #include <cstdint>
+#include <future>
 #include <memory>
 #include <string>
 #include <vector>
@@ -33,6 +34,7 @@ class FatfsAudioInput : public IAudioElement {
   FatfsAudioInput();
   ~FatfsAudioInput();
 
+  auto OpenFile(std::future<std::optional<std::string>>&& path) -> void;
   auto OpenFile(const std::string& path) -> bool;
 
   auto NeedsToProcess() const -> bool override;
@@ -47,6 +49,7 @@ class FatfsAudioInput : public IAudioElement {
   auto ContainerToStreamType(database::Encoding)
       -> std::optional<codecs::StreamType>;
 
+  std::optional<std::future<std::optional<std::string>>> pending_path_;
   FIL current_file_;
   bool is_file_open_;
 
diff --git a/src/database/database.cpp b/src/database/database.cpp
index a96b3eab..ac5e4873 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -229,6 +229,18 @@ auto Database::Update() -> std::future<void> {
   });
 }
 
+auto Database::GetTrackPath(TrackId id)
+    -> std::future<std::optional<std::string>> {
+  return worker_task_->Dispatch<std::optional<std::string>>(
+      [=, this]() -> std::optional<std::string> {
+        auto track_data = dbGetTrackData(id);
+        if (track_data) {
+          return track_data->filepath();
+        }
+        return {};
+      });
+}
+
 auto Database::GetTracks(std::size_t page_size) -> std::future<Result<Track>*> {
   return worker_task_->Dispatch<Result<Track>*>([=, this]() -> Result<Track>* {
     Continuation<Track> c{.iterator = nullptr,
diff --git a/src/database/include/database.hpp b/src/database/include/database.hpp
index 1a8388e8..8fecc5f6 100644
--- a/src/database/include/database.hpp
+++ b/src/database/include/database.hpp
@@ -82,6 +82,8 @@ class Database {
 
   auto Update() -> std::future<void>;
 
+  auto GetTrackPath(TrackId id) -> std::future<std::optional<std::string>>;
+
   auto GetTracks(std::size_t page_size) -> std::future<Result<Track>*>;
   auto GetDump(std::size_t page_size) -> std::future<Result<std::string>*>;
 
diff --git a/src/system_fsm/include/system_events.hpp b/src/system_fsm/include/system_events.hpp
index 38929616..ec202c69 100644
--- a/src/system_fsm/include/system_events.hpp
+++ b/src/system_fsm/include/system_events.hpp
@@ -6,6 +6,9 @@
 
 #pragma once
 
+#include <memory>
+
+#include "database.hpp"
 #include "tinyfsm.hpp"
 
 namespace system_fsm {
@@ -38,7 +41,9 @@ struct StorageUnmountRequested : tinyfsm::Event {};
 /*
  * Sent by SysState when the system storage has been successfully mounted.
  */
-struct StorageMounted : tinyfsm::Event {};
+struct StorageMounted : tinyfsm::Event {
+  std::weak_ptr<database::Database> db;
+};
 
 struct StorageError : tinyfsm::Event {};
 
diff --git a/src/system_fsm/running.cpp b/src/system_fsm/running.cpp
index f9ff6140..87c25440 100644
--- a/src/system_fsm/running.cpp
+++ b/src/system_fsm/running.cpp
@@ -50,7 +50,7 @@ void Running::entry() {
 
   ESP_LOGI(kTag, "storage loaded okay");
   events::Dispatch<StorageMounted, SystemState, audio::AudioState, ui::UiState>(
-      StorageMounted());
+      StorageMounted{.db = sDatabase});
 }
 
 void Running::exit() {
diff --git a/src/tasks/tasks.cpp b/src/tasks/tasks.cpp
index c28f463c..7365813e 100644
--- a/src/tasks/tasks.cpp
+++ b/src/tasks/tasks.cpp
@@ -45,7 +45,7 @@ auto AllocateStack() -> cpp::span<StackType_t>;
 // amount of stack space.
 template <>
 auto AllocateStack<Type::kAudio>() -> cpp::span<StackType_t> {
-  std::size_t size = 32 * 1024;
+  std::size_t size = 48 * 1024;
   return {static_cast<StackType_t*>(heap_caps_malloc(size, MALLOC_CAP_DEFAULT)),
           size};
 }