add komihash: the fast and fun hashing header!

2 years ago · 86c44199b0
parent ee559cd722
commit 86c44199b0
7 changed files with 1432 additions and 0 deletions
--- a/lib/komihash/CMakeLists.txt
+++ b/lib/komihash/CMakeLists.txt
@ -0,0 +1,3 @@
+idf_component_register(
+  INCLUDE_DIRS .
+)
--- a/lib/komihash/LICENSE
+++ b/lib/komihash/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021-2022 Aleksey Vaneev
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/lib/komihash/README.md
+++ b/lib/komihash/README.md
@ -0,0 +1,477 @@
+# KOMIHASH - Very Fast Hash Function (in C) #
+
+## Introduction ##
+
+The `komihash()` function available in the `komihash.h` file implements a very
+fast 64-bit hash function, mainly designed for hash-table and hash-map uses;
+produces identical hashes on both big- and little-endian systems. Function's
+code is portable, scalar, header-only inlineable C (C++).
+
+This function features both a high large-block hashing performance (26 GB/s
+on Ryzen 3700X) and a high hashing throughput for small strings/messages
+(about 10 cycles/hash for 0-15-byte strings). Performance on 32-bit systems
+is, however, quite low. Also, large-block hashing performance on big-endian
+systems may be 20% lower due to the need of byte-swapping (can be switched off
+with a define).
+
+Technically, `komihash` is close to the class of hash functions like `wyhash`
+and `CircleHash`, which are, in turn, close to the `lehmer64` PRNG. However,
+`komihash` is structurally different to them in that it accumulates the full
+128-bit multiplication result, without "compression" into a single 64-bit
+state variable. Thus `komihash` does not lose differentiation between
+consecutive states while others may. Another important difference in
+`komihash` is that it parses the input message without overlaps. While
+overlaps allow a function to have fewer code branches, they are considered
+"non-ideal", potentially causing collisions and seed value flaws. Beside that,
+`komihash` features superior seed value handling and Perlin Noise hashing.
+
+Note that this function is not cryptographically-secure: in open systems it
+should only be used with a secret seed, to minimize the chance of a collision
+attack. However, when the default seed is used (0), this further reduces
+function's overhead by 1-2 cycles/hash (compiler-dependent).
+
+This function passes all [SMHasher](https://github.com/rurban/smhasher) tests.
+
+An aspect worth noting, important to some users, is that `komihash` at its
+base uses a very simple mathematical construct, and uses no author-intended
+nor author-fabricated information. The base state of the function is equal to
+the first mantissa bits of PI, and can be changed to any uniformly-random
+values.
+
+## Discrete-Incremental Hashing ##
+
+A correct way to hash an array of independent values, and which does not
+require pre-buffering, is to pass previous hash value as a seed value. This
+method may be as fast or faster than pre-buffering, especially if lengths of
+values in the array are not small. An additional 1-2 cycles/hash advantage is
+obtained if fixed-size values are being hashed incrementally (due to
+compiler's branching optimization). In most cases, incremental hashing of even
+a few 2-8-byte values may be faster than using pre-buffering if the overall
+input length is not known in advance.
+
+```
+	uint64_t HashVal = komihash( &val1, sizeof( val1 ), Seed );
+	HashVal = komihash( &val2, sizeof( val2 ), HashVal );
+	...
+	HashVal = komihash( &valN, sizeof( valN ), HashVal );
+```
+
+Note that this approach is not the same as "streamed" hashing since this
+approach implicitly encodes the length of each independent value. Such kind of
+hashing can be beneficial when a database record is being hashed, when it is
+necessary to separate fields by means of encoding their lengths.
+
+Discrete-incremental hashing of nested structures requires a "hash value
+stack" where the current hash value is pushed into it upon each nesting, the
+nested level starts at hash value 0, and the resulting value is hashed with a
+popped previous hash value upon exiting the nesting level.
+
+## Streamed Hashing ##
+
+The `komihash.h` file also features a fast continuously streamed
+implementation of the `komihash` hash function. Streamed hashing expects any
+number of `update` calls inbetween the `init` and `final` calls:
+
+```
+	komihash_stream_t ctx;
+	komihash_stream_init( &ctx, UseSeed );
+
+	komihash_stream_update( &ctx, &val1, sizeof( val1 ));
+	komihash_stream_update( &ctx, &val2, sizeof( val2 ));
+	...
+	komihash_stream_update( &ctx, &valN, sizeof( valN ));
+
+	uint64_t Hash = komihash_stream_final( &ctx );
+```
+
+Since the `final` function is non-destructive for the context structure, the
+function can be used to obtain intermediate "incremental" hashes of the data
+stream being hashed, and the hashing can then be resumed.
+
+The hash value produced via streamed hashing can be used in the
+discrete-incremental hashing outlined above (e.g., for files and blobs).
+
+You may also consider using [PRVHASH64S](https://github.com/avaneev/prvhash)
+which provides 8.4 GB/s hashing throughput on Ryzen 3700X, and is able to
+produce a hash value of any required bit-size.
+
+## Ports ##
+
+* [Java, by Dynatrace](https://github.com/dynatrace-oss/hash4j)
+* [LUA, by rangercyh](https://github.com/rangercyh/lua-komihash)
+* [.NET](https://www.nuget.org/packages/FastHashes/)
+* [Rust](https://crates.io/crates/komihash)
+
+## Comparisons ##
+
+These are the performance comparisons made and used by the author during the
+development of `komihash`.
+
+### LLVM clang-cl 8.0.1 64-bit, Windows 10, Ryzen 3700X (Zen2), 4.2 GHz ###
+
+Compiler options: `/Ox /arch:sse2`; overhead: `1.8` cycles/h.
+
+|Hash function   |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s     |
+|----            |----           |----           |----           |
+|**komihash 4.5**|11.0           |12.7           |26.2           |
+|komihash 4.3    |11.2           |13.0           |26.0           |
+|komihash 3.6    |11.1           |16.9           |27.5           |
+|komihash 2.8    |11.3           |17.4           |27.7           |
+|wyhash_final3   |13.4           |17.8           |29.7           |
+|XXH3_64 0.8.0   |17.5           |21.1           |29.0           |
+|XXH64 0.8.0     |12.7           |17.3           |17.3           |
+|prvhash64m 4.1  |19.9           |26.1           |4.1            |
+
+Compiler options: `/Ox -mavx2`; overhead: `1.8` cycles/h.
+
+|Hash function   |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s     |
+|----            |----           |----           |----           |
+|**komihash 4.5**|11.1           |12.7           |26.3           |
+|komihash 4.3    |11.2           |13.0           |25.9           |
+|komihash 3.6    |11.0           |16.3           |27.5           |
+|komihash 2.8    |11.1           |17.7           |27.8           |
+|wyhash_final3   |13.4           |17.7           |29.8           |
+|XXH3_64 0.8.0   |17.7           |21.3           |61.0           |
+|XXH64 0.8.0     |12.8           |17.4           |17.1           |
+|prvhash64m 4.1  |20.0           |26.2           |4.1            |
+
+### ICC 19.0 64-bit, Windows 10, Ryzen 3700X (Zen2), 4.2 GHz ###
+
+Compiler options: `/O3 /QxSSE2`; overhead: `2.0` cycles/h.
+
+|Hash function   |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s     |
+|----            |----           |----           |----           |
+|**komihash 4.5**|18.1           |21.9           |16.4           |
+|komihash 4.3    |17.9           |21.6           |16.3           |
+|komihash 3.6    |20.1           |24.0           |16.3           |
+|komihash 2.8    |21.3           |25.6           |16.2           |
+|wyhash_final3   |24.1           |32.0           |12.6           |
+|XXH3_64 0.8.0   |21.8           |27.2           |29.6           |
+|XXH64 0.8.0     |24.3           |36.6           |8.9            |
+|prvhash64m 4.1  |29.9           |39.1           |3.2            |
+
+(this is likely a worst-case scenario, when a compiler was not cross-tuned
+to a competing processor architecture; also, ICC for Windows does not support
+the `__builtin_expect` and `__builtin_prefetch` intrinsics)
+
+### LLVM clang 12.0.1 64-bit, CentOS 8, Xeon E-2176G (CoffeeLake), 4.5 GHz ###
+
+Compiler options: `-O3 -mavx2`; overhead: `5.3` cycles/h.
+
+|Hash function   |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s     |
+|----            |----           |----           |----           |
+|**komihash 4.5**|12.8           |14.4           |22.4           |
+|komihash 4.3    |15.3           |16.3           |22.8           |
+|komihash 3.6    |16.0           |19.0           |22.3           |
+|komihash 2.8    |18.1           |22.3           |23.5           |
+|wyhash_final3   |14.0           |18.7           |28.4           |
+|XXH3_64 0.8.0   |18.0           |29.3           |51.0           |
+|XXH64 0.8.0     |12.5           |16.4           |18.2           |
+|prvhash64m 4.1  |27.0           |29.9           |4.3            |
+
+### GCC 8.5.0 64-bit, CentOS 8, Xeon E-2176G (CoffeeLake), 4.5 GHz ###
+
+Compiler options: `-O3 -msse2`; overhead: `5.8` cycles/h.
+
+|Hash function   |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s     |
+|----            |----           |----           |----           |
+|**komihash 4.5**|13.2           |15.1           |24.7           |
+|komihash 4.3    |15.4           |16.2           |24.4           |
+|komihash 3.6    |16.4           |20.3           |24.7           |
+|komihash 2.8    |18.5           |22.4           |24.7           |
+|wyhash_final3   |14.9           |19.5           |29.8           |
+|XXH3_64 0.8.0   |16.9           |22.3           |26.6           |
+|XXH64 0.8.0     |13.7           |17.7           |18.0           |
+|prvhash64m 4.1  |23.2           |27.8           |4.3            |
+
+Compiler options: `-O3 -mavx2`; overhead: `5.8` cycles/h.
+
+|Hash function   |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s     |
+|----            |----           |----           |----           |
+|**komihash 4.5**|13.8           |15.2           |24.7           |
+|komihash 4.3    |15.3           |16.4           |24.4           |
+|komihash 3.6    |15.8           |20.1           |24.7           |
+|komihash 2.8    |16.6           |21.2           |24.7           |
+|wyhash_final3   |15.4           |19.0           |30.1           |
+|XXH3_64 0.8.0   |18.8           |23.4           |38.0           |
+|XXH64 0.8.0     |15.3           |17.9           |18.1           |
+|prvhash64m 4.1  |21.7           |27.1           |4.4            |
+
+### LLVM clang 8.0.0 64-bit, Windows 10, Core i7-7700K (KabyLake), 4.5 GHz ###
+
+Compiler options: `/Ox -mavx2`; overhead: `5.5` cycles/h.
+
+|Hash function   |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s     |
+|----            |----           |----           |----           |
+|**komihash 4.5**|12.6           |14.5           |22.2           |
+|komihash 4.3    |14.1           |16.0           |22.0           |
+|komihash 3.6    |14.0           |22.0           |22.9           |
+|komihash 2.8    |13.4           |22.7           |23.7           |
+|wyhash_final3   |14.5           |20.1           |30.0           |
+|XXH3_64 0.8.0   |18.4           |23.0           |48.3           |
+|XXH64 0.8.0     |13.2           |17.3           |17.7           |
+|prvhash64m 4.1  |23.2           |29.6           |4.1            |
+
+### ICC 19.0 64-bit, Windows 10, Core i7-7700K (KabyLake), 4.5 GHz ###
+
+Compiler options: `/O3 /QxSSE2`; overhead: `5.9` cycles/h.
+
+|Hash function   |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s     |
+|----            |----           |----           |----           |
+|**komihash 4.5**|18.1           |21.1           |17.2           |
+|komihash 4.3    |18.7           |21.5           |18.5           |
+|komihash 3.6    |19.5           |23.1           |18.1           |
+|komihash 2.8    |20.1           |23.6           |18.4           |
+|wyhash_final3   |19.2           |24.5           |20.0           |
+|XXH3_64 0.8.0   |19.9           |25.8           |28.0           |
+|XXH64 0.8.0     |18.8           |24.7           |16.0           |
+|prvhash64m 4.1  |25.5           |32.4           |3.2            |
+
+### Apple clang 12.0.0 64-bit, macOS 12.0.1, Apple M1, 3.5 GHz ###
+
+Compiler options: `-O3`; overhead: `unestimatable`.
+
+|Hash function   |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s     |
+|----            |----           |----           |----           |
+|**komihash 4.5**|8.3            |8.7            |23.6           |
+|komihash 4.3    |8.6            |9.0            |23.6           |
+|komihash 3.6    |8.5            |10.7           |23.6           |
+|komihash 2.8    |10.1           |11.4           |23.5           |
+|wyhash_final3   |7.9            |8.0            |26.1           |
+|XXH3_64 0.8.0   |8.2            |8.2            |30.5           |
+|XXH64 0.8.0     |8.8            |10.4           |14.5           |
+|prvhash64m 4.1  |12.9           |16.8           |3.5            |
+
+Notes: `XXH3_64` is unseeded (seeded variant is 1 cycle/h higher). `bulk` is
+256000 bytes: this means it is mainly a cache-bound performance, not
+reflective of high-load situations. `GB/s` should not be misinterpreted as
+`GiB/s`. `cycles/h` means `processor clock ticks per hash value`, including
+overhead. Measurement error is approximately 3%.
+
+### Averages over all measurements (overhead excluded) ###
+
+|Hash function   |0-15b, cycles/h|8-28b, cycles/h|
+|----            |----           |----           |
+|**komihash 4.5**|**9.5**        |**11.4**       |
+|komihash 4.3    |10.4           |12.1           |
+|komihash 3.6    |10.9           |15.4           |
+|komihash 2.8    |11.8           |16.7           |
+|wyhash_final3   |11.4           |15.9           |
+|XXH3_64 0.8.0   |13.7           |18.6           |
+|XXH64 0.8.0     |10.9           |15.8           |
+|prvhash64m 4.1  |18.8           |24.6           |
+
+This is the throughput comparison of hash functions on Ryzen 3700X. The used
+measurement method actually measures hash function's "latencied throughput",
+or sequential hashing, due to the use of the "volatile" variable specifiers
+and result accumulation.
+
+![TP plot](https://github.com/avaneev/komihash/blob/main/hash_comparison.png)
+
+The following method was used to obtain the `cycles/h` values. Note that this
+method measures a "raw" throughput, when processor's branch predictor tunes to
+a specific message length and a specific memory address. Practical performance
+depends on actual statistics of strings (messages) being hashed, including
+memory access patterns. Note that particular hash functions may "over-favor"
+specific message lengths. In this respect, `komihash` does not "favor" any
+specific length, thus it may be more universal. Throughput aside, hashing
+quality is also an important factor as it drives a hash-map's creation and
+subsequent accesses. This, and many other synthetic hash function tests should
+be taken with a grain of salt. Only an actual use-case can reveal which hash
+function is preferrable.
+
+```
+	const uint64_t rc = 1ULL << 26;
+	const int minl = 8; const int maxl = 28;
+	volatile uint64_t msg[ 8 ] = { 0 };
+	uint64_t v = 0;
+
+	const TClock t1( CSystem :: getClock() );
+
+	for( int k = minl; k <= maxl; k++ )
+	{
+		volatile size_t msgl = k;
+		volatile uint64_t sd = k + 1;
+
+		for( uint64_t i = 0; i < rc; i++ )
+		{
+			v ^= komihash( (uint8_t*) &msg, msgl, sd );
+//			v ^= wyhash( (uint8_t*) &msg, msgl, sd, _wyp );
+//			v ^= XXH3_64bits( (uint8_t*) &msg, msgl );
+//			v ^= msg[ 0 ]; // Used to estimate the overhead.
+			msg[ 0 ]++;
+		}
+	}
+
+	printf( "%016llx\n", v );
+	printf( "%.1f\n", CSystem :: getClockDiffSec( t1 ) * 4.2e9 /
+		( rc * ( maxl - minl + 1 ))); // 4.5 on Xeon, 4.5 on i7700K, 3.5 on M1
+```
+
+## Discussion ##
+
+You may wonder, why `komihash` does not include a quite common `^MsgLen` XOR
+instruction at some place in the code? The main reason is that due to the way
+`komihash` parses the input message such instruction is not necessary. Another
+reason is that for a non-cryptographic hash function such instruction provides
+no additional security: while it may seem that such instruction protects from
+simple "state XORing" collision attacks, in practice it offers no protection,
+if one considers how powerful [SAT solvers](https://github.com/pysathq/pysat)
+are: in less than a second they can "forge" a preimage which produces a
+required hash value. It is also important to note that in such "fast" hash
+functions like `komihash` the input message has complete control over the
+state variables.
+
+Is 128-bit version of this hash function planned? Most probably, no, it is
+not. While such version may be reasonable for data structure compatibility
+reasons, there is no much practical sense to use 128-bit hashes at a local
+level: a reliable 64-bit hash allows one to have 2.1 billion diverse binary
+objects (e.g. files in a file system, or entries in a hash-map) without
+collisions, on average. On the other hand, on a worldwide scale, having
+128-bit hashes is clearly not enough considering the number of existing
+digital devices and the number of diverse binary objects (e.g. files, records
+in databases) on each of them.
+
+An opinion on the "bulk" performance of "fast" hash functions: in most
+practical situations, when processor's total memory bandwidth is limited to
+e.g. 41 GB/s, a "bulk" single-threaded hashing performance on the order of
+30 GB/s is excessive considering memory bandwidth has to be spread over
+multiple cores. So, practically, such "fast" hash function, working on a
+high-load 8-core server, rarely receives more than 8 GB/s of bandwidth.
+Another factor worth mentioning is that a server rarely has more than 10 Gb/s
+network connectivity, thus further reducing practical hashing performance of
+incoming data. The same applies to disk system's throughput, if on-disk data
+is not yet in memory.
+
+## KOMIRAND ##
+
+The `komirand()` function available in the `komihash.h` file implements a
+simple, but reliable, self-starting, and fast (`0.62` cycles/byte) 64-bit
+pseudo-random number generator (PRNG) with `2^64` period. It is based on the
+same mathematical construct as the `komihash` hash function. `komirand`
+passes `PractRand` tests.
+
+## Other ##
+
+This function is named the way it is named is to honor
+the [Komi Republic](https://en.wikipedia.org/wiki/Komi_Republic) (located in
+Russia), native to the author.
+
+## Test Vectors ##
+
+Test vectors for the current version of `komihash`, string-hash pairs (note
+that the parentheses are not included in the calculation). The `bulk` is a
+buffer with increasing 8-bit values; `bulk` hashes are calculated from this
+buffer using various lengths. See the `testvec.c` file for details.
+
+```
+	komihash UseSeed = 0x0000000000000000:
+	"This is a 32-byte tester string." = 0x8e92e061278366d2
+	"The cat is out of the bag" = 0xd15723521d3c37b1
+	"A 16-byte string" = 0x467caa28ea3da7a6
+	"The new string" = 0xf18e67bc90c43233
+	"7 bytes" = 0xe72e558f5eaf2554
+	bulk(6) = 0xa56469564c2ea0ff
+	bulk(12) = 0x64c2ad96013f70fe
+	bulk(20) = 0x7a3888bc95545364
+	bulk(31) = 0xc77e02ed4b201b9a
+	bulk(32) = 0x256d74350303a1ba
+	bulk(40) = 0x59609c71697bb9df
+	bulk(47) = 0x36eb9e6a4c2c5e4b
+	bulk(48) = 0x8dd56c332850baa6
+	bulk(56) = 0xcbb722192b353999
+	bulk(64) = 0x5cf87bcba93e6a5b
+	bulk(72) = 0x6c79a1d9474f003f
+	bulk(80) = 0x88684fa67b351c33
+	bulk(112) = 0xdc481a2af36a87dd
+	bulk(132) = 0xe172275e13a1c938
+	bulk(256) = 0xa9d9cde10342d965
+
+	komihash UseSeed = 0x0123456789abcdef:
+	"This is a 32-byte tester string." = 0x6455c9cfdd577ebd
+	"The cat is out of the bag" = 0x5b1da0b43545d196
+	"A 16-byte string" = 0x26af914213d0c915
+	"The new string" = 0x62d9ca1b73250cb5
+	"7 bytes" = 0x2bf17dbb71d92897
+	bulk(6) = 0xaceebc32a3c0d9e4
+	bulk(12) = 0xec8eb3ef4af380b4
+	bulk(20) = 0x07045bd31abba34c
+	bulk(31) = 0xd5f619fb2e62c4ae
+	bulk(32) = 0x5a336fd2c4c39abe
+	bulk(40) = 0x0e870b4623eea8ec
+	bulk(47) = 0xe552edd6bf419d1d
+	bulk(48) = 0x37d170ddcb1223e6
+	bulk(56) = 0x1cd89e708e5098b6
+	bulk(64) = 0x4da1005904c8d804
+	bulk(72) = 0xc8b03f196b2551ee
+	bulk(80) = 0x2d4d58743755344d
+	bulk(112) = 0x0e77e5c92f929bdd
+	bulk(132) = 0x0b3b216a1fc3234e
+	bulk(256) = 0xeb726377f8d072e8
+
+	komihash UseSeed = 0x0000000000000100:
+	"This is a 32-byte tester string." = 0x60ed46218532462a
+	"The cat is out of the bag" = 0xa761280322bb7698
+	"A 16-byte string" = 0x11c31ccabaa524f1
+	"The new string" = 0x3a43b7f58281c229
+	"7 bytes" = 0x3c8a980831b70dc8
+	bulk(6) = 0xea606e43d1976ccf
+	bulk(12) = 0xacbec1886cd23275
+	bulk(20) = 0x57c3affd1b71fcdb
+	bulk(31) = 0x7ef6ba49a3b068c3
+	bulk(32) = 0x49dbca62ed5a1ddf
+	bulk(40) = 0x192848484481e8c0
+	bulk(47) = 0x420b43a5edba1bd7
+	bulk(48) = 0xd6e8400a9de24ce3
+	bulk(56) = 0xbea291b225ff384d
+	bulk(64) = 0xf237bc1d85f12b52
+	bulk(72) = 0x577a4d993f26cd52
+	bulk(80) = 0xace499103def982d
+	bulk(112) = 0x200c46677408d850
+	bulk(132) = 0x6b003f62eba47761
+	bulk(256) = 0xa8a3bd0ecf908b92
+```
+
+```
+	komirand Seed1/Seed2 = 0x0000000000000000:
+	0xaaaaaaaaaaaaaaaa
+	0xfffffffffffffffe
+	0x4924924924924910
+	0xbaebaebaebaeba00
+	0x400c62cc4727496b
+	0x35a969173e8f925b
+	0xdb47f6bae9a247ad
+	0x98e0f6cece6711fe
+	0x97ffa2397fda534b
+	0x11834262360df918
+	0x34e53df5399f2252
+	0xecaeb74a81d648ed
+
+	komirand Seed1/Seed2 = 0x0123456789abcdef:
+	0x776ad9718078ca64
+	0x737aa5d5221633d0
+	0x685046cca30f6f44
+	0xfb725cb01b30c1ba
+	0xc501cc999ede619f
+	0x8427298e525db507
+	0xd9baf3c54781f75e
+	0x7f5a4e5b97b37c7b
+	0xde8a0afe8e03b8c1
+	0xb6ed3e72b69fc3d6
+	0xa68727902f7628d0
+	0x44162b63af484587
+
+	komirand Seed1/Seed2 = 0x0000000000000100:
+	0xaaaaaaaaaaababaa
+	0xfffffffff8fcf8fe
+	0xdb6dba1e4dbb1134
+	0xf5b7d3aec37f4cb1
+	0x66a571da7ded7051
+	0x2d59ec9245bf03d9
+	0x5c06a41bd510aed8
+	0xea5e7ea9d2bd07a2
+	0xe395015ddce7756f
+	0xc07981aaeaae3b38
+	0x2e120ebfee59a5a2
+	0x9001eee495244dba
+```
--- a/lib/komihash/hash_comparison.png
+++ b/lib/komihash/hash_comparison.png
--- a/lib/komihash/komihash.h
+++ b/lib/komihash/komihash.h
@ -0,0 +1,831 @@
+/**
+ * komihash.h version 4.7
+ *
+ * The inclusion file for the "komihash" hash function, "komirand" 64-bit
+ * PRNG, and streamed "komihash" implementation.
+ *
+ * Description is available at https://github.com/avaneev/komihash
+ *
+ * License
+ *
+ * Copyright (c) 2021-2022 Aleksey Vaneev
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KOMIHASH_INCLUDED
+#define KOMIHASH_INCLUDED
+
+#include <stdint.h>
+#include <string.h>
+
+// Macros that apply byte-swapping.
+
+#if defined( __GNUC__ ) || defined( __clang__ )
+
+	#define KOMIHASH_BYTESW32( v ) __builtin_bswap32( v )
+	#define KOMIHASH_BYTESW64( v ) __builtin_bswap64( v )
+
+#elif defined( _MSC_VER )
+
+	#define KOMIHASH_BYTESW32( v ) _byteswap_ulong( v )
+	#define KOMIHASH_BYTESW64( v ) _byteswap_uint64( v )
+
+#else // defined( _MSC_VER )
+
+	#define KOMIHASH_BYTESW32( v ) ( \
+		( v & 0xFF000000 ) >> 24 | \
+		( v & 0x00FF0000 ) >> 8 | \
+		( v & 0x0000FF00 ) << 8 | \
+		( v & 0x000000FF ) << 24 )
+
+	#define KOMIHASH_BYTESW64( v ) ( \
+		( v & 0xFF00000000000000 ) >> 56 | \
+		( v & 0x00FF000000000000 ) >> 40 | \
+		( v & 0x0000FF0000000000 ) >> 24 | \
+		( v & 0x000000FF00000000 ) >> 8 | \
+		( v & 0x00000000FF000000 ) << 8 | \
+		( v & 0x0000000000FF0000 ) << 24 | \
+		( v & 0x000000000000FF00 ) << 40 | \
+		( v & 0x00000000000000FF ) << 56 )
+
+#endif // defined( _MSC_VER )
+
+// Endianness-definition macro, can be defined externally (e.g. =1, if
+// endianness-correction is unnecessary in any case, to reduce its associated
+// overhead).
+
+#if !defined( KOMIHASH_LITTLE_ENDIAN )
+	#if defined( _WIN32 ) || defined( __LITTLE_ENDIAN__ ) || \
+		( defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ )
+
+		#define KOMIHASH_LITTLE_ENDIAN 1
+
+	#elif defined( __BIG_ENDIAN__ ) || \
+		( defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ )
+
+		#define KOMIHASH_LITTLE_ENDIAN 0
+
+	#else // defined( __BIG_ENDIAN__ )
+
+		#warning KOMIHASH: cannot determine endianness, assuming little-endian.
+
+		#define KOMIHASH_LITTLE_ENDIAN 1
+
+	#endif // defined( __BIG_ENDIAN__ )
+#endif // !defined( KOMIHASH_LITTLE_ENDIAN )
+
+// Macros that apply byte-swapping, used for endianness-correction.
+
+#if KOMIHASH_LITTLE_ENDIAN
+
+	#define KOMIHASH_EC32( v ) ( v )
+	#define KOMIHASH_EC64( v ) ( v )
+
+#else // KOMIHASH_LITTLE_ENDIAN
+
+	#define KOMIHASH_EC32( v ) KOMIHASH_BYTESW32( v )
+	#define KOMIHASH_EC64( v ) KOMIHASH_BYTESW64( v )
+
+#endif // KOMIHASH_LITTLE_ENDIAN
+
+// Likelihood macros that are used for manually-guided micro-optimization.
+
+#if defined( __GNUC__ ) || defined( __clang__ )
+
+	#define KOMIHASH_LIKELY( x )  __builtin_expect( x, 1 )
+	#define KOMIHASH_UNLIKELY( x )  __builtin_expect( x, 0 )
+
+#else // likelihood macros
+
+	#define KOMIHASH_LIKELY( x ) ( x )
+	#define KOMIHASH_UNLIKELY( x ) ( x )
+
+#endif // likelihood macros
+
+// Memory address prefetch macro (temporal locality=1, in case a collision
+// resolution would be necessary).
+
+#if defined( __GNUC__ ) || defined( __clang__ )
+
+	#define KOMIHASH_PREFETCH( addr ) __builtin_prefetch( addr, 0, 1 )
+
+#else // prefetch macro
+
+	#define KOMIHASH_PREFETCH( addr )
+
+#endif // prefetch macro
+
+/**
+ * An auxiliary function that returns an unsigned 32-bit value created out of
+ * a sequence of bytes in memory. This function is used to convert endianness
+ * of in-memory 32-bit unsigned values, and to avoid unaligned memory
+ * accesses.
+ *
+ * @param p Pointer to 4 bytes in memory. Alignment is unimportant.
+ * @return Endianness-corrected 32-bit value from memory.
+ */
+
+static inline uint32_t kh_lu32ec( const uint8_t* const p )
+{
+	uint32_t v;
+	memcpy( &v, p, 4 );
+
+	return( KOMIHASH_EC32( v ));
+}
+
+/**
+ * An auxiliary function that returns an unsigned 64-bit value created out of
+ * a sequence of bytes in memory. This function is used to convert endianness
+ * of in-memory 64-bit unsigned values, and to avoid unaligned memory
+ * accesses.
+ *
+ * @param p Pointer to 8 bytes in memory. Alignment is unimportant.
+ * @return Endianness-corrected 64-bit value from memory.
+ */
+
+static inline uint64_t kh_lu64ec( const uint8_t* const p )
+{
+	uint64_t v;
+	memcpy( &v, p, 8 );
+
+	return( KOMIHASH_EC64( v ));
+}
+
+/**
+ * Function builds an unsigned 64-bit value out of remaining bytes in a
+ * message, and pads it with the "final byte". This function can only be
+ * called if less than 8 bytes are left to read. The message should be "long",
+ * permitting Msg[ -3 ] reads.
+ *
+ * @param Msg Message pointer, alignment is unimportant.
+ * @param MsgLen Message's remaining length, in bytes; can be 0.
+ * @return Final byte-padded value from the message.
+ */
+
+static inline uint64_t kh_lpu64ec_l3( const uint8_t* const Msg,
+	const size_t MsgLen )
+{
+	const int ml8 = -(int) ( MsgLen * 8 );
+
+	if( MsgLen < 4 )
+	{
+		const uint8_t* const Msg3 = Msg + MsgLen - 3;
+		const uint64_t m = (uint64_t) Msg3[ 0 ] | (uint64_t) Msg3[ 1 ] << 8 |
+			(uint64_t) Msg3[ 2 ] << 16;
+
+		return( 1ULL << (( Msg3[ 2 ] >> 7 ) - ml8 ) | m >> ( 24 + ml8 ));
+	}
+
+	const uint64_t mh = kh_lu32ec( Msg + MsgLen - 4 );
+	const uint64_t ml = kh_lu32ec( Msg );
+
+	return( 1ULL << ( (int) ( mh >> 31 ) - ml8 ) |
+		ml | ( mh >> ( 64 + ml8 )) << 32 );
+}
+
+/**
+ * Function builds an unsigned 64-bit value out of remaining bytes in a
+ * message, and pads it with the "final byte". This function can only be
+ * called if less than 8 bytes are left to read. Can be used on "short"
+ * messages, but MsgLen should be greater than 0.
+ *
+ * @param Msg Message pointer, alignment is unimportant.
+ * @param MsgLen Message's remaining length, in bytes; cannot be 0.
+ * @return Final byte-padded value from the message.
+ */
+
+static inline uint64_t kh_lpu64ec_nz( const uint8_t* const Msg,
+	const size_t MsgLen )
+{
+	const int ml8 = -(int) ( MsgLen * 8 );
+
+	if( MsgLen < 4 )
+	{
+		uint64_t m = Msg[ 0 ];
+		const uint8_t mf = Msg[ MsgLen - 1 ];
+
+		if( MsgLen > 1 )
+		{
+			m |= (uint64_t) Msg[ 1 ] << 8;
+
+			if( MsgLen > 2 )
+			{
+				m |= (uint64_t) mf << 16;
+			}
+		}
+
+		return( 1ULL << (( mf >> 7 ) - ml8 ) | m );
+	}
+
+	const uint64_t mh = kh_lu32ec( Msg + MsgLen - 4 );
+	const uint64_t ml = kh_lu32ec( Msg );
+
+	return( 1ULL << ( (int) ( mh >> 31 ) - ml8 ) |
+		ml | ( mh >> ( 64 + ml8 )) << 32 );
+}
+
+/**
+ * Function builds an unsigned 64-bit value out of remaining bytes in a
+ * message, and pads it with the "final byte". This function can only be
+ * called if less than 8 bytes are left to read. The message should be "long",
+ * permitting Msg[ -4 ] reads.
+ *
+ * @param Msg Message pointer, alignment is unimportant.
+ * @param MsgLen Message's remaining length, in bytes; can be 0.
+ * @return Final byte-padded value from the message.
+ */
+
+static inline uint64_t kh_lpu64ec_l4( const uint8_t* const Msg,
+	const size_t MsgLen )
+{
+	const int ml8 = -(int) ( MsgLen * 8 );
+
+	if( MsgLen < 5 )
+	{
+		const uint64_t m = kh_lu32ec( Msg + MsgLen - 4 );
+
+		return( 1ULL << ( (int) ( m >> 31 ) - ml8 ) | m >> ( 32 + ml8 ));
+	}
+
+	const uint64_t m = kh_lu64ec( Msg + MsgLen - 8 );
+
+	return( 1ULL << ( (int) ( m >> 63 ) - ml8 ) | m >> ( 64 + ml8 ));
+}
+
+#if defined( __SIZEOF_INT128__ )
+
+	/**
+	 * 64-bit by 64-bit unsigned multiplication.
+	 *
+	 * @param m1 Multiplier 1.
+	 * @param m2 Multiplier 2.
+	 * @param[out] rl The lower half of the 128-bit result.
+	 * @param[out] rh The higher half of the 128-bit result.
+	 */
+
+	static inline void kh_m128( const uint64_t m1, const uint64_t m2,
+		uint64_t* const rl, uint64_t* const rh )
+	{
+		const __uint128_t r = (__uint128_t) m1 * m2;
+
+		*rl = (uint64_t) r;
+		*rh = (uint64_t) ( r >> 64 );
+	}
+
+#elif defined( _MSC_VER ) && defined( _M_X64 )
+
+	#include <intrin.h>
+
+	static inline void kh_m128( const uint64_t m1, const uint64_t m2,
+		uint64_t* const rl, uint64_t* const rh )
+	{
+		*rl = _umul128( m1, m2, rh );
+	}
+
+#else // defined( _MSC_VER )
+
+	// _umul128() code for 32-bit systems, adapted from mullu(),
+	// from https://go.dev/src/runtime/softfloat64.go
+	// Licensed under BSD-style license.
+
+	static inline uint64_t kh__emulu( const uint32_t x, const uint32_t y )
+	{
+		return( x * (uint64_t) y );
+	}
+
+	static inline void kh_m128( const uint64_t u, const uint64_t v,
+		uint64_t* const rl, uint64_t* const rh )
+	{
+		*rl = u * v;
+
+		const uint32_t u0 = (uint32_t) u;
+		const uint32_t v0 = (uint32_t) v;
+		const uint64_t w0 = kh__emulu( u0, v0 );
+		const uint32_t u1 = (uint32_t) ( u >> 32 );
+		const uint32_t v1 = (uint32_t) ( v >> 32 );
+		const uint64_t t = kh__emulu( u1, v0 ) + ( w0 >> 32 );
+		const uint64_t w1 = (uint32_t) t + kh__emulu( u0, v1 );
+
+		*rh = kh__emulu( u1, v1 ) + ( w1 >> 32 ) + ( t >> 32 );
+	}
+
+#endif // defined( _MSC_VER )
+
+// Macro for common hashing round with 16-byte input, using the "r1h"
+// temporary variable.
+
+#define KOMIHASH_HASH16( m ) \
+	kh_m128( Seed1 ^ kh_lu64ec( m ), \
+		Seed5 ^ kh_lu64ec( m + 8 ), &Seed1, &r1h ); \
+	Seed5 += r1h; \
+	Seed1 ^= Seed5;
+
+// Macro for common hashing round without input, using the "r2h" temporary
+// variable.
+
+#define KOMIHASH_HASHROUND() \
+	kh_m128( Seed1, Seed5, &Seed1, &r2h ); \
+	Seed5 += r2h; \
+	Seed1 ^= Seed5;
+
+// Macro for common hashing finalization round, with the final hashing input
+// expected in the "r1h" and "r2h" temporary variables. The macro inserts the
+// function return instruction.
+
+#define KOMIHASH_HASHFIN() \
+	kh_m128( r1h, r2h, &Seed1, &r1h ); \
+	Seed5 += r1h; \
+	Seed1 ^= Seed5; \
+	KOMIHASH_HASHROUND(); \
+	return( Seed1 );
+
+// Macro for a common 64-byte full-performance hashing loop. Expects Msg and
+// MsgLen values (greater than 63), requires initialized Seed1-8 values, uses
+// r1h-r4h temporary variables.
+//
+// The "shifting" arrangement of Seed1-4 (below) does not increase individual
+// SeedN's PRNG period beyond 2^64, but reduces a chance of any occassional
+// synchronization between PRNG lanes happening. Practically, Seed1-4 together
+// become a single "fused" 256-bit PRNG value, having a summary PRNG period.
+
+#define KOMIHASH_HASHLOOP64() \
+	do \
+	{ \
+		KOMIHASH_PREFETCH( Msg ); \
+	\
+		kh_m128( Seed1 ^ kh_lu64ec( Msg ), \
+			Seed5 ^ kh_lu64ec( Msg + 8 ), &Seed1, &r1h ); \
+	\
+		kh_m128( Seed2 ^ kh_lu64ec( Msg + 16 ), \
+			Seed6 ^ kh_lu64ec( Msg + 24 ), &Seed2, &r2h ); \
+	\
+		kh_m128( Seed3 ^ kh_lu64ec( Msg + 32 ), \
+			Seed7 ^ kh_lu64ec( Msg + 40 ), &Seed3, &r3h ); \
+	\
+		kh_m128( Seed4 ^ kh_lu64ec( Msg + 48 ), \
+			Seed8 ^ kh_lu64ec( Msg + 56 ), &Seed4, &r4h ); \
+	\
+		Msg += 64; \
+		MsgLen -= 64; \
+	\
+		Seed5 += r1h; \
+		Seed6 += r2h; \
+		Seed7 += r3h; \
+		Seed8 += r4h; \
+		Seed2 ^= Seed5; \
+		Seed3 ^= Seed6; \
+		Seed4 ^= Seed7; \
+		Seed1 ^= Seed8; \
+	\
+	} while( KOMIHASH_LIKELY( MsgLen > 63 ));
+
+/**
+ * The hashing epilogue function (for internal use).
+ *
+ * @param Msg Pointer to the remaining part of the message.
+ * @param MsgLen Remaining part's length, can be zero.
+ * @param Seed1 Latest Seed1 value.
+ * @param Seed5 Latest Seed5 value.
+ * @return 64-bit hash value.
+ */
+
+static inline uint64_t komihash_epi( const uint8_t* Msg, size_t MsgLen,
+	uint64_t Seed1, uint64_t Seed5 )
+{
+	uint64_t r1h, r2h;
+
+	KOMIHASH_PREFETCH( Msg );
+
+	if( KOMIHASH_LIKELY( MsgLen > 31 ))
+	{
+		KOMIHASH_HASH16( Msg );
+		KOMIHASH_HASH16( Msg + 16 );
+
+		Msg += 32;
+		MsgLen -= 32;
+	}
+
+	if( MsgLen > 15 )
+	{
+		KOMIHASH_HASH16( Msg );
+
+		Msg += 16;
+		MsgLen -= 16;
+	}
+
+	if( MsgLen > 7 )
+	{
+		r2h = Seed5 ^ kh_lpu64ec_l4( Msg + 8, MsgLen - 8 );
+		r1h = Seed1 ^ kh_lu64ec( Msg );
+	}
+	else
+	{
+		r1h = Seed1 ^ kh_lpu64ec_l4( Msg, MsgLen );
+		r2h = Seed5;
+	}
+
+	KOMIHASH_HASHFIN();
+}
+
+/**
+ * KOMIHASH hash function. Produces and returns a 64-bit hash value of the
+ * specified message, string, or binary data block. Designed for 64-bit
+ * hash-table and hash-map uses. Produces identical hashes on both big- and
+ * little-endian systems.
+ *
+ * @param Msg0 The message to produce a hash from. The alignment of this
+ * pointer is unimportant. It is valid to pass 0 when MsgLen==0 (assuming that
+ * compiler's implementation of the address prefetch is non-failing for any
+ * address).
+ * @param MsgLen Message's length, in bytes, can be zero.
+ * @param UseSeed Optional value, to use instead of the default seed. To use
+ * the default seed, set to 0. The UseSeed value can have any bit length and
+ * statistical quality, and is used only as an additional entropy source. May
+ * need endianness-correction if this value is shared between big- and
+ * little-endian systems.
+ * @return 64-bit hash of the input data.
+ */
+
+static inline uint64_t komihash( const void* const Msg0, size_t MsgLen,
+	const uint64_t UseSeed )
+{
+	const uint8_t* Msg = (const uint8_t*) Msg0;
+
+	// The seeds are initialized to the first mantissa bits of PI.
+
+	uint64_t Seed1 = 0x243F6A8885A308D3 ^ ( UseSeed & 0x5555555555555555 );
+	uint64_t Seed5 = 0x452821E638D01377 ^ ( UseSeed & 0xAAAAAAAAAAAAAAAA );
+	uint64_t r1h, r2h;
+
+	// The three instructions in the "KOMIHASH_HASHROUND" macro represent the
+	// simplest constant-less PRNG, scalable to any even-sized state
+	// variables, with the `Seed1` being the PRNG output (2^64 PRNG period).
+	// It passes `PractRand` tests with rare non-systematic "unusual"
+	// evaluations.
+	//
+	// To make this PRNG reliable, self-starting, and eliminate a risk of
+	// stopping, the following variant can be used, which is a "register
+	// checker-board", a source of raw entropy. The PRNG is available as the
+	// komirand() function. Not required for hashing (but works for it) since
+	// the input entropy is usually available in abundance during hashing.
+	//
+	// Seed5 += r2h + 0xAAAAAAAAAAAAAAAA;
+	//
+	// (the `0xAAAA...` constant should match register's size; essentially,
+	// it is a replication of the `10` bit-pair; it is not an arbitrary
+	// constant).
+
+	KOMIHASH_HASHROUND(); // Required for PerlinNoise.
+
+	if( KOMIHASH_LIKELY( MsgLen < 16 ))
+	{
+		KOMIHASH_PREFETCH( Msg );
+
+		r1h = Seed1;
+		r2h = Seed5;
+
+		if( MsgLen > 7 )
+		{
+			// The following two XOR instructions are equivalent to mixing a
+			// message with a cryptographic one-time-pad (bitwise modulo 2
+			// addition). Message's statistics and distribution are thus
+			// unimportant.
+
+			r2h ^= kh_lpu64ec_l3( Msg + 8, MsgLen - 8 );
+			r1h ^= kh_lu64ec( Msg );
+		}
+		else
+		if( KOMIHASH_LIKELY( MsgLen != 0 ))
+		{
+			r1h ^= kh_lpu64ec_nz( Msg, MsgLen );
+		}
+
+		KOMIHASH_HASHFIN();
+	}
+
+	if( KOMIHASH_LIKELY( MsgLen < 32 ))
+	{
+		KOMIHASH_PREFETCH( Msg );
+
+		KOMIHASH_HASH16( Msg );
+
+		if( MsgLen > 23 )
+		{
+			r2h = Seed5 ^ kh_lpu64ec_l4( Msg + 24, MsgLen - 24 );
+			r1h = Seed1 ^ kh_lu64ec( Msg + 16 );
+		}
+		else
+		{
+			r1h = Seed1 ^ kh_lpu64ec_l4( Msg + 16, MsgLen - 16 );
+			r2h = Seed5;
+		}
+
+		KOMIHASH_HASHFIN();
+	}
+
+	if( MsgLen > 63 )
+	{
+		uint64_t Seed2 = 0x13198A2E03707344 ^ Seed1;
+		uint64_t Seed3 = 0xA4093822299F31D0 ^ Seed1;
+		uint64_t Seed4 = 0x082EFA98EC4E6C89 ^ Seed1;
+		uint64_t Seed6 = 0xBE5466CF34E90C6C ^ Seed5;
+		uint64_t Seed7 = 0xC0AC29B7C97C50DD ^ Seed5;
+		uint64_t Seed8 = 0x3F84D5B5B5470917 ^ Seed5;
+		uint64_t r3h, r4h;
+
+		KOMIHASH_HASHLOOP64();
+
+		Seed5 ^= Seed6 ^ Seed7 ^ Seed8;
+		Seed1 ^= Seed2 ^ Seed3 ^ Seed4;
+	}
+
+	return( komihash_epi( Msg, MsgLen, Seed1, Seed5 ));
+}
+
+/**
+ * Simple, reliable, self-starting yet efficient PRNG, with 2^64 period.
+ * 0.62 cycles/byte performance. Self-starts in 4 iterations, which is a
+ * suggested "warming up" initialization before using its output.
+ *
+ * @param[in,out] Seed1 Seed value 1. Can be initialized to any value
+ * (even 0). This is the usual "PRNG seed" value.
+ * @param[in,out] Seed2 Seed value 2, a supporting variable. Best initialized
+ * to the same value as Seed1.
+ * @return The next uniformly-random 64-bit value.
+ */
+
+static inline uint64_t komirand( uint64_t* const Seed1, uint64_t* const Seed2 )
+{
+	uint64_t rh;
+
+	kh_m128( *Seed1, *Seed2, Seed1, &rh );
+	*Seed2 += rh + 0xAAAAAAAAAAAAAAAA;
+	*Seed1 ^= *Seed2;
+
+	return( *Seed1 );
+}
+
+#if !defined( KOMIHASH_BUFSIZE )
+
+	#define KOMIHASH_BUFSIZE 768 // Streamed hashing's buffer size in bytes,
+		// must be a multiple of 64, and not less than 128.
+
+#endif // !defined( KOMIHASH_BUFSIZE )
+
+/**
+ * Context structure that holds streamed hashing state. The komihash_init()
+ * function should be called to initalize the structure before hashing. Note
+ * that the default buffer size is modest, permitting placement of this
+ * structure on stack. Seed[ 0 ] is used as initial UseSeed storage.
+ */
+
+typedef struct {
+	uint8_t fb[ 8 ]; ///< Stream's final byte (at [7]), array to avoid OOB.
+	uint8_t Buf[ KOMIHASH_BUFSIZE ]; ///< Buffer.
+	uint64_t Seed[ 8 ]; ///< Hashing state variables.
+	size_t BufFill; ///< Buffer-fill length (position).
+	size_t IsHashing; ///< 0 or 1, equals 1 if the actual hashing was started.
+} komihash_stream_t;
+
+/**
+ * Function initializes the streamed "komihash" session.
+ *
+ * @param[out] ctx Pointer to the context structure.
+ * @param UseSeed Optional value, to use instead of the default seed. To use
+ * the default seed, set to 0. The UseSeed value can have any bit length and
+ * statistical quality, and is used only as an additional entropy source. May
+ * need endianness-correction if this value is shared between big- and
+ * little-endian systems.
+ */
+
+static inline void komihash_stream_init( komihash_stream_t* const ctx,
+	const uint64_t UseSeed )
+{
+	ctx -> Seed[ 0 ] = UseSeed;
+	ctx -> BufFill = 0;
+	ctx -> IsHashing = 0;
+}
+
+/**
+ * Function updates the streamed hashing state with a new input data.
+ *
+ * @param[in,out] ctx Pointer to the context structure. The structure must be
+ * initialized via the komihash_stream_init() function.
+ * @param Msg0 The next part of the whole message being hashed. The alignment
+ * of this pointer is unimportant. It is valid to pass 0 when MsgLen==0.
+ * @param MsgLen Message's length, in bytes, can be zero.
+ */
+
+static inline void komihash_stream_update( komihash_stream_t* const ctx,
+	const void* const Msg0, size_t MsgLen )
+{
+	const uint8_t* Msg = (const uint8_t*) Msg0;
+
+	const uint8_t* SwMsg = 0;
+	size_t SwMsgLen = 0;
+	size_t BufFill = ctx -> BufFill;
+
+	if( BufFill + MsgLen >= KOMIHASH_BUFSIZE && BufFill != 0 )
+	{
+		const size_t CopyLen = KOMIHASH_BUFSIZE - BufFill;
+		memcpy( ctx -> Buf + BufFill, Msg, CopyLen );
+		BufFill = 0;
+
+		SwMsg = Msg + CopyLen;
+		SwMsgLen = MsgLen - CopyLen;
+
+		Msg = ctx -> Buf;
+		MsgLen = KOMIHASH_BUFSIZE;
+	}
+	else
+	if( MsgLen < 9 )
+	{
+		// For buffering speed-up.
+
+		uint8_t* op = ctx -> Buf + BufFill;
+
+		if( MsgLen == 4 )
+		{
+			memcpy( op, Msg, 4 );
+			ctx -> BufFill = BufFill + 4;
+			return;
+		}
+
+		if( MsgLen == 8 )
+		{
+			memcpy( op, Msg, 8 );
+			ctx -> BufFill = BufFill + 8;
+			return;
+		}
+
+		ctx -> BufFill = BufFill + MsgLen;
+		uint8_t* const ope = op + MsgLen;
+
+		while( op != ope )
+		{
+			*op = *Msg;
+			Msg++;
+			op++;
+		}
+
+		return;
+	}
+
+	if( BufFill == 0 )
+	{
+		while( MsgLen > 127 )
+		{
+			uint64_t Seed1, Seed2, Seed3, Seed4;
+			uint64_t Seed5, Seed6, Seed7, Seed8;
+			uint64_t r1h, r2h, r3h, r4h;
+
+			if( ctx -> IsHashing )
+			{
+				Seed1 = ctx -> Seed[ 0 ];
+				Seed2 = ctx -> Seed[ 1 ];
+				Seed3 = ctx -> Seed[ 2 ];
+				Seed4 = ctx -> Seed[ 3 ];
+				Seed5 = ctx -> Seed[ 4 ];
+				Seed6 = ctx -> Seed[ 5 ];
+				Seed7 = ctx -> Seed[ 6 ];
+				Seed8 = ctx -> Seed[ 7 ];
+			}
+			else
+			{
+				ctx -> IsHashing = 1;
+
+				const uint64_t UseSeed = ctx -> Seed[ 0 ];
+				Seed1 = 0x243F6A8885A308D3 ^ ( UseSeed & 0x5555555555555555 );
+				Seed5 = 0x452821E638D01377 ^ ( UseSeed & 0xAAAAAAAAAAAAAAAA );
+
+				KOMIHASH_HASHROUND();
+
+				Seed2 = 0x13198A2E03707344 ^ Seed1;
+				Seed3 = 0xA4093822299F31D0 ^ Seed1;
+				Seed4 = 0x082EFA98EC4E6C89 ^ Seed1;
+				Seed6 = 0xBE5466CF34E90C6C ^ Seed5;
+				Seed7 = 0xC0AC29B7C97C50DD ^ Seed5;
+				Seed8 = 0x3F84D5B5B5470917 ^ Seed5;
+			}
+
+			KOMIHASH_HASHLOOP64();
+
+			ctx -> Seed[ 0 ] = Seed1;
+			ctx -> Seed[ 1 ] = Seed2;
+			ctx -> Seed[ 2 ] = Seed3;
+			ctx -> Seed[ 3 ] = Seed4;
+			ctx -> Seed[ 4 ] = Seed5;
+			ctx -> Seed[ 5 ] = Seed6;
+			ctx -> Seed[ 6 ] = Seed7;
+			ctx -> Seed[ 7 ] = Seed8;
+
+			if( SwMsgLen == 0 )
+			{
+				if( MsgLen == 0 )
+				{
+					ctx -> fb[ 7 ] = Msg[ -1 ];
+					ctx -> BufFill = 0;
+					return;
+				}
+
+				break;
+			}
+
+			Msg = SwMsg;
+			MsgLen = SwMsgLen;
+			SwMsgLen = 0;
+		}
+	}
+
+	memcpy( ctx -> Buf + BufFill, Msg, MsgLen );
+	ctx -> BufFill = BufFill + MsgLen;
+}
+
+/**
+ * Function finalizes the streamed hashing session, and returns the resulting
+ * hash value of the previously hashed data. This value is equal to the value
+ * returned by the komihash() function for the same provided data.
+ *
+ * Note that since this function is non-destructive for the context structure,
+ * the function can be used to obtain intermediate hashes of the data stream
+ * being hashed, and the hashing can then be resumed.
+ *
+ * @param[in] ctx Pointer to the context structure. The structure must be
+ * initialized via the komihash_stream_init() function.
+ * @return 64-bit hash value.
+ */
+
+static inline uint64_t komihash_stream_final( komihash_stream_t* const ctx )
+{
+	const uint8_t* Msg = ctx -> Buf;
+	size_t MsgLen = ctx -> BufFill;
+
+	if( ctx -> IsHashing == 0 )
+	{
+		return( komihash( Msg, MsgLen, ctx -> Seed[ 0 ]));
+	}
+
+	ctx -> fb[ 4 ] = 0;
+	ctx -> fb[ 5 ] = 0;
+	ctx -> fb[ 6 ] = 0;
+
+	uint64_t Seed1 = ctx -> Seed[ 0 ];
+	uint64_t Seed2 = ctx -> Seed[ 1 ];
+	uint64_t Seed3 = ctx -> Seed[ 2 ];
+	uint64_t Seed4 = ctx -> Seed[ 3 ];
+	uint64_t Seed5 = ctx -> Seed[ 4 ];
+	uint64_t Seed6 = ctx -> Seed[ 5 ];
+	uint64_t Seed7 = ctx -> Seed[ 6 ];
+	uint64_t Seed8 = ctx -> Seed[ 7 ];
+
+	if( MsgLen > 63 )
+	{
+		uint64_t r1h, r2h, r3h, r4h;
+
+		KOMIHASH_HASHLOOP64();
+	}
+
+	Seed5 ^= Seed6 ^ Seed7 ^ Seed8;
+	Seed1 ^= Seed2 ^ Seed3 ^ Seed4;
+
+	return( komihash_epi( Msg, MsgLen, Seed1, Seed5 ));
+}
+
+/**
+ * FOR TESTING PURPOSES ONLY - use the komihash() function instead.
+ *
+ * @param Msg The message to produce a hash from.
+ * @param MsgLen Message's length, in bytes.
+ * @param UseSeed Seed to use.
+ * @return 64-bit hash value.
+ */
+
+static inline uint64_t komihash_stream_oneshot( const void* const Msg,
+	const size_t MsgLen, const uint64_t UseSeed )
+{
+	komihash_stream_t ctx;
+
+	komihash_stream_init( &ctx, UseSeed );
+	komihash_stream_update( &ctx, Msg, MsgLen );
+
+	return( komihash_stream_final( &ctx ));
+}
+
+#endif // KOMIHASH_INCLUDED
--- a/lib/komihash/testvec.c
+++ b/lib/komihash/testvec.c
@ -0,0 +1,99 @@
+/**
+ * testvec.c version 4.3.1
+ *
+ * The program that lists test vectors and their hash values, for the current
+ * version of komihash. Also prints initial outputs of `komirand` PRNG.
+ *
+ * Description is available at https://github.com/avaneev/komihash
+ *
+ * License
+ *
+ * Copyright (c) 2021-2022 Aleksey Vaneev
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include "komihash.h"
+
+int main()
+{
+	const int seedc = 3;
+	const uint64_t seeds[ seedc ] = { 0, 0x0123456789ABCDEF, 256 };
+
+	const int strc = 5;
+	const char* const strs[ strc ] = {
+		"This is a 32-byte tester string.",
+		"The cat is out of the bag",
+		"A 16-byte string",
+		"The new string",
+		"7 bytes"
+	};
+
+	const int bulkc = 15;
+	const int bulks[ bulkc ] = { 6, 12, 20, 31, 32, 40, 47, 48, 56, 64,
+		72, 80, 112, 132, 256 };
+
+	uint8_t bulkbuf[ 256 ];
+	int i;
+
+	for( i = 0; i < 256; i++ )
+	{
+		bulkbuf[ i ] = (uint8_t) i;
+	}
+
+	int j;
+
+	for( j = 0; j < seedc; j++ )
+	{
+		printf( "\tkomihash UseSeed = 0x%016llx:\n", seeds[ j ]);
+
+		for( i = 0; i < strc; i++ )
+		{
+			const char* const s = strs[ i ];
+			const size_t sl = strlen( s );
+
+			printf( "\t\"%s\" = 0x%016llx\n", s,
+				komihash( s, sl, seeds[ j ]));
+		}
+
+		for( i = 0; i < bulkc; i++ )
+		{
+			printf( "\tbulk(%i) = 0x%016llx\n", bulks[ i ],
+				komihash( bulkbuf, bulks[ i ], seeds[ j ]));
+		}
+
+		printf( "\n" );
+	}
+
+	for( j = 0; j < seedc; j++ )
+	{
+		printf( "\tkomirand Seed1/Seed2 = 0x%016llx:\n", seeds[ j ]);
+
+		uint64_t Seed1 = seeds[ j ];
+		uint64_t Seed2 = seeds[ j ];
+
+		for( i = 0; i < 12; i++ )
+		{
+			printf( "\t0x%016llx\n", komirand( &Seed1, &Seed2 ));
+		}
+
+		printf( "\n" );
+	}
+}
--- a/tools/cmake/common.cmake
+++ b/tools/cmake/common.cmake
@ -7,6 +7,7 @@ set(COMPONENTS "")
 # External dependencies
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/catch2")
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/cbor")
+list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/komihash")
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/libmad")
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/libtags")
 list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/lvgl")