You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
247 lines
7.5 KiB
247 lines
7.5 KiB
/*
|
|
* Copyright 2023 jacqueline <me@jacqueline.id.au>
|
|
*
|
|
* SPDX-License-Identifier: GPL-3.0-only
|
|
*/
|
|
|
|
#include "records.hpp"
|
|
|
|
#include <stdint.h>
|
|
#include <sys/_stdint.h>
|
|
|
|
#include <functional>
|
|
#include <iomanip>
|
|
#include <iostream>
|
|
#include <memory_resource>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "cppbor.h"
|
|
#include "cppbor_parse.h"
|
|
#include "esp_log.h"
|
|
|
|
#include "index.hpp"
|
|
#include "komihash.h"
|
|
#include "memory_resource.hpp"
|
|
#include "track.hpp"
|
|
|
|
// As LevelDB is a key-value store, each record in the database consists of a
|
|
// key and an optional value.
|
|
//
|
|
// Values, when present, are always cbor-encoded. This is fast, compact, and
|
|
// very easy to evolve over time due to its inclusion of type information.
|
|
//
|
|
// Keys have a more complicated scheme, as for performance we rely heavily on
|
|
// LevelDB's sorted storage format. We must therefore worry about clustering of
|
|
// similar records, and the sortability of our encoding format.
|
|
// Each kind of key consists of a a single-byte prefix, then one or more
|
|
// fields separated by null (0) bytes. Each field may be cbor-encoded, or may
|
|
// use some bespoke encoding; it depends on whether we want to be able to sort
|
|
// by that field.
|
|
// For debugging and discussion purposes, we represent field separators
|
|
// textually as '/', and write each field as its hex encoding. e.g. a data key
|
|
// for the track with id 17 would be written as 'D / 0x11'.
|
|
|
|
namespace database {
|
|
|
|
[[maybe_unused]] static const char* kTag = "RECORDS";
|
|
|
|
static const char kDataPrefix = 'D';
|
|
static const char kHashPrefix = 'H';
|
|
[[maybe_unused]] static const char kTagHashPrefix = 'T';
|
|
static const char kIndexPrefix = 'I';
|
|
static const char kFieldSeparator = '\0';
|
|
|
|
/* 'D/' */
|
|
auto EncodeDataPrefix() -> std::string {
|
|
return {kDataPrefix, kFieldSeparator};
|
|
}
|
|
|
|
/* 'D/ 0xACAB' */
|
|
auto EncodeDataKey(const TrackId& id) -> std::string {
|
|
return EncodeDataPrefix() + TrackIdToBytes(id);
|
|
}
|
|
|
|
auto EncodeDataValue(const TrackData& track) -> std::string {
|
|
auto* tag_hashes = new cppbor::Map{}; // Free'd by Array's dtor.
|
|
for (const auto& entry : track.individual_tag_hashes) {
|
|
tag_hashes->add(cppbor::Uint{static_cast<uint32_t>(entry.first)},
|
|
cppbor::Uint{entry.second});
|
|
}
|
|
cppbor::Array val{
|
|
cppbor::Uint{track.id},
|
|
cppbor::Tstr{track.filepath},
|
|
cppbor::Uint{track.tags_hash},
|
|
cppbor::Bool{track.is_tombstoned},
|
|
cppbor::Uint{track.modified_at.first},
|
|
cppbor::Uint{track.modified_at.second},
|
|
tag_hashes,
|
|
};
|
|
return val.toString();
|
|
}
|
|
|
|
auto ParseDataValue(const leveldb::Slice& slice) -> std::shared_ptr<TrackData> {
|
|
auto [item, unused, err] = cppbor::parseWithViews(
|
|
reinterpret_cast<const uint8_t*>(slice.data()), slice.size());
|
|
if (!item || item->type() != cppbor::ARRAY) {
|
|
return nullptr;
|
|
}
|
|
auto vals = item->asArray();
|
|
if (vals->size() != 7 || vals->get(0)->type() != cppbor::UINT ||
|
|
vals->get(1)->type() != cppbor::TSTR ||
|
|
vals->get(2)->type() != cppbor::UINT ||
|
|
vals->get(3)->type() != cppbor::SIMPLE ||
|
|
vals->get(4)->type() != cppbor::UINT ||
|
|
vals->get(5)->type() != cppbor::UINT ||
|
|
vals->get(6)->type() != cppbor::MAP) {
|
|
return {};
|
|
}
|
|
auto res = std::make_shared<TrackData>();
|
|
res->id = vals->get(0)->asUint()->unsignedValue();
|
|
res->filepath = vals->get(1)->asViewTstr()->view();
|
|
res->tags_hash = vals->get(2)->asUint()->unsignedValue();
|
|
res->is_tombstoned = vals->get(3)->asBool()->value();
|
|
res->modified_at = std::make_pair<uint16_t, uint16_t>(
|
|
vals->get(4)->asUint()->unsignedValue(),
|
|
vals->get(5)->asUint()->unsignedValue());
|
|
|
|
auto tag_hashes = vals->get(6)->asMap();
|
|
for (const auto& entry : *tag_hashes) {
|
|
auto tag = static_cast<Tag>(entry.first->asUint()->unsignedValue());
|
|
res->individual_tag_hashes[tag] = entry.second->asUint()->unsignedValue();
|
|
}
|
|
return res;
|
|
}
|
|
|
|
/* 'H/ 0xBEEF' */
|
|
auto EncodeHashKey(const uint64_t& hash) -> std::string {
|
|
return std::string{kHashPrefix, kFieldSeparator} +
|
|
cppbor::Uint{hash}.toString();
|
|
}
|
|
|
|
auto ParseHashValue(const leveldb::Slice& slice) -> std::optional<TrackId> {
|
|
return BytesToTrackId({slice.data(), slice.size()});
|
|
}
|
|
|
|
auto EncodeHashValue(TrackId id) -> std::string {
|
|
return TrackIdToBytes(id);
|
|
}
|
|
|
|
/* 'T/ 0xBEEF' */
|
|
auto EncodeTagHashKey(const uint64_t& hash) -> std::string {
|
|
return std::string{kTagHashPrefix, kFieldSeparator} +
|
|
cppbor::Uint{hash}.toString();
|
|
}
|
|
|
|
/* 'I/' */
|
|
auto EncodeAllIndexesPrefix() -> std::string {
|
|
return {kIndexPrefix, kFieldSeparator};
|
|
}
|
|
|
|
auto EncodeIndexPrefix(const IndexKey::Header& header) -> std::string {
|
|
std::ostringstream out;
|
|
out.put(kIndexPrefix).put(kFieldSeparator);
|
|
cppbor::Array val{
|
|
cppbor::Uint{header.id},
|
|
cppbor::Uint{header.depth},
|
|
cppbor::Uint{header.components_hash},
|
|
};
|
|
out << val.toString() << kFieldSeparator;
|
|
return out.str();
|
|
}
|
|
|
|
/*
|
|
* 'I/0xa2/0x686921/0xb9'
|
|
* ^ --- trailer
|
|
* ^ --- component ("hi!")
|
|
* ^ -------- header
|
|
*
|
|
* The components *must* be encoded in a way that is easy to sort
|
|
* lexicographically. The header and footer do not have this restriction, so
|
|
* cbor is fine.
|
|
*
|
|
* We store grouping information within the header; which index, filtered
|
|
* components. We store disambiguation information in the trailer; just a track
|
|
* id for now, but could reasonably be something like 'release year' as well.
|
|
*/
|
|
auto EncodeIndexKey(const IndexKey& key) -> std::string {
|
|
std::ostringstream out{};
|
|
|
|
out << EncodeIndexPrefix(key.header);
|
|
|
|
// The component should already be UTF-8 encoded, so just write it.
|
|
if (key.item) {
|
|
out << *key.item << kFieldSeparator;
|
|
}
|
|
|
|
if (key.track) {
|
|
out << TrackIdToBytes(*key.track);
|
|
}
|
|
|
|
return out.str();
|
|
}
|
|
|
|
auto ParseIndexKey(const leveldb::Slice& slice) -> std::optional<IndexKey> {
|
|
IndexKey result{};
|
|
|
|
auto prefix = EncodeAllIndexesPrefix();
|
|
if (!slice.starts_with(prefix)) {
|
|
return {};
|
|
}
|
|
|
|
std::string key_data = slice.ToString().substr(prefix.size());
|
|
auto [key, end_of_key, err] = cppbor::parseWithViews(
|
|
reinterpret_cast<const uint8_t*>(key_data.data()), key_data.size());
|
|
if (!key || key->type() != cppbor::ARRAY) {
|
|
return {};
|
|
}
|
|
auto as_array = key->asArray();
|
|
if (as_array->size() != 3 || as_array->get(0)->type() != cppbor::UINT ||
|
|
as_array->get(1)->type() != cppbor::UINT ||
|
|
as_array->get(2)->type() != cppbor::UINT) {
|
|
return {};
|
|
}
|
|
result.header.id = as_array->get(0)->asUint()->unsignedValue();
|
|
result.header.depth = as_array->get(1)->asUint()->unsignedValue();
|
|
result.header.components_hash = as_array->get(2)->asUint()->unsignedValue();
|
|
|
|
size_t header_length =
|
|
reinterpret_cast<const char*>(end_of_key) - key_data.data();
|
|
|
|
if (header_length == 0 || header_length >= key_data.size()) {
|
|
return {};
|
|
}
|
|
|
|
std::istringstream in(key_data.substr(header_length + 1));
|
|
std::stringbuf buffer{};
|
|
|
|
in.get(buffer, kFieldSeparator);
|
|
if (buffer.str().size() > 0) {
|
|
result.item = buffer.str();
|
|
}
|
|
|
|
buffer = {};
|
|
in.get(buffer);
|
|
std::string id_str = buffer.str();
|
|
if (id_str.size() > 1) {
|
|
result.track = BytesToTrackId(id_str.substr(1));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
auto TrackIdToBytes(TrackId id) -> std::string {
|
|
return cppbor::Uint{id}.toString();
|
|
}
|
|
|
|
auto BytesToTrackId(cpp::span<const char> bytes) -> std::optional<TrackId> {
|
|
auto [res, unused, err] = cppbor::parse(
|
|
reinterpret_cast<const uint8_t*>(bytes.data()), bytes.size());
|
|
if (!res || res->type() != cppbor::UINT) {
|
|
return {};
|
|
}
|
|
return res->asUint()->unsignedValue();
|
|
}
|
|
|
|
} // namespace database
|
|
|