/* * Copyright 2023 jacqueline * * SPDX-License-Identifier: GPL-3.0-only */ #include "records.hpp" #include #include #include #include #include #include #include #include #include #include "cppbor.h" #include "cppbor_parse.h" #include "esp_log.h" #include "index.hpp" #include "komihash.h" #include "memory_resource.hpp" #include "track.hpp" // As LevelDB is a key-value store, each record in the database consists of a // key and an optional value. // // Values, when present, are always cbor-encoded. This is fast, compact, and // very easy to evolve over time due to its inclusion of type information. // // Keys have a more complicated scheme, as for performance we rely heavily on // LevelDB's sorted storage format. We must therefore worry about clustering of // similar records, and the sortability of our encoding format. // Each kind of key consists of a a single-byte prefix, then one or more // fields separated by null (0) bytes. Each field may be cbor-encoded, or may // use some bespoke encoding; it depends on whether we want to be able to sort // by that field. // For debugging and discussion purposes, we represent field separators // textually as '/', and write each field as its hex encoding. e.g. a data key // for the track with id 17 would be written as 'D / 0x11'. namespace database { [[maybe_unused]] static const char* kTag = "RECORDS"; static const char kDataPrefix = 'D'; static const char kHashPrefix = 'H'; [[maybe_unused]] static const char kTagHashPrefix = 'T'; static const char kIndexPrefix = 'I'; static const char kFieldSeparator = '\0'; /* 'D/' */ auto EncodeDataPrefix() -> std::string { return {kDataPrefix, kFieldSeparator}; } /* 'D/ 0xACAB' */ auto EncodeDataKey(const TrackId& id) -> std::string { return EncodeDataPrefix() + TrackIdToBytes(id); } auto EncodeDataValue(const TrackData& track) -> std::string { auto* tag_hashes = new cppbor::Map{}; // Free'd by Array's dtor. for (const auto& entry : track.individual_tag_hashes) { tag_hashes->add(cppbor::Uint{static_cast(entry.first)}, cppbor::Uint{entry.second}); } cppbor::Array val{ cppbor::Uint{track.id}, cppbor::Tstr{track.filepath}, cppbor::Uint{track.tags_hash}, cppbor::Bool{track.is_tombstoned}, cppbor::Uint{track.modified_at.first}, cppbor::Uint{track.modified_at.second}, tag_hashes, }; return val.toString(); } auto ParseDataValue(const leveldb::Slice& slice) -> std::shared_ptr { auto [item, unused, err] = cppbor::parseWithViews( reinterpret_cast(slice.data()), slice.size()); if (!item || item->type() != cppbor::ARRAY) { return nullptr; } auto vals = item->asArray(); if (vals->size() != 7 || vals->get(0)->type() != cppbor::UINT || vals->get(1)->type() != cppbor::TSTR || vals->get(2)->type() != cppbor::UINT || vals->get(3)->type() != cppbor::SIMPLE || vals->get(4)->type() != cppbor::UINT || vals->get(5)->type() != cppbor::UINT || vals->get(6)->type() != cppbor::MAP) { return {}; } auto res = std::make_shared(); res->id = vals->get(0)->asUint()->unsignedValue(); res->filepath = vals->get(1)->asViewTstr()->view(); res->tags_hash = vals->get(2)->asUint()->unsignedValue(); res->is_tombstoned = vals->get(3)->asBool()->value(); res->modified_at = std::make_pair( vals->get(4)->asUint()->unsignedValue(), vals->get(5)->asUint()->unsignedValue()); auto tag_hashes = vals->get(6)->asMap(); for (const auto& entry : *tag_hashes) { auto tag = static_cast(entry.first->asUint()->unsignedValue()); res->individual_tag_hashes[tag] = entry.second->asUint()->unsignedValue(); } return res; } /* 'H/ 0xBEEF' */ auto EncodeHashKey(const uint64_t& hash) -> std::string { return std::string{kHashPrefix, kFieldSeparator} + cppbor::Uint{hash}.toString(); } auto ParseHashValue(const leveldb::Slice& slice) -> std::optional { return BytesToTrackId({slice.data(), slice.size()}); } auto EncodeHashValue(TrackId id) -> std::string { return TrackIdToBytes(id); } /* 'T/ 0xBEEF' */ auto EncodeTagHashKey(const uint64_t& hash) -> std::string { return std::string{kTagHashPrefix, kFieldSeparator} + cppbor::Uint{hash}.toString(); } /* 'I/' */ auto EncodeAllIndexesPrefix() -> std::string { return {kIndexPrefix, kFieldSeparator}; } auto EncodeIndexPrefix(const IndexKey::Header& header) -> std::string { std::ostringstream out; out.put(kIndexPrefix).put(kFieldSeparator); cppbor::Array val{ cppbor::Uint{header.id}, cppbor::Uint{header.depth}, cppbor::Uint{header.components_hash}, }; out << val.toString() << kFieldSeparator; return out.str(); } /* * 'I/0xa2/0x686921/0xb9' * ^ --- trailer * ^ --- component ("hi!") * ^ -------- header * * The components *must* be encoded in a way that is easy to sort * lexicographically. The header and footer do not have this restriction, so * cbor is fine. * * We store grouping information within the header; which index, filtered * components. We store disambiguation information in the trailer; just a track * id for now, but could reasonably be something like 'release year' as well. */ auto EncodeIndexKey(const IndexKey& key) -> std::string { std::ostringstream out{}; out << EncodeIndexPrefix(key.header); // The component should already be UTF-8 encoded, so just write it. if (key.item) { out << *key.item << kFieldSeparator; } if (key.track) { out << TrackIdToBytes(*key.track); } return out.str(); } auto ParseIndexKey(const leveldb::Slice& slice) -> std::optional { IndexKey result{}; auto prefix = EncodeAllIndexesPrefix(); if (!slice.starts_with(prefix)) { return {}; } std::string key_data = slice.ToString().substr(prefix.size()); auto [key, end_of_key, err] = cppbor::parseWithViews( reinterpret_cast(key_data.data()), key_data.size()); if (!key || key->type() != cppbor::ARRAY) { return {}; } auto as_array = key->asArray(); if (as_array->size() != 3 || as_array->get(0)->type() != cppbor::UINT || as_array->get(1)->type() != cppbor::UINT || as_array->get(2)->type() != cppbor::UINT) { return {}; } result.header.id = as_array->get(0)->asUint()->unsignedValue(); result.header.depth = as_array->get(1)->asUint()->unsignedValue(); result.header.components_hash = as_array->get(2)->asUint()->unsignedValue(); size_t header_length = reinterpret_cast(end_of_key) - key_data.data(); if (header_length == 0 || header_length >= key_data.size()) { return {}; } std::istringstream in(key_data.substr(header_length + 1)); std::stringbuf buffer{}; in.get(buffer, kFieldSeparator); if (buffer.str().size() > 0) { result.item = buffer.str(); } buffer = {}; in.get(buffer); std::string id_str = buffer.str(); if (id_str.size() > 1) { result.track = BytesToTrackId(id_str.substr(1)); } return result; } auto TrackIdToBytes(TrackId id) -> std::string { return cppbor::Uint{id}.toString(); } auto BytesToTrackId(cpp::span bytes) -> std::optional { auto [res, unused, err] = cppbor::parse( reinterpret_cast(bytes.data()), bytes.size()); if (!res || res->type() != cppbor::UINT) { return {}; } return res->asUint()->unsignedValue(); } } // namespace database