Implement incremental updates of database indexes

This makes rescanning the library *so* much faster. Yay!
custom
jacqueline 2 years ago
parent 4f8c127da9
commit 2086ab09b8
  1. 2
      dependencies.lock
  2. 105
      src/database/database.cpp
  3. 6
      src/database/include/database.hpp
  4. 6
      src/database/include/index.hpp
  5. 3
      src/database/include/records.hpp
  6. 6
      src/database/include/track.hpp
  7. 10
      src/database/index.cpp
  8. 24
      src/database/records.cpp

@ -4,6 +4,6 @@ dependencies:
source:
type: idf
version: 5.1.1
manifest_hash: 7e6103d8e34e5eabd5a6a51c49836c58f1686c3aa287f2e288b1ad76243aa61a
manifest_hash: b9761e0028130d307b778c710e5dd39fb3c942d8084ed429d448d938957fb0e6
target: esp32
version: 1.0.0

@ -21,6 +21,7 @@
#include "ff.h"
#include "freertos/projdefs.h"
#include "index.hpp"
#include "komihash.h"
#include "leveldb/cache.h"
#include "leveldb/db.h"
#include "leveldb/iterator.h"
@ -48,7 +49,7 @@ static const char* kTag = "DB";
static const char kDbPath[] = "/.tangara-db";
static const char kKeyDbVersion[] = "schema_version";
static const uint8_t kCurrentDbVersion = 1;
static const uint8_t kCurrentDbVersion = 2;
static const char kKeyTrackId[] = "next_track_id";
@ -163,20 +164,6 @@ auto Database::Update() -> std::future<void> {
leveldb::ReadOptions read_options;
read_options.fill_cache = false;
// Stage 0: discard indexes
// TODO(jacqueline): I think it should be possible to incrementally update
// indexes, but my brain hurts.
ESP_LOGI(kTag, "dropping stale indexes");
{
std::unique_ptr<leveldb::Iterator> it{db_->NewIterator(read_options)};
std::string prefix = EncodeAllIndexesPrefix();
it->Seek(prefix);
while (it->Valid() && it->key().starts_with(prefix)) {
db_->Delete(leveldb::WriteOptions(), it->key());
it->Next();
}
}
std::pair<uint16_t, uint16_t> newest_track{0, 0};
// Stage 1: verify all existing tracks are still valid.
@ -185,8 +172,8 @@ auto Database::Update() -> std::future<void> {
uint64_t num_processed = 0;
std::unique_ptr<leveldb::Iterator> it{db_->NewIterator(read_options)};
std::string prefix = EncodeDataPrefix();
it->Seek(prefix);
while (it->Valid() && it->key().starts_with(prefix)) {
for (it->Seek(prefix); it->Valid() && it->key().starts_with(prefix);
it->Next()) {
num_processed++;
events::Ui().Dispatch(event::UpdateProgress{
.stage = event::UpdateProgress::Stage::kVerifyingExistingTracks,
@ -198,13 +185,11 @@ auto Database::Update() -> std::future<void> {
// The value was malformed. Drop this record.
ESP_LOGW(kTag, "dropping malformed metadata");
db_->Delete(leveldb::WriteOptions(), it->key());
it->Next();
continue;
}
if (track->is_tombstoned) {
ESP_LOGW(kTag, "skipping tombstoned %lx", track->id);
it->Next();
continue;
}
@ -221,6 +206,7 @@ auto Database::Update() -> std::future<void> {
}
if (modified_at == track->modified_at) {
newest_track = std::max(modified_at, newest_track);
continue;
} else {
track->modified_at = modified_at;
}
@ -232,9 +218,9 @@ auto Database::Update() -> std::future<void> {
// malformed, or perhaps the file is missing. Either way, tombstone
// this record.
ESP_LOGW(kTag, "entombing missing #%lx", track->id);
dbRemoveIndexes(track);
track->is_tombstoned = true;
dbPutTrackData(*track);
it->Next();
continue;
}
@ -248,16 +234,13 @@ auto Database::Update() -> std::future<void> {
// database.
ESP_LOGI(kTag, "updating hash (%llx -> %llx)", track->tags_hash,
new_hash);
dbRemoveIndexes(track);
track->tags_hash = new_hash;
dbIngestTagHashes(*tags, track->individual_tag_hashes);
dbPutTrackData(*track);
dbPutHash(new_hash, track->id);
}
Track t{track, tags};
dbCreateIndexesForTrack(t);
it->Next();
}
}
@ -306,6 +289,7 @@ auto Database::Update() -> std::future<void> {
data->filepath = path;
data->tags_hash = hash;
data->modified_at = modified;
dbIngestTagHashes(*tags, data->individual_tag_hashes);
dbPutTrackData(*data);
dbPutHash(hash, id);
@ -322,6 +306,7 @@ auto Database::Update() -> std::future<void> {
new_data->filepath = path;
new_data->tags_hash = hash;
new_data->modified_at = modified;
dbIngestTagHashes(*tags, new_data->individual_tag_hashes);
dbPutTrackData(*new_data);
auto t = std::make_shared<Track>(new_data, tags);
dbCreateIndexesForTrack(*t);
@ -554,10 +539,76 @@ auto Database::dbGetHash(const uint64_t& hash) -> std::optional<TrackId> {
auto Database::dbCreateIndexesForTrack(const Track& track) -> void {
for (const IndexInfo& index : GetIndexes()) {
leveldb::WriteBatch writes;
if (Index(index, track, &writes)) {
auto entries = Index(index, track);
for (const auto& it : entries) {
writes.Put(EncodeIndexKey(it.first),
{it.second.data(), it.second.size()});
}
db_->Write(leveldb::WriteOptions(), &writes);
}
}
auto Database::dbRemoveIndexes(std::shared_ptr<TrackData> data) -> void {
auto tags = dbRecoverTagsFromHashes(data->individual_tag_hashes);
if (!tags) {
return;
}
Track track{data, tags};
for (const IndexInfo& index : GetIndexes()) {
auto entries = Index(index, track);
for (auto it = entries.rbegin(); it != entries.rend(); it++) {
auto key = EncodeIndexKey(it->first);
auto status = db_->Delete(leveldb::WriteOptions{}, key);
if (!status.ok()) {
return;
}
std::unique_ptr<leveldb::Iterator> cursor{db_->NewIterator({})};
cursor->Seek(key);
cursor->Prev();
auto prev_key = ParseIndexKey(cursor->key());
if (prev_key && prev_key->header == it->first.header) {
break;
}
cursor->Next();
auto next_key = ParseIndexKey(cursor->key());
if (next_key && next_key->header == it->first.header) {
break;
}
}
}
}
auto Database::dbIngestTagHashes(const TrackTags& tags,
std::pmr::unordered_map<Tag, uint64_t>& out)
-> void {
leveldb::WriteBatch batch{};
for (auto& entry : tags.tags()) {
auto hash =
komihash_stream_oneshot(entry.second.data(), entry.second.size(), 0);
batch.Put(EncodeTagHashKey(hash), entry.second.c_str());
out[entry.first] = hash;
}
db_->Write(leveldb::WriteOptions{}, &batch);
}
auto Database::dbRecoverTagsFromHashes(
const std::pmr::unordered_map<Tag, uint64_t>& hashes)
-> std::shared_ptr<TrackTags> {
auto out = std::make_shared<TrackTags>();
for (const auto& entry : hashes) {
std::string value;
auto res = db_->Get(leveldb::ReadOptions{}, EncodeTagHashKey(entry.second),
&value);
if (!res.ok()) {
ESP_LOGI(kTag, "failed to retrieve tag!");
continue;
}
out->set(entry.first, {value.data(), value.size()});
}
return out;
}
template <typename T>

@ -7,6 +7,7 @@
#pragma once
#include <stdint.h>
#include <sys/_stdint.h>
#include <cstdint>
#include <future>
#include <memory>
@ -150,6 +151,11 @@ class Database {
auto dbPutHash(const uint64_t& hash, TrackId i) -> void;
auto dbGetHash(const uint64_t& hash) -> std::optional<TrackId>;
auto dbCreateIndexesForTrack(const Track& track) -> void;
auto dbRemoveIndexes(std::shared_ptr<TrackData>) -> void;
auto dbIngestTagHashes(const TrackTags&,
std::pmr::unordered_map<Tag, uint64_t>&) -> void;
auto dbRecoverTagsFromHashes(const std::pmr::unordered_map<Tag, uint64_t>&)
-> std::shared_ptr<TrackTags>;
template <typename T>
auto dbGetPage(const Continuation& c) -> Result<T>*;

@ -46,6 +46,8 @@ struct IndexKey {
// an index consists of { kArtist, kAlbum, kTitle }, and we are at depth = 2
// then this may contain hash(hash("Jacqueline"), "My Cool Album").
std::uint64_t components_hash;
bool operator==(const Header&) const = default;
};
Header header;
@ -58,7 +60,9 @@ struct IndexKey {
std::optional<TrackId> track;
};
auto Index(const IndexInfo&, const Track&, leveldb::WriteBatch*) -> bool;
auto Index(const IndexInfo&, const Track&)
-> std::vector<std::pair<IndexKey, std::pmr::string>>;
auto ExpandHeader(const IndexKey::Header&,
const std::optional<std::pmr::string>&) -> IndexKey::Header;

@ -52,6 +52,9 @@ auto EncodeHashKey(const uint64_t& hash) -> std::string;
*/
auto EncodeHashValue(TrackId id) -> std::string;
/* Encodes a hash key for the specified hash. */
auto EncodeTagHashKey(const uint64_t& hash) -> std::string;
/*
* Parses bytes previously encoded via EncodeHashValue back into a track id. May
* return nullopt if parsing fails.

@ -83,6 +83,10 @@ class TrackTags {
auto at(const Tag& key) const -> std::optional<std::pmr::string>;
auto operator[](const Tag& key) const -> std::optional<std::pmr::string>;
auto tags() const -> const std::pmr::unordered_map<Tag, std::pmr::string>& {
return tags_;
}
/*
* Returns a hash of the 'identifying' tags of this track. That is, a hash
* that can be used to determine if one track is likely the same as another,
@ -119,12 +123,14 @@ struct TrackData {
: id(0),
filepath(&memory::kSpiRamResource),
tags_hash(0),
individual_tag_hashes(&memory::kSpiRamResource),
is_tombstoned(false),
modified_at() {}
TrackId id;
std::pmr::string filepath;
uint64_t tags_hash;
std::pmr::unordered_map<Tag, uint64_t> individual_tag_hashes;
bool is_tombstoned;
std::pair<uint16_t, uint16_t> modified_at;

@ -59,8 +59,9 @@ static auto missing_component_text(const Track& track, Tag tag)
}
}
auto Index(const IndexInfo& info, const Track& t, leveldb::WriteBatch* batch)
-> bool {
auto Index(const IndexInfo& info, const Track& t)
-> std::vector<std::pair<IndexKey, std::pmr::string>> {
std::vector<std::pair<IndexKey, std::pmr::string>> out;
IndexKey key{
.header{
.id = info.id,
@ -93,8 +94,7 @@ auto Index(const IndexInfo& info, const Track& t, leveldb::WriteBatch* batch)
value = t.TitleOrFilename();
}
auto encoded = EncodeIndexKey(key);
batch->Put(encoded, {value.data(), value.size()});
out.push_back(std::make_pair(key, value));
// If there are more components after this, then we need to finish by
// narrowing the header with the current title.
@ -102,7 +102,7 @@ auto Index(const IndexInfo& info, const Track& t, leveldb::WriteBatch* batch)
key.header = ExpandHeader(key.header, key.item);
}
}
return true;
return out;
}
auto ExpandHeader(const IndexKey::Header& header,

@ -48,6 +48,7 @@ static const char* kTag = "RECORDS";
static const char kDataPrefix = 'D';
static const char kHashPrefix = 'H';
static const char kTagHashPrefix = 'T';
static const char kIndexPrefix = 'I';
static const char kFieldSeparator = '\0';
@ -62,6 +63,11 @@ auto EncodeDataKey(const TrackId& id) -> std::string {
}
auto EncodeDataValue(const TrackData& track) -> std::string {
auto* tag_hashes = new cppbor::Map{}; // Free'd by Array's dtor.
for (const auto& entry : track.individual_tag_hashes) {
tag_hashes->add(cppbor::Uint{static_cast<uint32_t>(entry.first)},
cppbor::Uint{entry.second});
}
cppbor::Array val{
cppbor::Uint{track.id},
cppbor::Tstr{track.filepath},
@ -69,6 +75,7 @@ auto EncodeDataValue(const TrackData& track) -> std::string {
cppbor::Bool{track.is_tombstoned},
cppbor::Uint{track.modified_at.first},
cppbor::Uint{track.modified_at.second},
tag_hashes,
};
return val.toString();
}
@ -80,12 +87,13 @@ auto ParseDataValue(const leveldb::Slice& slice) -> std::shared_ptr<TrackData> {
return nullptr;
}
auto vals = item->asArray();
if (vals->size() != 6 || vals->get(0)->type() != cppbor::UINT ||
if (vals->size() != 7 || vals->get(0)->type() != cppbor::UINT ||
vals->get(1)->type() != cppbor::TSTR ||
vals->get(2)->type() != cppbor::UINT ||
vals->get(3)->type() != cppbor::SIMPLE ||
vals->get(4)->type() != cppbor::UINT ||
vals->get(5)->type() != cppbor::UINT) {
vals->get(5)->type() != cppbor::UINT ||
vals->get(6)->type() != cppbor::MAP) {
return {};
}
auto res = std::make_shared<TrackData>();
@ -96,6 +104,12 @@ auto ParseDataValue(const leveldb::Slice& slice) -> std::shared_ptr<TrackData> {
res->modified_at = std::make_pair<uint16_t, uint16_t>(
vals->get(4)->asUint()->unsignedValue(),
vals->get(5)->asUint()->unsignedValue());
auto tag_hashes = vals->get(6)->asMap();
for (const auto& entry : *tag_hashes) {
auto tag = static_cast<Tag>(entry.first->asUint()->unsignedValue());
res->individual_tag_hashes[tag] = entry.second->asUint()->unsignedValue();
}
return res;
}
@ -113,6 +127,12 @@ auto EncodeHashValue(TrackId id) -> std::string {
return TrackIdToBytes(id);
}
/* 'T/ 0xBEEF' */
auto EncodeTagHashKey(const uint64_t& hash) -> std::string {
return std::string{kTagHashPrefix, kFieldSeparator} +
cppbor::Uint{hash}.toString();
}
/* 'I/' */
auto EncodeAllIndexesPrefix() -> std::string {
return {kIndexPrefix, kFieldSeparator};

Loading…
Cancel
Save