Implement incremental updates of database indexes

This makes rescanning the library *so* much faster. Yay!
2 years ago · 2086ab09b8
parent 4f8c127da9
commit 2086ab09b8
8 changed files with 127 additions and 37 deletions
--- a/dependencies.lock
+++ b/dependencies.lock
@ -4,6 +4,6 @@ dependencies:
    source:
      type: idf
    version: 5.1.1
-manifest_hash: 7e6103d8e34e5eabd5a6a51c49836c58f1686c3aa287f2e288b1ad76243aa61a
+manifest_hash: b9761e0028130d307b778c710e5dd39fb3c942d8084ed429d448d938957fb0e6
 target: esp32
 version: 1.0.0
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@ -21,6 +21,7 @@
 #include "ff.h"
 #include "freertos/projdefs.h"
 #include "index.hpp"
+#include "komihash.h"
 #include "leveldb/cache.h"
 #include "leveldb/db.h"
 #include "leveldb/iterator.h"
@ -48,7 +49,7 @@ static const char* kTag = "DB";
 static const char kDbPath[] = "/.tangara-db";

 static const char kKeyDbVersion[] = "schema_version";
-static const uint8_t kCurrentDbVersion = 1;
+static const uint8_t kCurrentDbVersion = 2;

 static const char kKeyTrackId[] = "next_track_id";

@ -163,20 +164,6 @@ auto Database::Update() -> std::future<void> {
    leveldb::ReadOptions read_options;
    read_options.fill_cache = false;

-    // Stage 0: discard indexes
-    // TODO(jacqueline): I think it should be possible to incrementally update
-    // indexes, but my brain hurts.
-    ESP_LOGI(kTag, "dropping stale indexes");
-    {
-      std::unique_ptr<leveldb::Iterator> it{db_->NewIterator(read_options)};
-      std::string prefix = EncodeAllIndexesPrefix();
-      it->Seek(prefix);
-      while (it->Valid() && it->key().starts_with(prefix)) {
-        db_->Delete(leveldb::WriteOptions(), it->key());
-        it->Next();
-      }
-    }
-
    std::pair<uint16_t, uint16_t> newest_track{0, 0};

    // Stage 1: verify all existing tracks are still valid.
@ -185,8 +172,8 @@ auto Database::Update() -> std::future<void> {
      uint64_t num_processed = 0;
      std::unique_ptr<leveldb::Iterator> it{db_->NewIterator(read_options)};
      std::string prefix = EncodeDataPrefix();
-      it->Seek(prefix);
-      while (it->Valid() && it->key().starts_with(prefix)) {
+      for (it->Seek(prefix); it->Valid() && it->key().starts_with(prefix);
+           it->Next()) {
        num_processed++;
        events::Ui().Dispatch(event::UpdateProgress{
            .stage = event::UpdateProgress::Stage::kVerifyingExistingTracks,
@ -198,13 +185,11 @@ auto Database::Update() -> std::future<void> {
          // The value was malformed. Drop this record.
          ESP_LOGW(kTag, "dropping malformed metadata");
          db_->Delete(leveldb::WriteOptions(), it->key());
-          it->Next();
          continue;
        }

        if (track->is_tombstoned) {
          ESP_LOGW(kTag, "skipping tombstoned %lx", track->id);
-          it->Next();
          continue;
        }

@ -221,6 +206,7 @@ auto Database::Update() -> std::future<void> {
        }
        if (modified_at == track->modified_at) {
          newest_track = std::max(modified_at, newest_track);
+          continue;
        } else {
          track->modified_at = modified_at;
        }
@ -232,9 +218,9 @@ auto Database::Update() -> std::future<void> {
          // malformed, or perhaps the file is missing. Either way, tombstone
          // this record.
          ESP_LOGW(kTag, "entombing missing #%lx", track->id);
+          dbRemoveIndexes(track);
          track->is_tombstoned = true;
          dbPutTrackData(*track);
-          it->Next();
          continue;
        }

@ -248,16 +234,13 @@ auto Database::Update() -> std::future<void> {
          // database.
          ESP_LOGI(kTag, "updating hash (%llx -> %llx)", track->tags_hash,
                   new_hash);
+          dbRemoveIndexes(track);
+
          track->tags_hash = new_hash;
+          dbIngestTagHashes(*tags, track->individual_tag_hashes);
          dbPutTrackData(*track);
          dbPutHash(new_hash, track->id);
        }
-
-        Track t{track, tags};
-
-        dbCreateIndexesForTrack(t);
-
-        it->Next();
      }
    }

@ -306,6 +289,7 @@ auto Database::Update() -> std::future<void> {
        data->filepath = path;
        data->tags_hash = hash;
        data->modified_at = modified;
+        dbIngestTagHashes(*tags, data->individual_tag_hashes);

        dbPutTrackData(*data);
        dbPutHash(hash, id);
@ -322,6 +306,7 @@ auto Database::Update() -> std::future<void> {
        new_data->filepath = path;
        new_data->tags_hash = hash;
        new_data->modified_at = modified;
+        dbIngestTagHashes(*tags, new_data->individual_tag_hashes);
        dbPutTrackData(*new_data);
        auto t = std::make_shared<Track>(new_data, tags);
        dbCreateIndexesForTrack(*t);
@ -554,10 +539,76 @@ auto Database::dbGetHash(const uint64_t& hash) -> std::optional<TrackId> {
 auto Database::dbCreateIndexesForTrack(const Track& track) -> void {
  for (const IndexInfo& index : GetIndexes()) {
    leveldb::WriteBatch writes;
-    if (Index(index, track, &writes)) {
+    auto entries = Index(index, track);
+    for (const auto& it : entries) {
+      writes.Put(EncodeIndexKey(it.first),
+                 {it.second.data(), it.second.size()});
+    }
    db_->Write(leveldb::WriteOptions(), &writes);
  }
+}
+
+auto Database::dbRemoveIndexes(std::shared_ptr<TrackData> data) -> void {
+  auto tags = dbRecoverTagsFromHashes(data->individual_tag_hashes);
+  if (!tags) {
+    return;
+  }
+  Track track{data, tags};
+  for (const IndexInfo& index : GetIndexes()) {
+    auto entries = Index(index, track);
+    for (auto it = entries.rbegin(); it != entries.rend(); it++) {
+      auto key = EncodeIndexKey(it->first);
+      auto status = db_->Delete(leveldb::WriteOptions{}, key);
+      if (!status.ok()) {
+        return;
+      }
+
+      std::unique_ptr<leveldb::Iterator> cursor{db_->NewIterator({})};
+      cursor->Seek(key);
+      cursor->Prev();
+
+      auto prev_key = ParseIndexKey(cursor->key());
+      if (prev_key && prev_key->header == it->first.header) {
+        break;
+      }
+
+      cursor->Next();
+      auto next_key = ParseIndexKey(cursor->key());
+      if (next_key && next_key->header == it->first.header) {
+        break;
+      }
+    }
+  }
+}
+
+auto Database::dbIngestTagHashes(const TrackTags& tags,
+                                 std::pmr::unordered_map<Tag, uint64_t>& out)
+    -> void {
+  leveldb::WriteBatch batch{};
+  for (auto& entry : tags.tags()) {
+    auto hash =
+        komihash_stream_oneshot(entry.second.data(), entry.second.size(), 0);
+    batch.Put(EncodeTagHashKey(hash), entry.second.c_str());
+    out[entry.first] = hash;
+  }
+  db_->Write(leveldb::WriteOptions{}, &batch);
+}
+
+auto Database::dbRecoverTagsFromHashes(
+    const std::pmr::unordered_map<Tag, uint64_t>& hashes)
+    -> std::shared_ptr<TrackTags> {
+  auto out = std::make_shared<TrackTags>();
+  for (const auto& entry : hashes) {
+    std::string value;
+    auto res = db_->Get(leveldb::ReadOptions{}, EncodeTagHashKey(entry.second),
+                        &value);
+    if (!res.ok()) {
+      ESP_LOGI(kTag, "failed to retrieve tag!");
+      continue;
+    }
+    out->set(entry.first, {value.data(), value.size()});
  }
+  return out;
 }

 template <typename T>
--- a/src/database/include/database.hpp
+++ b/src/database/include/database.hpp
@ -7,6 +7,7 @@
 #pragma once

 #include <stdint.h>
+#include <sys/_stdint.h>
 #include <cstdint>
 #include <future>
 #include <memory>
@ -150,6 +151,11 @@ class Database {
  auto dbPutHash(const uint64_t& hash, TrackId i) -> void;
  auto dbGetHash(const uint64_t& hash) -> std::optional<TrackId>;
  auto dbCreateIndexesForTrack(const Track& track) -> void;
+  auto dbRemoveIndexes(std::shared_ptr<TrackData>) -> void;
+  auto dbIngestTagHashes(const TrackTags&,
+                         std::pmr::unordered_map<Tag, uint64_t>&) -> void;
+  auto dbRecoverTagsFromHashes(const std::pmr::unordered_map<Tag, uint64_t>&)
+      -> std::shared_ptr<TrackTags>;

  template <typename T>
  auto dbGetPage(const Continuation& c) -> Result<T>*;
--- a/src/database/include/index.hpp
+++ b/src/database/include/index.hpp
@ -46,6 +46,8 @@ struct IndexKey {
    // an index consists of { kArtist, kAlbum, kTitle }, and we are at depth = 2
    // then this may contain hash(hash("Jacqueline"), "My Cool Album").
    std::uint64_t components_hash;
+
+    bool operator==(const Header&) const = default;
  };
  Header header;

@ -58,7 +60,9 @@ struct IndexKey {
  std::optional<TrackId> track;
 };

-auto Index(const IndexInfo&, const Track&, leveldb::WriteBatch*) -> bool;
+auto Index(const IndexInfo&, const Track&)
+    -> std::vector<std::pair<IndexKey, std::pmr::string>>;
+
 auto ExpandHeader(const IndexKey::Header&,
                  const std::optional<std::pmr::string>&) -> IndexKey::Header;

--- a/src/database/include/records.hpp
+++ b/src/database/include/records.hpp
@ -52,6 +52,9 @@ auto EncodeHashKey(const uint64_t& hash) -> std::string;
 */
 auto EncodeHashValue(TrackId id) -> std::string;

+/* Encodes a hash key for the specified hash. */
+auto EncodeTagHashKey(const uint64_t& hash) -> std::string;
+
 /*
 * Parses bytes previously encoded via EncodeHashValue back into a track id. May
 * return nullopt if parsing fails.
--- a/src/database/include/track.hpp
+++ b/src/database/include/track.hpp
@ -83,6 +83,10 @@ class TrackTags {
  auto at(const Tag& key) const -> std::optional<std::pmr::string>;
  auto operator[](const Tag& key) const -> std::optional<std::pmr::string>;

+  auto tags() const -> const std::pmr::unordered_map<Tag, std::pmr::string>& {
+    return tags_;
+  }
+
  /*
   * Returns a hash of the 'identifying' tags of this track. That is, a hash
   * that can be used to determine if one track is likely the same as another,
@ -119,12 +123,14 @@ struct TrackData {
      : id(0),
        filepath(&memory::kSpiRamResource),
        tags_hash(0),
+        individual_tag_hashes(&memory::kSpiRamResource),
        is_tombstoned(false),
        modified_at() {}

  TrackId id;
  std::pmr::string filepath;
  uint64_t tags_hash;
+  std::pmr::unordered_map<Tag, uint64_t> individual_tag_hashes;
  bool is_tombstoned;
  std::pair<uint16_t, uint16_t> modified_at;

--- a/src/database/index.cpp
+++ b/src/database/index.cpp
@ -59,8 +59,9 @@ static auto missing_component_text(const Track& track, Tag tag)
  }
 }

-auto Index(const IndexInfo& info, const Track& t, leveldb::WriteBatch* batch)
-    -> bool {
+auto Index(const IndexInfo& info, const Track& t)
+    -> std::vector<std::pair<IndexKey, std::pmr::string>> {
+  std::vector<std::pair<IndexKey, std::pmr::string>> out;
  IndexKey key{
      .header{
          .id = info.id,
@ -93,8 +94,7 @@ auto Index(const IndexInfo& info, const Track& t, leveldb::WriteBatch* batch)
      value = t.TitleOrFilename();
    }

-    auto encoded = EncodeIndexKey(key);
-    batch->Put(encoded, {value.data(), value.size()});
+    out.push_back(std::make_pair(key, value));

    // If there are more components after this, then we need to finish by
    // narrowing the header with the current title.
@ -102,7 +102,7 @@ auto Index(const IndexInfo& info, const Track& t, leveldb::WriteBatch* batch)
      key.header = ExpandHeader(key.header, key.item);
    }
  }
-  return true;
+  return out;
 }

 auto ExpandHeader(const IndexKey::Header& header,
--- a/src/database/records.cpp
+++ b/src/database/records.cpp
@ -48,6 +48,7 @@ static const char* kTag = "RECORDS";

 static const char kDataPrefix = 'D';
 static const char kHashPrefix = 'H';
+static const char kTagHashPrefix = 'T';
 static const char kIndexPrefix = 'I';
 static const char kFieldSeparator = '\0';

@ -62,6 +63,11 @@ auto EncodeDataKey(const TrackId& id) -> std::string {
 }

 auto EncodeDataValue(const TrackData& track) -> std::string {
+  auto* tag_hashes = new cppbor::Map{};  // Free'd by Array's dtor.
+  for (const auto& entry : track.individual_tag_hashes) {
+    tag_hashes->add(cppbor::Uint{static_cast<uint32_t>(entry.first)},
+                    cppbor::Uint{entry.second});
+  }
  cppbor::Array val{
      cppbor::Uint{track.id},
      cppbor::Tstr{track.filepath},
@ -69,6 +75,7 @@ auto EncodeDataValue(const TrackData& track) -> std::string {
      cppbor::Bool{track.is_tombstoned},
      cppbor::Uint{track.modified_at.first},
      cppbor::Uint{track.modified_at.second},
+      tag_hashes,
  };
  return val.toString();
 }
@ -80,12 +87,13 @@ auto ParseDataValue(const leveldb::Slice& slice) -> std::shared_ptr<TrackData> {
    return nullptr;
  }
  auto vals = item->asArray();
-  if (vals->size() != 6 || vals->get(0)->type() != cppbor::UINT ||
+  if (vals->size() != 7 || vals->get(0)->type() != cppbor::UINT ||
      vals->get(1)->type() != cppbor::TSTR ||
      vals->get(2)->type() != cppbor::UINT ||
      vals->get(3)->type() != cppbor::SIMPLE ||
      vals->get(4)->type() != cppbor::UINT ||
-      vals->get(5)->type() != cppbor::UINT) {
+      vals->get(5)->type() != cppbor::UINT ||
+      vals->get(6)->type() != cppbor::MAP) {
    return {};
  }
  auto res = std::make_shared<TrackData>();
@ -96,6 +104,12 @@ auto ParseDataValue(const leveldb::Slice& slice) -> std::shared_ptr<TrackData> {
  res->modified_at = std::make_pair<uint16_t, uint16_t>(
      vals->get(4)->asUint()->unsignedValue(),
      vals->get(5)->asUint()->unsignedValue());
+
+  auto tag_hashes = vals->get(6)->asMap();
+  for (const auto& entry : *tag_hashes) {
+    auto tag = static_cast<Tag>(entry.first->asUint()->unsignedValue());
+    res->individual_tag_hashes[tag] = entry.second->asUint()->unsignedValue();
+  }
  return res;
 }

@ -113,6 +127,12 @@ auto EncodeHashValue(TrackId id) -> std::string {
  return TrackIdToBytes(id);
 }

+/* 'T/ 0xBEEF' */
+auto EncodeTagHashKey(const uint64_t& hash) -> std::string {
+  return std::string{kTagHashPrefix, kFieldSeparator} +
+         cppbor::Uint{hash}.toString();
+}
+
 /* 'I/' */
 auto EncodeAllIndexesPrefix() -> std::string {
  return {kIndexPrefix, kFieldSeparator};