diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp index 67893b6e..9854f693 100644 --- a/src/tangara/database/database.cpp +++ b/src/tangara/database/database.cpp @@ -7,6 +7,7 @@ #include "database/database.hpp" #include +#include #include #include #include @@ -21,6 +22,7 @@ #include "cppbor.h" #include "cppbor_parse.h" +#include "debug.hpp" #include "esp_log.h" #include "esp_timer.h" #include "ff.h" @@ -66,8 +68,8 @@ static std::atomic sIsDbOpen(false); using std::placeholders::_1; using std::placeholders::_2; -static auto CreateNewDatabase(leveldb::Options& options, locale::ICollator& col) - -> leveldb::DB* { +static auto CreateNewDatabase(leveldb::Options& options, + locale::ICollator& col) -> leveldb::DB* { Database::Destroy(); leveldb::DB* db; options.create_if_missing = true; @@ -348,6 +350,8 @@ auto Database::updateIndexes() -> void { } update_tracker_ = std::make_unique(); + tag_parser_.ClearCaches(); + leveldb::ReadOptions read_options; read_options.fill_cache = false; read_options.verify_checksums = true; @@ -373,21 +377,24 @@ auto Database::updateIndexes() -> void { continue; } + std::shared_ptr tags; FILINFO info; FRESULT res = f_stat(track->filepath.c_str(), &info); - - std::pair modified_at{0, 0}; if (res == FR_OK) { - modified_at = {info.fdate, info.ftime}; - } - if (modified_at == track->modified_at) { - continue; - } else { - track->modified_at = modified_at; + std::pair modified_at{info.fdate, info.ftime}; + if (modified_at == track->modified_at) { + continue; + } else { + // Will be written out later; we make sure we update the track's + // modification time and tags in the same batch so that interrupted + // reindexes don't cause a big mess. + track->modified_at = modified_at; + } + + tags = tag_parser_.ReadAndParseTags( + {track->filepath.data(), track->filepath.size()}); } - std::shared_ptr tags = tag_parser_.ReadAndParseTags( - {track->filepath.data(), track->filepath.size()}); if (!tags || tags->encoding() == Container::kUnsupported) { // We couldn't read the tags for this track. Either they were // malformed, or perhaps the file is missing. Either way, tombstone @@ -411,30 +418,24 @@ auto Database::updateIndexes() -> void { // At this point, we know that the track still exists in its original // location. All that's left to do is update any metadata about it. - auto new_type = calculateMediaType(*tags, track->filepath); - uint64_t new_hash = tags->Hash(); - if (new_hash != track->tags_hash || new_type != track->type) { - // This track's tags have changed. Since the filepath is exactly the - // same, we assume this is a legitimate correction. Update the - // database. - ESP_LOGI(kTag, "updating hash (%llx -> %llx)", track->tags_hash, - new_hash); - - // Again, we remove the old index records first so has to avoid - // dangling references. - dbRemoveIndexes(track); + // Remove the old index records first so has to avoid dangling records. + dbRemoveIndexes(track); - // Atomically correct the hash + create the new index records. - leveldb::WriteBatch batch; - track->tags_hash = new_hash; - dbIngestTagHashes(*tags, track->individual_tag_hashes, batch); + // Atomically correct the hash + create the new index records. + leveldb::WriteBatch batch; - track->type = new_type; - dbCreateIndexesForTrack(*track, *tags, batch); - batch.Put(EncodeDataKey(track->id), EncodeDataValue(*track)); + uint64_t new_hash = tags->Hash(); + if (track->tags_hash != new_hash) { + track->tags_hash = new_hash; batch.Put(EncodeHashKey(new_hash), EncodeHashValue(track->id)); - db_->Write(leveldb::WriteOptions(), &batch); } + + track->type = calculateMediaType(*tags, track->filepath); + batch.Put(EncodeDataKey(track->id), EncodeDataValue(*track)); + + dbIngestTagHashes(*tags, track->individual_tag_hashes, batch); + dbCreateIndexesForTrack(*track, *tags, batch); + db_->Write(leveldb::WriteOptions(), &batch); } } @@ -445,8 +446,8 @@ auto Database::updateIndexes() -> void { track_finder_.launch(""); }; -auto Database::processCandidateCallback(FILINFO& info, std::string_view path) - -> void { +auto Database::processCandidateCallback(FILINFO& info, + std::string_view path) -> void { leveldb::ReadOptions read_options; read_options.fill_cache = true; read_options.verify_checksums = false; @@ -530,9 +531,8 @@ static constexpr char kMusicMediaPath[] = "/Music/"; static constexpr char kPodcastMediaPath[] = "/Podcasts/"; static constexpr char kAudiobookMediaPath[] = "/Audiobooks/"; -auto Database::calculateMediaType(TrackTags& tags, std::string_view path) - -> MediaType { - +auto Database::calculateMediaType(TrackTags& tags, + std::string_view path) -> MediaType { auto equalsIgnoreCase = [&](char lhs, char rhs) { return std::tolower(lhs) == std::tolower(rhs); }; @@ -540,7 +540,8 @@ auto Database::calculateMediaType(TrackTags& tags, std::string_view path) // Use the filepath first, since it's the most explicit way for the user to // tell us what this track is. auto checkPathPrefix = [&](std::string_view path, std::string prefix) { - auto res = std::mismatch(prefix.begin(), prefix.end(), path.begin(), path.end(), equalsIgnoreCase); + auto res = std::mismatch(prefix.begin(), prefix.end(), path.begin(), + path.end(), equalsIgnoreCase); return res.first == prefix.end(); }; @@ -634,8 +635,8 @@ auto Database::dbMintNewTrackId() -> TrackId { return next_track_id_++; } -auto Database::dbGetTrackData(leveldb::ReadOptions options, TrackId id) - -> std::shared_ptr { +auto Database::dbGetTrackData(leveldb::ReadOptions options, + TrackId id) -> std::shared_ptr { std::string key = EncodeDataKey(id); std::string raw_val; if (!db_->Get(options, key, &raw_val).ok()) { @@ -668,26 +669,55 @@ auto Database::dbRemoveIndexes(std::shared_ptr data) -> void { } for (const IndexInfo& index : getIndexes()) { auto entries = Index(collator_, index, *data, *tags); + std::optional preserve_depth{}; + + // Iterate through the index records backwards, so that we start deleting + // records from the highest depth. This allows us to work out what depth to + // stop at as we go. + // e.g. if the last track of an album is being deleted, we need to delete + // the album index record at n-1 depth. But if there are still other tracks + // in the album, then we should only clear records at depth n. for (auto it = entries.rbegin(); it != entries.rend(); it++) { + if (preserve_depth) { + if (it->first.header.components_hash.size() < *preserve_depth) { + // Some records deeper than this were not removed, so don't try to + // remove this one. + continue; + } + // A record weren't removed, but the current record is either a + // sibling of that record, or a leaf belonging to a sibling of that + // record. Either way, we can start deleting records again. + // preserve_depth will be set to a new value if we start encountering + // siblings again. + preserve_depth.reset(); + } + auto key = EncodeIndexKey(it->first); auto status = db_->Delete(leveldb::WriteOptions{}, key); if (!status.ok()) { return; } + // Are any sibling records left at this depth? If two index records have + // matching headers, they are siblings (same depth, same selections at + // lower depths) std::unique_ptr cursor{db_->NewIterator({})}; + + // Check the record before the one we just deleted. cursor->Seek(key); cursor->Prev(); - auto prev_key = ParseIndexKey(cursor->key()); if (prev_key && prev_key->header == it->first.header) { - break; + preserve_depth = it->first.header.components_hash.size(); + continue; } + // Check the record after. cursor->Next(); auto next_key = ParseIndexKey(cursor->key()); if (next_key && next_key->header == it->first.header) { - break; + preserve_depth = it->first.header.components_hash.size(); + continue; } } } @@ -809,8 +839,7 @@ Iterator::Iterator(std::shared_ptr db, IndexId idx) : Iterator(db, IndexKey::Header{ .id = idx, - .depth = 0, - .components_hash = 0, + .components_hash = {}, }) {} Iterator::Iterator(std::shared_ptr db, const IndexKey::Header& header) @@ -883,8 +912,8 @@ auto TrackIterator::next() -> void { auto& cur = levels_.back().value(); if (!cur) { - // The current top iterator is out of tracks. Pop it, and move the parent - // to the next item. + // The current top iterator is out of tracks. Pop it, and move the + // parent to the next item. levels_.pop_back(); } else if (std::holds_alternative(cur->contents())) { // This record is a branch. Push a new iterator. diff --git a/src/tangara/database/database.hpp b/src/tangara/database/database.hpp index ef96f859..6d91b04c 100644 --- a/src/tangara/database/database.hpp +++ b/src/tangara/database/database.hpp @@ -38,7 +38,7 @@ namespace database { -const uint8_t kCurrentDbVersion = 9; +const uint8_t kCurrentDbVersion = 10; struct SearchKey; class Record; diff --git a/src/tangara/database/index.cpp b/src/tangara/database/index.cpp index e60f56d5..7f594700 100644 --- a/src/tangara/database/index.cpp +++ b/src/tangara/database/index.cpp @@ -77,8 +77,8 @@ const IndexInfo kAudiobooks{ .components = {Tag::kAlbum, Tag::kAlbumOrder}, }; -static auto titleOrFilename(const TrackData& data, const TrackTags& tags) - -> std::pmr::string { +static auto titleOrFilename(const TrackData& data, + const TrackTags& tags) -> std::pmr::string { auto title = tags.title(); if (title) { return *title; @@ -147,8 +147,7 @@ auto Indexer::index() -> std::vector> { IndexKey::Header root_header{ .id = index_.id, - .depth = 0, - .components_hash = 0, + .components_hash = {}, }; handleLevel(root_header, index_.components); @@ -237,16 +236,10 @@ auto Index(locale::ICollator& collator, } auto ExpandHeader(const IndexKey::Header& header, - const std::optional& component) - -> IndexKey::Header { + const std::pmr::string& component) -> IndexKey::Header { IndexKey::Header ret{header}; - ret.depth++; - if (component) { - ret.components_hash = - komihash(component->data(), component->size(), ret.components_hash); - } else { - ret.components_hash = komihash(NULL, 0, ret.components_hash); - } + ret.components_hash.push_back( + komihash(component.data(), component.size(), 0)); return ret; } diff --git a/src/tangara/database/index.hpp b/src/tangara/database/index.hpp index 98e57d54..c4dd5701 100644 --- a/src/tangara/database/index.hpp +++ b/src/tangara/database/index.hpp @@ -41,14 +41,8 @@ struct IndexKey { struct Header { // The index that this key was created for. IndexId id; - // The number of components of IndexInfo that have already been filtered. - // For example, if an index consists of { kGenre, kArtist }, and this key - // represents an artist, then depth = 1. - std::uint8_t depth; - // The cumulative hash of all filtered components, in order. For example, if - // an index consists of { kArtist, kAlbum, kTitle }, and we are at depth = 2 - // then this may contain hash(hash("Jacqueline"), "My Cool Album"). - std::uint64_t components_hash; + // The hashes of all filtered components, in order, up to the current depth + std::vector components_hash; bool operator==(const Header&) const = default; }; @@ -56,7 +50,7 @@ struct IndexKey { // The filterable / selectable item that this key represents. "Jacqueline" for // kArtist, "My Cool Album" for kAlbum, etc. - std::optional item; + std::pmr::string item; // If this is a leaf component, the track id for this record. // This could reasonably be the value for a record, but we keep it as a part // of the key to help with disambiguation. @@ -69,7 +63,7 @@ auto Index(locale::ICollator&, const TrackTags&) -> std::vector>; auto ExpandHeader(const IndexKey::Header&, - const std::optional&) -> IndexKey::Header; + const std::pmr::string&) -> IndexKey::Header; // Predefined indexes // TODO(jacqueline): Make these defined at runtime! :) diff --git a/src/tangara/database/records.cpp b/src/tangara/database/records.cpp index addcc13d..852eb131 100644 --- a/src/tangara/database/records.cpp +++ b/src/tangara/database/records.cpp @@ -181,11 +181,13 @@ auto EncodeAllIndexesPrefix() -> std::string { auto EncodeIndexPrefix(const IndexKey::Header& header) -> std::string { std::ostringstream out; out << makePrefix(kIndexPrefix); - cppbor::Array val{ - cppbor::Uint{header.id}, - cppbor::Uint{header.depth}, - cppbor::Uint{header.components_hash}, - }; + + cppbor::Array components{}; + for (auto hash : header.components_hash) { + components.add(cppbor::Uint{hash}); + } + + cppbor::Array val{cppbor::Uint{header.id}, std::move(components)}; out << val.toString() << kFieldSeparator; return out.str(); } @@ -210,8 +212,8 @@ auto EncodeIndexKey(const IndexKey& key) -> std::string { out << EncodeIndexPrefix(key.header); // The component should already be UTF-8 encoded, so just write it. - if (key.item) { - out << *key.item << kFieldSeparator; + if (!key.item.empty()) { + out << key.item << kFieldSeparator; } if (key.track) { @@ -236,14 +238,16 @@ auto ParseIndexKey(const leveldb::Slice& slice) -> std::optional { return {}; } auto as_array = key->asArray(); - if (as_array->size() != 3 || as_array->get(0)->type() != cppbor::UINT || - as_array->get(1)->type() != cppbor::UINT || - as_array->get(2)->type() != cppbor::UINT) { + if (as_array->size() != 2 || as_array->get(0)->type() != cppbor::UINT || + as_array->get(1)->type() != cppbor::ARRAY) { return {}; } result.header.id = as_array->get(0)->asUint()->unsignedValue(); - result.header.depth = as_array->get(1)->asUint()->unsignedValue(); - result.header.components_hash = as_array->get(2)->asUint()->unsignedValue(); + auto components_array = as_array->get(1)->asArray(); + for (int i = 0; i < components_array->size(); i++) { + result.header.components_hash.push_back( + components_array->get(i)->asUint()->unsignedValue()); + } size_t header_length = reinterpret_cast(end_of_key) - key_data.data(); diff --git a/src/tangara/database/tag_parser.cpp b/src/tangara/database/tag_parser.cpp index 6cdf6175..59bd8f13 100644 --- a/src/tangara/database/tag_parser.cpp +++ b/src/tangara/database/tag_parser.cpp @@ -190,6 +190,12 @@ auto TagParserImpl::ReadAndParseTags(std::string_view path) } } + // There wasn't a fine-grained 'Artists' tag in the source. Synthesize a tag + // based on the singlet 'Artist' field. + if (tags->allArtists().empty() && tags->artist()) { + tags->singleAllArtists(tags->artist().value()); + } + // Store the result in the cache for later. { std::lock_guard lock{cache_mutex_}; @@ -199,6 +205,11 @@ auto TagParserImpl::ReadAndParseTags(std::string_view path) return tags; } +auto TagParserImpl::ClearCaches() -> void { + std::lock_guard lock{cache_mutex_}; + cache_.Clear(); +} + OggTagParser::OggTagParser() {} auto OggTagParser::ReadAndParseTags(std::string_view p) diff --git a/src/tangara/database/tag_parser.hpp b/src/tangara/database/tag_parser.hpp index 69b71940..220339c0 100644 --- a/src/tangara/database/tag_parser.hpp +++ b/src/tangara/database/tag_parser.hpp @@ -19,6 +19,8 @@ class ITagParser { virtual ~ITagParser() {} virtual auto ReadAndParseTags(std::string_view path) -> std::shared_ptr = 0; + + virtual auto ClearCaches() -> void {} }; class TagParserImpl : public ITagParser { @@ -27,6 +29,8 @@ class TagParserImpl : public ITagParser { auto ReadAndParseTags(std::string_view path) -> std::shared_ptr override; + auto ClearCaches() -> void override; + private: std::vector> parsers_; @@ -58,14 +62,8 @@ class GenericTagParser : public ITagParser { // Supported file extensions for parsing tags, derived from the list of // supported audio formats here: // https://cooltech.zone/tangara/docs/music-library/ - static constexpr std::string supported_exts[] = { - "flac", - "mp3", - "ogg", - "ogx", - "opus", - "wav" - }; + static constexpr std::string supported_exts[] = {"flac", "mp3", "ogg", + "ogx", "opus", "wav"}; }; } // namespace database diff --git a/src/tangara/database/track.cpp b/src/tangara/database/track.cpp index ad9db1ba..dc66c9c9 100644 --- a/src/tangara/database/track.cpp +++ b/src/tangara/database/track.cpp @@ -83,11 +83,17 @@ auto tagToString(const TagValue& val) -> std::string { return std::to_string(arg); } else if constexpr (std::is_same_v< T, std::span>) { - std::ostringstream builder{}; + std::stringstream stream{}; + bool first = true; for (const auto& str : arg) { - builder << std::string{str.data(), str.size()} << ","; + if (first) { + first = false; + } else { + stream << ";"; + } + stream << std::string{str.data(), str.size()}; } - return builder.str(); + return stream.str(); } }, val); @@ -101,7 +107,6 @@ auto tagToString(const TagValue& val) -> std::string { auto parseDelimitedTags(const std::string_view s, const char* delimiters, std::pmr::vector& out) -> void { - out.clear(); std::string src = {s.data(), s.size()}; char* token = std::strtok(src.data(), delimiters); @@ -240,7 +245,6 @@ auto TrackTags::artist() const -> const std::optional& { auto TrackTags::artist(std::string_view s) -> void { artist_ = s; - maybeSynthesizeAllArtists(); } auto TrackTags::allArtists() const -> std::span { @@ -249,7 +253,10 @@ auto TrackTags::allArtists() const -> std::span { auto TrackTags::allArtists(const std::string_view s) -> void { parseDelimitedTags(s, kAllArtistDelimiters, allArtists_); - maybeSynthesizeAllArtists(); +} + +auto TrackTags::singleAllArtists(const std::string_view s) -> void { + allArtists_.emplace_back(s); } auto TrackTags::album() const -> const std::optional& { @@ -325,17 +332,6 @@ auto TrackTags::Hash() const -> uint64_t { return komihash_stream_final(&stream); } -/* - * Adds the current 'artist' tag to 'allArtists' if needed. Many tracks lack a - * fine-grained 'ARTISTS=' tag (or equivalent), but pushing down this nuance to - * consumers of TrackTags adds a lot of complexity. - */ -auto TrackTags::maybeSynthesizeAllArtists() -> void { - if (allArtists_.empty() && artist_) { - allArtists_.push_back(*artist_); - } -} - auto database::TrackData::clone() const -> std::shared_ptr { auto data = std::make_shared(); data->id = id; diff --git a/src/tangara/database/track.hpp b/src/tangara/database/track.hpp index 71eb44ce..c7dff425 100644 --- a/src/tangara/database/track.hpp +++ b/src/tangara/database/track.hpp @@ -117,6 +117,7 @@ class TrackTags { auto allArtists() const -> std::span; auto allArtists(const std::string_view) -> void; + auto singleAllArtists(const std::string_view) -> void; auto album() const -> const std::optional&; auto album(std::string_view) -> void; @@ -144,8 +145,6 @@ class TrackTags { auto Hash() const -> uint64_t; private: - auto maybeSynthesizeAllArtists() -> void; - Container encoding_; std::optional title_; diff --git a/src/tangara/lua/lua_database.cpp b/src/tangara/lua/lua_database.cpp index e79d6141..928dbc39 100644 --- a/src/tangara/lua/lua_database.cpp +++ b/src/tangara/lua/lua_database.cpp @@ -190,35 +190,24 @@ static const struct luaL_Reg kDatabaseFuncs[] = { {"update", update}, {"track_by_id", track_by_id}, {NULL, NULL}}; -/* - * Struct to be used as userdata for the Lua representation of database records. - * In order to push these large values into PSRAM as much as possible, memory - * for these is allocated and managed by Lua. This struct must therefore be - * trivially copyable. - */ -struct LuaRecord { - std::variant contents; - size_t text_size; - char text[]; -}; - -static_assert(std::is_trivially_destructible()); -static_assert(std::is_trivially_copy_assignable()); - -static auto push_lua_record(lua_State* L, const database::Record& r) -> void { - // Create and init the userdata. - LuaRecord* record = reinterpret_cast( - lua_newuserdata(L, sizeof(LuaRecord) + r.text().size())); - luaL_setmetatable(L, kDbRecordMetatable); +static auto push_lua_record(lua_State* state, + const database::Record& r) -> void { + database::Record** data = reinterpret_cast( + lua_newuserdata(state, sizeof(uintptr_t))); + *data = new database::Record(r); + luaL_setmetatable(state, kDbRecordMetatable); +} - // Init all the fields - *record = { - .contents = r.contents(), - .text_size = r.text().size(), - }; +auto db_check_record(lua_State* L, int stack_pos) -> database::Record* { + database::Record* rec = *reinterpret_cast( + luaL_checkudata(L, stack_pos, kDbRecordMetatable)); + return rec; +} - // Copy the string data across. - std::memcpy(record->text, r.text().data(), r.text().size()); +static auto db_record_gc(lua_State* state) -> int { + database::Record* rec = db_check_record(state, 1); + delete rec; + return 0; } auto db_check_iterator(lua_State* L, int stack_pos) -> database::Iterator* { @@ -227,8 +216,8 @@ auto db_check_iterator(lua_State* L, int stack_pos) -> database::Iterator* { return it; } -static auto push_iterator(lua_State* state, const database::Iterator& it) - -> void { +static auto push_iterator(lua_State* state, + const database::Iterator& it) -> void { database::Iterator** data = reinterpret_cast( lua_newuserdata(state, sizeof(uintptr_t))); *data = new database::Iterator(it); @@ -279,15 +268,13 @@ static const struct luaL_Reg kDbIteratorFuncs[] = { {"__gc", db_iterator_gc}, {NULL, NULL}}; static auto record_text(lua_State* state) -> int { - LuaRecord* data = reinterpret_cast( - luaL_checkudata(state, 1, kDbRecordMetatable)); - lua_pushlstring(state, data->text, data->text_size); + database::Record* rec = db_check_record(state, 1); + lua_pushlstring(state, rec->text().data(), rec->text().size()); return 1; } static auto record_contents(lua_State* state) -> int { - LuaRecord* data = reinterpret_cast( - luaL_checkudata(state, 1, kDbRecordMetatable)); + database::Record* rec = db_check_record(state, 1); std::visit( [&](auto&& arg) { @@ -304,7 +291,7 @@ static auto record_contents(lua_State* state) -> int { } } }, - data->contents); + rec->contents()); return 1; } @@ -312,6 +299,7 @@ static auto record_contents(lua_State* state) -> int { static const struct luaL_Reg kDbRecordFuncs[] = {{"title", record_text}, {"contents", record_contents}, {"__tostring", record_text}, + {"__gc", db_record_gc}, {NULL, NULL}}; static auto index_name(lua_State* state) -> int { @@ -405,7 +393,6 @@ static auto lua_database(lua_State* state) -> int { lua_rawset(state, -3); lua_rawset(state, -3); - lua_pushliteral(state, "IndexTypes"); lua_newtable(state); lua_pushliteral(state, "ALBUMS_BY_ARTIST"); diff --git a/src/tangara/lua/lua_database.hpp b/src/tangara/lua/lua_database.hpp index 51e71758..fa2c26a3 100644 --- a/src/tangara/lua/lua_database.hpp +++ b/src/tangara/lua/lua_database.hpp @@ -13,8 +13,9 @@ namespace lua { auto db_check_iterator(lua_State*, int stack_pos) -> database::Iterator*; +auto db_check_record(lua_State*, int stack_pos) -> database::Record*; -auto pushTagValue(lua_State* L, const database::TagValue& val) -> void; +auto pushTagValue(lua_State* L, const database::TagValue& val) -> void; auto RegisterDatabaseModule(lua_State*) -> void;