Resolve some issues with dangling index records (#193)

- The tag parser's cache is now cleared between indexing runs, preventing stale data from being used
 - Multi-value tag fields (genres, all artists) are now properly ingested in the tag value cache
 - Cleaning up removed index records now properly handles the case where only a subset of the records for multi-value tags need to be deleted.
 - Synthesizing missing tag values is now done in the tag parser instead of TrackTags, which resolves some issues with multi-value tag callbacks from libtags not being handled properly

These fixes seem to address all the issues with stale index records we were able to repro (including the issues in https://codeberg.org/cool-tech-zone/tangara-fw/issues/191), but if you've got any more cases with consistent repros then lmk!

Co-authored-by: ailurux <ailuruxx@gmail.com>
Reviewed-on: https://codeberg.org/cool-tech-zone/tangara-fw/pulls/193
custom
cooljqln 3 months ago
parent eeead03747
commit 580712acd1
  1. 123
      src/tangara/database/database.cpp
  2. 2
      src/tangara/database/database.hpp
  3. 19
      src/tangara/database/index.cpp
  4. 14
      src/tangara/database/index.hpp
  5. 28
      src/tangara/database/records.cpp
  6. 11
      src/tangara/database/tag_parser.cpp
  7. 14
      src/tangara/database/tag_parser.hpp
  8. 30
      src/tangara/database/track.cpp
  9. 3
      src/tangara/database/track.hpp
  10. 59
      src/tangara/lua/lua_database.cpp
  11. 3
      src/tangara/lua/lua_database.hpp

@ -7,6 +7,7 @@
#include "database/database.hpp"
#include <bits/ranges_algo.h>
#include <stdint.h>
#include <algorithm>
#include <cctype>
#include <cstdint>
@ -21,6 +22,7 @@
#include "cppbor.h"
#include "cppbor_parse.h"
#include "debug.hpp"
#include "esp_log.h"
#include "esp_timer.h"
#include "ff.h"
@ -66,8 +68,8 @@ static std::atomic<bool> sIsDbOpen(false);
using std::placeholders::_1;
using std::placeholders::_2;
static auto CreateNewDatabase(leveldb::Options& options, locale::ICollator& col)
-> leveldb::DB* {
static auto CreateNewDatabase(leveldb::Options& options,
locale::ICollator& col) -> leveldb::DB* {
Database::Destroy();
leveldb::DB* db;
options.create_if_missing = true;
@ -348,6 +350,8 @@ auto Database::updateIndexes() -> void {
}
update_tracker_ = std::make_unique<UpdateTracker>();
tag_parser_.ClearCaches();
leveldb::ReadOptions read_options;
read_options.fill_cache = false;
read_options.verify_checksums = true;
@ -373,21 +377,24 @@ auto Database::updateIndexes() -> void {
continue;
}
std::shared_ptr<TrackTags> tags;
FILINFO info;
FRESULT res = f_stat(track->filepath.c_str(), &info);
std::pair<uint16_t, uint16_t> modified_at{0, 0};
if (res == FR_OK) {
modified_at = {info.fdate, info.ftime};
}
if (modified_at == track->modified_at) {
continue;
} else {
track->modified_at = modified_at;
std::pair<uint16_t, uint16_t> modified_at{info.fdate, info.ftime};
if (modified_at == track->modified_at) {
continue;
} else {
// Will be written out later; we make sure we update the track's
// modification time and tags in the same batch so that interrupted
// reindexes don't cause a big mess.
track->modified_at = modified_at;
}
tags = tag_parser_.ReadAndParseTags(
{track->filepath.data(), track->filepath.size()});
}
std::shared_ptr<TrackTags> tags = tag_parser_.ReadAndParseTags(
{track->filepath.data(), track->filepath.size()});
if (!tags || tags->encoding() == Container::kUnsupported) {
// We couldn't read the tags for this track. Either they were
// malformed, or perhaps the file is missing. Either way, tombstone
@ -411,30 +418,24 @@ auto Database::updateIndexes() -> void {
// At this point, we know that the track still exists in its original
// location. All that's left to do is update any metadata about it.
auto new_type = calculateMediaType(*tags, track->filepath);
uint64_t new_hash = tags->Hash();
if (new_hash != track->tags_hash || new_type != track->type) {
// This track's tags have changed. Since the filepath is exactly the
// same, we assume this is a legitimate correction. Update the
// database.
ESP_LOGI(kTag, "updating hash (%llx -> %llx)", track->tags_hash,
new_hash);
// Again, we remove the old index records first so has to avoid
// dangling references.
dbRemoveIndexes(track);
// Remove the old index records first so has to avoid dangling records.
dbRemoveIndexes(track);
// Atomically correct the hash + create the new index records.
leveldb::WriteBatch batch;
track->tags_hash = new_hash;
dbIngestTagHashes(*tags, track->individual_tag_hashes, batch);
// Atomically correct the hash + create the new index records.
leveldb::WriteBatch batch;
track->type = new_type;
dbCreateIndexesForTrack(*track, *tags, batch);
batch.Put(EncodeDataKey(track->id), EncodeDataValue(*track));
uint64_t new_hash = tags->Hash();
if (track->tags_hash != new_hash) {
track->tags_hash = new_hash;
batch.Put(EncodeHashKey(new_hash), EncodeHashValue(track->id));
db_->Write(leveldb::WriteOptions(), &batch);
}
track->type = calculateMediaType(*tags, track->filepath);
batch.Put(EncodeDataKey(track->id), EncodeDataValue(*track));
dbIngestTagHashes(*tags, track->individual_tag_hashes, batch);
dbCreateIndexesForTrack(*track, *tags, batch);
db_->Write(leveldb::WriteOptions(), &batch);
}
}
@ -445,8 +446,8 @@ auto Database::updateIndexes() -> void {
track_finder_.launch("");
};
auto Database::processCandidateCallback(FILINFO& info, std::string_view path)
-> void {
auto Database::processCandidateCallback(FILINFO& info,
std::string_view path) -> void {
leveldb::ReadOptions read_options;
read_options.fill_cache = true;
read_options.verify_checksums = false;
@ -530,9 +531,8 @@ static constexpr char kMusicMediaPath[] = "/Music/";
static constexpr char kPodcastMediaPath[] = "/Podcasts/";
static constexpr char kAudiobookMediaPath[] = "/Audiobooks/";
auto Database::calculateMediaType(TrackTags& tags, std::string_view path)
-> MediaType {
auto Database::calculateMediaType(TrackTags& tags,
std::string_view path) -> MediaType {
auto equalsIgnoreCase = [&](char lhs, char rhs) {
return std::tolower(lhs) == std::tolower(rhs);
};
@ -540,7 +540,8 @@ auto Database::calculateMediaType(TrackTags& tags, std::string_view path)
// Use the filepath first, since it's the most explicit way for the user to
// tell us what this track is.
auto checkPathPrefix = [&](std::string_view path, std::string prefix) {
auto res = std::mismatch(prefix.begin(), prefix.end(), path.begin(), path.end(), equalsIgnoreCase);
auto res = std::mismatch(prefix.begin(), prefix.end(), path.begin(),
path.end(), equalsIgnoreCase);
return res.first == prefix.end();
};
@ -634,8 +635,8 @@ auto Database::dbMintNewTrackId() -> TrackId {
return next_track_id_++;
}
auto Database::dbGetTrackData(leveldb::ReadOptions options, TrackId id)
-> std::shared_ptr<TrackData> {
auto Database::dbGetTrackData(leveldb::ReadOptions options,
TrackId id) -> std::shared_ptr<TrackData> {
std::string key = EncodeDataKey(id);
std::string raw_val;
if (!db_->Get(options, key, &raw_val).ok()) {
@ -668,26 +669,55 @@ auto Database::dbRemoveIndexes(std::shared_ptr<TrackData> data) -> void {
}
for (const IndexInfo& index : getIndexes()) {
auto entries = Index(collator_, index, *data, *tags);
std::optional<uint8_t> preserve_depth{};
// Iterate through the index records backwards, so that we start deleting
// records from the highest depth. This allows us to work out what depth to
// stop at as we go.
// e.g. if the last track of an album is being deleted, we need to delete
// the album index record at n-1 depth. But if there are still other tracks
// in the album, then we should only clear records at depth n.
for (auto it = entries.rbegin(); it != entries.rend(); it++) {
if (preserve_depth) {
if (it->first.header.components_hash.size() < *preserve_depth) {
// Some records deeper than this were not removed, so don't try to
// remove this one.
continue;
}
// A record weren't removed, but the current record is either a
// sibling of that record, or a leaf belonging to a sibling of that
// record. Either way, we can start deleting records again.
// preserve_depth will be set to a new value if we start encountering
// siblings again.
preserve_depth.reset();
}
auto key = EncodeIndexKey(it->first);
auto status = db_->Delete(leveldb::WriteOptions{}, key);
if (!status.ok()) {
return;
}
// Are any sibling records left at this depth? If two index records have
// matching headers, they are siblings (same depth, same selections at
// lower depths)
std::unique_ptr<leveldb::Iterator> cursor{db_->NewIterator({})};
// Check the record before the one we just deleted.
cursor->Seek(key);
cursor->Prev();
auto prev_key = ParseIndexKey(cursor->key());
if (prev_key && prev_key->header == it->first.header) {
break;
preserve_depth = it->first.header.components_hash.size();
continue;
}
// Check the record after.
cursor->Next();
auto next_key = ParseIndexKey(cursor->key());
if (next_key && next_key->header == it->first.header) {
break;
preserve_depth = it->first.header.components_hash.size();
continue;
}
}
}
@ -809,8 +839,7 @@ Iterator::Iterator(std::shared_ptr<Database> db, IndexId idx)
: Iterator(db,
IndexKey::Header{
.id = idx,
.depth = 0,
.components_hash = 0,
.components_hash = {},
}) {}
Iterator::Iterator(std::shared_ptr<Database> db, const IndexKey::Header& header)
@ -883,8 +912,8 @@ auto TrackIterator::next() -> void {
auto& cur = levels_.back().value();
if (!cur) {
// The current top iterator is out of tracks. Pop it, and move the parent
// to the next item.
// The current top iterator is out of tracks. Pop it, and move the
// parent to the next item.
levels_.pop_back();
} else if (std::holds_alternative<IndexKey::Header>(cur->contents())) {
// This record is a branch. Push a new iterator.

@ -38,7 +38,7 @@
namespace database {
const uint8_t kCurrentDbVersion = 9;
const uint8_t kCurrentDbVersion = 10;
struct SearchKey;
class Record;

@ -77,8 +77,8 @@ const IndexInfo kAudiobooks{
.components = {Tag::kAlbum, Tag::kAlbumOrder},
};
static auto titleOrFilename(const TrackData& data, const TrackTags& tags)
-> std::pmr::string {
static auto titleOrFilename(const TrackData& data,
const TrackTags& tags) -> std::pmr::string {
auto title = tags.title();
if (title) {
return *title;
@ -147,8 +147,7 @@ auto Indexer::index() -> std::vector<std::pair<IndexKey, std::string>> {
IndexKey::Header root_header{
.id = index_.id,
.depth = 0,
.components_hash = 0,
.components_hash = {},
};
handleLevel(root_header, index_.components);
@ -237,16 +236,10 @@ auto Index(locale::ICollator& collator,
}
auto ExpandHeader(const IndexKey::Header& header,
const std::optional<std::pmr::string>& component)
-> IndexKey::Header {
const std::pmr::string& component) -> IndexKey::Header {
IndexKey::Header ret{header};
ret.depth++;
if (component) {
ret.components_hash =
komihash(component->data(), component->size(), ret.components_hash);
} else {
ret.components_hash = komihash(NULL, 0, ret.components_hash);
}
ret.components_hash.push_back(
komihash(component.data(), component.size(), 0));
return ret;
}

@ -41,14 +41,8 @@ struct IndexKey {
struct Header {
// The index that this key was created for.
IndexId id;
// The number of components of IndexInfo that have already been filtered.
// For example, if an index consists of { kGenre, kArtist }, and this key
// represents an artist, then depth = 1.
std::uint8_t depth;
// The cumulative hash of all filtered components, in order. For example, if
// an index consists of { kArtist, kAlbum, kTitle }, and we are at depth = 2
// then this may contain hash(hash("Jacqueline"), "My Cool Album").
std::uint64_t components_hash;
// The hashes of all filtered components, in order, up to the current depth
std::vector<std::uint64_t> components_hash;
bool operator==(const Header&) const = default;
};
@ -56,7 +50,7 @@ struct IndexKey {
// The filterable / selectable item that this key represents. "Jacqueline" for
// kArtist, "My Cool Album" for kAlbum, etc.
std::optional<std::pmr::string> item;
std::pmr::string item;
// If this is a leaf component, the track id for this record.
// This could reasonably be the value for a record, but we keep it as a part
// of the key to help with disambiguation.
@ -69,7 +63,7 @@ auto Index(locale::ICollator&,
const TrackTags&) -> std::vector<std::pair<IndexKey, std::string>>;
auto ExpandHeader(const IndexKey::Header&,
const std::optional<std::pmr::string>&) -> IndexKey::Header;
const std::pmr::string&) -> IndexKey::Header;
// Predefined indexes
// TODO(jacqueline): Make these defined at runtime! :)

@ -181,11 +181,13 @@ auto EncodeAllIndexesPrefix() -> std::string {
auto EncodeIndexPrefix(const IndexKey::Header& header) -> std::string {
std::ostringstream out;
out << makePrefix(kIndexPrefix);
cppbor::Array val{
cppbor::Uint{header.id},
cppbor::Uint{header.depth},
cppbor::Uint{header.components_hash},
};
cppbor::Array components{};
for (auto hash : header.components_hash) {
components.add(cppbor::Uint{hash});
}
cppbor::Array val{cppbor::Uint{header.id}, std::move(components)};
out << val.toString() << kFieldSeparator;
return out.str();
}
@ -210,8 +212,8 @@ auto EncodeIndexKey(const IndexKey& key) -> std::string {
out << EncodeIndexPrefix(key.header);
// The component should already be UTF-8 encoded, so just write it.
if (key.item) {
out << *key.item << kFieldSeparator;
if (!key.item.empty()) {
out << key.item << kFieldSeparator;
}
if (key.track) {
@ -236,14 +238,16 @@ auto ParseIndexKey(const leveldb::Slice& slice) -> std::optional<IndexKey> {
return {};
}
auto as_array = key->asArray();
if (as_array->size() != 3 || as_array->get(0)->type() != cppbor::UINT ||
as_array->get(1)->type() != cppbor::UINT ||
as_array->get(2)->type() != cppbor::UINT) {
if (as_array->size() != 2 || as_array->get(0)->type() != cppbor::UINT ||
as_array->get(1)->type() != cppbor::ARRAY) {
return {};
}
result.header.id = as_array->get(0)->asUint()->unsignedValue();
result.header.depth = as_array->get(1)->asUint()->unsignedValue();
result.header.components_hash = as_array->get(2)->asUint()->unsignedValue();
auto components_array = as_array->get(1)->asArray();
for (int i = 0; i < components_array->size(); i++) {
result.header.components_hash.push_back(
components_array->get(i)->asUint()->unsignedValue());
}
size_t header_length =
reinterpret_cast<const char*>(end_of_key) - key_data.data();

@ -190,6 +190,12 @@ auto TagParserImpl::ReadAndParseTags(std::string_view path)
}
}
// There wasn't a fine-grained 'Artists' tag in the source. Synthesize a tag
// based on the singlet 'Artist' field.
if (tags->allArtists().empty() && tags->artist()) {
tags->singleAllArtists(tags->artist().value());
}
// Store the result in the cache for later.
{
std::lock_guard<std::mutex> lock{cache_mutex_};
@ -199,6 +205,11 @@ auto TagParserImpl::ReadAndParseTags(std::string_view path)
return tags;
}
auto TagParserImpl::ClearCaches() -> void {
std::lock_guard<std::mutex> lock{cache_mutex_};
cache_.Clear();
}
OggTagParser::OggTagParser() {}
auto OggTagParser::ReadAndParseTags(std::string_view p)

@ -19,6 +19,8 @@ class ITagParser {
virtual ~ITagParser() {}
virtual auto ReadAndParseTags(std::string_view path)
-> std::shared_ptr<TrackTags> = 0;
virtual auto ClearCaches() -> void {}
};
class TagParserImpl : public ITagParser {
@ -27,6 +29,8 @@ class TagParserImpl : public ITagParser {
auto ReadAndParseTags(std::string_view path)
-> std::shared_ptr<TrackTags> override;
auto ClearCaches() -> void override;
private:
std::vector<std::unique_ptr<ITagParser>> parsers_;
@ -58,14 +62,8 @@ class GenericTagParser : public ITagParser {
// Supported file extensions for parsing tags, derived from the list of
// supported audio formats here:
// https://cooltech.zone/tangara/docs/music-library/
static constexpr std::string supported_exts[] = {
"flac",
"mp3",
"ogg",
"ogx",
"opus",
"wav"
};
static constexpr std::string supported_exts[] = {"flac", "mp3", "ogg",
"ogx", "opus", "wav"};
};
} // namespace database

@ -83,11 +83,17 @@ auto tagToString(const TagValue& val) -> std::string {
return std::to_string(arg);
} else if constexpr (std::is_same_v<
T, std::span<const std::pmr::string>>) {
std::ostringstream builder{};
std::stringstream stream{};
bool first = true;
for (const auto& str : arg) {
builder << std::string{str.data(), str.size()} << ",";
if (first) {
first = false;
} else {
stream << ";";
}
stream << std::string{str.data(), str.size()};
}
return builder.str();
return stream.str();
}
},
val);
@ -101,7 +107,6 @@ auto tagToString(const TagValue& val) -> std::string {
auto parseDelimitedTags(const std::string_view s,
const char* delimiters,
std::pmr::vector<std::pmr::string>& out) -> void {
out.clear();
std::string src = {s.data(), s.size()};
char* token = std::strtok(src.data(), delimiters);
@ -240,7 +245,6 @@ auto TrackTags::artist() const -> const std::optional<std::pmr::string>& {
auto TrackTags::artist(std::string_view s) -> void {
artist_ = s;
maybeSynthesizeAllArtists();
}
auto TrackTags::allArtists() const -> std::span<const std::pmr::string> {
@ -249,7 +253,10 @@ auto TrackTags::allArtists() const -> std::span<const std::pmr::string> {
auto TrackTags::allArtists(const std::string_view s) -> void {
parseDelimitedTags(s, kAllArtistDelimiters, allArtists_);
maybeSynthesizeAllArtists();
}
auto TrackTags::singleAllArtists(const std::string_view s) -> void {
allArtists_.emplace_back(s);
}
auto TrackTags::album() const -> const std::optional<std::pmr::string>& {
@ -325,17 +332,6 @@ auto TrackTags::Hash() const -> uint64_t {
return komihash_stream_final(&stream);
}
/*
* Adds the current 'artist' tag to 'allArtists' if needed. Many tracks lack a
* fine-grained 'ARTISTS=' tag (or equivalent), but pushing down this nuance to
* consumers of TrackTags adds a lot of complexity.
*/
auto TrackTags::maybeSynthesizeAllArtists() -> void {
if (allArtists_.empty() && artist_) {
allArtists_.push_back(*artist_);
}
}
auto database::TrackData::clone() const -> std::shared_ptr<TrackData> {
auto data = std::make_shared<TrackData>();
data->id = id;

@ -117,6 +117,7 @@ class TrackTags {
auto allArtists() const -> std::span<const std::pmr::string>;
auto allArtists(const std::string_view) -> void;
auto singleAllArtists(const std::string_view) -> void;
auto album() const -> const std::optional<std::pmr::string>&;
auto album(std::string_view) -> void;
@ -144,8 +145,6 @@ class TrackTags {
auto Hash() const -> uint64_t;
private:
auto maybeSynthesizeAllArtists() -> void;
Container encoding_;
std::optional<std::pmr::string> title_;

@ -190,35 +190,24 @@ static const struct luaL_Reg kDatabaseFuncs[] = {
{"update", update}, {"track_by_id", track_by_id},
{NULL, NULL}};
/*
* Struct to be used as userdata for the Lua representation of database records.
* In order to push these large values into PSRAM as much as possible, memory
* for these is allocated and managed by Lua. This struct must therefore be
* trivially copyable.
*/
struct LuaRecord {
std::variant<database::TrackId, database::IndexKey::Header> contents;
size_t text_size;
char text[];
};
static_assert(std::is_trivially_destructible<LuaRecord>());
static_assert(std::is_trivially_copy_assignable<LuaRecord>());
static auto push_lua_record(lua_State* L, const database::Record& r) -> void {
// Create and init the userdata.
LuaRecord* record = reinterpret_cast<LuaRecord*>(
lua_newuserdata(L, sizeof(LuaRecord) + r.text().size()));
luaL_setmetatable(L, kDbRecordMetatable);
static auto push_lua_record(lua_State* state,
const database::Record& r) -> void {
database::Record** data = reinterpret_cast<database::Record**>(
lua_newuserdata(state, sizeof(uintptr_t)));
*data = new database::Record(r);
luaL_setmetatable(state, kDbRecordMetatable);
}
// Init all the fields
*record = {
.contents = r.contents(),
.text_size = r.text().size(),
};
auto db_check_record(lua_State* L, int stack_pos) -> database::Record* {
database::Record* rec = *reinterpret_cast<database::Record**>(
luaL_checkudata(L, stack_pos, kDbRecordMetatable));
return rec;
}
// Copy the string data across.
std::memcpy(record->text, r.text().data(), r.text().size());
static auto db_record_gc(lua_State* state) -> int {
database::Record* rec = db_check_record(state, 1);
delete rec;
return 0;
}
auto db_check_iterator(lua_State* L, int stack_pos) -> database::Iterator* {
@ -227,8 +216,8 @@ auto db_check_iterator(lua_State* L, int stack_pos) -> database::Iterator* {
return it;
}
static auto push_iterator(lua_State* state, const database::Iterator& it)
-> void {
static auto push_iterator(lua_State* state,
const database::Iterator& it) -> void {
database::Iterator** data = reinterpret_cast<database::Iterator**>(
lua_newuserdata(state, sizeof(uintptr_t)));
*data = new database::Iterator(it);
@ -279,15 +268,13 @@ static const struct luaL_Reg kDbIteratorFuncs[] = {
{"__gc", db_iterator_gc}, {NULL, NULL}};
static auto record_text(lua_State* state) -> int {
LuaRecord* data = reinterpret_cast<LuaRecord*>(
luaL_checkudata(state, 1, kDbRecordMetatable));
lua_pushlstring(state, data->text, data->text_size);
database::Record* rec = db_check_record(state, 1);
lua_pushlstring(state, rec->text().data(), rec->text().size());
return 1;
}
static auto record_contents(lua_State* state) -> int {
LuaRecord* data = reinterpret_cast<LuaRecord*>(
luaL_checkudata(state, 1, kDbRecordMetatable));
database::Record* rec = db_check_record(state, 1);
std::visit(
[&](auto&& arg) {
@ -304,7 +291,7 @@ static auto record_contents(lua_State* state) -> int {
}
}
},
data->contents);
rec->contents());
return 1;
}
@ -312,6 +299,7 @@ static auto record_contents(lua_State* state) -> int {
static const struct luaL_Reg kDbRecordFuncs[] = {{"title", record_text},
{"contents", record_contents},
{"__tostring", record_text},
{"__gc", db_record_gc},
{NULL, NULL}};
static auto index_name(lua_State* state) -> int {
@ -405,7 +393,6 @@ static auto lua_database(lua_State* state) -> int {
lua_rawset(state, -3);
lua_rawset(state, -3);
lua_pushliteral(state, "IndexTypes");
lua_newtable(state);
lua_pushliteral(state, "ALBUMS_BY_ARTIST");

@ -13,8 +13,9 @@
namespace lua {
auto db_check_iterator(lua_State*, int stack_pos) -> database::Iterator*;
auto db_check_record(lua_State*, int stack_pos) -> database::Record*;
auto pushTagValue(lua_State* L, const database::TagValue& val) -> void;
auto pushTagValue(lua_State* L, const database::TagValue& val) -> void;
auto RegisterDatabaseModule(lua_State*) -> void;

Loading…
Cancel
Save