From bdadc159c7538e88eab5efcfeb2ecf971a510c60 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Fri, 28 Apr 2023 13:26:47 +1000 Subject: [PATCH] Add libtags for extracting info from files --- lib/libtags/437.c | 9 + lib/libtags/8859.c | 29 ++ lib/libtags/CMakeLists.txt | 5 + lib/libtags/LICENSE | 18 + lib/libtags/README.md | 47 +++ lib/libtags/examples/mkfile | 13 + lib/libtags/examples/readtags.c | 114 ++++++ lib/libtags/flac.c | 111 ++++++ lib/libtags/id3genres.c | 42 +++ lib/libtags/id3v1.c | 48 +++ lib/libtags/id3v2.c | 479 +++++++++++++++++++++++++ lib/libtags/it.c | 15 + lib/libtags/m4a.c | 157 ++++++++ lib/libtags/mkfile | 29 ++ lib/libtags/mod.c | 49 +++ lib/libtags/opus.c | 96 +++++ lib/libtags/s3m.c | 17 + lib/libtags/tags.c | 80 +++++ lib/libtags/tags.h | 94 +++++ lib/libtags/tagspriv.h | 59 +++ lib/libtags/utf16.c | 59 +++ lib/libtags/vorbis.c | 132 +++++++ lib/libtags/wav.c | 90 +++++ lib/libtags/xm.c | 15 + src/database/CMakeLists.txt | 2 +- src/database/include/tag_processor.hpp | 2 + src/database/tag_processor.cpp | 94 ++++- tools/cmake/common.cmake | 1 + 28 files changed, 1902 insertions(+), 4 deletions(-) create mode 100644 lib/libtags/437.c create mode 100644 lib/libtags/8859.c create mode 100644 lib/libtags/CMakeLists.txt create mode 100644 lib/libtags/LICENSE create mode 100644 lib/libtags/README.md create mode 100644 lib/libtags/examples/mkfile create mode 100644 lib/libtags/examples/readtags.c create mode 100644 lib/libtags/flac.c create mode 100644 lib/libtags/id3genres.c create mode 100644 lib/libtags/id3v1.c create mode 100644 lib/libtags/id3v2.c create mode 100644 lib/libtags/it.c create mode 100644 lib/libtags/m4a.c create mode 100644 lib/libtags/mkfile create mode 100644 lib/libtags/mod.c create mode 100644 lib/libtags/opus.c create mode 100644 lib/libtags/s3m.c create mode 100644 lib/libtags/tags.c create mode 100644 lib/libtags/tags.h create mode 100644 lib/libtags/tagspriv.h create mode 100644 lib/libtags/utf16.c create mode 100644 lib/libtags/vorbis.c create mode 100644 lib/libtags/wav.c create mode 100644 lib/libtags/xm.c diff --git a/lib/libtags/437.c b/lib/libtags/437.c new file mode 100644 index 00000000..9177bc18 --- /dev/null +++ b/lib/libtags/437.c @@ -0,0 +1,9 @@ +/* https://en.wikipedia.org/wiki/Code_page_437 */ +#include "tagspriv.h" + +int +cp437toutf8(char *o, int osz, const char *s, int sz) +{ + /* FIXME somebody come up with portable code */ + return snprint(o, osz, "%.*s", sz, s); +} diff --git a/lib/libtags/8859.c b/lib/libtags/8859.c new file mode 100644 index 00000000..9efd9026 --- /dev/null +++ b/lib/libtags/8859.c @@ -0,0 +1,29 @@ +/* http://en.wikipedia.org/wiki/ISO/IEC_8859-1 */ +#include "tagspriv.h" + +int +iso88591toutf8(uchar *o, int osz, const uchar *s, int sz) +{ + int i; + + for(i = 0; i < sz && osz > 1 && s[i] != 0; i++){ + if(s[i] >= 0xa0 && osz < 3) + break; + + if(s[i] >= 0xc0){ + *o++ = 0xc3; + *o++ = s[i] - 0x40; + osz--; + }else if(s[i] >= 0xa0){ + *o++ = 0xc2; + *o++ = s[i]; + osz--; + }else{ + *o++ = s[i]; + } + osz--; + } + + *o = 0; + return i; +} diff --git a/lib/libtags/CMakeLists.txt b/lib/libtags/CMakeLists.txt new file mode 100644 index 00000000..9d9377b7 --- /dev/null +++ b/lib/libtags/CMakeLists.txt @@ -0,0 +1,5 @@ +idf_component_register( + SRCS 437.c 8859.c flac.c id3genres.c id3v1.c id3v2.c it.c m4a.c mod.c opus.c + s3m.c tags.c utf16.c vorbis.c wav.c xm.c + INCLUDE_DIRS . +) diff --git a/lib/libtags/LICENSE b/lib/libtags/LICENSE new file mode 100644 index 00000000..a8abc627 --- /dev/null +++ b/lib/libtags/LICENSE @@ -0,0 +1,18 @@ +Copyright © 2013-2020 Sigrid Solveig Haflínudóttir + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/lib/libtags/README.md b/lib/libtags/README.md new file mode 100644 index 00000000..e86f8906 --- /dev/null +++ b/lib/libtags/README.md @@ -0,0 +1,47 @@ +libtags +======= + +A cross-platform library for reading tags, designed for highly constrained environments. + +Comparison to id3lib and taglib: + +| | libtags | id3lib | taglib | +|:---------------|:----------------|:-----------------|:-----------------| +| ID3v2.4 | yes | no | yes | +| Ogg/Vorbis | yes | no | yes | +| FLAC | yes | no | yes | +| m4a | yes | no | yes | +| opus | yes | no | yes | +| WAV | yes | no | yes | +| IT | yes | no | ??? | +| XM | yes | no | ??? | +| S3M | yes | no | ??? | +| MOD | yes | no | ??? | +| replay gain | yes | no | ??? | +| size | tiny | bloated | more bloated | +| license | MIT | LGPL | LGPL/MPL | +| written in | C | C++ | C++ | +| memory | no allocations | allocates memory | allocates memory | +| thread safe | yes | ??? | ??? | +| speed | ultra-fast | slow | fast | +| tag writing | no, not a goal | yes | yes | +| Plan 9 support | yes, native | no | no | + +CPU time (784 files: mp3, ogg, flac): + +| | libtags | taglib | +|:---------------|:-----------------|:-----------------| +| files cached | real 0m0.027s | real 0m0.155s | +| | user 0m0.014s | user 0m0.102s | +| | sys 0m0.012s | sys 0m0.053s | +| | | | +| cache dropped | real 0m1.158s | real 0m1.628s | +| | user 0m0.024s | user 0m0.211s | +| | sys 0m0.132s | sys 0m0.187s | + +## Usage + +Just compile it to an archive (`.a`) and link to your program. Use it in your code +by including `tags.h`, that's the API. Documentation is in the header. + +See `examples/readtags.c`. You can compile it on Linux like so: `gcc examples/readtags.c *.c -I. -o readtags`. diff --git a/lib/libtags/examples/mkfile b/lib/libtags/examples/mkfile new file mode 100644 index 00000000..1debc631 --- /dev/null +++ b/lib/libtags/examples/mkfile @@ -0,0 +1,13 @@ + +#include +#include +#include +#define print printf +#define seek lseek +#define nil NULL +#define OREAD O_RDONLY +#define USED(x) (void)x +#else +#include +#include +#endif +#include + +typedef struct Aux Aux; + +struct Aux +{ + int fd; +}; + +static const char *t2s[] = +{ + [Tartist] = "artist", + [Talbum] = "album", + [Ttitle] = "title", + [Tdate] = "date", + [Ttrack] = "track", + [Talbumgain] = "albumgain", + [Talbumpeak] = "albumpeak", + [Ttrackgain] = "trackgain", + [Ttrackpeak] = "trackpeak", + [Tgenre] = "genre", + [Timage] = "image", +}; + +static void +tag(Tagctx *ctx, int t, const char *k, const char *v, int offset, int size, Tagread f) +{ + USED(ctx); USED(k); USED(f); + if(t == Timage) + print("%-12s %s %d %d\n", t2s[t], v, offset, size); + else if(t != Tunknown) + print("%-12s %s\n", t2s[t], v); +} + +static void +toc(Tagctx *ctx, int ms, int offset) +{ + USED(ctx); USED(ms); USED(offset); +} + +static int +ctxread(Tagctx *ctx, void *buf, int cnt) +{ + Aux *aux = ctx->aux; + return read(aux->fd, buf, cnt); +} + +static int +ctxseek(Tagctx *ctx, int offset, int whence) +{ + Aux *aux = ctx->aux; + return seek(aux->fd, offset, whence); +} + +int +main(int argc, char **argv) +{ + int i; + char buf[256]; + Aux aux; + Tagctx ctx = + { + .read = ctxread, + .seek = ctxseek, + .tag = tag, + .toc = toc, + .buf = buf, + .bufsz = sizeof(buf), + .aux = &aux, + }; + + if(argc < 2){ + print("usage: readtags FILE...\n"); + return -1; + } + + for(i = 1; i < argc; i++){ + print("*** %s\n", argv[i]); + if((aux.fd = open(argv[i], OREAD)) < 0) + print("failed to open\n"); + else{ + if(tagsget(&ctx) != 0) + print("no tags or failed to read tags\n"); + else{ + if(ctx.duration > 0) + print("%-12s %d ms\n", "duration", ctx.duration); + if(ctx.samplerate > 0) + print("%-12s %d\n", "samplerate", ctx.samplerate); + if(ctx.channels > 0) + print("%-12s %d\n", "channels", ctx.channels); + if(ctx.bitrate > 0) + print("%-12s %d\n", "bitrate", ctx.bitrate); + } + close(aux.fd); + } + print("\n"); + } + return 0; +} diff --git a/lib/libtags/flac.c b/lib/libtags/flac.c new file mode 100644 index 00000000..824f6abf --- /dev/null +++ b/lib/libtags/flac.c @@ -0,0 +1,111 @@ +/* https://xiph.org/flac/format.html */ +#include "tagspriv.h" + +#define beu3(d) ((d)[0]<<16 | (d)[1]<<8 | (d)[2]<<0) + +int +tagflac(Tagctx *ctx) +{ + uchar *d; + int sz, last; + uvlong g; + + d = (uchar*)ctx->buf; + /* 8 bytes for marker, block type, length. 18 bytes for the stream info */ + if(ctx->read(ctx, d, 8+18) != 8+18 || memcmp(d, "fLaC\x00", 5) != 0) + return -1; + + sz = beu3(&d[5]); /* size of the stream info */ + ctx->samplerate = beu3(&d[18]) >> 4; + ctx->channels = ((d[20]>>1) & 7) + 1; + if(ctx->samplerate < 1 || ctx->channels < 1) + return -1; + + g = (uvlong)(d[21] & 0xf)<<32 | beu3(&d[22])<<8 | d[25]; + ctx->duration = g * 1000 / ctx->samplerate; + + /* skip the rest of the stream info */ + if(ctx->seek(ctx, sz-18, 1) != ctx->restart+8+sz) + return -1; + + for(last = 0; !last;){ + if(ctx->read(ctx, d, 4) != 4) + return -1; + + sz = beu3(&d[1]); + if((d[0] & 0x80) != 0) + last = 1; + + if((d[0] & 0x7f) == 6){ /* 6 = picture */ + int n, offset; + char *mime; + + if(sz < 16 || ctx->read(ctx, d, 8) != 8) /* type, mime length */ + return -1; + sz -= 8; + n = beuint(&d[4]); + mime = ctx->buf+20; + if(n >= sz || n >= ctx->bufsz-1 || ctx->read(ctx, mime, n) != n) + return -1; + sz -= n; + mime[n] = 0; + ctx->read(ctx, d, 4); /* description */ + sz -= 4; + offset = beuint(d) + ctx->seek(ctx, 0, 1) + 20; + ctx->read(ctx, d, 20); + sz -= 20; + n = beuint(&d[16]); + tagscallcb(ctx, Timage, "", mime, offset, n, nil); + if(ctx->seek(ctx, sz, 1) <= 0) + return -1; + }else if((d[0] & 0x7f) == 4){ /* 4 = vorbis comment */ + int i, numtags, tagsz, vensz; + char *k, *v; + + if(sz < 12 || ctx->read(ctx, d, 4) != 4) + return -1; + + sz -= 4; + vensz = leuint(d); + if(vensz < 0 || vensz > sz-4) + return -1; + /* skip vendor, read the number of tags */ + if(ctx->seek(ctx, vensz, 1) < 0 || ctx->read(ctx, d, 4) != 4) + return -1; + sz -= vensz + 4; + numtags = leuint(d); + + for(i = 0; i < numtags && sz > 4; i++){ + if(ctx->read(ctx, d, 4) != 4) + return -1; + tagsz = leuint(d); + sz -= 4; + if(tagsz > sz) + return -1; + + /* if it doesn't fit, ignore it */ + if(tagsz+1 > ctx->bufsz){ + if(ctx->seek(ctx, tagsz, 1) < 0) + return -1; + continue; + } + + k = ctx->buf; + if(ctx->read(ctx, k, tagsz) != tagsz) + return -1; + /* some tags have a stupid '\r'; ignore */ + if(k[tagsz-1] == '\r') + k[tagsz-1] = 0; + k[tagsz] = 0; + + if((v = strchr(k, '=')) != nil){ + *v++ = 0; + cbvorbiscomment(ctx, k, v); + } + } + }else if(ctx->seek(ctx, sz, 1) <= 0) + return -1; + } + + return 0; +} diff --git a/lib/libtags/id3genres.c b/lib/libtags/id3genres.c new file mode 100644 index 00000000..f1d6f7dc --- /dev/null +++ b/lib/libtags/id3genres.c @@ -0,0 +1,42 @@ +#include "tagspriv.h" + +const char *id3genres[Numgenre] = +{ + "Blues", "Classic Rock", "Country", "Dance", "Disco", "Funk", + "Grunge", "Hip-Hop", "Jazz", "Metal", "New Age", "Oldies", + "Other", "Pop", "Rhythm and Blues", "Rap", "Reggae", "Rock", + "Techno", "Industrial", "Alternative", "Ska", "Death Metal", + "Pranks", "Soundtrack", "Euro-Techno", "Ambient", "Trip-Hop", + "Vocal", "Jazz & Funk", "Fusion", "Trance", "Classical", + "Instrumental", "Acid", "House", "Game", "Sound Clip", "Gospel", + "Noise", "Alternative Rock", "Bass", "Soul", "Punk rock", "Space", + "Meditative", "Instrumental Pop", "Instrumental Rock", "Ethnic", + "Gothic", "Darkwave", "Techno-Industrial", "Electronic", + "Pop-Folk", "Eurodance", "Dream", "Southern Rock", "Comedy", + "Cult", "Gangsta", "Top 40", "Christian Rap", "Pop/Funk", + "Jungle", "Native American", "Cabaret", "New Wave", "Psychedelic", + "Rave", "Showtunes", "Trailer", "Lo-Fi", "Tribal", "Acid Punk", + "Acid Jazz", "Polka", "Retro", "Musical", "Rock & Roll", "Hard Rock", + "Folk", "Folk-Rock", "National Folk", "Swing", "Fast Fusion", + "Bebop", "Latin", "Revival", "Celtic", "Bluegrass", "Avantgarde", + "Gothic Rock", "Progressive Rock", "Psychedelic Rock", "Symphonic Rock", + "Slow Rock", "Big Band", "Chorus", "Easy Listening", "Acoustic", + "Humour", "Speech", "Chanson", "Opera", "Chamber Music", "Sonata", + "Symphony", "Booty Bass", "Primus", "Porn groove", "Satire", "Slow Jam", + "Club", "Tango", "Samba", "Folklore", "Ballad", "Power Ballad", + "Rhythmic Soul", "Freestyle", "Duet", "Punk rock", "Drum Solo", "A capella", + "Euro-House", "Dance Hall", "Goa Trance", "Drum & Bass", + "Club-House", "Hardcore Techno", "Terror", "Indie", "BritPop", + "Afro-punk", "Polsk Punk", "Beat", "Christian Gangsta Rap", "Heavy Metal", + "Black Metal", "Crossover", "Contemporary Christian", "Christian Rock", + "Merengue", "Salsa", "Thrash Metal", "Anime", "Jpop", "Synthpop", + "Abstract", "Art Rock", "Baroque", "Bhangra", "Big Beat", + "Breakbeat", "Chillout", "Downtempo", "Dub", "EBM", "Eclectic", + "Electro", "Electroclash", "Emo", "Experimental", "Garage", + "Global", "IDM", "Illbient", "Industro-Goth", "Jam Band", + "Krautrock", "Leftfield", "Lounge", "Math Rock", "New Romantic", + "Nu-Breakz", "Post-Punk", "Post-Rock", "Psytrance", "Shoegaze", + "Space Rock", "Trop Rock", "World Music", "Neoclassical", + "Audiobook", "Audio Theatre", "Neue Deutsche Welle", "Podcast", + "Indie Rock", "G-Funk", "Dubstep", "Garage Rock", "Psybient", +}; diff --git a/lib/libtags/id3v1.c b/lib/libtags/id3v1.c new file mode 100644 index 00000000..afcf90e9 --- /dev/null +++ b/lib/libtags/id3v1.c @@ -0,0 +1,48 @@ +/* + * http://en.wikipedia.org/wiki/ID3 + * Space-padded strings are mentioned there. This is wrong and is a lie. + */ +#include "tagspriv.h" + +enum +{ + Insz = 128, + Outsz = 61, +}; + +int +tagid3v1(Tagctx *ctx) +{ + uchar *in, *out; + + if(ctx->bufsz < Insz+Outsz) + return -1; + in = (uchar*)ctx->buf; + out = in + Insz; + + if(ctx->seek(ctx, -Insz, 2) < 0) + return -1; + if(ctx->read(ctx, in, Insz) != Insz || memcmp(in, "TAG", 3) != 0) + return -1; + + if((ctx->found & 1< 0) + txtcb(ctx, Ttitle, "", out); + if((ctx->found & 1< 0) + txtcb(ctx, Tartist, "", out); + if((ctx->found & 1< 0) + txtcb(ctx, Talbum, "", out); + + in[93+4] = 0; + if((ctx->found & 1<found & 1< 0){ + snprint((char*)out, Outsz, "%d", in[126]); + txtcb(ctx, Ttrack, "", out); + } + + if((ctx->found & 1<duration = atoi(v); + else if(strcmp(k, "CO") == 0 || strcmp(k, "CON") == 0){ + for(; v[0]; v++){ + if(v[0] == '(' && v[1] <= '9' && v[1] >= '0'){ + int i = atoi(&v[1]); + if(i < Numgenre) + txtcb(ctx, Tgenre, k-1, id3genres[i]); + for(v++; v[0] && v[0] != ')'; v++); + v--; + }else if(v[0] != '(' && v[0] != ')'){ + txtcb(ctx, Tgenre, k-1, v); + break; + } + } + }else if(strcmp(k, "XXX") == 0 && strncmp(v, "REPLAYGAIN_", 11) == 0){ + int type = -1; + v += 11; + if(strncmp(v, "TRACK_", 6) == 0){ + v += 6; + if(strcmp(v, "GAIN") == 0) + type = Ttrackgain; + else if(strcmp(v, "PEAK") == 0) + type = Ttrackpeak; + }else if(strncmp(v, "ALBUM_", 6) == 0){ + v += 6; + if(strcmp(v, "GAIN") == 0) + type = Talbumgain; + else if(strcmp(v, "PEAK") == 0) + type = Talbumpeak; + } + if(type >= 0) + txtcb(ctx, type, k-1, v+5); + else + return 0; + }else{ + txtcb(ctx, Tunknown, k-1, v); + } + return 1; +} + +static int +rva2(Tagctx *ctx, char *tag, int sz) +{ + uchar *b, *end; + + if((b = memchr(tag, 0, sz)) == nil) + return -1; + b++; + for(end = (uchar*)tag+sz; b+4 < end; b += 5){ + int type = b[0]; + float peak; + float va = (float)(b[1]<<8 | b[2]) / 512.0f; + + if(b[3] == 24){ + peak = (float)(b[4]<<16 | b[5]<<8 | b[6]) / 32768.0f; + b += 2; + }else if(b[3] == 16){ + peak = (float)(b[4]<<8 | b[5]) / 32768.0f; + b += 1; + }else if(b[3] == 8){ + peak = (float)b[4] / 32768.0f; + }else + return -1; + + if(type == 1){ /* master volume */ + char vas[16], peaks[8]; + snprint(vas, sizeof(vas), "%+.5f dB", va); + snprint(peaks, sizeof(peaks), "%.5f", peak); + vas[sizeof(vas)-1] = 0; + peaks[sizeof(peaks)-1] = 0; + + if(strcmp((char*)tag, "track") == 0){ + txtcb(ctx, Ttrackgain, "RVA2", vas); + txtcb(ctx, Ttrackpeak, "RVA2", peaks); + }else if(strcmp((char*)tag, "album") == 0){ + txtcb(ctx, Talbumgain, "RVA2", vas); + txtcb(ctx, Talbumpeak, "RVA2", peaks); + } + break; + } + } + return 0; +} + +static int +resync(uchar *b, int sz) +{ + int i; + + if(sz < 4) + return sz; + for(i = 0; i < sz-2; i++){ + if(b[i] == 0xff && b[i+1] == 0x00 && (b[i+2] & 0xe0) == 0xe0){ + memmove(&b[i+1], &b[i+2], sz-i-2); + sz--; + } + } + return sz; +} + +static int +unsyncread(void *buf, int *sz) +{ + int i; + uchar *b; + + b = buf; + for(i = 0; i < *sz; i++){ + if(b[i] == 0xff){ + if(i+1 >= *sz || (b[i+1] == 0x00 && i+2 >= *sz)) + break; + if(b[i+1] == 0x00 && (b[i+2] & 0xe0) == 0xe0){ + memmove(&b[i+1], &b[i+2], *sz-i-2); + (*sz)--; + } + } + } + return i; +} + +static int +nontext(Tagctx *ctx, uchar *d, int tsz, int unsync) +{ + int n, offset; + char *b, *tag; + Tagread f; + + tag = ctx->buf; + n = 0; + f = unsync ? unsyncread : nil; + if(strcmp((char*)d, "APIC") == 0){ + offset = ctx->seek(ctx, 0, 1); + if((n = ctx->read(ctx, tag, 256)) == 256){ /* APIC mime and description should fit */ + b = tag + 1; /* mime type */ + for(n = 1 + strlen(b) + 2; n < 253; n++){ + if(tag[0] == 0 || tag[0] == 3){ /* one zero byte */ + if(tag[n] == 0){ + n++; + break; + } + }else if(tag[n] == 0 && tag[n+1] == 0 && tag[n+2] == 0){ + n += 3; + break; + } + } + tagscallcb(ctx, Timage, "APIC", b, offset+n, tsz-n, f); + n = 256; + } + }else if(strcmp((char*)d, "PIC") == 0){ + offset = ctx->seek(ctx, 0, 1); + if((n = ctx->read(ctx, tag, 256)) == 256){ /* PIC description should fit */ + b = tag + 1; /* mime type */ + for(n = 5; n < 253; n++){ + if(tag[0] == 0 || tag[0] == 3){ /* one zero byte */ + if(tag[n] == 0){ + n++; + break; + } + }else if(tag[n] == 0 && tag[n+1] == 0 && tag[n+2] == 0){ + n += 3; + break; + } + } + tagscallcb(ctx, Timage, "PIC", strcmp(b, "JPG") == 0 ? "image/jpeg" : "image/png", offset+n, tsz-n, f); + n = 256; + } + }else if(strcmp((char*)d, "RVA2") == 0 && tsz >= 6+5){ + /* replay gain. 6 = "track\0", 5 = other */ + if(ctx->bufsz >= tsz && (n = ctx->read(ctx, tag, tsz)) == tsz) + rva2(ctx, tag, unsync ? resync((uchar*)tag, n) : n); + } + + return ctx->seek(ctx, tsz-n, 1) < 0 ? -1 : 0; +} + +static int +text(Tagctx *ctx, uchar *d, int tsz, int unsync) +{ + char *b, *tag; + + if(ctx->bufsz >= tsz+1){ + /* place the data at the end to make best effort at charset conversion */ + tag = &ctx->buf[ctx->bufsz - tsz - 1]; + if(ctx->read(ctx, tag, tsz) != tsz) + return -1; + }else{ + ctx->seek(ctx, tsz, 1); + return 0; + } + + if(unsync) + tsz = resync((uchar*)tag, tsz); + + tag[tsz] = 0; + b = &tag[1]; + + switch(tag[0]){ + case 0: /* iso-8859-1 */ + if(iso88591toutf8((uchar*)ctx->buf, ctx->bufsz, (uchar*)b, tsz) > 0) + v2cb(ctx, (char*)d, ctx->buf); + break; + case 1: /* utf-16 */ + case 2: + if(utf16to8((uchar*)ctx->buf, ctx->bufsz, (uchar*)b, tsz) > 0) + v2cb(ctx, (char*)d, ctx->buf); + break; + case 3: /* utf-8 */ + if(*b) + v2cb(ctx, (char*)d, b); + break; + } + + return 0; +} + +static int +isid3(uchar *d) +{ + /* "ID3" version[2] flags[1] size[4] */ + return ( + d[0] == 'I' && d[1] == 'D' && d[2] == '3' && + d[3] < 0xff && d[4] < 0xff && + d[6] < 0x80 && d[7] < 0x80 && d[8] < 0x80 && d[9] < 0x80 + ); +} + +static const uchar bitrates[4][4][16] = { + { + {0}, + {0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 72, 80, 0}, /* v2.5 III */ + {0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 72, 80, 0}, /* v2.5 II */ + {0, 16, 24, 28, 32, 40, 48, 56, 64, 72, 80, 88, 96, 112, 128, 0}, /* v2.5 I */ + }, + { {0}, {0}, {0}, {0} }, + { + {0}, + {0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 72, 80, 0}, /* v2 III */ + {0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 72, 80, 0}, /* v2 II */ + {0, 16, 24, 28, 32, 40, 48, 56, 64, 72, 80, 88, 96, 112, 128, 0}, /* v2 I */ + }, + { + {0}, + {0, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 0}, /* v1 III */ + {0, 16, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 0}, /* v1 II */ + {0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 0}, /* v1 I */ + } +}; + +static const uint samplerates[4][4] = { + {11025, 12000, 8000, 0}, + { 0, 0, 0, 0}, + {22050, 24000, 16000, 0}, + {44100, 48000, 32000, 0}, +}; + +static const int chans[] = {2, 2, 2, 1}; + +static const int samplesframe[4][4] = { + {0, 0, 0, 0}, + {0, 576, 1152, 384}, + {0, 576, 1152, 384}, + {0, 1152, 1152, 384}, +}; + +static void +getduration(Tagctx *ctx, int offset) +{ + uvlong n, framelen, samplespf, toc; + uchar *b; + uint x; + int xversion, xlayer, xbitrate, i; + + if(ctx->read(ctx, ctx->buf, 256) != 256) + return; + + x = beuint((uchar*)ctx->buf); + xversion = x >> 19 & 3; + xlayer = x >> 17 & 3; + xbitrate = x >> 12 & 0xf; + ctx->bitrate = 2000*(int)bitrates[xversion][xlayer][xbitrate]; + samplespf = samplesframe[xversion][xlayer]; + + ctx->samplerate = samplerates[xversion][x >> 10 & 3]; + ctx->channels = chans[x >> 6 & 3]; + + if(ctx->samplerate > 0){ + framelen = (uvlong)144*ctx->bitrate / ctx->samplerate; + if((x & (1<<9)) != 0) /* padding */ + framelen += xlayer == 3 ? 4 : 1; /* for I it's 4 bytes */ + + if(memcmp(&ctx->buf[0x24], "Info", 4) == 0 || memcmp(&ctx->buf[0x24], "Xing", 4) == 0){ + b = (uchar*)ctx->buf + 0x28; + x = beuint(b); b += 4; + if((x & 1) != 0){ /* number of frames is set */ + n = beuint(b); b += 4; + ctx->duration = n * samplespf * 1000 / ctx->samplerate; + } + + if((x & 2) != 0){ /* file size is set */ + n = beuint(b); b += 4; + if(ctx->duration == 0 && framelen > 0) + ctx->duration = n * samplespf * 1000 / framelen / ctx->samplerate; + + if((x & 4) != 0 && ctx->toc != nil){ /* TOC is set */ + toc = offset + 100 + (char*)b - ctx->buf; + if((x & 8) != 0) /* VBR scale */ + toc += 4; + for(i = 0; i < 100; i++){ + /* + * offset = n * b[i] / 256 + * ms = i * duration / 100 + */ + ctx->toc(ctx, i * ctx->duration / 100, toc + (n * b[i]) / 256); + } + b += 100; + if((x & 8) != 0) /* VBR scale */ + b += 4; + } + } + offset += (char*)b - ctx->buf; + }else if(memcmp(&ctx->buf[0x24], "VBRI", 4) == 0){ + n = beuint((uchar*)&ctx->buf[0x32]); + ctx->duration = n * samplespf * 1000 / ctx->samplerate; + + if(ctx->duration == 0 && framelen > 0){ + n = beuint((uchar*)&ctx->buf[0x28]); /* file size */ + ctx->duration = n * samplespf * 1000 / framelen / ctx->samplerate; + } + } + } + + if(ctx->bitrate > 0 && ctx->duration == 0) /* worst case -- use real file size instead */ + ctx->duration = (ctx->seek(ctx, 0, 2) - offset)/(ctx->bitrate / 1000) * 8; +} + +int +tagid3v2(Tagctx *ctx) +{ + int sz, exsz, framesz; + int ver, unsync, offset; + int newpos, oldpos; + uchar d[10], *b; + + if(ctx->read(ctx, d, sizeof(d)) != sizeof(d)) + return -1; + if(!isid3(d)){ /* no tags, but the stream information is there */ + if(d[0] != 0xff || (d[1] & 0xfe) != 0xfa) + return -1; + ctx->seek(ctx, -(int)sizeof(d), 1); + getduration(ctx, 0); + return 0; + } + + oldpos = 0; +header: + ver = d[3]; + unsync = d[5] & (1<<7); + sz = synchsafe(&d[6]); + + if(ver == 2 && (d[5] & (1<<6)) != 0) /* compression */ + return -1; + + ctx->restart = sizeof(d)+sz; + + if(ver > 2){ + if((d[5] & (1<<4)) != 0) /* footer */ + sz -= 10; + if((d[5] & (1<<6)) != 0){ /* skip extended header */ + if(ctx->read(ctx, d, 4) != 4) + return -1; + exsz = (ver >= 3) ? beuint(d) : synchsafe(d); + if(ctx->seek(ctx, exsz, 1) < 0) + return -1; + sz -= exsz; + } + } + + framesz = (ver >= 3) ? 10 : 6; + for(; sz > framesz;){ + int tsz, frameunsync; + + if(ctx->read(ctx, d, framesz) != framesz) + return -1; + sz -= framesz; + + /* return on padding */ + if(memcmp(d, "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", framesz) == 0) + break; + if(ver >= 3){ + tsz = (ver == 3) ? beuint(&d[4]) : synchsafe(&d[4]); + if(tsz < 0 || tsz > sz) + break; + frameunsync = d[9] & (1<<1); + d[4] = 0; + + if((d[9] & 0x0c) != 0){ /* compression & encryption */ + ctx->seek(ctx, tsz, 1); + sz -= tsz; + continue; + } + if(ver == 4 && (d[9] & 1<<0) != 0){ /* skip data length indicator */ + ctx->seek(ctx, 4, 1); + sz -= 4; + tsz -= 4; + } + }else{ + tsz = beuint(&d[3]) >> 8; + if(tsz > sz) + return -1; + frameunsync = 0; + d[3] = 0; + } + sz -= tsz; + + if(d[0] == 'T' && text(ctx, d, tsz, unsync || frameunsync) != 0) + return -1; + else if(d[0] != 'T' && nontext(ctx, d, tsz, unsync || frameunsync) != 0) + return -1; + } + + offset = ctx->seek(ctx, sz, 1); + sz = ctx->bufsz <= 2048 ? ctx->bufsz : 2048; + b = nil; + for(exsz = 0; exsz < 2048; exsz += sz){ + if(ctx->read(ctx, ctx->buf, sz) != sz) + break; + for(b = (uchar*)ctx->buf; (b = memchr(b, 'I', sz - 1 - ((char*)b - ctx->buf))) != nil; b++){ + newpos = ctx->seek(ctx, (char*)b - ctx->buf + offset + exsz, 0); + if(ctx->read(ctx, d, sizeof(d)) != sizeof(d)) + return 0; + if(isid3(d) && newpos != oldpos){ + oldpos = newpos; + goto header; + } + } + for(b = (uchar*)ctx->buf; (b = memchr(b, 0xff, sz-3)) != nil; b++){ + if((b[1] & 0xe0) == 0xe0){ + offset = ctx->seek(ctx, (char*)b - ctx->buf + offset + exsz, 0); + exsz = 2048; + break; + } + } + } + + if(b != nil) + getduration(ctx, offset); + + return 0; +} diff --git a/lib/libtags/it.c b/lib/libtags/it.c new file mode 100644 index 00000000..60079234 --- /dev/null +++ b/lib/libtags/it.c @@ -0,0 +1,15 @@ +#include "tagspriv.h" + +int +tagit(Tagctx *ctx) +{ + uchar d[4+26+1], o[26*2+1]; + + if(ctx->read(ctx, d, 4+26) != 4+26 || memcmp(d, "IMPM", 4) != 0) + return -1; + d[4+26] = 0; + if(iso88591toutf8(o, sizeof(o), d+4, 26) > 0) + txtcb(ctx, Ttitle, "", o); + + return 0; +} diff --git a/lib/libtags/m4a.c b/lib/libtags/m4a.c new file mode 100644 index 00000000..924ba51a --- /dev/null +++ b/lib/libtags/m4a.c @@ -0,0 +1,157 @@ +/* http://wiki.multimedia.cx/?title=QuickTime_container */ +/* https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html */ +#include "tagspriv.h" + +#define beuint16(d) (ushort)((d)[0]<<8 | (d)[1]<<0) + +int +tagm4a(Tagctx *ctx) +{ + uvlong duration; + uint x; + uchar *d; + int sz, type, dtype, i, skip, n; + + d = (uchar*)ctx->buf; + /* 4 bytes for atom size, 4 for type, 4 for data - exect "ftyp" to come first */ + if(ctx->read(ctx, d, 4+4+4) != 4+4+4 || memcmp(d+4, "ftypM4A ", 8) != 0) + return -1; + sz = beuint(d) - 4; /* already have 8 bytes */ + + for(;;){ + if(ctx->seek(ctx, sz, 1) < 0) + return -1; + if(ctx->read(ctx, d, 4) != 4) /* size */ + break; + sz = beuint(d); + if(sz == 0) + continue; + if(ctx->read(ctx, d, 4) != 4) /* type */ + return -1; + if(sz < 8) + continue; + + d[4] = 0; + + if(memcmp(d, "meta", 4) == 0){ + sz = 4; + continue; + }else if( + memcmp(d, "udta", 4) == 0 || + memcmp(d, "ilst", 4) == 0 || + memcmp(d, "trak", 4) == 0 || + memcmp(d, "mdia", 4) == 0 || + memcmp(d, "minf", 4) == 0 || + memcmp(d, "moov", 4) == 0 || + memcmp(d, "trak", 4) == 0 || + memcmp(d, "stbl", 4) == 0){ + sz = 0; + continue; + }else if(memcmp(d, "stsd", 4) == 0){ + sz -= 8; + if(ctx->read(ctx, d, 8) != 8) + return -1; + sz -= 8; + + for(i = beuint(&d[4]); i > 0 && sz > 0; i--){ + if(ctx->read(ctx, d, 8) != 8) /* size + format */ + return -1; + sz -= 8; + skip = beuint(d) - 8; + + if(memcmp(&d[4], "mp4a", 4) == 0){ /* audio */ + n = 6+2 + 2+4+2 + 2+2 + 2+2 + 4; /* read a bunch at once */ + /* reserved+id, ver+rev+vendor, channels+bps, ?+?, sample rate */ + if(ctx->read(ctx, d, n) != n) + return -1; + skip -= n; + sz -= n; + ctx->channels = beuint16(&d[16]); + ctx->samplerate = beuint(&d[24])>>16; + } + + if(ctx->seek(ctx, skip, 1) < 0) + return -1; + sz -= skip; + } + continue; + } + + sz -= 8; + type = -1; + if(memcmp(d, "\251nam", 4) == 0) + type = Ttitle; + else if(memcmp(d, "\251alb", 4) == 0) + type = Talbum; + else if(memcmp(d, "\251ART", 4) == 0) + type = Tartist; + else if(memcmp(d, "\251gen", 4) == 0 || memcmp(d, "gnre", 4) == 0) + type = Tgenre; + else if(memcmp(d, "\251day", 4) == 0) + type = Tdate; + else if(memcmp(d, "covr", 4) == 0) + type = Timage; + else if(memcmp(d, "trkn", 4) == 0) + type = Ttrack; + else if(memcmp(d, "mdhd", 4) == 0){ + if(ctx->read(ctx, d, 4) != 4) + return -1; + sz -= 4; + duration = 0; + if(d[0] == 0){ /* version 0 */ + if(ctx->read(ctx, d, 16) != 16) + return -1; + sz -= 16; + if((x = beuint(&d[8])) > 0) + duration = beuint(&d[12]) / x; + }else if(d[1] == 1){ /* version 1 */ + if(ctx->read(ctx, d, 28) != 28) + return -1; + sz -= 28; + if((x = beuint(&d[16])) > 0) + duration = ((uvlong)beuint(&d[20])<<32 | beuint(&d[24])) / (uvlong)x; + } + ctx->duration = duration * 1000; + continue; + } + + if(type < 0) + continue; + + if(ctx->seek(ctx, 8, 1) < 0) /* skip size and "data" */ + return -1; + sz -= 8; + if(ctx->read(ctx, d, 8) != 8) /* read data type and 4 bytes of whatever else */ + return -1; + sz -= 8; + d[0] = 0; + dtype = beuint(d); + + if(type == Ttrack){ + if(ctx->read(ctx, d, 4) != 4) + return -1; + sz -= 4; + snprint((char*)d, ctx->bufsz, "%d", beuint(d)); + txtcb(ctx, type, "", d); + }else if(type == Tgenre){ + if(ctx->read(ctx, d, 2) != 2) + return -1; + sz -= 2; + if((i = d[1]-1) >= 0 && i < Numgenre) + txtcb(ctx, type, "", id3genres[i]); + }else if(dtype == 1){ /* text */ + if(sz >= ctx->bufsz) /* skip tags that can't fit into memory. ">=" because of '\0' */ + continue; + if(ctx->read(ctx, d, sz) != sz) + return -1; + d[sz] = 0; + txtcb(ctx, type, "", d); + sz = 0; + }else if(type == Timage && dtype == 13) /* jpeg cover image */ + tagscallcb(ctx, Timage, "", "image/jpeg", ctx->seek(ctx, 0, 1), sz, nil); + else if(type == Timage && dtype == 14) /* png cover image */ + tagscallcb(ctx, Timage, "", "image/png", ctx->seek(ctx, 0, 1), sz, nil); + } + + return 0; +} diff --git a/lib/libtags/mkfile b/lib/libtags/mkfile new file mode 100644 index 00000000..6f094e3b --- /dev/null +++ b/lib/libtags/mkfile @@ -0,0 +1,29 @@ +seek(ctx, 1080, 0) != 1080) + return -1; + if(ctx->read(ctx, d, 4) != 4) + return -1; + for(i = 0; ; i++){ + if(variants[i] == nil) + return -1; + if(memcmp(d, variants[i], 4) == 0) + break; + } + if(ctx->seek(ctx, 0, 0) != 0) + return -1; + if(ctx->read(ctx, d, 20) != 20) + return -1; + if(iso88591toutf8(o, sizeof(o), d, 20) > 0) + txtcb(ctx, Ttitle, "", o); + + return 0; +} diff --git a/lib/libtags/opus.c b/lib/libtags/opus.c new file mode 100644 index 00000000..fa2c7d40 --- /dev/null +++ b/lib/libtags/opus.c @@ -0,0 +1,96 @@ +#include "tagspriv.h" + +int +tagopus(Tagctx *ctx) +{ + char *v; + uchar *d, h[4]; + int sz, numtags, i, npages, pgend; + + d = (uchar*)ctx->buf; + for(npages = pgend = 0; npages < 2; npages++){ + int nsegs; + if(ctx->read(ctx, d, 27) != 27) + return -1; + if(memcmp(d, "OggS", 4) != 0) + return -1; + + /* calculate the size of the packet */ + nsegs = d[26]; + if(ctx->read(ctx, d, nsegs+8) != nsegs+8) + return -1; + for(sz = i = 0; i < nsegs; sz += d[i++]); + + if(memcmp(&d[nsegs], "OpusHead", 8) == 0){ + if(ctx->read(ctx, d, 8) != 8 || d[0] != 1) + return -1; + sz -= 8; + ctx->channels = d[1]; + ctx->samplerate = leuint(&d[4]); + }else if(memcmp(&d[nsegs], "OpusTags", 8) == 0){ + /* FIXME - embedded pics make tags span multiple packets */ + pgend = ctx->seek(ctx, 0, 1) + sz; + break; + } + + ctx->seek(ctx, sz-8, 1); + } + + if(npages < 3){ + if(ctx->read(ctx, d, 4) != 4) + return -1; + sz = leuint(d); + if(ctx->seek(ctx, sz, 1) < 0 || ctx->read(ctx, h, 4) != 4) + return -1; + numtags = leuint(h); + + for(i = 0; i < numtags; i++){ + if(ctx->read(ctx, h, 4) != 4) + return -1; + if((sz = leuint(h)) < 0) + return -1; + /* FIXME - embedded pics make tags span multiple packets */ + if(pgend < ctx->seek(ctx, 0, 1)+sz) + break; + + if(ctx->bufsz < sz+1){ + if(ctx->seek(ctx, sz, 1) < 0) + return -1; + continue; + } + if(ctx->read(ctx, ctx->buf, sz) != sz) + return -1; + ctx->buf[sz] = 0; + + if((v = strchr(ctx->buf, '=')) == nil) + return -1; + *v++ = 0; + cbvorbiscomment(ctx, ctx->buf, v); + } + } + + /* calculate the duration */ + if(ctx->samplerate > 0){ + sz = ctx->bufsz <= 4096 ? ctx->bufsz : 4096; + for(i = sz; i < 65536+16; i += sz - 16){ + if(ctx->seek(ctx, -i, 2) <= 0) + break; + v = ctx->buf; + if(ctx->read(ctx, v, sz) != sz) + break; + for(; v != nil && v < ctx->buf+sz;){ + v = memchr(v, 'O', ctx->buf+sz - v - 14); + if(v != nil && v[1] == 'g' && v[2] == 'g' && v[3] == 'S'){ + uvlong g = leuint(v+6) | (uvlong)leuint(v+10)<<32; + ctx->duration = g * 1000 / 48000; /* granule positions are always 48KHz */ + } + if(v != nil) + v++; + } + if(ctx->duration != 0) + break; + } + } + + return 0; +} diff --git a/lib/libtags/s3m.c b/lib/libtags/s3m.c new file mode 100644 index 00000000..44a85f3c --- /dev/null +++ b/lib/libtags/s3m.c @@ -0,0 +1,17 @@ +#include "tagspriv.h" + +int +tags3m(Tagctx *ctx) +{ + char d[28+1+1], o[28*UTFmax+1], *s; + + if(ctx->read(ctx, d, 28+1+1) != 28+1+1 || (d[28] != 0x1a && d[28] != 0) || d[29] != 0x10) + return -1; + d[28] = 0; + for(s = d+27; s != d-1 && (*s == ' ' || *s == 0); s--); + s[1] = 0; + if(cp437toutf8(o, sizeof(o), d, s+1-d) > 0) + txtcb(ctx, Ttitle, "", o); + + return 0; +} diff --git a/lib/libtags/tags.c b/lib/libtags/tags.c new file mode 100644 index 00000000..6c6b2b55 --- /dev/null +++ b/lib/libtags/tags.c @@ -0,0 +1,80 @@ +#include "tagspriv.h" + +typedef struct Getter Getter; + +struct Getter +{ + int (*f)(Tagctx *ctx); + int format; +}; + +extern int tagflac(Tagctx *ctx); +extern int tagid3v1(Tagctx *ctx); +extern int tagid3v2(Tagctx *ctx); +extern int tagit(Tagctx *ctx); +extern int tagm4a(Tagctx *ctx); +extern int tagopus(Tagctx *ctx); +extern int tags3m(Tagctx *ctx); +extern int tagvorbis(Tagctx *ctx); +extern int tagwav(Tagctx *ctx); +extern int tagxm(Tagctx *ctx); +extern int tagmod(Tagctx *ctx); + +static const Getter g[] = +{ + {tagid3v2, Fmp3}, + {tagid3v1, Fmp3}, + {tagvorbis, Fogg}, + {tagflac, Fflac}, + {tagm4a, Fm4a}, + {tagopus, Fopus}, + {tagwav, Fwav}, + {tagit, Fit}, + {tagxm, Fxm}, + {tags3m, Fs3m}, + {tagmod, Fmod}, +}; + +void +tagscallcb(Tagctx *ctx, int type, const char *k, char *s, int offset, int size, Tagread f) +{ + char *e; + + if(f == nil && size == 0){ + while((uchar)*s <= ' ' && *s) + s++; + e = s + strlen(s); + while(e != s && (uchar)e[-1] <= ' ') + e--; + *e = 0; + } + if(*s){ + ctx->tag(ctx, type, k, s, offset, size, f); + if(type != Tunknown){ + ctx->found |= 1<num++; + } + } +} + +int +tagsget(Tagctx *ctx) +{ + int i, res; + + ctx->channels = ctx->samplerate = ctx->bitrate = ctx->duration = 0; + ctx->found = 0; + ctx->format = Funknown; + ctx->restart = 0; + res = -1; + for(i = 0; i < nelem(g); i++){ + ctx->num = 0; + if(g[i].f(ctx) == 0){ + ctx->format = g[i].format; + res = 0; + } + ctx->seek(ctx, ctx->restart, 0); + } + + return res; +} diff --git a/lib/libtags/tags.h b/lib/libtags/tags.h new file mode 100644 index 00000000..c53d239e --- /dev/null +++ b/lib/libtags/tags.h @@ -0,0 +1,94 @@ +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct Tagctx Tagctx; +typedef int (*Tagread)(void *buf, int *cnt); + +/* Tag type. */ +enum +{ + Tunknown = -1, + Tartist, + Talbum, + Ttitle, + Tdate, /* "2014", "2015/02/01", but the year goes first */ + Ttrack, /* "1", "01", "1/4", but the track number goes first */ + Talbumgain, + Talbumpeak, + Ttrackgain, + Ttrackpeak, + Tgenre, + Timage, +}; + +/* Format of the audio file. */ +enum +{ + Funknown = -1, + Fmp3, + Fogg, + Fflac, + Fm4a, + Fopus, + Fwav, + Fit, + Fxm, + Fs3m, + Fmod, + + Fmax, +}; + +/* Tag parser context. You need to set it properly before parsing an audio file using libtags. */ +struct Tagctx +{ + /* Read function. This is what libtags uses to read the file. */ + int (*read)(Tagctx *ctx, void *buf, int cnt); + + /* Seek function. This is what libtags uses to seek through the file. */ + int (*seek)(Tagctx *ctx, int offset, int whence); + + /* Callback that is used by libtags to inform about the tags of a file. + * "type" is the tag's type (Tartist, ...) or Tunknown if libtags doesn't know how to map a tag kind to + * any of these. "k" is the raw key like "TPE1", "TPE2", etc. "s" is the null-terminated string unless "type" is + * Timage. "offset" and "size" define the placement and size of the image cover ("type" = Timage) + * inside the file, and "f" is not NULL in case reading the image cover requires additional + * operations on the data, in which case you need to read the image cover as a stream and call this + * function to apply these operations on the contents read. + */ + void (*tag)(Tagctx *ctx, int type, const char *k, const char *s, int offset, int size, Tagread f); + + /* Approximate millisecond-to-byte offsets within the file, if available. This callback is optional. */ + void (*toc)(Tagctx *ctx, int ms, int offset); + + /* Auxiliary data. Not used by libtags. */ + void *aux; + + /* Memory buffer to work in. */ + char *buf; + + /* Size of the buffer. Must be at least 256 bytes. */ + int bufsz; + + /* Here goes the stuff libtags sets. It should be accessed after tagsget() returns. + * A value of 0 means it's undefined. + */ + int channels; /* Number of channels. */ + int samplerate; /* Hz */ + int bitrate; /* Bitrate, bits/s. */ + int duration; /* ms */ + int format; /* Fmp3, Fogg, Fflac, Fm4a */ + + /* Private, don't touch. */ + int found; + int num; + int restart; +}; + +/* Parse the file using this function. Returns 0 on success. */ +extern int tagsget(Tagctx *ctx); + +#ifdef __cplusplus +} +#endif diff --git a/lib/libtags/tagspriv.h b/lib/libtags/tagspriv.h new file mode 100644 index 00000000..496828b1 --- /dev/null +++ b/lib/libtags/tagspriv.h @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include +#define snprint snprintf +#define cistrcmp strcasecmp +#define cistrncmp strncasecmp +#define nil NULL +#define UTFmax 4 +#define nelem(x) (int)(sizeof(x)/sizeof((x)[0])) +typedef uint8_t uchar; +typedef uint16_t u16int; +typedef uint32_t u32int; +typedef uint64_t uvlong; +typedef unsigned int uint; +typedef unsigned short ushort; +#include "tags.h" + +enum +{ + Numgenre = 192, +}; + +#define beuint(d) (uint)(((uchar*)(d))[0]<<24 | ((uchar*)(d))[1]<<16 | ((uchar*)(d))[2]<<8 | ((uchar*)(d))[3]<<0) +#define leuint(d) (uint)(((uchar*)(d))[3]<<24 | ((uchar*)(d))[2]<<16 | ((uchar*)(d))[1]<<8 | ((uchar*)(d))[0]<<0) + +extern const char *id3genres[Numgenre]; + +/* + * Converts (to UTF-8) at most sz bytes of src and writes it to out buffer. + * Returns the number of bytes converted. + * You need sz*2+1 bytes for out buffer to be completely safe. + */ +int iso88591toutf8(uchar *out, int osz, const uchar *src, int sz); + +/* + * Converts (to UTF-8) at most sz bytes of src and writes it to out buffer. + * Returns the number of bytes converted or < 0 in case of error. + * You need sz*4+1 bytes for out buffer to be completely safe. + * UTF-16 defaults to big endian if there is no BOM. + */ +int utf16to8(uchar *out, int osz, const uchar *src, int sz); + +/* + * Same as utf16to8, but CP437 to UTF-8. + */ +int cp437toutf8(char *o, int osz, const char *s, int sz); + +/* + * This one is common for both vorbis.c and flac.c + * It maps a string k to tag type and executes the callback from ctx. + * Returns 1 if callback was called, 0 otherwise. + */ +void cbvorbiscomment(Tagctx *ctx, char *k, char *v); + +void tagscallcb(Tagctx *ctx, int type, const char *k, char *s, int offset, int size, Tagread f); + +#define txtcb(ctx, type, k, s) tagscallcb(ctx, type, k, (char*)s, 0, 0, nil) diff --git a/lib/libtags/utf16.c b/lib/libtags/utf16.c new file mode 100644 index 00000000..28543036 --- /dev/null +++ b/lib/libtags/utf16.c @@ -0,0 +1,59 @@ +/* Horror stories: http://en.wikipedia.org/wiki/UTF-16 */ +#include "tagspriv.h" + +#define rchr(s) (be ? ((s)[0]<<8 | (s)[1]) : ((s)[1]<<8 | (s)[0])) + +static const uchar mark[] = {0x00, 0x00, 0xc0, 0xe0, 0xf0}; + +int +utf16to8(uchar *o, int osz, const uchar *s, int sz) +{ + int i, be, c, c2, wr, j; + + i = 0; + be = 1; + if(s[0] == 0xfe && s[1] == 0xff) + i += 2; + else if(s[0] == 0xff && s[1] == 0xfe){ + be = 0; + i += 2; + } + + for(; i < sz-1 && osz > 1;){ + c = rchr(&s[i]); + i += 2; + if(c >= 0xd800 && c <= 0xdbff && i < sz-1){ + c2 = rchr(&s[i]); + if(c2 >= 0xdc00 && c2 <= 0xdfff){ + c = 0x10000 | (c - 0xd800)<<10 | (c2 - 0xdc00); + i += 2; + }else + return -1; + }else if(c >= 0xdc00 && c <= 0xdfff) + return -1; + + if(c < 0x80) + wr = 1; + else if(c < 0x800) + wr = 2; + else if(c < 0x10000) + wr = 3; + else + wr = 4; + + osz -= wr; + if(osz < 1) + break; + + o += wr; + for(j = wr; j > 1; j--){ + *(--o) = (c & 0xbf) | 0x80; + c >>= 6; + } + *(--o) = c | mark[wr]; + o += wr; + } + + *o = 0; + return i; +} diff --git a/lib/libtags/vorbis.c b/lib/libtags/vorbis.c new file mode 100644 index 00000000..0c659da5 --- /dev/null +++ b/lib/libtags/vorbis.c @@ -0,0 +1,132 @@ +/* + * https://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-810005 + * https://wiki.xiph.org/VorbisComment + */ +#include "tagspriv.h" + +void +cbvorbiscomment(Tagctx *ctx, char *k, char *v){ + if(*v == 0) + return; + if(cistrcmp(k, "album") == 0) + txtcb(ctx, Talbum, k, v); + else if(cistrcmp(k, "title") == 0) + txtcb(ctx, Ttitle, k, v); + else if(cistrcmp(k, "artist") == 0) + txtcb(ctx, Tartist, k, v); + else if(cistrcmp(k, "tracknumber") == 0) + txtcb(ctx, Ttrack, k, v); + else if(cistrcmp(k, "date") == 0) + txtcb(ctx, Tdate, k, v); + else if(cistrcmp(k, "replaygain_track_peak") == 0) + txtcb(ctx, Ttrackpeak, k, v); + else if(cistrcmp(k, "replaygain_track_gain") == 0) + txtcb(ctx, Ttrackgain, k, v); + else if(cistrcmp(k, "replaygain_album_peak") == 0) + txtcb(ctx, Talbumpeak, k, v); + else if(cistrcmp(k, "replaygain_album_gain") == 0) + txtcb(ctx, Talbumgain, k, v); + else if(cistrcmp(k, "genre") == 0) + txtcb(ctx, Tgenre, k, v); + else + txtcb(ctx, Tunknown, k, v); +} + +int +tagvorbis(Tagctx *ctx) +{ + char *v; + uchar *d, h[4]; + int sz, numtags, i, npages, pgend; + + d = (uchar*)ctx->buf; + /* need to find vorbis frame with type=3 */ + for(npages = pgend = 0; npages < 2; npages++){ /* vorbis comment is the second header */ + int nsegs; + if(ctx->read(ctx, d, 27) != 27) + return -1; + if(memcmp(d, "OggS", 4) != 0) + return -1; + + /* calculate the size of the packet */ + nsegs = d[26]; + if(ctx->read(ctx, d, nsegs+1) != nsegs+1) + return -1; + for(sz = i = 0; i < nsegs; sz += d[i++]); + + if(d[nsegs] == 3){ /* comment */ + /* FIXME - embedded pics make tags span multiple packets */ + pgend = ctx->seek(ctx, 0, 1) + sz; + break; + } + if(d[nsegs] == 1 && sz >= 28){ /* identification */ + if(ctx->read(ctx, d, 28) != 28) + return -1; + sz -= 28; + ctx->channels = d[10]; + ctx->samplerate = leuint(&d[11]); + if((ctx->bitrate = leuint(&d[15])) == 0) /* maximum */ + ctx->bitrate = leuint(&d[19]); /* nominal */ + } + + ctx->seek(ctx, sz-1, 1); + } + + if(npages < 3) { + if(ctx->read(ctx, &d[1], 10) != 10 || memcmp(&d[1], "vorbis", 6) != 0) + return -1; + sz = leuint(&d[7]); + if(ctx->seek(ctx, sz, 1) < 0 || ctx->read(ctx, h, 4) != 4) + return -1; + numtags = leuint(h); + + for(i = 0; i < numtags; i++){ + if(ctx->read(ctx, h, 4) != 4) + return -1; + if((sz = leuint(h)) < 0) + return -1; + /* FIXME - embedded pics make tags span multiple packets */ + if(pgend < ctx->seek(ctx, 0, 1)+sz) + break; + + if(ctx->bufsz < sz+1){ + if(ctx->seek(ctx, sz, 1) < 0) + return -1; + continue; + } + if(ctx->read(ctx, ctx->buf, sz) != sz) + return -1; + ctx->buf[sz] = 0; + + if((v = strchr(ctx->buf, '=')) == nil) + return -1; + *v++ = 0; + cbvorbiscomment(ctx, ctx->buf, v); + } + } + + /* calculate the duration */ + if(ctx->samplerate > 0){ + sz = ctx->bufsz <= 4096 ? ctx->bufsz : 4096; + for(i = sz; i < 65536+16; i += sz - 16){ + if(ctx->seek(ctx, -i, 2) <= 0) + break; + v = ctx->buf; + if(ctx->read(ctx, v, sz) != sz) + break; + for(; v != nil && v < ctx->buf+sz;){ + v = memchr(v, 'O', ctx->buf+sz - v - 14); + if(v != nil && v[1] == 'g' && v[2] == 'g' && v[3] == 'S' && (v[5] & 4) == 4){ /* last page */ + uvlong g = leuint(v+6) | (uvlong)leuint(v+10)<<32; + ctx->duration = g * 1000 / ctx->samplerate; + } + if(v != nil) + v++; + } + if(ctx->duration != 0) + break; + } + } + + return 0; +} diff --git a/lib/libtags/wav.c b/lib/libtags/wav.c new file mode 100644 index 00000000..55e1566b --- /dev/null +++ b/lib/libtags/wav.c @@ -0,0 +1,90 @@ +#include "tagspriv.h" + +#define le16u(d) (u16int)((d)[0] | (d)[1]<<8) + +static struct { + char *s; + int type; +}t[] = { + {"IART", Tartist}, + {"ICRD", Tdate}, + {"IGNR", Tgenre}, + {"INAM", Ttitle}, + {"IPRD", Talbum}, + {"ITRK", Ttrack}, +}; + +int +tagwav(Tagctx *ctx) +{ + uchar *d; + int i, n, info; + u32int csz, x; + uvlong sz; + + d = (uchar*)ctx->buf; + + sz = 1; + info = 0; + for(i = 0; i < 8 && sz > 0; i++){ + if(ctx->read(ctx, d, 4+4+(i?0:4)) != 4+4+(i?0:4)) + return -1; + if(i == 0){ + if(memcmp(d, "RIFF", 4) != 0 || memcmp(d+8, "WAVE", 4) != 0) + return -1; + sz = leuint(d+4); + if(sz < 4) + return -1; + sz -= 4; + continue; + }else if(memcmp(d, "INFO", 4) == 0){ + info = 1; + ctx->seek(ctx, -4, 1); + continue; + } + + if(sz <= 8) + break; + sz -= 4+4; + csz = leuint(d+4); + if(sz < csz) + break; + sz -= csz; + + if(i == 1){ + if(memcmp(d, "fmt ", 4) != 0 || csz < 16) + return -1; + if(ctx->read(ctx, d, 16) != 16) + return -1; + csz -= 16; + ctx->channels = le16u(d+2); + ctx->samplerate = leuint(d+4); + x = leuint(d+8); + if(ctx->channels < 1 || ctx->samplerate < 1 || x < 1) + return -1; + ctx->duration = sz*1000 / x; + }else if(memcmp(d, "LIST", 4) == 0){ + sz = csz - 4; + continue; + }else if(memcmp(d, "data", 4) == 0){ + break; + }else if(info){ + csz++; + for(n = 0; n < nelem(t); n++){ + if(memcmp(d, t[n].s, 4) == 0){ + if(ctx->read(ctx, d, csz) != (int)csz) + return -1; + d[csz-1] = 0; + txtcb(ctx, t[n].type, "", d); + csz = 0; + break; + } + } + } + + if(ctx->seek(ctx, csz, 1) < 0) + return -1; + } + + return i > 0 ? 0 : -1; +} diff --git a/lib/libtags/xm.c b/lib/libtags/xm.c new file mode 100644 index 00000000..8f070262 --- /dev/null +++ b/lib/libtags/xm.c @@ -0,0 +1,15 @@ +#include "tagspriv.h" + +int +tagxm(Tagctx *ctx) +{ + char d[17+20+1], o[20*UTFmax+1]; + + if(ctx->read(ctx, d, 17+20) != 17+20 || cistrncmp(d, "Extended Module: ", 17) != 0) + return -1; + d[17+20] = 0; + if(cp437toutf8(o, sizeof(o), d+17, 20) > 0) + txtcb(ctx, Ttitle, "", o); + + return 0; +} diff --git a/src/database/CMakeLists.txt b/src/database/CMakeLists.txt index 0bc6f6b9..47f8fe31 100644 --- a/src/database/CMakeLists.txt +++ b/src/database/CMakeLists.txt @@ -1,7 +1,7 @@ idf_component_register( SRCS "env_esp.cpp" "database.cpp" "tag_processor.cpp" "db_task.cpp" INCLUDE_DIRS "include" - REQUIRES "result" "span" "esp_psram" "fatfs") + REQUIRES "result" "span" "esp_psram" "fatfs" "libtags") target_compile_options(${COMPONENT_LIB} PRIVATE ${EXTRA_WARNINGS}) diff --git a/src/database/include/tag_processor.hpp b/src/database/include/tag_processor.hpp index fb2201db..eda88225 100644 --- a/src/database/include/tag_processor.hpp +++ b/src/database/include/tag_processor.hpp @@ -6,6 +6,8 @@ namespace database { struct FileInfo { bool is_playable; + std::string artist; + std::string album; std::string title; }; diff --git a/src/database/tag_processor.cpp b/src/database/tag_processor.cpp index 16dbf160..5752d61b 100644 --- a/src/database/tag_processor.cpp +++ b/src/database/tag_processor.cpp @@ -1,14 +1,102 @@ #include "tag_processor.hpp" +#include +#include +#include + namespace database { +namespace libtags { + +struct Aux { + FIL file; + FILINFO info; + std::string artist; + std::string album; + std::string title; +}; + +static int read(Tagctx *ctx, void *buf, int cnt) { + Aux *aux = reinterpret_cast(ctx->aux); + UINT bytes_read; + if (f_read(&aux->file, buf, cnt, &bytes_read) != FR_OK) { + return -1; + } + return bytes_read; +} + +static int seek(Tagctx *ctx, int offset, int whence) { + Aux *aux = reinterpret_cast(ctx->aux); + FRESULT res; + if (whence == 0) { + // Seek from the start of the file. This is f_lseek's behaviour. + res = f_lseek(&aux->file, offset); + } else if (whence == 1) { + // Seek from current offset. + res = f_lseek(&aux->file, aux->file.fptr + offset); + } else if (whence == 2) { + // Seek from the end of the file + res = f_lseek(&aux->file, aux->info.fsize + offset); + } else { + return -1; + } + return res; +} + +static void +tag(Tagctx *ctx, int t, const char *k, const char *v, int offset, int size, Tagread f) { + Aux *aux = reinterpret_cast(ctx->aux); + if (t == Ttitle) { + aux->title = v; + } else if (t == Tartist) { + aux->artist = v; + } else if (t == Talbum) { + aux->album = v; + } +} + +static void +toc(Tagctx *ctx, int ms, int offset) {} + +} // namespace libtags + +static const std::size_t kBufSize = 1024; +static const char* kTag = "TAGS"; + auto GetInfo(const std::string& path, FileInfo* out) -> bool { - // TODO(jacqueline): bring in taglib for this - if (path.ends_with(".mp3")) { + libtags::Aux aux; + if (f_stat(path.c_str(), &aux.info) != FR_OK || f_open(&aux.file, path.c_str(), FA_READ) != FR_OK) { + ESP_LOGI(kTag, "failed to open file"); + return false; + } + // Fine to have this on the stack; this is only called on the leveldb task. + char buf[kBufSize]; + Tagctx ctx; + ctx.read = libtags::read; + ctx.seek = libtags::seek; + ctx.tag = libtags::tag; + ctx.toc = libtags::toc; + ctx.aux = &aux; + ctx.buf = buf; + ctx.bufsz = kBufSize; + int res = tagsget(&ctx); + f_close(&aux.file); + + if (res != 0) { + ESP_LOGI(kTag, "failed to parse tags"); + return false; + } + + if (ctx.format == Fmp3) { + ESP_LOGI(kTag, "file is mp3"); + ESP_LOGI(kTag, "artist: %s", aux.artist.c_str()); + ESP_LOGI(kTag, "album: %s", aux.album.c_str()); + ESP_LOGI(kTag, "title: %s", aux.title.c_str()); out->is_playable = true; - out->title = path.substr(0, path.size() - 4); + out->title = aux.title; return true; } + return false; } diff --git a/tools/cmake/common.cmake b/tools/cmake/common.cmake index a116dcb3..acb017a4 100644 --- a/tools/cmake/common.cmake +++ b/tools/cmake/common.cmake @@ -8,6 +8,7 @@ set(COMPONENTS "") list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/catch2") list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/cbor") list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/libmad") +list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/libtags") list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/lvgl") list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/result") list(APPEND EXTRA_COMPONENT_DIRS "$ENV{PROJ_PATH}/lib/span")