/** * TODO file description * * Created on 2020/01/04. */ #ifndef LIQUIDTYPE_UTF8_H #define LIQUIDTYPE_UTF8_H #include #include struct Utf8Char { union { uint8_t bytes[4]; uint32_t uint; }; }; struct Utf8Parser { struct Utf8Char buffer; uint8_t utf_len; uint8_t utf_j; }; /** * Utf8 character iterator. * * Usage: * struct Utf8Iterator iter; * Utf8Iterator_Start(&iter, myString); * * union Utf8Char uchar; * while ((uchar = Utf8Iterator_Next(&iter)).uint) { * // do something with the char * } * * // Free myString if needed. * // The iterator does not need any cleanup if it lives on stack. */ struct Utf8Iterator { /* Characters to parse. The pointer is advanced as the iterator progresses. */ const char *source; struct Utf8Parser parser; }; void Utf8Iterator_Start(struct Utf8Iterator *self, const char *source); /** * Get the next character from the iterator; Returns empty character if there are no more characters to parse. * * Invalid characters are skipped. */ struct Utf8Char Utf8Iterator_Next(struct Utf8Iterator *self); /** * Parse a character. * * The returned struct contains NIL (uint == 0) if no character is yet available. * * ASCII is passed through, utf-8 is collected and returned in one piece. */ struct Utf8Char Utf8Parser_Handle(struct Utf8Parser *self, char c); #endif //LIQUIDTYPE_UTF8_H