You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
66 lines
1.4 KiB
66 lines
1.4 KiB
5 years ago
|
/**
|
||
|
* TODO file description
|
||
|
*
|
||
|
* Created on 2020/01/04.
|
||
|
*/
|
||
|
|
||
|
#ifndef LIQUIDTYPE_UTF8_H
|
||
|
#define LIQUIDTYPE_UTF8_H
|
||
|
|
||
|
#include <stddef.h>
|
||
|
#include <stdint.h>
|
||
|
|
||
|
struct Utf8Char {
|
||
|
union {
|
||
|
uint8_t bytes[4];
|
||
|
uint32_t uint;
|
||
|
};
|
||
|
};
|
||
|
|
||
|
struct Utf8Parser {
|
||
|
struct Utf8Char buffer;
|
||
|
uint8_t utf_len;
|
||
|
uint8_t utf_j;
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Utf8 character iterator.
|
||
|
*
|
||
|
* Usage:
|
||
|
* struct Utf8Iterator iter;
|
||
|
* Utf8Iterator_Start(&iter, myString);
|
||
|
*
|
||
|
* union Utf8Char uchar;
|
||
|
* while ((uchar = Utf8Iterator_Next(&iter)).uint) {
|
||
|
* // do something with the char
|
||
|
* }
|
||
|
*
|
||
|
* // Free myString if needed.
|
||
|
* // The iterator does not need any cleanup if it lives on stack.
|
||
|
*/
|
||
|
struct Utf8Iterator {
|
||
|
/* Characters to parse. The pointer is advanced as the iterator progresses. */
|
||
|
const char *source;
|
||
|
struct Utf8Parser parser;
|
||
|
};
|
||
|
|
||
|
void Utf8Iterator_Start(struct Utf8Iterator *self, const char *source);
|
||
|
|
||
|
/**
|
||
|
* Get the next character from the iterator; Returns empty character if there are no more characters to parse.
|
||
|
*
|
||
|
* Invalid characters are skipped.
|
||
|
*/
|
||
|
struct Utf8Char Utf8Iterator_Next(struct Utf8Iterator *self);
|
||
|
|
||
|
/**
|
||
|
* Parse a character.
|
||
|
*
|
||
|
* The returned struct contains NIL (uint == 0) if no character is yet available.
|
||
|
*
|
||
|
* ASCII is passed through, utf-8 is collected and returned in one piece.
|
||
|
*/
|
||
|
struct Utf8Char Utf8Parser_Handle(struct Utf8Parser *self, char c);
|
||
|
|
||
|
#endif //LIQUIDTYPE_UTF8_H
|