playground project testing RP2040 (Pico) with SSD1309 2.42" OLED from AliExpress
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

93 lines
2.2 KiB

/**
* UTF-8 string parsing and character iteration
*
* Created on 2020/01/04.
*/
#ifndef LIQUIDTYPE_UTF8_H
#define LIQUIDTYPE_UTF8_H
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include "progmem.h"
/**
* UTF-8 encoded character.
*/
struct Utf8Char {
union {
/** character bytes; padded by zero bytes if shorter than 4 */
uint8_t bytes[4];
/** u32 view of the bytes */
uint32_t uint;
};
};
/** UTF8 string parser internal state */
struct Utf8Parser {
/** UTF-8 bytes buffer */
struct Utf8Char buffer;
/** Currently collected UTF-8 character length */
uint8_t utf_len;
/** Position in the current character */
uint8_t utf_j;
};
static inline void Utf8Parser_Clear(struct Utf8Parser *self) {
self->buffer.uint = 0;
self->utf_j = 0;
self->utf_len = 0;
}
/**
* Utf8 character iterator.
*
* Usage:
* struct Utf8Iterator iter;
* Utf8Iterator_Init(&iter, myString);
*
* union Utf8Char uchar;
* while ((uchar = Utf8Iterator_Next(&iter)).uint) {
* // do something with the char
* }
*
* // Free myString if needed, it is not mutated.
*/
struct Utf8Iterator {
/* Characters to parse. The pointer is advanced as the iterator progresses. */
const char *source;
struct Utf8Parser parser;
bool is_progmem;
};
static inline void Utf8Iterator_Init(struct Utf8Iterator *self, const char *source) {
Utf8Parser_Clear(&self->parser);
self->source = source;
self->is_progmem = false;
}
static inline void Utf8Iterator_Init_P(struct Utf8Iterator *self, const char *source) {
Utf8Iterator_Init(self, source);
self->is_progmem = true;
}
size_t utf8_strlen(const char *text);
/**
* Get the next character from the iterator; Returns empty character if there are no more characters to parse.
*
* Invalid characters are skipped.
*/
struct Utf8Char Utf8Iterator_Next(struct Utf8Iterator *self);
/**
* Parse a character.
*
* The returned struct contains NIL (uint == 0) if no character is yet available.
*
* ASCII is passed through, utf-8 is collected and returned in one piece.
*/
struct Utf8Char Utf8Parser_Handle(struct Utf8Parser *self, char c);
#endif //LIQUIDTYPE_UTF8_H