commit
758503a739
@ -0,0 +1,4 @@ |
||||
.idea/ |
||||
cmake-build* |
||||
|
||||
|
@ -0,0 +1,12 @@ |
||||
cmake_minimum_required(VERSION 3.25) |
||||
project(hd44780utf C) |
||||
|
||||
set(CMAKE_C_STANDARD 99) |
||||
|
||||
add_executable(hd44780utf |
||||
src/utf8.c |
||||
src/main.c |
||||
src/cgrom.c |
||||
src/cgram.c |
||||
src/lcdbuf.h |
||||
src/lcdbuf.c) |
@ -0,0 +1,477 @@ |
||||
/**
|
||||
* TODO file description |
||||
*/ |
||||
|
||||
#include "cgram.h" |
||||
|
||||
const struct cgram_pattern CGRAM_CZ[] = { |
||||
{ |
||||
.symbol = "ě", |
||||
.fallback = 'e', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b01110, |
||||
0b10001, |
||||
0b11111, |
||||
0b10000, |
||||
0b01110 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "š", |
||||
.fallback = 's', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b01110, |
||||
0b10000, |
||||
0b01110, |
||||
0b00001, |
||||
0b11110 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "č", |
||||
.fallback = 'c', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b01110, |
||||
0b10000, |
||||
0b10000, |
||||
0b10001, |
||||
0b01110 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "ř", |
||||
.fallback = 'r', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b10110, |
||||
0b11001, |
||||
0b10000, |
||||
0b10000, |
||||
0b10000 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "ž", |
||||
.fallback = 'z', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b11111, |
||||
0b00010, |
||||
0b00100, |
||||
0b01000, |
||||
0b11111 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "ý", |
||||
.fallback = 'y', |
||||
.data = { |
||||
0b00010, |
||||
0b00100, |
||||
0b10001, |
||||
0b10001, |
||||
0b01111, |
||||
0b00001, |
||||
0b01110 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "á", |
||||
.fallback = 'a', |
||||
.data = { |
||||
0b00010, |
||||
0b00100, |
||||
0b01110, |
||||
0b00001, |
||||
0b01111, |
||||
0b10001, |
||||
0b01111 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "í", |
||||
.fallback = 'i', |
||||
.data = { |
||||
0b00110, |
||||
0b00000, |
||||
0b01100, |
||||
0b00100, |
||||
0b00100, |
||||
0b00100, |
||||
0b01110 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "é", |
||||
.fallback = 'e', |
||||
.data = { |
||||
0b00010, |
||||
0b00100, |
||||
0b01110, |
||||
0b10001, |
||||
0b11111, |
||||
0b10000, |
||||
0b01110 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "ú", |
||||
.fallback = 'u', |
||||
.data = { |
||||
0b00010, |
||||
0b00100, |
||||
0b10001, |
||||
0b10001, |
||||
0b10001, |
||||
0b10011, |
||||
0b01101 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "ů", |
||||
.fallback = 'u', |
||||
.data = { |
||||
0b00100, |
||||
0b01010, |
||||
0b10101, |
||||
0b10001, |
||||
0b10001, |
||||
0b10011, |
||||
0b01101 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "ď", |
||||
.fallback = 'd', |
||||
.data = { |
||||
0b01101, |
||||
0b00001, |
||||
0b01101, |
||||
0b10011, |
||||
0b10001, |
||||
0b10001, |
||||
0b01111 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "ť", |
||||
.fallback = 't', |
||||
.data = { |
||||
0b01010, |
||||
0b01001, |
||||
0b11100, |
||||
0b01000, |
||||
0b01000, |
||||
0b01001, |
||||
0b00110 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "ň", |
||||
.fallback = 'n', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b10110, |
||||
0b11001, |
||||
0b10001, |
||||
0b10001, |
||||
0b10001 |
||||
}, |
||||
}, |
||||
|
||||
// UPPERCASE
|
||||
{ |
||||
.symbol = "Ě", |
||||
.fallback = 'E', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b11111, |
||||
0b10000, |
||||
0b11100, |
||||
0b10000, |
||||
0b11111 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "Š", |
||||
.fallback = 'S', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b01111, |
||||
0b10000, |
||||
0b01110, |
||||
0b00001, |
||||
0b11110 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "Č", |
||||
.fallback = 'C', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b01110, |
||||
0b10001, |
||||
0b10000, |
||||
0b10001, |
||||
0b01110 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "Ř", |
||||
.fallback = 'R', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b11110, |
||||
0b10001, |
||||
0b11110, |
||||
0b10010, |
||||
0b10001 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "Ž", |
||||
.fallback = 'Z', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b11111, |
||||
0b00001, |
||||
0b01110, |
||||
0b10000, |
||||
0b11111 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "Ý", |
||||
.fallback = 'Y', |
||||
.data = { |
||||
0b00010, |
||||
0b00100, |
||||
0b10001, |
||||
0b10001, |
||||
0b01010, |
||||
0b00100, |
||||
0b00100 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "Á", |
||||
.fallback = 'A', |
||||
.data = { |
||||
0b00010, |
||||
0b00100, |
||||
0b01110, |
||||
0b10001, |
||||
0b11111, |
||||
0b10001, |
||||
0b10001 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "Í", |
||||
.fallback = 'I', |
||||
.data = { |
||||
0b00010, |
||||
0b00100, |
||||
0b01110, |
||||
0b00100, |
||||
0b00100, |
||||
0b00100, |
||||
0b01110 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "É", |
||||
.fallback = 'E', |
||||
.data = { |
||||
0b00010, |
||||
0b00100, |
||||
0b11111, |
||||
0b10000, |
||||
0b11100, |
||||
0b10000, |
||||
0b11111 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "Ú", |
||||
.fallback = 'U', |
||||
.data = { |
||||
0b00010, |
||||
0b00100, |
||||
0b10001, |
||||
0b10001, |
||||
0b10001, |
||||
0b10001, |
||||
0b01110 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "Ů", |
||||
.fallback = 'U', |
||||
.data = { |
||||
0b00100, |
||||
0b01010, |
||||
0b10101, |
||||
0b10001, |
||||
0b10001, |
||||
0b10001, |
||||
0b01110 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "Ď", |
||||
.fallback = 'D', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b11100, |
||||
0b10010, |
||||
0b10001, |
||||
0b10010, |
||||
0b11100 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "Ť", |
||||
.fallback = 'T', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b11111, |
||||
0b00100, |
||||
0b00100, |
||||
0b00100, |
||||
0b00100 |
||||
}, |
||||
}, |
||||
{ |
||||
.symbol = "Ň", |
||||
.fallback = 'N', |
||||
.data = { |
||||
0b01010, |
||||
0b00100, |
||||
0b10001, |
||||
0b11001, |
||||
0b10101, |
||||
0b10011, |
||||
0b10001 |
||||
}, |
||||
}, |
||||
|
||||
{}, /* end mark */ |
||||
}; |
||||
|
||||
|
||||
/*
|
||||
// this should be more or less the default font
|
||||
|
||||
{{0b00000, 0b00000, 0b00000, 0b00000, 0b00000, 0b00000, 0b00000}}, //
|
||||
{{0b00100, 0b00100, 0b00100, 0b00100, 0b00100, 0b00000, 0b00100}}, // !
|
||||
{{0b01010, 0b01010, 0b01010, 0b00000, 0b00000, 0b00000, 0b00000}}, // "
|
||||
{{0b01010, 0b01010, 0b11111, 0b01010, 0b11111, 0b01010, 0b01010}}, // #
|
||||
{{0b00100, 0b01111, 0b10100, 0b01110, 0b00101, 0b11110, 0b00100}}, // $
|
||||
{{0b11000, 0b11001, 0b00010, 0b00100, 0b01000, 0b10011, 0b00011}}, // %
|
||||
{{0b01100, 0b10010, 0b10100, 0b01000, 0b10101, 0b10010, 0b01101}}, // &
|
||||
{{0b01100, 0b00100, 0b01000, 0b00000, 0b00000, 0b00000, 0b00000}}, // '
|
||||
{{0b00010, 0b00100, 0b01000, 0b01000, 0b01000, 0b00100, 0b00010}}, // (
|
||||
{{0b01000, 0b00100, 0b00010, 0b00010, 0b00010, 0b00100, 0b01000}}, // )
|
||||
{{0b00000, 0b00100, 0b10101, 0b01110, 0b10101, 0b00100, 0b00000}}, // *
|
||||
{{0b00000, 0b00100, 0b00100, 0b11111, 0b00100, 0b00100, 0b00000}}, // +
|
||||
{{0b00000, 0b00000, 0b00000, 0b00000, 0b01100, 0b00100, 0b01000}}, // ,
|
||||
{{0b00000, 0b00000, 0b00000, 0b11111, 0b00000, 0b00000, 0b00000}}, // -
|
||||
{{0b00000, 0b00000, 0b00000, 0b00000, 0b00000, 0b01100, 0b01100}}, // .
|
||||
{{0b00000, 0b00001, 0b00010, 0b00100, 0b01000, 0b10000, 0b00000}}, // /
|
||||
{{0b01110, 0b10001, 0b10011, 0b10101, 0b11001, 0b10001, 0b01110}}, // 0
|
||||
{{0b00100, 0b01100, 0b00100, 0b00100, 0b00100, 0b00100, 0b01110}}, // 1
|
||||
{{0b01110, 0b10001, 0b00001, 0b00010, 0b00100, 0b01000, 0b11111}}, // 2
|
||||
{{0b11111, 0b00010, 0b00100, 0b00010, 0b00001, 0b10001, 0b01110}}, // 3
|
||||
{{0b00010, 0b00110, 0b01010, 0b10010, 0b11111, 0b00010, 0b00010}}, // 4
|
||||
{{0b11111, 0b10000, 0b11110, 0b00001, 0b00001, 0b10001, 0b01110}}, // 5
|
||||
{{0b00110, 0b01000, 0b10000, 0b11110, 0b10001, 0b10001, 0b01110}}, // 6
|
||||
{{0b11111, 0b00001, 0b00010, 0b00100, 0b01000, 0b01000, 0b01000}}, // 7
|
||||
{{0b01110, 0b10001, 0b10001, 0b01110, 0b10001, 0b10001, 0b01110}}, // 8
|
||||
{{0b01110, 0b10001, 0b10001, 0b01111, 0b00001, 0b00010, 0b01100}}, // 9
|
||||
{{0b00000, 0b01100, 0b01100, 0b00000, 0b01100, 0b01100, 0b00000}}, // :
|
||||
{{0b00000, 0b01100, 0b01100, 0b00000, 0b01100, 0b00100, 0b01000}}, // ;
|
||||
{{0b00010, 0b00100, 0b01000, 0b10000, 0b01000, 0b00100, 0b00010}}, // <
|
||||
{{0b00000, 0b00000, 0b11111, 0b00000, 0b11111, 0b00000, 0b00000}}, // =
|
||||
{{0b01000, 0b00100, 0b00010, 0b00001, 0b00010, 0b00100, 0b01000}}, // >
|
||||
{{0b01110, 0b10001, 0b00001, 0b00010, 0b00100, 0b00000, 0b00100}}, // ?
|
||||
{{0b01110, 0b10001, 0b00001, 0b01101, 0b10101, 0b10101, 0b01110}}, // @
|
||||
{{0b01110, 0b10001, 0b10001, 0b10001, 0b11111, 0b10001, 0b10001}}, // A
|
||||
{{0b11110, 0b10001, 0b10001, 0b11110, 0b10001, 0b10001, 0b11110}}, // B
|
||||
{{0b01110, 0b10001, 0b10000, 0b10000, 0b10000, 0b10001, 0b01110}}, // C
|
||||
{{0b11100, 0b10010, 0b10001, 0b10001, 0b10001, 0b10010, 0b11100}}, // D
|
||||
{{0b11111, 0b10000, 0b10000, 0b11110, 0b10000, 0b10000, 0b11111}}, // E
|
||||
{{0b11111, 0b10000, 0b10000, 0b11110, 0b10000, 0b10000, 0b10000}}, // F
|
||||
{{0b01110, 0b10001, 0b10000, 0b10111, 0b10001, 0b10001, 0b01111}}, // G
|
||||
{{0b10001, 0b10001, 0b10001, 0b11111, 0b10001, 0b10001, 0b10001}}, // H
|
||||
{{0b01110, 0b00100, 0b00100, 0b00100, 0b00100, 0b00100, 0b01110}}, // I
|
||||
{{0b00111, 0b00010, 0b00010, 0b00010, 0b00010, 0b10010, 0b01100}}, // J
|
||||
{{0b10001, 0b10010, 0b10100, 0b11000, 0b10100, 0b10010, 0b10001}}, // K
|
||||
{{0b10000, 0b10000, 0b10000, 0b10000, 0b10000, 0b10000, 0b11111}}, // L
|
||||
{{0b10001, 0b11011, 0b10101, 0b10101, 0b10001, 0b10001, 0b10001}}, // M
|
||||
{{0b10001, 0b10001, 0b11001, 0b10101, 0b10011, 0b10001, 0b10001}}, // N
|
||||
{{0b01110, 0b10001, 0b10001, 0b10001, 0b10001, 0b10001, 0b01110}}, // O
|
||||
{{0b11110, 0b10001, 0b10001, 0b11110, 0b10000, 0b10000, 0b10000}}, // P
|
||||
{{0b01110, 0b10001, 0b10001, 0b10001, 0b10101, 0b10010, 0b01101}}, // Q
|
||||
{{0b11110, 0b10001, 0b10001, 0b11110, 0b10100, 0b10010, 0b10001}}, // R
|
||||
{{0b01111, 0b10000, 0b10000, 0b01110, 0b00001, 0b00001, 0b11110}}, // S
|
||||
{{0b11111, 0b00100, 0b00100, 0b00100, 0b00100, 0b00100, 0b00100}}, // T
|
||||
{{0b10001, 0b10001, 0b10001, 0b10001, 0b10001, 0b10001, 0b01110}}, // U
|
||||
{{0b10001, 0b10001, 0b10001, 0b10001, 0b10001, 0b01010, 0b00100}}, // V
|
||||
{{0b10001, 0b10001, 0b10001, 0b10101, 0b10101, 0b10101, 0b01010}}, // W
|
||||
{{0b10001, 0b10001, 0b01010, 0b00100, 0b01010, 0b10001, 0b10001}}, // X
|
||||
{{0b10001, 0b10001, 0b10001, 0b01010, 0b00100, 0b00100, 0b00100}}, // Y
|
||||
{{0b11111, 0b00001, 0b00010, 0b00100, 0b01000, 0b10000, 0b11111}}, // Z
|
||||
{{0b01110, 0b01000, 0b01000, 0b01000, 0b01000, 0b01000, 0b01110}}, // [
|
||||
{{0b00000, 0b10000, 0b01000, 0b00100, 0b00010, 0b00001, 0b00000}}, // \
|
||||
{{0b01110, 0b00010, 0b00010, 0b00010, 0b00010, 0b00010, 0b01110}}, // ]
|
||||
{{0b00100, 0b01010, 0b10001, 0b00000, 0b00000, 0b00000, 0b00000}}, // ^
|
||||
{{0b00000, 0b00000, 0b00000, 0b00000, 0b00000, 0b00000, 0b11111}}, // _
|
||||
{{0b01000, 0b00100, 0b00010, 0b00000, 0b00000, 0b00000, 0b00000}}, // `
|
||||
{{0b00000, 0b00000, 0b01110, 0b00001, 0b01111, 0b10001, 0b01111}}, // a
|
||||
{{0b10000, 0b10000, 0b10110, 0b11001, 0b10001, 0b10001, 0b11110}}, // b
|
||||
{{0b00000, 0b00000, 0b01110, 0b10000, 0b10000, 0b10001, 0b01110}}, // c
|
||||
{{0b00001, 0b00001, 0b01101, 0b10011, 0b10001, 0b10001, 0b01111}}, // d
|
||||
{{0b00000, 0b00000, 0b01110, 0b10001, 0b11111, 0b10000, 0b01110}}, // e
|
||||
{{0b00110, 0b01001, 0b01000, 0b11100, 0b01000, 0b01000, 0b01000}}, // f
|
||||
{{0b00000, 0b01111, 0b10001, 0b10001, 0b01111, 0b00001, 0b01110}}, // g
|
||||
{{0b10000, 0b10000, 0b10110, 0b11001, 0b10001, 0b10001, 0b10001}}, // h
|
||||
{{0b00100, 0b00000, 0b01100, 0b00100, 0b00100, 0b00100, 0b01110}}, // i
|
||||
{{0b00010, 0b00000, 0b00110, 0b00010, 0b00010, 0b10010, 0b01100}}, // j
|
||||
{{0b10000, 0b10000, 0b10010, 0b10100, 0b11000, 0b10100, 0b10010}}, // k
|
||||
{{0b01100, 0b00100, 0b00100, 0b00100, 0b00100, 0b00100, 0b01110}}, // l
|
||||
{{0b00000, 0b00000, 0b11010, 0b10101, 0b10101, 0b10001, 0b10001}}, // m
|
||||
{{0b00000, 0b00000, 0b10110, 0b11001, 0b10001, 0b10001, 0b10001}}, // n
|
||||
{{0b00000, 0b00000, 0b01110, 0b10001, 0b10001, 0b10001, 0b01110}}, // o
|
||||
{{0b00000, 0b00000, 0b11110, 0b10001, 0b11110, 0b10000, 0b10000}}, // p
|
||||
{{0b00000, 0b00000, 0b01101, 0b10011, 0b01111, 0b00001, 0b00001}}, // q
|
||||
{{0b00000, 0b00000, 0b10110, 0b11001, 0b10000, 0b10000, 0b10000}}, // r
|
||||
{{0b00000, 0b00000, 0b01110, 0b10000, 0b01110, 0b00001, 0b11110}}, // s
|
||||
{{0b01000, 0b01000, 0b11100, 0b01000, 0b01000, 0b01001, 0b00110}}, // t
|
||||
{{0b00000, 0b00000, 0b10001, 0b10001, 0b10001, 0b10011, 0b01101}}, // u
|
||||
{{0b00000, 0b00000, 0b10001, 0b10001, 0b10001, 0b01010, 0b00100}}, // v
|
||||
{{0b00000, 0b00000, 0b10001, 0b10001, 0b10101, 0b10101, 0b01010}}, // w
|
||||
{{0b00000, 0b00000, 0b10001, 0b01010, 0b00100, 0b01010, 0b10001}}, // x
|
||||
{{0b00000, 0b00000, 0b10001, 0b10001, 0b01111, 0b00001, 0b01110}}, // y
|
||||
{{0b00000, 0b00000, 0b11111, 0b00010, 0b00100, 0b01000, 0b11111}}, // z
|
||||
{{0b00010, 0b00100, 0b00100, 0b01000, 0b00100, 0b00100, 0b00010}}, // {
|
||||
{{0b00100, 0b00100, 0b00100, 0b00100, 0b00100, 0b00100, 0b00100}}, // |
|
||||
{{0b01000, 0b00100, 0b00100, 0b00010, 0b00100, 0b00100, 0b01000}}, // }
|
||||
{{0b00000, 0b00000, 0b00000, 0b01101, 0b10010, 0b00000, 0b00000}}, // ~
|
||||
*/ |
@ -0,0 +1,25 @@ |
||||
/**
|
||||
* TODO file description |
||||
*/ |
||||
|
||||
#ifndef HD44780UTF_CGRAM_H |
||||
#define HD44780UTF_CGRAM_H |
||||
|
||||
#include "utf8.h" |
||||
#include <stdint.h> |
||||
|
||||
/** Pattern for CGRAM */ |
||||
struct cgram_pattern { |
||||
/** The symbol displayed */ |
||||
struct Utf8Char symbol; |
||||
|
||||
/** ASCII char shown if there is no space for the CGRAM pattern */ |
||||
char fallback; |
||||
|
||||
/** Graphic data (rows) - the 8th row is typically blank and can be left out from the definition */ |
||||
uint8_t data[8]; |
||||
}; |
||||
|
||||
extern const struct cgram_pattern CGRAM_CZ[]; |
||||
|
||||
#endif //HD44780UTF_CGRAM_H
|
@ -0,0 +1,136 @@ |
||||
#include <stdint.h> |
||||
#include "utf8.h" |
||||
#include "cgrom.h" |
||||
|
||||
const struct cgrom_entry CGROM_A00[] = { |
||||
{.address = 32, .symbol = " "}, |
||||
{.address = 33, .symbol = "!"}, |
||||
{.address = 34, .symbol = "\""}, |
||||
{.address = 35, .symbol = "#"}, |
||||
{.address = 36, .symbol = "$"}, |
||||
{.address = 37, .symbol = "%"}, |
||||
{.address = 38, .symbol = "&"}, |
||||
{.address = 39, .symbol = "'"}, |
||||
{.address = 40, .symbol = "("}, |
||||
{.address = 41, .symbol = ")"}, |
||||
{.address = 42, .symbol = "*"}, |
||||
{.address = 43, .symbol = "+"}, |
||||
{.address = 44, .symbol = ","}, |
||||
{.address = 45, .symbol = "-"}, |
||||
{.address = 46, .symbol = "."}, |
||||
{.address = 47, .symbol = "/"}, |
||||
{.address = 48, .symbol = "0"}, |
||||
{.address = 49, .symbol = "1"}, |
||||
{.address = 50, .symbol = "2"}, |
||||
{.address = 51, .symbol = "3"}, |
||||
{.address = 52, .symbol = "4"}, |
||||
{.address = 53, .symbol = "5"}, |
||||
{.address = 54, .symbol = "6"}, |
||||
{.address = 55, .symbol = "7"}, |
||||
{.address = 56, .symbol = "8"}, |
||||
{.address = 57, .symbol = "9"}, |
||||
{.address = 58, .symbol = ":"}, |
||||
{.address = 59, .symbol = ";"}, |
||||
{.address = 60, .symbol = "<"}, |
||||
{.address = 61, .symbol = "="}, |
||||
{.address = 62, .symbol = ">"}, |
||||
{.address = 63, .symbol = "?"}, |
||||
{.address = 64, .symbol = "@"}, |
||||
{.address = 65, .symbol = "A"}, |
||||
{.address = 66, .symbol = "B"}, |
||||
{.address = 67, .symbol = "C"}, |
||||
{.address = 68, .symbol = "D"}, |
||||
{.address = 69, .symbol = "E"}, |
||||
{.address = 70, .symbol = "F"}, |
||||
{.address = 71, .symbol = "G"}, |
||||
{.address = 72, .symbol = "H"}, |
||||
{.address = 73, .symbol = "I"}, |
||||
{.address = 74, .symbol = "J"}, |
||||
{.address = 75, .symbol = "K"}, |
||||
{.address = 76, .symbol = "L"}, |
||||
{.address = 77, .symbol = "M"}, |
||||
{.address = 78, .symbol = "N"}, |
||||
{.address = 79, .symbol = "O"}, |
||||
{.address = 80, .symbol = "P"}, |
||||
{.address = 81, .symbol = "Q"}, |
||||
{.address = 82, .symbol = "R"}, |
||||
{.address = 83, .symbol = "S"}, |
||||
{.address = 84, .symbol = "T"}, |
||||
{.address = 85, .symbol = "U"}, |
||||
{.address = 86, .symbol = "V"}, |
||||
{.address = 87, .symbol = "W"}, |
||||
{.address = 88, .symbol = "X"}, |
||||
{.address = 89, .symbol = "Y"}, |
||||
{.address = 90, .symbol = "Z"}, |
||||
{.address = 91, .symbol = "["}, |
||||
{.address = 92, .symbol = "¥"}, // yen
|
||||
{.address = 93, .symbol = "]"}, |
||||
{.address = 94, .symbol = "^"}, |
||||
{.address = 95, .symbol = "_"}, |
||||
{.address = 96, .symbol = "`"}, |
||||
{.address = 97, .symbol = "a"}, |
||||
{.address = 98, .symbol = "b"}, |
||||
{.address = 99, .symbol = "c"}, |
||||
{.address = 100, .symbol = "d"}, |
||||
{.address = 101, .symbol = "e"}, |
||||
{.address = 102, .symbol = "f"}, |
||||
{.address = 103, .symbol = "g"}, |
||||
{.address = 104, .symbol = "h"}, |
||||
{.address = 105, .symbol = "i"}, |
||||
{.address = 106, .symbol = "j"}, |
||||
{.address = 107, .symbol = "k"}, |
||||
{.address = 108, .symbol = "l"}, |
||||
{.address = 109, .symbol = "m"}, |
||||
{.address = 110, .symbol = "n"}, |
||||
{.address = 111, .symbol = "o"}, |
||||
{.address = 112, .symbol = "p"}, |
||||
{.address = 113, .symbol = "q"}, |
||||
{.address = 114, .symbol = "r"}, |
||||
{.address = 115, .symbol = "s"}, |
||||
{.address = 116, .symbol = "t"}, |
||||
{.address = 117, .symbol = "u"}, |
||||
{.address = 118, .symbol = "v"}, |
||||
{.address = 119, .symbol = "w"}, |
||||
{.address = 120, .symbol = "x"}, |
||||
{.address = 121, .symbol = "y"}, |
||||
{.address = 122, .symbol = "z"}, |
||||
{.address = 123, .symbol = "{"}, |
||||
{.address = 124, .symbol = "|"}, |
||||
{.address = 125, .symbol = "}"}, |
||||
{.address = 126, .symbol = "←"}, |
||||
{.address = 127, .symbol = "→"}, |
||||
|
||||
// lots of japanese symbols - add them yourself if you need them
|
||||
{.address = 0xA2, .symbol = "「"}, |
||||
{.address = 0xA3, .symbol = "」"}, |
||||
{.address = 0xA5, .symbol = "·"}, |
||||
{.address = 0xDF, .symbol = "°"}, |
||||
|
||||
// there is also some greek and obscure diacritics
|
||||
{.address = 0xE0, .symbol = "α"}, |
||||
{.address = 0xE1, .symbol = "ä"}, |
||||
{.address = 0xE2, .symbol = "β"}, |
||||
{.address = 0xE3, .symbol = "ϵ"}, |
||||
{.address = 0xE4, .symbol = "μ"}, |
||||
{.address = 0xE5, .symbol = "σ"}, |
||||
{.address = 0xE6, .symbol = "ρ"}, |
||||
{.address = 0xE8, .symbol = "√"}, |
||||
// E9 is nice superscript minus one, but it's not in Unicode :(
|
||||
|
||||
{.address = 0xEC, .symbol = "¢"}, |
||||
{.address = 0xED, .symbol = "£"}, |
||||
{.address = 0xEE, .symbol = "ñ"}, |
||||
{.address = 0xEF, .symbol = "ö"}, |
||||
|
||||
{.address = 0xF2, .symbol = "Θ"}, |
||||
{.address = 0xF3, .symbol = "∞"}, |
||||
{.address = 0xF4, .symbol = "Ω"}, |
||||
{.address = 0xF5, .symbol = "Ü"}, |
||||
{.address = 0xF6, .symbol = "Σ"}, |
||||
{.address = 0xF7, .symbol = "π"}, |
||||
|
||||
{.address = 0xFC, .symbol = "円"}, |
||||
{.address = 0xFD, .symbol = "÷"}, |
||||
{.address = 0xFF, .symbol = "█"}, |
||||
{}, /* end mark */ |
||||
}; |
@ -0,0 +1,24 @@ |
||||
/**
|
||||
* A definition of the actual CGROM |
||||
*/ |
||||
|
||||
#ifndef HD44780UTF_CGROM_H |
||||
#define HD44780UTF_CGROM_H |
||||
|
||||
#include <stdint.h> |
||||
#include "utf8.h" |
||||
|
||||
/** CGROM look-up table entry */ |
||||
struct cgrom_entry { |
||||
/** Address in the CGROM */ |
||||
uint8_t address; |
||||
|
||||
/** Corresponding symbol */ |
||||
struct Utf8Char symbol; |
||||
}; |
||||
|
||||
|
||||
/** The standard japanese lookup table, terminated by an empty entry */ |
||||
extern const struct cgrom_entry CGROM_A00[]; |
||||
|
||||
#endif //HD44780UTF_CGROM_H
|
@ -0,0 +1,256 @@ |
||||
/**
|
||||
* TODO file description |
||||
*/ |
||||
|
||||
#include <string.h> |
||||
#include <assert.h> |
||||
#include "lcdbuf.h" |
||||
|
||||
|
||||
/** Initialize the struct */ |
||||
void LcdBuffer_Init(struct LcdBuffer *self, const struct cgrom_entry *cgrom, const struct cgram_pattern *custom_symbols) |
||||
{ |
||||
assert(self); |
||||
assert(cgrom); |
||||
assert(custom_symbols); |
||||
|
||||
LcdBuffer_Clear(self); |
||||
self->cgrom = cgrom; |
||||
self->custom_symbols = custom_symbols; |
||||
} |
||||
|
||||
/** Clear the screen */ |
||||
void LcdBuffer_Clear(struct LcdBuffer *self) |
||||
{ |
||||
assert(self); |
||||
|
||||
memset(self->cgram, 0, sizeof(self->cgram)); |
||||
memset(self->screen, 32, sizeof(self->screen)); |
||||
memset(self->dirty_extents, 0, sizeof(self->dirty_extents)); |
||||
self->full_repaint_required = false; |
||||
} |
||||
|
||||
/** Write what needs to be written to the HW, clear all dirty marks */ |
||||
void LcdBuffer_Flush(struct LcdBuffer *self) |
||||
{ |
||||
for (int i = 0; i < 8; i++) { |
||||
if (self->cgram[i].refcount > 0 && self->cgram[i].dirty) { |
||||
LcdBuffer_IO_WriteCGRAM(i, self->custom_symbols[self->cgram[i].symbol_index].data); |
||||
self->cgram[i].dirty = false; |
||||
} |
||||
} |
||||
|
||||
if (self->full_repaint_required) { |
||||
for (int r = 0; r < LINE_NUM; r++) { |
||||
LcdBuffer_IO_WriteAt(r, 0, self->screen[r], LINE_LEN); |
||||
} |
||||
memset(self->dirty_extents, 0, sizeof(self->dirty_extents)); |
||||
} else { |
||||
for (int e = 0; e < BUFLEN_DIRTY_LIST; e++) { |
||||
struct DirtyExtent *ext = &self->dirty_extents[e]; |
||||
if (!ext->count) { |
||||
continue; |
||||
} |
||||
LcdBuffer_IO_WriteAt(ext->row, ext->col, &self->screen[ext->row][ext->col], ext->count); |
||||
ext->count = 0; // mark the slot as free
|
||||
} |
||||
} |
||||
} |
||||
|
||||
/** Fully write everything to the display */ |
||||
void LcdBuffer_FlushAll(struct LcdBuffer *self) |
||||
{ |
||||
for (int i = 0; i < 8; i++) { |
||||
if (self->cgram[i].refcount > 0) { |
||||
LcdBuffer_IO_WriteCGRAM(i, self->custom_symbols[self->cgram[i].symbol_index].data); |
||||
} |
||||
self->cgram[i].dirty = false; |
||||
} |
||||
|
||||
for (int r = 0; r < LINE_NUM; r++) { |
||||
LcdBuffer_IO_WriteAt(r, 0, self->screen[r], LINE_LEN); |
||||
} |
||||
|
||||
memset(self->dirty_extents, 0, sizeof(self->dirty_extents)); |
||||
self->full_repaint_required = false; |
||||
} |
||||
|
||||
static void mark_dirty(struct LcdBuffer *self, uint8_t row, uint8_t col) |
||||
{ |
||||
int first_empty_extent_slot = -1; |
||||
for (int i = 0; i < BUFLEN_DIRTY_LIST; i++) { |
||||
struct DirtyExtent *ext = &self->dirty_extents[i]; |
||||
if (ext->count == 0) { |
||||
// unused
|
||||
if (first_empty_extent_slot == -1) { |
||||
first_empty_extent_slot = i; |
||||
} |
||||
continue; |
||||
} |
||||
if (ext->row != row) { |
||||
// not this row
|
||||
continue; |
||||
} |
||||
|
||||
// this is a filled extent
|
||||
|
||||
if (ext->col < col && ext->col + ext->count > col) { |
||||
// already in this extent
|
||||
return; |
||||
} |
||||
|
||||
if (col < ext->col && (ext->col - col) <= 5) { |
||||
ext->count += (ext->col - col); |
||||
ext->col = col; |
||||
return; |
||||
} |
||||
|
||||
if (col >= ext->col + ext->count && (col - ext->col + ext->count) <= 5) { |
||||
ext->count += (col - ext->col + ext->count); |
||||
return; |
||||
} |
||||
} |
||||
|
||||
if (first_empty_extent_slot == -1) { |
||||
self->full_repaint_required = true; |
||||
} else { |
||||
self->dirty_extents[first_empty_extent_slot].col = col; |
||||
self->dirty_extents[first_empty_extent_slot].row = row; |
||||
self->dirty_extents[first_empty_extent_slot].count = 1; |
||||
} |
||||
} |
||||
|
||||
/** Set one utf8 character at a position */ |
||||
void LcdBuffer_Set(struct LcdBuffer *self, uint8_t row, uint8_t col, struct Utf8Char ch) |
||||
{ |
||||
assert(self); |
||||
assert(row < LINE_NUM); |
||||
assert(col < LINE_LEN); |
||||
|
||||
uint8_t oldchar = self->screen[row][col]; |
||||
|
||||
if (oldchar >= 8 && oldchar == ch.uint) { |
||||
// No change
|
||||
return; |
||||
} |
||||
|
||||
// Fast path for standard ASCII
|
||||
if (ch.uint >= 32 && ch.uint < 126 && ch.uint != '\\') { // A00 has YEN in place of BACKSLASH
|
||||
// normal ASCII
|
||||
if (oldchar < 8) { |
||||
// release refcount on the CGRAM cell
|
||||
self->cgram[oldchar].refcount -= 1; |
||||
} |
||||
|
||||
self->screen[row][col] = ch.uint; |
||||
goto done_dirty; |
||||
} |
||||
|
||||
// Find if it's in CGROM
|
||||
const struct cgrom_entry *rom = self->cgrom; |
||||
for (;;) { |
||||
if (rom->symbol.uint == 0) { |
||||
// End of the lookup table
|
||||
break; |
||||
} |
||||
|
||||
if (rom->symbol.uint == ch.uint) { |
||||
// found it!
|
||||
if (oldchar < 8) { |
||||
// release refcount on the CGRAM cell
|
||||
self->cgram[oldchar].refcount -= 1; |
||||
} |
||||
|
||||
self->screen[row][col] = rom->address; |
||||
goto done_dirty; |
||||
} |
||||
|
||||
rom++; |
||||
} |
||||
|
||||
// Check if the same custom char is already used - if so, increment refcount and reuse it
|
||||
int first_empty_custom_slot = -1; |
||||
for (int i = 0; i < 8; i++) { |
||||
if (self->cgram[i].refcount > 0) { |
||||
if (self->cgram[i].uint == ch.uint) { |
||||
if (oldchar == i) { |
||||
// No change, was already the same custom
|
||||
return; |
||||
} |
||||
|
||||
if (oldchar < 8) { |
||||
// release refcount on the CGRAM cell
|
||||
self->cgram[oldchar].refcount -= 1; |
||||
} |
||||
|
||||
self->cgram[i].refcount += 1; |
||||
self->screen[row][col] = i; |
||||
goto done_dirty; |
||||
} |
||||
} else if (first_empty_custom_slot == -1) { |
||||
first_empty_custom_slot = i; |
||||
} |
||||
} |
||||
|
||||
// New custom pattern is needed
|
||||
|
||||
if (oldchar < 8) { |
||||
// release refcount on the CGRAM cell
|
||||
self->cgram[oldchar].refcount -= 1; |
||||
} |
||||
|
||||
uint32_t index = 0; |
||||
const struct cgram_pattern *pattern = self->custom_symbols; |
||||
for (;;) { |
||||
if (pattern->symbol.uint == 0) { |
||||
// End of the lookup table
|
||||
break; |
||||
} |
||||
|
||||
if (pattern->symbol.uint == ch.uint) { |
||||
// found it!
|
||||
|
||||
if (first_empty_custom_slot == -1) { |
||||
// Whoops, out of slots. Show a fallback glyph
|
||||
if (oldchar != pattern->fallback) { |
||||
self->screen[row][col] = pattern->fallback; |
||||
goto done_dirty; |
||||
} |
||||
return; |
||||
} |
||||
|
||||
// Allocate a new slot in the CGRAM
|
||||
self->cgram[first_empty_custom_slot].refcount = 1; |
||||
self->cgram[first_empty_custom_slot].uint = ch.uint; |
||||
self->cgram[first_empty_custom_slot].dirty = true; // it should be flushed!
|
||||
self->cgram[first_empty_custom_slot].symbol_index = index; |
||||
|
||||
self->screen[row][col] = first_empty_custom_slot; |
||||
goto done_dirty; |
||||
} |
||||
|
||||
index++; |
||||
pattern++; |
||||
} |
||||
|
||||
// Fallback, no way to show this glyph
|
||||
self->screen[row][col] = '?'; |
||||
|
||||
done_dirty: |
||||
mark_dirty(self, row, col); |
||||
} |
||||
|
||||
/** Write a UTF8 string at a position */ |
||||
void LcdBuffer_Write(struct LcdBuffer *self, uint8_t row, uint8_t col, char *utf_string) |
||||
{ |
||||
struct Utf8Iterator iter; |
||||
Utf8Iterator_Init(&iter, utf_string); |
||||
struct Utf8Char uchar; |
||||
while ((uchar = Utf8Iterator_Next(&iter)).uint) { |
||||
if (col >= LINE_LEN) { |
||||
break; |
||||
} |
||||
LcdBuffer_Set(self, row, col, uchar); |
||||
col++; |
||||
} |
||||
} |
@ -0,0 +1,82 @@ |
||||
/**
|
||||
* TODO file description |
||||
*/ |
||||
|
||||
#ifndef HD44780UTF_LCDBUF_H |
||||
#define HD44780UTF_LCDBUF_H |
||||
|
||||
#include <stdint.h> |
||||
#include "utf8.h" |
||||
#include "cgram.h" |
||||
#include "cgrom.h" |
||||
|
||||
#define LINE_NUM 4 |
||||
#define LINE_LEN 20 |
||||
|
||||
#define BUFLEN_DIRTY_LIST 8 |
||||
|
||||
_Static_assert(LINE_NUM * LINE_LEN < 256, "LINE_NUM * LINE_LEN must fit in u8"); |
||||
|
||||
/** Indicates a range of screen cells that were changed and must be written to HW */ |
||||
struct DirtyExtent { |
||||
uint8_t row; |
||||
uint8_t col; |
||||
uint8_t count; |
||||
}; |
||||
|
||||
/** Struct for one CGRAM slot */ |
||||
struct CgramState { |
||||
/** UTF8 uint shown in this slot */ |
||||
uint32_t uint; |
||||
/** Array index in the custom symbols table, use for look-up when writing the font data to HW */ |
||||
uint32_t symbol_index; |
||||
/** Number of occurrences of this symbol in the screen array */ |
||||
uint8_t refcount; |
||||
/** This CGRAM slot needs to be written to HW */ |
||||
bool dirty; |
||||
}; |
||||
|
||||
struct LcdBuffer { |
||||
/** The raw screen buffer. Custom symbols are 0x00-0x07 */ |
||||
uint8_t screen[LINE_NUM][LINE_LEN]; |
||||
/** CGRAM state array */ |
||||
struct CgramState cgram[8]; |
||||
/** Hardware CGROM lookup table, used to map UTF8 to existing ROM symbols */ |
||||
const struct cgrom_entry *cgrom; |
||||
/** Defined custom display pattern of utf8 symbols */ |
||||
const struct cgram_pattern *custom_symbols; |
||||
|
||||
/** Array of dirty extents - ranges in the display that need to be flushed to HW */ |
||||
struct DirtyExtent dirty_extents[BUFLEN_DIRTY_LIST]; |
||||
/** If the dirty extents array was not sufficient to hold all changes, this flag is set,
|
||||
* indicating the dirty_extents array should be disregarded. */ |
||||
bool full_repaint_required; |
||||
}; |
||||
|
||||
/** Initialize the struct */ |
||||
void LcdBuffer_Init(struct LcdBuffer *self, const struct cgrom_entry *cgrom, const struct cgram_pattern *custom_symbols); |
||||
|
||||
/** Clear the screen */ |
||||
void LcdBuffer_Clear(struct LcdBuffer *self); |
||||
|
||||
/** Write what needs to be written to the HW, clear all dirty marks */ |
||||
void LcdBuffer_Flush(struct LcdBuffer *self); |
||||
|
||||
/** Fully write everything to the display */ |
||||
void LcdBuffer_FlushAll(struct LcdBuffer *self); |
||||
|
||||
/** Set one utf8 character at a position */ |
||||
void LcdBuffer_Set(struct LcdBuffer *self, uint8_t row, uint8_t col, struct Utf8Char ch); |
||||
|
||||
/** Write a UTF8 string at a position */ |
||||
void LcdBuffer_Write(struct LcdBuffer *self, uint8_t row, uint8_t col, char *utf_string); |
||||
|
||||
/* Callbacks - need to be implemented by the application! */ |
||||
|
||||
/** Write character data at position */ |
||||
void LcdBuffer_IO_WriteAt(uint8_t row, uint8_t col, const uint8_t *buf, uint8_t len); |
||||
|
||||
/** Write CGRAM data. Data is always 8 bytes long. */ |
||||
void LcdBuffer_IO_WriteCGRAM(uint8_t position, const uint8_t* data); |
||||
|
||||
#endif //HD44780UTF_LCDBUF_H
|
@ -0,0 +1,28 @@ |
||||
#include <stdio.h> |
||||
#include <stdint.h> |
||||
#include "lcdbuf.h" |
||||
|
||||
int main() |
||||
{ |
||||
struct LcdBuffer buf; |
||||
LcdBuffer_Init(&buf, CGROM_A00, CGRAM_CZ); |
||||
|
||||
LcdBuffer_Write(&buf, 0, 0, "Ahoj"); |
||||
|
||||
LcdBuffer_Flush(&buf); |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
/** Write character data at position */ |
||||
void LcdBuffer_IO_WriteAt(uint8_t row, uint8_t col, const uint8_t *buf, uint8_t len) |
||||
{ |
||||
printf("W@%d,%d: \"%.*s\" (len %d)\n", row, col, len, buf, len); |
||||
} |
||||
|
||||
/** Write CGRAM data. Data is always 8 bytes long. */ |
||||
void LcdBuffer_IO_WriteCGRAM(uint8_t position, const uint8_t *data) |
||||
{ |
||||
printf("G@%d: %02x %02x %02x %02x %02x %02x %02x %02x\n", |
||||
position, data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]); |
||||
} |
@ -0,0 +1,136 @@ |
||||
#include <stdint.h> |
||||
#include "utf8.h" |
||||
|
||||
//
|
||||
// Created by MightyPork on 2017/08/20.
|
||||
//
|
||||
// UTF-8 parser - collects bytes of a code point before writing them
|
||||
// into a screen cell.
|
||||
//
|
||||
|
||||
const struct Utf8Char EMPTY_CHAR = (struct Utf8Char) {.uint = 0}; |
||||
|
||||
|
||||
// Code Points First Byte Second Byte Third Byte Fourth Byte
|
||||
// U+0000 - U+007F 00 - 7F
|
||||
// U+0080 - U+07FF C2 - DF 80 - BF
|
||||
// U+0800 - U+0FFF E0 *A0 - BF 80 - BF
|
||||
// U+1000 - U+CFFF E1 - EC 80 - BF 80 - BF
|
||||
// U+D000 - U+D7FF ED 80 - *9F 80 - BF
|
||||
// U+E000 - U+FFFF EE - EF 80 - BF 80 - BF
|
||||
// U+10000 - U+3FFFF F0 *90 - BF 80 - BF 80 - BF
|
||||
// U+40000 - U+FFFFF F1 - F3 80 - BF 80 - BF 80 - BF
|
||||
// U+100000 - U+10FFFF F4 80 - *8F 80 - BF 80 - BF
|
||||
|
||||
void Utf8Parser_Clear(struct Utf8Parser *self) |
||||
{ |
||||
self->buffer.uint = 0; |
||||
self->utf_j = 0; |
||||
self->utf_len = 0; |
||||
} |
||||
|
||||
/**
|
||||
* Handle a received character |
||||
*/ |
||||
struct Utf8Char Utf8Parser_Handle(struct Utf8Parser *self, char c) |
||||
{ |
||||
uint8_t *bytes = self->buffer.bytes; |
||||
|
||||
uint8_t uc = (uint8_t) c; |
||||
// collecting unicode glyphs...
|
||||
if (uc & 0x80) { |
||||
if (self->utf_len == 0) { |
||||
bytes[0] = uc; |
||||
self->utf_j = 1; |
||||
|
||||
// start
|
||||
if (uc == 0xC0 || uc == 0xC1 || uc > 0xF4) { |
||||
// forbidden start codes
|
||||
goto fail; |
||||
} |
||||
|
||||
if ((uc & 0xE0) == 0xC0) { |
||||
self->utf_len = 2; |
||||
} else if ((uc & 0xF0) == 0xE0) { |
||||
self->utf_len = 3; |
||||
} else if ((uc & 0xF8) == 0xF0) { |
||||
self->utf_len = 4; |
||||
} else { |
||||
// chars over 127 that don't start unicode sequences
|
||||
goto fail; |
||||
} |
||||
} else { |
||||
if ((uc & 0xC0) != 0x80) { |
||||
bytes[self->utf_j++] = uc; |
||||
goto fail; |
||||
} else { |
||||
bytes[self->utf_j++] = uc; |
||||
if (self->utf_j >= self->utf_len) { |
||||
// check for bad sequences - overlong or some other problem
|
||||
if (bytes[0] == 0xF4 && bytes[1] > 0x8F) { goto fail; } |
||||
if (bytes[0] == 0xF0 && bytes[1] < 0x90) { goto fail; } |
||||
if (bytes[0] == 0xED && bytes[1] > 0x9F) { goto fail; } |
||||
if (bytes[0] == 0xE0 && bytes[1] < 0xA0) { goto fail; } |
||||
|
||||
// trap for surrogates - those break javascript
|
||||
if (bytes[0] == 0xED && bytes[1] >= 0xA0 && bytes[1] <= 0xBF) { goto fail; } |
||||
|
||||
goto success; |
||||
} |
||||
} |
||||
} |
||||
} else { |
||||
bytes[0] = uc; |
||||
goto success; |
||||
} |
||||
|
||||
return EMPTY_CHAR; |
||||
|
||||
success:; |
||||
struct Utf8Char result = self->buffer; |
||||
self->buffer.uint = 0; // erase the buffer
|
||||
self->utf_len = 0; |
||||
return result; |
||||
|
||||
fail: |
||||
self->buffer.uint = 0; // erase the buffer
|
||||
self->utf_len = 0; |
||||
return EMPTY_CHAR; |
||||
} |
||||
|
||||
|
||||
void Utf8Iterator_Init(struct Utf8Iterator *self, const char *source) |
||||
{ |
||||
Utf8Parser_Clear(&self->parser); |
||||
self->source = source; |
||||
} |
||||
|
||||
|
||||
struct Utf8Char Utf8Iterator_Next(struct Utf8Iterator *self) |
||||
{ |
||||
char c; |
||||
struct Utf8Char uchar; |
||||
while (1) { |
||||
c = *self->source++; |
||||
if (!c) { break; } |
||||
|
||||
uchar = Utf8Parser_Handle(&self->parser, c); |
||||
if (uchar.uint) { |
||||
return uchar; |
||||
} |
||||
} |
||||
return EMPTY_CHAR; |
||||
} |
||||
|
||||
|
||||
size_t utf8_strlen(const char *text) |
||||
{ |
||||
// TODO optimize
|
||||
struct Utf8Iterator iter; |
||||
Utf8Iterator_Init(&iter, text); |
||||
size_t num = 0; |
||||
while ((Utf8Iterator_Next(&iter)).uint) { |
||||
num++; |
||||
} |
||||
return num; |
||||
} |
@ -0,0 +1,115 @@ |
||||
/**
|
||||
* UTF-8 string parsing and character iteration |
||||
* |
||||
* Created on 2020/01/04. |
||||
*/ |
||||
|
||||
#ifndef LIQUIDTYPE_UTF8_H |
||||
#define LIQUIDTYPE_UTF8_H |
||||
|
||||
#include <stddef.h> |
||||
#include <stdint.h> |
||||
#include <stdbool.h> |
||||
|
||||
/** Character containing all zeros */ |
||||
extern const struct Utf8Char EMPTY_CHAR; |
||||
|
||||
/**
|
||||
* UTF-8 encoded character. |
||||
* |
||||
* It's convenient to use the uint values internally to represent this symbol. |
||||
* Since UTF8 can't contain zero bytes, just strip the trailing zeros when composing a string. |
||||
*/ |
||||
struct Utf8Char { |
||||
union { |
||||
/**
|
||||
* character bytes; padded by zero bytes if shorter than 4. |
||||
* |
||||
* Can be initialized by a string literal in lookup tables |
||||
*/ |
||||
uint8_t bytes[4]; |
||||
|
||||
/** u32 view of the bytes */ |
||||
uint32_t uint; |
||||
}; |
||||
}; |
||||
|
||||
/** UTF8 string parser internal state */ |
||||
struct Utf8Parser { |
||||
/** UTF-8 bytes buffer */ |
||||
struct Utf8Char buffer; |
||||
/** Currently collected UTF-8 character length */ |
||||
uint8_t utf_len; |
||||
/** Position in the current character */ |
||||
uint8_t utf_j; |
||||
}; |
||||
|
||||
/**
|
||||
* Clear the parser internal state |
||||
* |
||||
* @param self |
||||
*/ |
||||
void Utf8Parser_Clear(struct Utf8Parser *self); |
||||
|
||||
/**
|
||||
* Initialize the parser struct before starting to parse |
||||
*/ |
||||
static void Utf8Parser_Init(struct Utf8Parser *self) { |
||||
Utf8Parser_Clear(self); |
||||
} |
||||
|
||||
/**
|
||||
* Parse a character. |
||||
* |
||||
* The returned struct contains NIL (uint == 0) if no character is yet available. |
||||
* |
||||
* ASCII is passed through, utf-8 is collected and returned in one piece. |
||||
*/ |
||||
struct Utf8Char Utf8Parser_Handle(struct Utf8Parser *self, char c); |
||||
|
||||
/**
|
||||
* Utf8 character iterator. |
||||
* |
||||
* Usage: |
||||
* struct Utf8Iterator iter; |
||||
* Utf8Iterator_Init(&iter, myString); |
||||
* |
||||
* struct Utf8Char uchar; |
||||
* while ((uchar = Utf8Iterator_Next(&iter)).uint) { |
||||
* // do something with the char
|
||||
* } |
||||
*/ |
||||
struct Utf8Iterator { |
||||
/* Characters to parse. The pointer is advanced as the iterator progresses. */ |
||||
const char *source; |
||||
struct Utf8Parser parser; |
||||
}; |
||||
|
||||
/**
|
||||
* Initialize the iterator struct |
||||
* |
||||
* @param self |
||||
* @param source - string to iterate; It can be in RO memory, it's only read. |
||||
*/ |
||||
void Utf8Iterator_Init(struct Utf8Iterator *self, const char *source); |
||||
|
||||
/**
|
||||
* Get the next character from the iterator; |
||||
* |
||||
* Returns empty character if there are no more characters to parse (the .uint field is zero) |
||||
* |
||||
* Invalid characters are skipped. |
||||
*/ |
||||
struct Utf8Char Utf8Iterator_Next(struct Utf8Iterator *self); |
||||
|
||||
/**
|
||||
* Get utf8 string length, counting codepoints. |
||||
* |
||||
* @attention This function is rather expensive, cache the result if reused |
||||
* |
||||
* @param text - utf8 string, zero terminated |
||||
* @return number of codepoints |
||||
*/ |
||||
size_t utf8_strlen(const char *text); |
||||
|
||||
#endif //LIQUIDTYPE_UTF8_H
|
Loading…
Reference in new issue