You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
224 lines
5.7 KiB
224 lines
5.7 KiB
//
|
|
// Created by MightyPork on 2017/08/20.
|
|
//
|
|
// UTF-8 parser - collects bytes of a code point before writing them
|
|
// into a screen cell.
|
|
//
|
|
|
|
#include "apars_utf8.h"
|
|
#include "apars_logging.h"
|
|
#include "screen.h"
|
|
#include "uart_driver.h"
|
|
#include "ansi_parser_callbacks.h"
|
|
#include "ansi_parser.h"
|
|
#include "ascii.h"
|
|
|
|
static u8 bytes[4];
|
|
static int utf_len = 0;
|
|
static int utf_j = 0;
|
|
|
|
/**
|
|
* Clear the buffer where we collect pieces of a code point.
|
|
* This is used for parser reset.
|
|
*/
|
|
void ICACHE_FLASH_ATTR
|
|
apars_reset_utf8buffer(void)
|
|
{
|
|
utf_len = 0;
|
|
utf_j = 0;
|
|
memset(bytes, 0, 4);
|
|
}
|
|
|
|
// Code Points First Byte Second Byte Third Byte Fourth Byte
|
|
// U+0000 - U+007F 00 - 7F
|
|
// U+0080 - U+07FF C2 - DF 80 - BF
|
|
// U+0800 - U+0FFF E0 *A0 - BF 80 - BF
|
|
// U+1000 - U+CFFF E1 - EC 80 - BF 80 - BF
|
|
// U+D000 - U+D7FF ED 80 - *9F 80 - BF
|
|
// U+E000 - U+FFFF EE - EF 80 - BF 80 - BF
|
|
// U+10000 - U+3FFFF F0 *90 - BF 80 - BF 80 - BF
|
|
// U+40000 - U+FFFFF F1 - F3 80 - BF 80 - BF 80 - BF
|
|
// U+100000 - U+10FFFF F4 80 - *8F 80 - BF 80 - BF
|
|
|
|
static void ICACHE_FLASH_ATTR screen_print_ascii(const char *str)
|
|
{
|
|
char gly[2];
|
|
gly[1] = 0;
|
|
for(int j = 0;str[j]!=0;j++) {
|
|
gly[0] = str[j];
|
|
screen_putchar(gly);
|
|
}
|
|
}
|
|
|
|
static void ICACHE_FLASH_ATTR hdump_spaces_eol(int needed)
|
|
{
|
|
if (needed == 0) needed = 5;
|
|
int x, y;
|
|
screen_cursor_get(&y, &x);
|
|
if (x > termconf_live.width - needed) {
|
|
screen_clear_in_line(CLEAR_FROM_CURSOR);
|
|
screen_putchar("\n");
|
|
screen_putchar("\r");
|
|
}
|
|
}
|
|
|
|
|
|
static void ICACHE_FLASH_ATTR hdump_good(const char *ch)
|
|
{
|
|
char buf[10];
|
|
hdump_spaces_eol(6);
|
|
|
|
screen_set_fg(7);
|
|
screen_set_bg(0);
|
|
if(ch[0]<32) {
|
|
screen_set_fg(7);
|
|
screen_set_bg(2);
|
|
switch (ch[0]) {
|
|
case NUL: screen_print_ascii("NUL"); break;
|
|
case SOH: screen_print_ascii("SOH"); break;
|
|
case STX: screen_print_ascii("STX"); break;
|
|
case ETX: screen_print_ascii("ETX"); break;
|
|
case EOT: screen_print_ascii("EOT"); break;
|
|
case ENQ: screen_print_ascii("ENQ"); break;
|
|
case ACK: screen_print_ascii("ACK"); break;
|
|
case BEL: screen_print_ascii("BEL"); break;
|
|
case BS: screen_print_ascii("BS"); break;
|
|
case TAB: screen_print_ascii("TAB"); break;
|
|
case LF: screen_print_ascii("LF"); break;
|
|
case VT: screen_print_ascii("VT"); break;
|
|
case FF: screen_print_ascii("FF"); break;
|
|
case CR: screen_print_ascii("CR"); break;
|
|
case SO: screen_print_ascii("SO"); break;
|
|
case SI: screen_print_ascii("SI"); break;
|
|
case DLE: screen_print_ascii("DLE"); break;
|
|
case DC1: screen_print_ascii("DC1"); break;
|
|
case DC2: screen_print_ascii("DC2"); break;
|
|
case DC3: screen_print_ascii("DC3"); break;
|
|
case DC4: screen_print_ascii("DC4"); break;
|
|
case NAK: screen_print_ascii("NAK"); break;
|
|
case SYN: screen_print_ascii("SYN"); break;
|
|
case ETB: screen_print_ascii("ETB"); break;
|
|
case CAN: screen_print_ascii("CAN"); break;
|
|
case EM: screen_print_ascii("EM"); break;
|
|
case SUB: screen_print_ascii("SUB"); break;
|
|
case ESC: screen_print_ascii("ESC"); break;
|
|
case FS: screen_print_ascii("FS"); break;
|
|
case GS: screen_print_ascii("GS"); break;
|
|
case RS: screen_print_ascii("RS"); break;
|
|
case US: screen_print_ascii("US"); break;
|
|
case SP: screen_print_ascii("SP"); break;
|
|
case DEL: screen_print_ascii("DEL"); break;
|
|
default:
|
|
sprintf(buf, "%02Xh", ch[0]);
|
|
screen_print_ascii(buf);
|
|
}
|
|
} else {
|
|
screen_putchar(ch);
|
|
}
|
|
|
|
screen_set_default_bg();
|
|
screen_set_default_fg();
|
|
screen_print_ascii(" ");
|
|
}
|
|
|
|
static void ICACHE_FLASH_ATTR hdump_bad(const char *ch, int len)
|
|
{
|
|
char buf[10];
|
|
hdump_spaces_eol(len*5);
|
|
|
|
screen_set_fg(7);
|
|
screen_set_bg(1);
|
|
for (int i=0;i<len;i++) {
|
|
sprintf(buf, "%02Xh", ch[i]);
|
|
screen_print_ascii(buf);
|
|
if(i<len-1) screen_print_ascii(" ");
|
|
}
|
|
screen_set_default_bg();
|
|
screen_set_default_fg();
|
|
screen_print_ascii(" ");
|
|
}
|
|
|
|
|
|
/**
|
|
* Handle a received plain character
|
|
* @param c - received character
|
|
*/
|
|
void ICACHE_FLASH_ATTR
|
|
apars_handle_plainchar(char c)
|
|
{
|
|
u8 uc = (u8)c;
|
|
// collecting unicode glyphs...
|
|
if (uc & 0x80) {
|
|
if (utf_len == 0) {
|
|
bytes[0] = uc;
|
|
utf_j = 1;
|
|
|
|
// start
|
|
if (uc == 0xC0 || uc == 0xC1 || uc > 0xF4) {
|
|
// forbidden start codes
|
|
goto fail;
|
|
}
|
|
|
|
if ((uc & 0xE0) == 0xC0) {
|
|
utf_len = 2;
|
|
}
|
|
else if ((uc & 0xF0) == 0xE0) {
|
|
utf_len = 3;
|
|
}
|
|
else if ((uc & 0xF8) == 0xF0) {
|
|
utf_len = 4;
|
|
}
|
|
else {
|
|
// chars over 127 that don't start unicode sequences
|
|
goto fail;
|
|
}
|
|
}
|
|
else {
|
|
if ((uc & 0xC0) != 0x80) {
|
|
bytes[utf_j++] = uc;
|
|
goto fail;
|
|
}
|
|
else {
|
|
bytes[utf_j++] = uc;
|
|
if (utf_j >= utf_len) {
|
|
// check for bad sequences - overlong or some other problem
|
|
if (bytes[0] == 0xF4 && bytes[1] > 0x8F) goto fail;
|
|
if (bytes[0] == 0xF0 && bytes[1] < 0x90) goto fail;
|
|
if (bytes[0] == 0xED && bytes[1] > 0x9F) goto fail;
|
|
if (bytes[0] == 0xE0 && bytes[1] < 0xA0) goto fail;
|
|
|
|
// trap for surrogates - those break javascript
|
|
if (bytes[0] == 0xED && bytes[1] >= 0xA0 && bytes[1] <= 0xBF) goto fail;
|
|
|
|
if (termconf_live.ascii_debug) {
|
|
hdump_good((const char *) bytes);
|
|
} else {
|
|
screen_putchar((const char *) bytes);
|
|
}
|
|
apars_reset_utf8buffer();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
bytes[0] = uc;
|
|
bytes[1] = 0; // just to make sure it's closed...
|
|
if (termconf_live.ascii_debug) {
|
|
hdump_good((const char *) bytes);
|
|
} else {
|
|
screen_putchar((const char *) bytes);
|
|
}
|
|
apars_reset_utf8buffer();
|
|
}
|
|
|
|
return;
|
|
fail:
|
|
if (termconf_live.ascii_debug) {
|
|
hdump_bad((const char *) bytes, utf_j);
|
|
} else {
|
|
screen_putchar("\xEF\xBF\xBD");
|
|
}
|
|
//ansi_warn("BAD UTF8!");
|
|
//apars_show_context();
|
|
apars_reset_utf8buffer();
|
|
}
|
|
|