From faffa687947e634765d2fd7c64b6ce0f27bbf799 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Hru=C5=A1ka?= Date: Sun, 1 Oct 2017 03:23:04 +0200 Subject: [PATCH] use binary messages and fix bugs in utf sanitizer --- front-end | 2 +- user/ansi_parser.c | 56 +++++++++-------- user/ansi_parser.rl | 6 ++ user/apars_utf8.c | 150 ++++++++++++++++++++++++++++++++++++-------- user/cgi_sockets.c | 4 +- user/cgi_term_cfg.c | 10 +++ user/screen.c | 6 ++ user/screen.h | 4 +- 8 files changed, 184 insertions(+), 54 deletions(-) diff --git a/front-end b/front-end index cee23ca..55e2def 160000 --- a/front-end +++ b/front-end @@ -1 +1 @@ -Subproject commit cee23ca951bd1c07b9c1d1d1334f18c0894cc028 +Subproject commit 55e2def6e3899a708a9afca099252bdf44919b7c diff --git a/user/ansi_parser.c b/user/ansi_parser.c index d23cca6..37d30cf 100644 --- a/user/ansi_parser.c +++ b/user/ansi_parser.c @@ -5,10 +5,11 @@ #include "ansi_parser_callbacks.h" #include "ascii.h" #include "apars_logging.h" +#include "screen.h" /* Ragel constants block */ -/* #line 12 "user/ansi_parser.c" */ +/* #line 13 "user/ansi_parser.c" */ static const char _ansi_actions[] ESP_CONST_DATA = { 0, 1, 0, 1, 1, 1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1, @@ -32,7 +33,7 @@ static const int ansi_en_charsetcmd_body = 10; static const int ansi_en_main = 1; -/* #line 11 "user/ansi_parser.rl" */ +/* #line 12 "user/ansi_parser.rl" */ // Max nr of CSI parameters @@ -114,15 +115,20 @@ ansi_parser(char newchar) // This is used to detect timeout delay (time since last rx char) ansi_parser_char_cnt++; + if (termconf->ascii_debug) { + apars_handle_plainchar(newchar); + return; + } + // Init Ragel on the first run if (cs == -1) { -/* #line 121 "user/ansi_parser.c" */ +/* #line 127 "user/ansi_parser.c" */ { cs = ansi_start; } -/* #line 95 "user/ansi_parser.rl" */ +/* #line 101 "user/ansi_parser.rl" */ #if DEBUG_ANSI memset(history, 0, sizeof(history)); @@ -202,7 +208,7 @@ ansi_parser(char newchar) // The parser -/* #line 206 "user/ansi_parser.c" */ +/* #line 212 "user/ansi_parser.c" */ { const char *_acts; unsigned int _nacts; @@ -392,7 +398,7 @@ execFuncs: while ( _nacts-- > 0 ) { switch ( *_acts++ ) { case 0: -/* #line 182 "user/ansi_parser.rl" */ +/* #line 188 "user/ansi_parser.rl" */ { ansi_warn("Parser error."); apars_show_context(); @@ -401,7 +407,7 @@ execFuncs: } break; case 1: -/* #line 191 "user/ansi_parser.rl" */ +/* #line 197 "user/ansi_parser.rl" */ { if ((*p) != 0) { apars_handle_plainchar((*p)); @@ -409,7 +415,7 @@ execFuncs: } break; case 2: -/* #line 199 "user/ansi_parser.rl" */ +/* #line 205 "user/ansi_parser.rl" */ { // Reset the CSI builder leadchar = NUL; @@ -426,13 +432,13 @@ execFuncs: } break; case 3: -/* #line 214 "user/ansi_parser.rl" */ +/* #line 220 "user/ansi_parser.rl" */ { leadchar = (*p); } break; case 4: -/* #line 218 "user/ansi_parser.rl" */ +/* #line 224 "user/ansi_parser.rl" */ { if (arg_cnt == 0) arg_cnt = 1; // x10 + digit @@ -442,7 +448,7 @@ execFuncs: } break; case 5: -/* #line 226 "user/ansi_parser.rl" */ +/* #line 232 "user/ansi_parser.rl" */ { if (arg_cnt == 0) arg_cnt = 1; // handle case when first arg is empty arg_cnt++; @@ -450,20 +456,20 @@ execFuncs: } break; case 6: -/* #line 232 "user/ansi_parser.rl" */ +/* #line 238 "user/ansi_parser.rl" */ { interchar = (*p); } break; case 7: -/* #line 236 "user/ansi_parser.rl" */ +/* #line 242 "user/ansi_parser.rl" */ { apars_handle_csi(leadchar, arg, arg_cnt, interchar, (*p)); {cs = 1;goto _again;} } break; case 8: -/* #line 248 "user/ansi_parser.rl" */ +/* #line 254 "user/ansi_parser.rl" */ { leadchar = (*p); str_ni = 0; @@ -473,13 +479,13 @@ execFuncs: } break; case 9: -/* #line 256 "user/ansi_parser.rl" */ +/* #line 262 "user/ansi_parser.rl" */ { string_buffer[str_ni++] = (*p); } break; case 10: -/* #line 260 "user/ansi_parser.rl" */ +/* #line 266 "user/ansi_parser.rl" */ { inside_string = false; string_buffer[str_ni++] = '\0'; @@ -488,41 +494,41 @@ execFuncs: } break; case 11: -/* #line 273 "user/ansi_parser.rl" */ +/* #line 279 "user/ansi_parser.rl" */ { apars_handle_hash_cmd((*p)); {cs = 1;goto _again;} } break; case 12: -/* #line 278 "user/ansi_parser.rl" */ +/* #line 284 "user/ansi_parser.rl" */ { apars_handle_short_cmd((*p)); {cs = 1;goto _again;} } break; case 13: -/* #line 283 "user/ansi_parser.rl" */ +/* #line 289 "user/ansi_parser.rl" */ { apars_handle_space_cmd((*p)); {cs = 1;goto _again;} } break; case 14: -/* #line 290 "user/ansi_parser.rl" */ +/* #line 296 "user/ansi_parser.rl" */ { leadchar = (*p); {cs = 10;goto _again;} } break; case 15: -/* #line 295 "user/ansi_parser.rl" */ +/* #line 301 "user/ansi_parser.rl" */ { apars_handle_chs_designate(leadchar, (*p)); {cs = 1;goto _again;} } break; -/* #line 526 "user/ansi_parser.c" */ +/* #line 532 "user/ansi_parser.c" */ } } goto _again; @@ -540,7 +546,7 @@ _again: while ( __nacts-- > 0 ) { switch ( *__acts++ ) { case 0: -/* #line 182 "user/ansi_parser.rl" */ +/* #line 188 "user/ansi_parser.rl" */ { ansi_warn("Parser error."); apars_show_context(); @@ -550,7 +556,7 @@ _again: goto _again;} } break; -/* #line 554 "user/ansi_parser.c" */ +/* #line 560 "user/ansi_parser.c" */ } } } @@ -558,6 +564,6 @@ goto _again;} _out: {} } -/* #line 318 "user/ansi_parser.rl" */ +/* #line 324 "user/ansi_parser.rl" */ } diff --git a/user/ansi_parser.rl b/user/ansi_parser.rl index 7566210..c2042c7 100644 --- a/user/ansi_parser.rl +++ b/user/ansi_parser.rl @@ -3,6 +3,7 @@ #include "ansi_parser_callbacks.h" #include "ascii.h" #include "apars_logging.h" +#include "screen.h" /* Ragel constants block */ %%{ @@ -89,6 +90,11 @@ ansi_parser(char newchar) // This is used to detect timeout delay (time since last rx char) ansi_parser_char_cnt++; + if (termconf->ascii_debug) { + apars_handle_plainchar(newchar); + return; + } + // Init Ragel on the first run if (cs == -1) { %% write init; diff --git a/user/apars_utf8.c b/user/apars_utf8.c index 832cde8..110331a 100644 --- a/user/apars_utf8.c +++ b/user/apars_utf8.c @@ -11,19 +11,12 @@ #include "uart_driver.h" #include "ansi_parser_callbacks.h" #include "ansi_parser.h" +#include "ascii.h" static u8 bytes[4]; static int utf_len = 0; static int utf_j = 0; -ETSTimer timerResumeRx; - -void ICACHE_FLASH_ATTR resumeRxCb(void *unused) -{ - ansi_dbg("Parser recover."); - ansi_parser_inhibit = false; -} - /** * Clear the buffer where we collect pieces of a code point. * This is used for parser reset. @@ -47,6 +40,105 @@ apars_reset_utf8buffer(void) // U+40000 - U+FFFFF F1 - F3 80 - BF 80 - BF 80 - BF // U+100000 - U+10FFFF F4 80 - *8F 80 - BF 80 - BF +static void ICACHE_FLASH_ATTR screen_print_ascii(const char *str) +{ + char gly[2]; + gly[1] = 0; + for(int j = 0;str[j]!=0;j++) { + gly[0] = str[j]; + screen_putchar(gly); + } +} + +static void ICACHE_FLASH_ATTR hdump_spaces_eol(int needed) +{ + if (needed == 0) needed = 5; + int x, y; + screen_cursor_get(&y, &x); + if (x > termconf_live.width - needed) { + screen_clear_in_line(CLEAR_FROM_CURSOR); + screen_putchar("\n"); + screen_putchar("\r"); + } +} + + +static void ICACHE_FLASH_ATTR hdump_good(const char *ch) +{ + char buf[10]; + hdump_spaces_eol(6); + + screen_set_fg(7); + screen_set_bg(0); + if(ch[0]<32) { + screen_set_fg(7); + screen_set_bg(2); + switch (ch[0]) { + case NUL: screen_print_ascii("NUL"); break; + case SOH: screen_print_ascii("SOH"); break; + case STX: screen_print_ascii("STX"); break; + case ETX: screen_print_ascii("ETX"); break; + case EOT: screen_print_ascii("EOT"); break; + case ENQ: screen_print_ascii("ENQ"); break; + case ACK: screen_print_ascii("ACK"); break; + case BEL: screen_print_ascii("BEL"); break; + case BS: screen_print_ascii("BS"); break; + case TAB: screen_print_ascii("TAB"); break; + case LF: screen_print_ascii("LF"); break; + case VT: screen_print_ascii("VT"); break; + case FF: screen_print_ascii("FF"); break; + case CR: screen_print_ascii("CR"); break; + case SO: screen_print_ascii("SO"); break; + case SI: screen_print_ascii("SI"); break; + case DLE: screen_print_ascii("DLE"); break; + case DC1: screen_print_ascii("DC1"); break; + case DC2: screen_print_ascii("DC2"); break; + case DC3: screen_print_ascii("DC3"); break; + case DC4: screen_print_ascii("DC4"); break; + case NAK: screen_print_ascii("NAK"); break; + case SYN: screen_print_ascii("SYN"); break; + case ETB: screen_print_ascii("ETB"); break; + case CAN: screen_print_ascii("CAN"); break; + case EM: screen_print_ascii("EM"); break; + case SUB: screen_print_ascii("SUB"); break; + case ESC: screen_print_ascii("ESC"); break; + case FS: screen_print_ascii("FS"); break; + case GS: screen_print_ascii("GS"); break; + case RS: screen_print_ascii("RS"); break; + case US: screen_print_ascii("US"); break; + case SP: screen_print_ascii("SP"); break; + case DEL: screen_print_ascii("DEL"); break; + default: + sprintf(buf, "%02Xh", ch[0]); + screen_print_ascii(buf); + } + } else { + screen_putchar(ch); + } + + screen_set_default_bg(); + screen_set_default_fg(); + screen_print_ascii(" "); +} + +static void ICACHE_FLASH_ATTR hdump_bad(const char *ch, int len) +{ + char buf[10]; + hdump_spaces_eol(len*5); + + screen_set_fg(7); + screen_set_bg(1); + for (int i=0;i 0xF4) { + if (uc == 0xC0 || uc == 0xC1 || uc > 0xF4) { // forbidden start codes goto fail; } - if ((c & 0xE0) == 0xC0) { + if ((uc & 0xE0) == 0xC0) { utf_len = 2; } - else if ((c & 0xF0) == 0xE0) { + else if ((uc & 0xF0) == 0xE0) { utf_len = 3; } - else if ((c & 0xF8) == 0xF0) { + else if ((uc & 0xF8) == 0xF0) { utf_len = 4; } else { // chars over 127 that don't start unicode sequences goto fail; } - - bytes[0] = c; - utf_j = 1; } else { - if ((c & 0xC0) != 0x80) { + if ((uc & 0xC0) != 0x80) { + bytes[utf_j++] = uc; goto fail; } else { - bytes[utf_j++] = c; + bytes[utf_j++] = uc; if (utf_j >= utf_len) { // check for bad sequences - overlong or some other problem if (bytes[0] == 0xF4 && bytes[1] > 0x8F) goto fail; @@ -96,25 +190,31 @@ apars_handle_plainchar(char c) // trap for surrogates - those break javascript if (bytes[0] == 0xED && bytes[1] >= 0xA0 && bytes[1] <= 0xBF) goto fail; - screen_putchar((const char *) bytes); + if (termconf_live.ascii_debug) { + hdump_good((const char *) bytes); + } else { + screen_putchar((const char *) bytes); + } apars_reset_utf8buffer(); } } } } else { - bytes[0] = c; + bytes[0] = uc; bytes[1] = 0; // just to make sure it's closed... - screen_putchar((const char *) bytes); + if (termconf_live.ascii_debug) { + hdump_good((const char *) bytes); + } else { + screen_putchar((const char *) bytes); + } + apars_reset_utf8buffer(); } return; fail: - ansi_parser_inhibit = true; - + hdump_bad((const char *) bytes, utf_j); ansi_warn("BAD UTF8!"); apars_show_context(); apars_reset_utf8buffer(); - ansi_dbg("Temporarily inhibiting parser..."); - TIMER_START(&timerResumeRx, resumeRxCb, 500, 0); } diff --git a/user/cgi_sockets.c b/user/cgi_sockets.c index c91bb2b..9d6a087 100644 --- a/user/cgi_sockets.c +++ b/user/cgi_sockets.c @@ -69,7 +69,7 @@ updateNotify_do(Websock *ws, ScreenNotifyTopics topics) } httpd_cgi_state cont = screenSerializeToBuffer(sock_buff, SOCK_BUF_LEN, topics, &data); - int flg = 0; + int flg = WEBSOCK_FLAG_BIN; if (cont == HTTPD_CGI_MORE) flg |= WEBSOCK_FLAG_MORE; if (i > 0) flg |= WEBSOCK_FLAG_CONT; if (ws) { @@ -134,7 +134,7 @@ notify_growl(char *msg) // here's some potential for a race error with the other broadcast functions // - we assume app won't send notifications in the middle of updating content - cgiWebsockBroadcast(URL_WS_UPDATE, msg, (int) strlen(msg), 0); + cgiWebsockBroadcast(URL_WS_UPDATE, msg, (int) strlen(msg), WEBSOCK_FLAG_BIN); resetHeartbeatTimer(); } diff --git a/user/cgi_term_cfg.c b/user/cgi_term_cfg.c index 5f62b31..fe68015 100644 --- a/user/cgi_term_cfg.c +++ b/user/cgi_term_cfg.c @@ -222,6 +222,13 @@ cgiTermCfgSetParams(HttpdConnData *connData) termconf->allow_decopt_12 = (bool)n; } + if (GET_ARG("ascii_debug")) { + cgi_dbg("ascii_debug: %s", buff); + n = atoi(buff); + termconf->ascii_debug = (bool)n; + shall_clear_screen = true; + } + if (GET_ARG("theme")) { cgi_dbg("Screen color theme: %s", buff); n = atoi(buff); @@ -453,6 +460,9 @@ tplTermCfg(HttpdConnData *connData, char *token, void **arg) else if (streq(token, "allow_decopt_12")) { sprintf(buff, "%d", (int)termconf->allow_decopt_12); } + else if (streq(token, "ascii_debug")) { + sprintf(buff, "%d", (int)termconf->ascii_debug); + } else if (streq(token, "loopback")) { sprintf(buff, "%d", (int)termconf->loopback); } diff --git a/user/screen.c b/user/screen.c index 7fc271b..66a3e13 100644 --- a/user/screen.c +++ b/user/screen.c @@ -212,6 +212,7 @@ terminal_restore_defaults(void) termconf->want_all_fn = SCR_DEF_ALLFN; termconf->debugbar = SCR_DEF_DEBUGBAR; termconf->allow_decopt_12 = SCR_DEF_DECOPT12; + termconf->ascii_debug = SCR_DEF_ASCIIDEBUG; } /** @@ -241,6 +242,11 @@ terminal_apply_settings_noclear(void) termconf->allow_decopt_12 = SCR_DEF_DECOPT12; changed = 1; } + if (termconf->config_version < 3) { + persist_dbg("termconf: Updating to version %d", 1); + termconf->ascii_debug = SCR_DEF_ASCIIDEBUG; + changed = 1; + } termconf->config_version = TERMCONF_VERSION; diff --git a/user/screen.h b/user/screen.h index d69f711..8e4037b 100644 --- a/user/screen.h +++ b/user/screen.h @@ -67,6 +67,7 @@ enum CursorShape { #define SCR_DEF_ALLFN 0 // capture F5 etc #define SCR_DEF_DEBUGBAR 0 #define SCR_DEF_DECOPT12 0 +#define SCR_DEF_ASCIIDEBUG 0 // --- Persistent Settings --- #define CURSOR_BLINKS(shape) ((shape)==CURSOR_BLOCK_BL||(shape)==CURSOR_UNDERLINE_BL||(shape)==CURSOR_BAR_BL) @@ -74,7 +75,7 @@ enum CursorShape { // Size designed for the terminal config structure // Must be constant to avoid corrupting user config after upgrade #define TERMCONF_SIZE 300 -#define TERMCONF_VERSION 2 +#define TERMCONF_VERSION 3 typedef struct { u32 width; @@ -98,6 +99,7 @@ typedef struct { bool want_all_fn; bool debugbar; bool allow_decopt_12; + bool ascii_debug; } TerminalConfigBundle; // Live config