From b70c65868b1067c736f78786af950a58219f0a1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Hru=C5=A1ka?= Date: Fri, 29 Sep 2017 21:57:20 +0200 Subject: [PATCH] dump context and stop listening for a bit on rx of bad UTF --- user/ansi_parser.c | 49 ++++++++++++++++--------------- user/ansi_parser.h | 2 ++ user/ansi_parser.rl | 3 ++ user/apars_utf8.c | 71 +++++++++++++++++++++++++++++++++------------ user/user_main.c | 7 ++++- 5 files changed, 89 insertions(+), 43 deletions(-) diff --git a/user/ansi_parser.c b/user/ansi_parser.c index 7b01ded..d23cca6 100644 --- a/user/ansi_parser.c +++ b/user/ansi_parser.c @@ -44,6 +44,7 @@ static volatile bool inside_string = false; // public volatile u32 ansi_parser_char_cnt = 0; +volatile bool ansi_parser_inhibit = 0; void ICACHE_FLASH_ATTR ansi_parser_reset(void) { @@ -108,18 +109,20 @@ ansi_parser(char newchar) static char string_buffer[ANSI_STR_LEN]; static int str_ni; + if (ansi_parser_inhibit) return; + // This is used to detect timeout delay (time since last rx char) ansi_parser_char_cnt++; // Init Ragel on the first run if (cs == -1) { -/* #line 118 "user/ansi_parser.c" */ +/* #line 121 "user/ansi_parser.c" */ { cs = ansi_start; } -/* #line 92 "user/ansi_parser.rl" */ +/* #line 95 "user/ansi_parser.rl" */ #if DEBUG_ANSI memset(history, 0, sizeof(history)); @@ -199,7 +202,7 @@ ansi_parser(char newchar) // The parser -/* #line 203 "user/ansi_parser.c" */ +/* #line 206 "user/ansi_parser.c" */ { const char *_acts; unsigned int _nacts; @@ -389,7 +392,7 @@ execFuncs: while ( _nacts-- > 0 ) { switch ( *_acts++ ) { case 0: -/* #line 179 "user/ansi_parser.rl" */ +/* #line 182 "user/ansi_parser.rl" */ { ansi_warn("Parser error."); apars_show_context(); @@ -398,7 +401,7 @@ execFuncs: } break; case 1: -/* #line 188 "user/ansi_parser.rl" */ +/* #line 191 "user/ansi_parser.rl" */ { if ((*p) != 0) { apars_handle_plainchar((*p)); @@ -406,7 +409,7 @@ execFuncs: } break; case 2: -/* #line 196 "user/ansi_parser.rl" */ +/* #line 199 "user/ansi_parser.rl" */ { // Reset the CSI builder leadchar = NUL; @@ -423,13 +426,13 @@ execFuncs: } break; case 3: -/* #line 211 "user/ansi_parser.rl" */ +/* #line 214 "user/ansi_parser.rl" */ { leadchar = (*p); } break; case 4: -/* #line 215 "user/ansi_parser.rl" */ +/* #line 218 "user/ansi_parser.rl" */ { if (arg_cnt == 0) arg_cnt = 1; // x10 + digit @@ -439,7 +442,7 @@ execFuncs: } break; case 5: -/* #line 223 "user/ansi_parser.rl" */ +/* #line 226 "user/ansi_parser.rl" */ { if (arg_cnt == 0) arg_cnt = 1; // handle case when first arg is empty arg_cnt++; @@ -447,20 +450,20 @@ execFuncs: } break; case 6: -/* #line 229 "user/ansi_parser.rl" */ +/* #line 232 "user/ansi_parser.rl" */ { interchar = (*p); } break; case 7: -/* #line 233 "user/ansi_parser.rl" */ +/* #line 236 "user/ansi_parser.rl" */ { apars_handle_csi(leadchar, arg, arg_cnt, interchar, (*p)); {cs = 1;goto _again;} } break; case 8: -/* #line 245 "user/ansi_parser.rl" */ +/* #line 248 "user/ansi_parser.rl" */ { leadchar = (*p); str_ni = 0; @@ -470,13 +473,13 @@ execFuncs: } break; case 9: -/* #line 253 "user/ansi_parser.rl" */ +/* #line 256 "user/ansi_parser.rl" */ { string_buffer[str_ni++] = (*p); } break; case 10: -/* #line 257 "user/ansi_parser.rl" */ +/* #line 260 "user/ansi_parser.rl" */ { inside_string = false; string_buffer[str_ni++] = '\0'; @@ -485,41 +488,41 @@ execFuncs: } break; case 11: -/* #line 270 "user/ansi_parser.rl" */ +/* #line 273 "user/ansi_parser.rl" */ { apars_handle_hash_cmd((*p)); {cs = 1;goto _again;} } break; case 12: -/* #line 275 "user/ansi_parser.rl" */ +/* #line 278 "user/ansi_parser.rl" */ { apars_handle_short_cmd((*p)); {cs = 1;goto _again;} } break; case 13: -/* #line 280 "user/ansi_parser.rl" */ +/* #line 283 "user/ansi_parser.rl" */ { apars_handle_space_cmd((*p)); {cs = 1;goto _again;} } break; case 14: -/* #line 287 "user/ansi_parser.rl" */ +/* #line 290 "user/ansi_parser.rl" */ { leadchar = (*p); {cs = 10;goto _again;} } break; case 15: -/* #line 292 "user/ansi_parser.rl" */ +/* #line 295 "user/ansi_parser.rl" */ { apars_handle_chs_designate(leadchar, (*p)); {cs = 1;goto _again;} } break; -/* #line 523 "user/ansi_parser.c" */ +/* #line 526 "user/ansi_parser.c" */ } } goto _again; @@ -537,7 +540,7 @@ _again: while ( __nacts-- > 0 ) { switch ( *__acts++ ) { case 0: -/* #line 179 "user/ansi_parser.rl" */ +/* #line 182 "user/ansi_parser.rl" */ { ansi_warn("Parser error."); apars_show_context(); @@ -547,7 +550,7 @@ _again: goto _again;} } break; -/* #line 551 "user/ansi_parser.c" */ +/* #line 554 "user/ansi_parser.c" */ } } } @@ -555,6 +558,6 @@ goto _again;} _out: {} } -/* #line 315 "user/ansi_parser.rl" */ +/* #line 318 "user/ansi_parser.rl" */ } diff --git a/user/ansi_parser.h b/user/ansi_parser.h index 8e813bd..212aaa4 100644 --- a/user/ansi_parser.h +++ b/user/ansi_parser.h @@ -3,6 +3,8 @@ #include +extern volatile bool ansi_parser_inhibit; // discard all characters + void ansi_parser_reset(void); extern volatile u32 ansi_parser_char_cnt; diff --git a/user/ansi_parser.rl b/user/ansi_parser.rl index 2f74623..7566210 100644 --- a/user/ansi_parser.rl +++ b/user/ansi_parser.rl @@ -19,6 +19,7 @@ static volatile bool inside_string = false; // public volatile u32 ansi_parser_char_cnt = 0; +volatile bool ansi_parser_inhibit = 0; void ICACHE_FLASH_ATTR ansi_parser_reset(void) { @@ -83,6 +84,8 @@ ansi_parser(char newchar) static char string_buffer[ANSI_STR_LEN]; static int str_ni; + if (ansi_parser_inhibit) return; + // This is used to detect timeout delay (time since last rx char) ansi_parser_char_cnt++; diff --git a/user/apars_utf8.c b/user/apars_utf8.c index b3f502c..17b7387 100644 --- a/user/apars_utf8.c +++ b/user/apars_utf8.c @@ -8,11 +8,22 @@ #include "apars_utf8.h" #include "apars_logging.h" #include "screen.h" +#include "uart_driver.h" +#include "ansi_parser_callbacks.h" +#include "ansi_parser.h" -static char utf_collect[4]; -static int utf_i = 0; +static u8 bytes[4]; +static int utf_len = 0; static int utf_j = 0; +ETSTimer timerResumeRx; + +void ICACHE_FLASH_ATTR resumeRxCb(void *unused) +{ + ansi_dbg("Parser recover."); + ansi_parser_inhibit = false; +} + /** * Clear the buffer where we collect pieces of a code point. * This is used for parser reset. @@ -20,11 +31,22 @@ static int utf_j = 0; void ICACHE_FLASH_ATTR apars_reset_utf8buffer(void) { - utf_i = 0; + utf_len = 0; utf_j = 0; - memset(utf_collect, 0, 4); + memset(bytes, 0, 4); } +// Code Points First Byte Second Byte Third Byte Fourth Byte +// U+0000 - U+007F 00 - 7F +// U+0080 - U+07FF C2 - DF 80 - BF +// U+0800 - U+0FFF E0 *A0 - BF 80 - BF +// U+1000 - U+CFFF E1 - EC 80 - BF 80 - BF +// U+D000 - U+D7FF ED 80 - *9F 80 - BF +// U+E000 - U+FFFF EE - EF 80 - BF 80 - BF +// U+10000 - U+3FFFF F0 *90 - BF 80 - BF 80 - BF +// U+40000 - U+FFFFF F1 - F3 80 - BF 80 - BF 80 - BF +// U+100000 - U+10FFFF F4 80 - *8F 80 - BF 80 - BF + /** * Handle a received plain character * @param c - received character @@ -34,28 +56,28 @@ apars_handle_plainchar(char c) { // collecting unicode glyphs... if (c & 0x80) { - if (utf_i == 0) { + if (utf_len == 0) { // start - if (c == 192 || c == 193 || c >= 245) { - // forbidden codes (would be an overlong sequence) + if (c == 0xC0 || c == 0xC1 || c > 0xF4) { + // forbidden start codes goto fail; } if ((c & 0xE0) == 0xC0) { - utf_i = 2; + utf_len = 2; } else if ((c & 0xF0) == 0xE0) { - utf_i = 3; + utf_len = 3; } else if ((c & 0xF8) == 0xF0) { - utf_i = 4; + utf_len = 4; } else { // chars over 127 that don't start unicode sequences goto fail; } - utf_collect[0] = c; + bytes[0] = c; utf_j = 1; } else { @@ -63,22 +85,33 @@ apars_handle_plainchar(char c) goto fail; } else { - utf_collect[utf_j++] = c; - if (utf_j >= utf_i) { - screen_putchar(utf_collect); + bytes[utf_j++] = c; + if (utf_j >= utf_len) { + // check for bad sequences + if (bytes[0] == 0xF4 && bytes[1] > 0x8F) goto fail; + if (bytes[0] == 0xF0 && bytes[1] < 0x90) goto fail; + if (bytes[0] == 0xED && bytes[1] > 0x9F) goto fail; + if (bytes[0] == 0xE0 && bytes[1] < 0xA0) goto fail; + + screen_putchar((const char *) bytes); apars_reset_utf8buffer(); } } } } else { - utf_collect[0] = c; - utf_collect[1] = 0; // just to make sure it's closed... - screen_putchar(utf_collect); + bytes[0] = c; + bytes[1] = 0; // just to make sure it's closed... + screen_putchar((const char *) bytes); } return; - fail: - ansi_warn("Bad UTF-8: %0Xh", c); +fail: + ansi_parser_inhibit = true; + + ansi_warn("BAD UTF8!"); + apars_show_context(); apars_reset_utf8buffer(); + ansi_dbg("Temporarily inhibiting parser..."); + TIMER_START(&timerResumeRx, resumeRxCb, 1000, 0); } diff --git a/user/user_main.c b/user/user_main.c index 7b9a98f..fd735b5 100644 --- a/user/user_main.c +++ b/user/user_main.c @@ -28,6 +28,8 @@ #include "ansi_parser_callbacks.h" #include "wifimgr.h" #include "persist.h" +#include "ansi_parser.h" +#include "ascii.h" #ifdef ESPFS_POS CgiUploadFlashDef uploadParams={ @@ -86,6 +88,7 @@ static ETSTimer prHeapTimer; //Main routine. Initialize stdout, the I/O, filesystem and the webserver and we're done. void ICACHE_FLASH_ATTR user_init(void) { + ansi_parser_inhibit = true; serialInitBase(); // Prevent WiFi starting and connecting by default @@ -128,9 +131,11 @@ static void ICACHE_FLASH_ATTR user_start(void *unused) captdnsInit(); httpdInit(routes, 80); + ansi_parser_inhibit = false; + // Print the CANCEL character to indicate the module has restarted // Critically important for client application if any kind of screen persistence / content re-use is needed - UART_WriteChar(UART0, 24, UART_TIMEOUT_US); // 0x18 - 24 - CAN + UART_WriteChar(UART0, CAN, UART_TIMEOUT_US); // 0x18 - 24 - CAN } // ---- unused funcs removed from sdk to save space ---