From f74d157a7cc01abb3c9817e29156d1278e3ed4e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Hru=C5=A1ka?= Date: Tue, 16 Nov 2021 01:09:13 +0100 Subject: [PATCH] refactor "next word/string" parsing to work inside the definition without substates, add char and [char], add .\" and s\" --- include/fh_error.h | 1 + include/fh_runtime.h | 22 +++-- src/fh_builtins.c | 93 ++++++++++++++++--- src/fh_error.c | 3 +- src/fh_runtime.c | 213 +++++++++++++++++++++++++------------------ str.forth | 7 ++ 6 files changed, 230 insertions(+), 109 deletions(-) diff --git a/include/fh_error.h b/include/fh_error.h index 755fc6f..0350295 100644 --- a/include/fh_error.h +++ b/include/fh_error.h @@ -26,6 +26,7 @@ enum fh_error { FH_ERR_ILLEGAL_FETCH, FH_ERR_ILLEGAL_STORE, FH_ERR_DIV_BY_ZERO, + FH_ERR_SYNTAX, FH_ERR_MAX, }; diff --git a/include/fh_runtime.h b/include/fh_runtime.h index ab5ee6a..28c5f2d 100644 --- a/include/fh_runtime.h +++ b/include/fh_runtime.h @@ -82,15 +82,9 @@ enum fh_state { /** Forth runtime minor state */ enum fh_substate { FH_SUBSTATE_NONE = 0, - FH_SUBSTATE_COLON_NAME, - FH_SUBSTATE_S_QUOTE, - FH_SUBSTATE_DOT_QUOTE, FH_SUBSTATE_PAREN_COMMENT, FH_SUBSTATE_LINE_COMMENT, FH_SUBSTATE_EXIT, - FH_SUBSTATE_SEE_NAME, - FH_SUBSTATE_POSTPONE_NAME, - FH_SUBSTATE_CHAR, FH_SUBSTATE_MAX, }; @@ -174,6 +168,22 @@ void fh_setsubstate(struct fh_thread_s *fh, enum fh_substate substate); enum fh_error w_user_word(struct fh_thread_s *fh, const struct fh_word_s *w); +enum fh_error fh_input_read_quotedstring(struct fh_thread_s *fh, bool escaped, char *outbuf, size_t capacity, size_t *out_len); +enum fh_error fh_input_read_word(struct fh_thread_s *fh, char **out, size_t *len); +void fh_input_consume_spaces(struct fh_thread_s *fh); + +enum fh_error fh_postpone_word( + struct fh_thread_s *fh, + const char *name, + size_t wordlen +); + +enum fh_error fh_see_word( + struct fh_thread_s *fh, + const char *name, + size_t wordlen +); + /* if the return address is this, we should drop back to interactive mode */ // SFR and magic addresses are "negative" diff --git a/src/fh_builtins.c b/src/fh_builtins.c index bf7bcaf..0b13b95 100644 --- a/src/fh_builtins.c +++ b/src/fh_builtins.c @@ -362,7 +362,13 @@ static enum fh_error w_colon(struct fh_thread_s *fh, const struct fh_word_s *w) enum fh_error rv; ENSURE_STATE(FH_STATE_INTERPRET); - fh_setstate(fh, FH_STATE_COMPILE, FH_SUBSTATE_COLON_NAME); + char *wordname = NULL; + size_t namelen = 0; + fh_input_consume_spaces(fh); + TRY(fh_input_read_word(fh, &wordname, &namelen)); + LOG("Name: %.*s", namelen, wordname); + + fh_setstate(fh, FH_STATE_COMPILE, 0); uint32_t ptr; TRY(fh_heap_reserve(fh, DICTWORD_SIZE, &ptr)); @@ -371,6 +377,9 @@ static enum fh_error w_colon(struct fh_thread_s *fh, const struct fh_word_s *w) new_word->previous = fh->dict_last; new_word->param = fh->here; new_word->handler = w_user_word; + strncpy(new_word->name, wordname, namelen); + new_word->name[namelen] = 0; + fh->dict_last = ptr; return FH_OK; @@ -379,9 +388,15 @@ static enum fh_error w_colon(struct fh_thread_s *fh, const struct fh_word_s *w) static enum fh_error w_postpone(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; + enum fh_error rv; ENSURE_STATE(FH_STATE_COMPILE); - fh_setsubstate(fh, FH_SUBSTATE_POSTPONE_NAME); + char *wordname; + size_t namelen = 0; + fh_input_consume_spaces(fh); + TRY(fh_input_read_word(fh, &wordname, &namelen)); + TRY(fh_postpone_word(fh, wordname, namelen)); + return FH_OK; } @@ -770,16 +785,43 @@ static enum fh_error w_exit(struct fh_thread_s *fh, const struct fh_word_s *w) static enum fh_error w_s_quote(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; - fh_setsubstate(fh, FH_SUBSTATE_S_QUOTE); + enum fh_error rv; + size_t len; + uint32_t addr = fh->here + (fh->state == FH_STATE_INTERPRET ? 0 : INSTR_SIZE); + + /* read the string straight into HEAP */ + + fh_input_consume_spaces(fh); + char *start = (char *) &fh->heap[addr]; + TRY(fh_input_read_quotedstring(fh, w->param == 1, start, HEAP_END - addr, &len)); + fh->here = WORDALIGNED(addr + len); + + struct fh_instruction_s instr; + if (fh->state == FH_STATE_INTERPRET) { + LOG("Interpret a string alloc: \"%.*s\"", len, start); + TRY(ds_push(fh, addr)); + TRY(ds_push(fh, len)); + } else { + LOG("Compile a string: \"%.*s\"", len, start); + instr_init(&instr, FH_INSTR_ALLOCSTR, len); + fh_heap_write(fh, addr - INSTR_SIZE, &instr, INSTR_SIZE); + } + return FH_OK; } -//static enum fh_error w_char(struct fh_thread_s *fh, const struct fh_word_s *w) -//{ -// (void) w; -// fh_setsubstate(fh, FH_SUBSTATE_CHAR); -// return FH_OK; -//} +static enum fh_error w_char(struct fh_thread_s *fh, const struct fh_word_s *w) +{ + (void) w; + enum fh_error rv; + + char *wordname = NULL; + size_t namelen = 0; + fh_input_consume_spaces(fh); + TRY(fh_input_read_word(fh, &wordname, &namelen)); + TRY(ds_push(fh, (char) *wordname)); + return FH_OK; +} static enum fh_error w_error_word0(struct fh_thread_s *fh, const struct fh_word_s *w) { @@ -792,7 +834,26 @@ static enum fh_error w_error_word0(struct fh_thread_s *fh, const struct fh_word_ static enum fh_error w_dot_quote(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; - fh_setsubstate(fh, FH_SUBSTATE_DOT_QUOTE); + enum fh_error rv; + size_t len; + uint32_t addr = fh->here + (fh->state == FH_STATE_INTERPRET ? 0 : INSTR_SIZE); + + /* read the string straight into HEAP, but don't advance the heap pointer, so the string is immediately discarded again */ + + fh_input_consume_spaces(fh); + char *start = (char *) &fh->heap[addr]; + TRY(fh_input_read_quotedstring(fh, w->param == 1, start, HEAP_END - addr, &len)); + + struct fh_instruction_s instr; + if (fh->state == FH_STATE_INTERPRET) { + FHPRINT("%.*s", (int) len, start); + } else { + LOG("Compile a string: \"%.*s\"", len, start); + instr_init(&instr, FH_INSTR_TYPESTR, len); + fh_heap_write(fh, addr - INSTR_SIZE, &instr, INSTR_SIZE); + fh->here = WORDALIGNED(addr + len); + } + return FH_OK; } @@ -983,7 +1044,11 @@ static enum fh_error w_emit(struct fh_thread_s *fh, const struct fh_word_s *w) static enum fh_error w_see(struct fh_thread_s *fh, const struct fh_word_s *w) { enum fh_error rv; - fh_setsubstate(fh, FH_SUBSTATE_SEE_NAME); + char *wordname; + size_t namelen = 0; + fh_input_consume_spaces(fh); + TRY(fh_input_read_word(fh, &wordname, &namelen)); + TRY(fh_see_word(fh, wordname, namelen)); return FH_OK; } @@ -1147,11 +1212,13 @@ enum fh_error register_builtin_words(struct fh_thread_s *fh) const struct name_and_handler builtins[] = { {"", w_error_word0, 1, 0}, /* Weird meta stuff */ - {"immediate", w_immediate, 1, 0}, + {"immediate", w_immediate, 0, 0}, {"postpone", w_postpone, 1, 0}, {"[", w_leftbracket, 1, 0}, {"]", w_rightbracket, 1, 0}, {"literal", w_literal, 1, 0}, + {"char", w_char, 0, 0}, + {"[char]", w_char, 1, 0}, /* Runtime stats */ {"depth", w_depth, 0, 0}, {"unused", w_unused, 0, 0}, @@ -1161,7 +1228,9 @@ enum fh_error register_builtin_words(struct fh_thread_s *fh) {"bye", w_bye, 0, 0}, /* Strings & Chars */ {"s\"", w_s_quote, 1, 0}, + {"s\\\"", w_s_quote, 1, 1}, // escaped {".\"", w_dot_quote, 1, 0}, + {".\\\"", w_dot_quote, 1, 1}, // escaped, this is non-standard // {"char", w_char, 1, 0}, /* Pointers */ {"@", w_fetch, 0, 0}, diff --git a/src/fh_error.c b/src/fh_error.c index bf065c7..d501a31 100644 --- a/src/fh_error.c +++ b/src/fh_error.c @@ -14,11 +14,12 @@ static const char *errornames[FH_ERR_MAX] = { [FH_ERR_COMPILE_FULL] = "COMPILE_FULL", [FH_ERR_NAME_TOO_LONG] = "NAME_TOO_LONG", [FH_ERR_INVALID_STATE] = "INVALID_STATE", - [FH_ERR_INTERNAL] = "INTERNAL", + [FH_ERR_INTERNAL] = "INTERNAL_ERROR", [FH_ERR_UNKNOWN_WORD] = "UNKNOWN_WORD", [FH_ERR_ILLEGAL_FETCH] = "ILLEGAL_FETCH", [FH_ERR_ILLEGAL_STORE] = "ILLEGAL_STORE", [FH_ERR_DIV_BY_ZERO] = "DIV_BY_ZERO", + [FH_ERR_SYNTAX] = "SYNTAX_ERROR", }; /** Get error name from code, returns Unknown if not defined */ diff --git a/src/fh_runtime.c b/src/fh_runtime.c index 31a9237..f9f8245 100644 --- a/src/fh_runtime.c +++ b/src/fh_runtime.c @@ -23,17 +23,127 @@ static const char *statenames[FH_STATE_MAX] = { /** Sub-state names */ static const char *substatenames[FH_SUBSTATE_MAX] = { [FH_SUBSTATE_NONE] = "NONE", - [FH_SUBSTATE_COLON_NAME] = "COLON_NAME", - [FH_SUBSTATE_S_QUOTE] = "S_QUOTE", - [FH_SUBSTATE_DOT_QUOTE] = "DOT_QUOTE", [FH_SUBSTATE_PAREN_COMMENT] = "PAREN_COMMENT", [FH_SUBSTATE_LINE_COMMENT] = "LINE_COMMENT", [FH_SUBSTATE_EXIT] = "EXIT", - [FH_SUBSTATE_SEE_NAME] = "SEE_NAME", - [FH_SUBSTATE_POSTPONE_NAME] = "POSTPONE_NAME", - [FH_SUBSTATE_CHAR] = "CHAR", }; +void fh_input_consume_spaces(struct fh_thread_s *fh) { + char *rp = (char *) &fh->heap[INPUTBUF_ADDR + fh->inputptr]; + while (isspace(*rp)) { + rp++; + fh->inputptr++; + } +} + +enum fh_error fh_input_read_word(struct fh_thread_s *fh, char **out, size_t *len) { + char *rp = (char *) &fh->heap[INPUTBUF_ADDR + fh->inputptr]; + char *start = rp; + while (1) { + char c = *rp; + if (isspace(c) || c == 0) { + if (rp == start) { + LOGE("Expected a word!"); + return FH_ERR_SYNTAX; + } + *out = start; + *len = rp - start; + return FH_OK; + } + rp++; + fh->inputptr++; + } +} + +enum fh_error fh_input_read_quotedstring(struct fh_thread_s *fh, bool escaped, char *outbuf, size_t capacity, size_t *out_len) { + char *rp = (char *) &fh->heap[INPUTBUF_ADDR + fh->inputptr]; + bool next_escaped = false; + size_t remains = capacity; + size_t len = 0; + int hexdigits = 0; + uint32_t hex = 0; + while (len < capacity) { + char c = *rp; + if (c == 0) { + LOGE("Unterminated quoted string!"); + return FH_ERR_SYNTAX; + } + + if (hexdigits) { + hex <<= 4; + if (isdigit(c)) { + hex |= c - '0'; + } else if (c>='a' && c<='f') { + hex |= c - 'a'; + } else if (c>='A' && c<='F') { + hex |= c - 'A'; + } else { + LOGE("Bad hex escape"); + return FH_ERR_SYNTAX; + } + hexdigits--; + if (hexdigits == 0) { + c = (char) hex; + goto append; + } + } + + if (!escaped || !next_escaped) { + if (c == '\"') { + *outbuf = 0; + *out_len = len; + // advance past the quote + fh->inputptr++; + return FH_OK; + } + + if (c == '\\') { + next_escaped = true; + goto skip; + } + } else { + next_escaped = false; + switch (c) { + case 'a': c = 7; break; + case 'b': c = 8; break; + case 'e': c = 27; break; + case 'f': c = 12; break; + case 'l': c = 10; break; + case 'm': + case 'n': + if (remains < 2) goto full; + *outbuf++ = '\r'; + *outbuf++ = '\n'; + remains -= 2; + len += 2; + goto skip; + case 'q': c = '"'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + case 'z': c = 0; break; // XXX this will cause problems! + case 'x': + hex = 0; + hexdigits = 2; + goto skip; + default:; + // just append normally + } + } + + append: + *outbuf++ = c; + len++; + skip: + rp++; + fh->inputptr++; + } + + full: + LOGE("String too long!"); + return FH_ERR_SYNTAX; +} + /** Add a word to the dictionary. */ enum fh_error fh_add_word(const struct fh_word_s *w, struct fh_thread_s *fh) { @@ -219,46 +329,6 @@ enum fh_error fh_init(struct fh_thread_s *fh) return FH_OK; } -/** Process a quoted string read from input */ -static enum fh_error fh_handle_quoted_string( - struct fh_thread_s *fh, - const char *start, - size_t len -) -{ - enum fh_error rv; - uint32_t addr = 0; - struct fh_instruction_s instr; - - if (fh->state == FH_STATE_INTERPRET) { - switch (fh->substate) { - case FH_SUBSTATE_S_QUOTE: - addr = fh->here; - TRY(fh_heap_put(fh, start, len)); - TRY(ds_push(fh, addr)); - TRY(ds_push(fh, len)); - break; - case FH_SUBSTATE_DOT_QUOTE: - FHPRINT("%.*s", (int) len, start); - break; - - default: - LOGE("Bad substate in interpret mode: %s", substatenames[fh->substate]); - } - } else { - LOG("Compile a string"); - /* compile */ - if (fh->substate == FH_SUBSTATE_S_QUOTE) { - instr_init(&instr, FH_INSTR_ALLOCSTR, len); - } else { - instr_init(&instr, FH_INSTR_TYPESTR, len); - } - TRY(fh_heap_put(fh, &instr, INSTR_SIZE)); - TRY(fh_heap_put(fh, start, len)); - } - return FH_OK; -} - enum fh_error fh_handle_word(struct fh_thread_s *fh, uint32_t addr) { struct fh_instruction_s instr; @@ -419,7 +489,7 @@ static void show_word(struct fh_thread_s *fh, const struct fh_word_s *w) } /** Decompile a word */ -static enum fh_error fh_see_word( +enum fh_error fh_see_word( struct fh_thread_s *fh, const char *name, const size_t wordlen @@ -434,7 +504,7 @@ static enum fh_error fh_see_word( } /** Postpone a word */ -static enum fh_error fh_postpone_word( +enum fh_error fh_postpone_word( struct fh_thread_s *fh, const char *name, const size_t wordlen @@ -496,9 +566,6 @@ enum fh_error fh_process_line(struct fh_thread_s *fh, const char *linebuf, size_ size_t length; switch (fh->substate) { case FH_SUBSTATE_NONE: - case FH_SUBSTATE_COLON_NAME: - case FH_SUBSTATE_SEE_NAME: - case FH_SUBSTATE_POSTPONE_NAME: /* try to read a word */ end = strchr(rp, ' '); if (end) { @@ -507,47 +574,13 @@ enum fh_error fh_process_line(struct fh_thread_s *fh, const char *linebuf, size_ length = strlen(rp); } - switch (fh->substate) { - case FH_SUBSTATE_NONE: - /* eval a word */ - LOG("Handle \"%.*s\"", (int) length, rp); - TRY(fh_handle_ascii_word(fh, rp, length)); - break; - case FH_SUBSTATE_COLON_NAME: - /* new word's name is found */ - LOG("New word name = \"%.*s\"", (int) length, rp); - strncpy(fh_word_at(fh, fh->dict_last)->name, rp, length); - fh_setsubstate(fh, FH_SUBSTATE_NONE); - break; - case FH_SUBSTATE_SEE_NAME: - TRY(fh_see_word(fh, rp, length)); - fh_setsubstate(fh, FH_SUBSTATE_NONE); - break; - case FH_SUBSTATE_POSTPONE_NAME: - TRY(fh_postpone_word(fh, rp, length)); - fh_setsubstate(fh, FH_SUBSTATE_NONE); - break; - } + ReadPos += length + 1; - if (end) { - ReadPos += length + 1; - } else { - goto done; - } - break; + /* eval a word */ + LOG("Handle \"%.*s\"", (int) length, rp); + TRY(fh_handle_ascii_word(fh, rp, length)); - case FH_SUBSTATE_S_QUOTE: - case FH_SUBSTATE_DOT_QUOTE: - end = strchr(rp, '"'); - if (end) { - length = end - rp; - LOG("Quoted string: \"%.*s\"", (int) length, rp); - TRY(fh_handle_quoted_string(fh, rp, length)); - fh_setsubstate(fh, FH_SUBSTATE_NONE); - ReadPos += length + 1; - } else { - /* no end. this is weird. */ - LOGE("Unterminated quoted string!"); + if (!end) { goto done; } break; diff --git a/str.forth b/str.forth index c1e3f2c..3f7e53c 100644 --- a/str.forth +++ b/str.forth @@ -1,2 +1,9 @@ ." show this string" S" save this string" TYPE + +: aa ." show this string" ; +: bb S" show this string" TYPE ; + +aa + +bb