diff --git a/include/fh_mem.h b/include/fh_mem.h index 52e129f..26a1938 100644 --- a/include/fh_mem.h +++ b/include/fh_mem.h @@ -7,6 +7,8 @@ #ifndef FORTH_FH_MEM_H #define FORTH_FH_MEM_H +void fh_setbase(struct fh_thread_s *fh, uint32_t base); + enum fh_error fh_fetch(struct fh_thread_s *fh, uint32_t addr, uint32_t *dst); enum fh_error fh_fetch_char(struct fh_thread_s *fh, uint32_t addr, char *dst); diff --git a/src/fh_builtins.c b/src/fh_builtins.c index f4a99bd..def0065 100644 --- a/src/fh_builtins.c +++ b/src/fh_builtins.c @@ -10,6 +10,55 @@ #define TOBOOL(a) (a == 0 ? 0 : 0xFFFFFFFF) + +/** + * Encode a code point using UTF-8 + * + * Copied from ESPTERM source + * + * @param out - output buffer (min 4 characters), will be 0-terminated if shorten than 4 + * @param utf - code point 0-0x10FFFF + * @return number of bytes on success, 0 on failure (also produces U+FFFD, which uses 3 bytes) + */ +static int utf8_encode(char *out, uint32_t utf) +{ + if (utf <= 0x7F) { + // Plain ASCII + out[0] = (char) utf; + out[1] = 0; + return 1; + } else if (utf <= 0x07FF) { + // 2-byte unicode + out[0] = (char) (((utf >> 6) & 0x1F) | 0xC0); + out[1] = (char) (((utf >> 0) & 0x3F) | 0x80); + out[2] = 0; + return 2; + } else if (utf <= 0xFFFF) { + // 3-byte unicode + out[0] = (char) (((utf >> 12) & 0x0F) | 0xE0); + out[1] = (char) (((utf >> 6) & 0x3F) | 0x80); + out[2] = (char) (((utf >> 0) & 0x3F) | 0x80); + out[3] = 0; + return 3; + } else if (utf <= 0x10FFFF) { + // 4-byte unicode + out[0] = (char) (((utf >> 18) & 0x07) | 0xF0); + out[1] = (char) (((utf >> 12) & 0x3F) | 0x80); + out[2] = (char) (((utf >> 6) & 0x3F) | 0x80); + out[3] = (char) (((utf >> 0) & 0x3F) | 0x80); +// out[4] = 0; + return 4; + } else { + // error - use replacement character + out[0] = (char) 0xEF; + out[1] = (char) 0xBF; + out[2] = (char) 0xBD; + out[3] = 0; + return 0; + } +} + + static enum fh_error w_plus(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; @@ -247,7 +296,7 @@ static enum fh_error w_semicolon(struct fh_thread_s *fh, const struct fh_word_s return FH_OK; } -static enum fh_error w_dup(struct fh_thread_s *fh, const struct fh_word_s *w) +static enum fh_error w_dupe(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; @@ -257,6 +306,18 @@ static enum fh_error w_dup(struct fh_thread_s *fh, const struct fh_word_s *w) return FH_OK; } +static enum fh_error w_question_dupe(struct fh_thread_s *fh, const struct fh_word_s *w) +{ + (void) w; + enum fh_error rv; + uint32_t a = 0; + TRY(ds_peek(fh, &a)); + if (a) { + TRY(ds_push(fh, a)); + } + return FH_OK; +} + static enum fh_error w_two_dup(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; @@ -475,11 +536,10 @@ static enum fh_error w_type(struct fh_thread_s *fh, const struct fh_word_s *w) return FH_OK; } -static enum fh_error w_cr(struct fh_thread_s *fh, const struct fh_word_s *w) +static enum fh_error wp_putc(struct fh_thread_s *fh, const struct fh_word_s *w) { - (void) w; (void) fh; - FHPRINT("\n"); + FHPRINT("%c", w->param); return FH_OK; } @@ -576,18 +636,36 @@ static enum fh_error w_bye(struct fh_thread_s *fh, const struct fh_word_s *w) } static enum fh_error wp_setbase(struct fh_thread_s *fh, const struct fh_word_s *w) +{ + fh_setbase(fh, w->param); + return FH_OK; +} + +static enum fh_error w_emit(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; - fh->base = w->param; + uint32_t a; + TRY(ds_pop(fh, &a)); + + char buf[5]; + int num = utf8_encode(buf, a); + FHPRINT("%.*s", num, buf); + return FH_OK; +} + +static enum fh_error wp_const(struct fh_thread_s *fh, const struct fh_word_s *w) +{ + enum fh_error rv; + TRY(ds_push(fh, w->param)); return FH_OK; } -static enum fh_error w_base(struct fh_thread_s *fh, const struct fh_word_s *w) +static enum fh_error w_depth(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; - TRY(ds_push(fh, MAGICADDR_BASE)); + TRY(ds_push(fh, w->param)); return FH_OK; } @@ -656,76 +734,90 @@ enum fh_error register_builtin_words(struct fh_thread_s *fh) }; const struct name_and_handler builtins[] = { - {"s\"", w_s_quote, 1, 0}, - {".\"", w_dot_quote, 1, 0}, + {"s\"", w_s_quote, 1, 0}, + {".\"", w_dot_quote, 1, 0}, /* Compiler control words */ - {"bye", w_bye, 0, 0}, + {"bye", w_bye, 0, 0}, /* Pointers */ - {"@", w_fetch, 0, 0}, - {"!", w_store, 0, 0}, - {"2!", w_two_store, 0, 0}, - {"2@", w_two_fetch, 0, 0}, + {"@", w_fetch, 0, 0}, + {"!", w_store, 0, 0}, + {"2!", w_two_store, 0, 0}, + {"2@", w_two_fetch, 0, 0}, + // TODO +! + // TODO pictured numbers (#) + // TODO tick + // TODO comma + // TODO >BODY, >IN, >NUMBER /* Arithmetics */ - {"dec", wp_setbase, 0, 10}, - {"hex", wp_setbase, 0, 16}, - {"base", w_base, 0, 0}, - {"+", w_plus, 0, 0}, - {"-", w_minus, 0, 0}, - {"*", w_star, 0, 0}, - {"*/", w_star_slash, 0, 0}, - {"/", w_slash, 0, 0}, - {"/mod", w_slash_mod, 0, 0}, - {"0<", w_zero_less, 0, 0}, - {"0=", w_zero_equals, 0, 0}, - {"0<>", w_zero_not_equals, 0, 0}, - {"0>", w_zero_greater, 0, 0}, - {"<", w_less, 0, 0}, - {"=", w_equals, 0, 0}, - {"<>", w_not_equals, 0, 0}, - {">", w_greater, 0, 0}, - {"1+", wp_add, 0, 1}, - {"1-", wp_add, 0, -1}, - {"2+", wp_add, 0, 2}, - {"2-", wp_add, 0, -2}, - {"2*", wp_mul, 0, 2}, - {"2/", wp_div, 0, 2}, + {"decimal", wp_setbase, 0, 10}, + {"hex", wp_setbase, 0, 16}, + {"base", wp_const, 0, MAGICADDR_BASE}, + {"depth", w_depth, 0, 0}, + {"+", w_plus, 0, 0}, + {"-", w_minus, 0, 0}, + {"*", w_star, 0, 0}, + {"*/", w_star_slash, 0, 0}, + // TODO */mod + {"/", w_slash, 0, 0}, + {"/mod", w_slash_mod, 0, 0}, + {"0<", w_zero_less, 0, 0}, + {"0=", w_zero_equals, 0, 0}, + {"0<>", w_zero_not_equals, 0, 0}, + {"0>", w_zero_greater, 0, 0}, + {"<", w_less, 0, 0}, + {"=", w_equals, 0, 0}, + {"<>", w_not_equals, 0, 0}, + {">", w_greater, 0, 0}, + {"1+", wp_add, 0, 1}, + {"char+", wp_add, 0, 1}, + {"1-", wp_add, 0, -1}, + {"2+", wp_add, 0, 2}, + {"2-", wp_add, 0, -2}, + {"2*", wp_mul, 0, 2}, + {"chars", wp_mul, 0, 1}, + {"2/", wp_div, 0, 2}, + {"cells", wp_mul, 0, CELL}, + {"cell+", wp_add, 0, CELL}, /* Stack manip */ - {"drop", w_drop, 0, 0}, - {"dup", w_dup, 0, 0}, - {"over", w_over, 0, 0}, - {"swap", w_swap, 0, 0}, - {"rot", w_rot, 0, 0}, - {"tuck", w_tuck, 0, 0}, - {"pick", w_pick, 0, 0}, - {"roll", w_roll, 0, 0}, + {"drop", w_drop, 0, 0}, + {"dup", w_dupe, 0, 0}, + {"?dup", w_question_dupe, 0, 0}, + {"over", w_over, 0, 0}, + {"swap", w_swap, 0, 0}, + {"rot", w_rot, 0, 0}, + {"tuck", w_tuck, 0, 0}, + {"pick", w_pick, 0, 0}, + {"roll", w_roll, 0, 0}, /* Double wide stack manip */ - {"2drop", w_two_drop, 0, 0}, - {"2dup", w_two_dup, 0, 0}, - {"2over", w_two_over, 0, 0}, - {"2swap", w_two_swap, 0, 0}, + {"2drop", w_two_drop, 0, 0}, + {"2dup", w_two_dup, 0, 0}, + {"2over", w_two_over, 0, 0}, + {"2swap", w_two_swap, 0, 0}, // /* Return stack manip */ - {">r", w_to_r, 0, 0}, - {"r>", w_r_from, 0, 0}, - {"r@", w_r_fetch, 0, 0}, + {">r", w_to_r, 0, 0}, + {"r>", w_r_from, 0, 0}, + {"r@", w_r_fetch, 0, 0}, // /* Double wide return stack manip */ - {"2>r", w_two_to_r, 0, 0}, - {"2r>", w_two_r_from, 0, 0}, - {"2r@", w_two_r_fetch, 0, 0}, + {"2>r", w_two_to_r, 0, 0}, + {"2r>", w_two_r_from, 0, 0}, + {"2r@", w_two_r_fetch, 0, 0}, /* Printing */ - {".", w_dot, 0, 0}, - {"type", w_type, 0, 0}, - {"cr", w_cr, 0, 0}, - {"space", w_space, 0, 0}, - {"dump", w_dump, 0, 0}, + {".", w_dot, 0, 0}, + {"type", w_type, 0, 0}, + {"cr", wp_putc, 0, '\n'}, + {"space", wp_putc, 0, ' '}, + {"bl", wp_const, 0, ' '}, + {"dump", w_dump, 0, 0}, + {"emit", w_emit, 0, 0}, /* Control flow */ - {"abort", w_abort, 0, 0}, - {"quit", w_quit, 0, 0}, - {"exit", w_exit, 0, 0}, + {"abort", w_abort, 0, 0}, + {"quit", w_quit, 0, 0}, + {"exit", w_exit, 0, 0}, /* Syntax */ - {":", w_colon, 0, 0}, - {";", w_semicolon, 1, 0}, - {"\\", w_backslash, 1, 0}, // line comment - {"(", w_paren, 1, 0}, // enclosed comment + {":", w_colon, 0, 0}, + {";", w_semicolon, 1, 0}, + {"\\", w_backslash, 1, 0}, // line comment + {"(", w_paren, 1, 0}, // enclosed comment { /* end marker */ } }; diff --git a/src/fh_mem.c b/src/fh_mem.c index d33bd23..6a1a7db 100644 --- a/src/fh_mem.c +++ b/src/fh_mem.c @@ -5,6 +5,11 @@ #include "fh_runtime.h" #include "fh_mem.h" +void fh_setbase(struct fh_thread_s *fh, uint32_t base) { + LOG("BASE = %d", base); + fh->base = base; +} + enum fh_error fh_fetch(struct fh_thread_s *fh, uint32_t addr, uint32_t *dst) { switch (addr) { @@ -45,7 +50,7 @@ enum fh_error fh_store(struct fh_thread_s *fh, uint32_t addr, uint32_t val) { switch (addr) { case MAGICADDR_BASE: - fh->base = val; + fh_setbase(fh, val); break; // TODO more magic diff --git a/src/fh_runtime.c b/src/fh_runtime.c index 4cdea08..6e084f9 100644 --- a/src/fh_runtime.c +++ b/src/fh_runtime.c @@ -221,10 +221,10 @@ static enum fh_error fh_handle_quoted_string( static enum fh_error fh_handle_word( struct fh_thread_s *fh, const char *start, - size_t len + const size_t wordlen ) { - if (len >= MAX_NAME_LEN) { + if (wordlen >= MAX_NAME_LEN) { return FH_ERR_NAME_TOO_LONG; } @@ -235,7 +235,7 @@ static enum fh_error fh_handle_word( uint32_t cnt = 0; enum fh_error rv; while (w->handler) { - if (0 == strncasecmp(start, w->name, len) && w->name[len] == 0) { + if (0 == strncasecmp(start, w->name, wordlen) && w->name[wordlen] == 0) { // word found! if (fh->state == FH_STATE_COMPILE && !w->immediate) { LOG("Compile word call: %s", w->name); @@ -255,9 +255,22 @@ static enum fh_error fh_handle_word( /* word not found, try parsing as number */ errno = 0; char *endptr; - long v = strtol(start, &endptr, (int) fh->base); // XXX if base is 0, this will use auto-detection - if (errno != 0 || endptr == start) { - LOGE("Unknown word and fail to parse as number: %.*s", (int) len, start); + int base = (int) fh->base; + + // prefix can override BASE - this is a syntax extension + if (start[0] == '0') { + if (start[1] == 'x') { + base = 16; + } else if (start[1] == 'b') { + base = 2; + } else if (start[1] == 'o') { + base = 8; + } + } + + long v = strtol(start, &endptr, base); // XXX if base is 0, this will use auto-detection + if (errno != 0 || (endptr - start) != wordlen) { + LOGE("Unknown word and fail to parse as number: %.*s", (int) wordlen, start); return FH_ERR_UNKNOWN_WORD; }