#include "forth_internal.h" /** * Encode a code point using UTF-8 * * Copied from ESPTERM source * * @param out - output buffer (min 4 characters), will be 0-terminated if shorten than 4 * @param utf - code point 0-0x10FFFF * @return number of bytes on success, 0 on failure (also produces U+FFFD, which uses 3 bytes) */ static int utf8_encode(char *out, uint32_t utf) { if (utf <= 0x7F) { // Plain ASCII out[0] = (char) utf; out[1] = 0; return 1; } else if (utf <= 0x07FF) { // 2-byte unicode out[0] = (char) (((utf >> 6) & 0x1F) | 0xC0); out[1] = (char) (((utf >> 0) & 0x3F) | 0x80); out[2] = 0; return 2; } else if (utf <= 0xFFFF) { // 3-byte unicode out[0] = (char) (((utf >> 12) & 0x0F) | 0xE0); out[1] = (char) (((utf >> 6) & 0x3F) | 0x80); out[2] = (char) (((utf >> 0) & 0x3F) | 0x80); out[3] = 0; return 3; } else if (utf <= 0x10FFFF) { // 4-byte unicode out[0] = (char) (((utf >> 18) & 0x07) | 0xF0); out[1] = (char) (((utf >> 12) & 0x3F) | 0x80); out[2] = (char) (((utf >> 6) & 0x3F) | 0x80); out[3] = (char) (((utf >> 0) & 0x3F) | 0x80); // out[4] = 0; return 4; } else { // error - use replacement character out[0] = (char) 0xEF; out[1] = (char) 0xBF; out[2] = (char) 0xBD; out[3] = 0; return 0; } } static enum fh_error w_dot(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint32_t a = 0; TRY(ds_pop(fh, &a)); FHPRINT("%d ", (int32_t) a); return FH_OK; } static enum fh_error w_u_r(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint32_t a = 0, n = 0; TRY(ds_pop(fh, &n)); TRY(ds_pop(fh, &a)); FHPRINT("%*d", n, a); return FH_OK; } static enum fh_error w_type(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint32_t count = 0, addr = 0; LOG("Get count,addr"); TRY(ds_pop(fh, &count)); TRY(ds_pop(fh, &addr)); const char *str = fh_str_at(fh, addr); if (!str) { LOGE("Type addr out of bounds!"); return FH_ERR_NOT_APPLICABLE; } FHPRINT("%.*s", count, str); return FH_OK; } static enum fh_error wp_putc(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) fh; FHPRINT("%c", w->param); return FH_OK; } static enum fh_error w_debug_dump(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; (void) fh; FHPRINT("DS "); for (int i = 0; i < fh->data_stack_top; i++) { FHPRINT("%d ", fh->data_stack[i]); } FHPRINT("\nRS "); for (int i = 0; i < fh->return_stack_top; i++) { FHPRINT("%d ", fh->return_stack[i]); } FHPRINT("\n"); return FH_OK; } static enum fh_error w_emit(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint32_t a; TRY(ds_pop(fh, &a)); char buf[5]; int num = utf8_encode(buf, a); FHPRINT("%.*s", num, buf); return FH_OK; } static enum fh_error w_see(struct fh_thread_s *fh, const struct fh_word_s *w) { enum fh_error rv; char *wordname; size_t namelen = 0; fh_input_consume_spaces(fh); TRY(fh_input_read_word(fh, &wordname, &namelen)); TRY(fh_see_word(fh, wordname, namelen)); return FH_OK; } static enum fh_error w_s_quote(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; size_t len; uint32_t addr = fh->here + (fh->state == FH_STATE_INTERPRET ? 0 : INSTR_SIZE); /* read the string straight into HEAP */ fh_input_consume_spaces(fh); char *start = (char *) &fh->heap[addr]; TRY(fh_input_read_quotedstring(fh, w->param == 1, start, HEAP_END - addr, &len)); fh->here = WORDALIGNED(addr + len); struct fh_instruction_s instr; if (fh->state == FH_STATE_INTERPRET) { LOG("Interpret a string alloc: \"%.*s\"", len, start); TRY(ds_push(fh, addr)); TRY(ds_push(fh, len)); } else { LOG("Compile a string: \"%.*s\"", len, start); instr.kind = FH_INSTR_ALLOCSTR; instr.data = len; fh_heap_write(fh, addr - INSTR_SIZE, &instr, INSTR_SIZE); } return FH_OK; } static bool chartest_equals_or_end(char c, void *param) { char cc = *(char*)param; return cc == c || c == 0; } static enum fh_error w_dot_quote(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; size_t len; // leave space for the instr in case of compiled version uint32_t addr = fh->here + (fh->state == FH_STATE_INTERPRET ? 0 : INSTR_SIZE); /* read the string straight into HEAP, but don't advance the heap pointer, so the string is immediately discarded again */ fh_input_consume_spaces(fh); char *start; char c = (char)w->param; uint32_t capacity = HEAP_END - addr; if (c == '\\') { start = (char *) &fh->heap[addr]; TRY(fh_input_read_quotedstring(fh, 1, start, capacity, &len)); } else { start = NULL; TRY(fh_input_read_delimited(fh, &start, &len, chartest_equals_or_end, &c)); if (len > capacity) { LOGE("String too low for heap"); return FH_ERR_HEAP_FULL; } if (fh->state == FH_STATE_COMPILE) { fh_heap_copyptr(fh, addr, start, len); } } struct fh_instruction_s instr; if (fh->state == FH_STATE_INTERPRET) { FHPRINT("%.*s", (int) len, start); // the string is invalidated immediately, heap pointer is NOT advanced. } else { LOG("Compile a string: \"%.*s\"", len, start); TRY(fh_put_instr(fh, FH_INSTR_TYPESTR, len)); fh->here = WORDALIGNED(addr + len); // at the end of the string } return FH_OK; } const struct name_and_handler fh_builtins_text[] = { {"s\"", w_s_quote, 1, 0}, {"s\\\"", w_s_quote, 1, 1}, // escaped {".\"", w_dot_quote, 1, '"'}, {".(", w_dot_quote, 1, ')'}, {".\\\"", w_dot_quote, 1, '\\'}, // escaped, this is non-standard {".", w_dot, 0, 0}, {"type", w_type, 0, 0}, {"cr", wp_putc, 0, '\n'}, {"space", wp_putc, 0, ' '}, {"bl", wp_const, 0, ' '}, {"u.r", w_u_r, 0, 0}, {"??", w_debug_dump, 0, 0}, {"emit", w_emit, 0, 0}, {"see", w_see, 0, 0}, { /* end marker */ } };