#include "forth_internal.h" /** * Encode a code point using UTF-8 * * Copied from ESPTERM source * * @param out - output buffer (min 4 characters), will be 0-terminated if shorten than 4 * @param utf - code point 0-0x10FFFF * @return number of bytes on success, 0 on failure (also produces U+FFFD, which uses 3 bytes) */ static int utf8_encode(char *out, uint32_t utf) { if (utf <= 0x7F) { // Plain ASCII out[0] = (char) utf; out[1] = 0; return 1; } else if (utf <= 0x07FF) { // 2-byte unicode out[0] = (char) (((utf >> 6) & 0x1F) | 0xC0); out[1] = (char) (((utf >> 0) & 0x3F) | 0x80); out[2] = 0; return 2; } else if (utf <= 0xFFFF) { // 3-byte unicode out[0] = (char) (((utf >> 12) & 0x0F) | 0xE0); out[1] = (char) (((utf >> 6) & 0x3F) | 0x80); out[2] = (char) (((utf >> 0) & 0x3F) | 0x80); out[3] = 0; return 3; } else if (utf <= 0x10FFFF) { // 4-byte unicode out[0] = (char) (((utf >> 18) & 0x07) | 0xF0); out[1] = (char) (((utf >> 12) & 0x3F) | 0x80); out[2] = (char) (((utf >> 6) & 0x3F) | 0x80); out[3] = (char) (((utf >> 0) & 0x3F) | 0x80); // out[4] = 0; return 4; } else { // error - use replacement character out[0] = (char) 0xEF; out[1] = (char) 0xBF; out[2] = (char) 0xBD; out[3] = 0; return 0; } } static enum fh_error w_dot(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint32_t a = 0; TRY(ds_pop(fh, &a)); FHPRINT("%d ", (int32_t) a); return FH_OK; } static enum fh_error w_u_r(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint32_t a = 0, n = 0; TRY(ds_pop(fh, &n)); TRY(ds_pop(fh, &a)); FHPRINT("%*d", n, a); return FH_OK; } static enum fh_error w_type(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint32_t count = 0, addr = 0; LOG("Get count,addr"); TRY(ds_pop(fh, &count)); TRY(ds_pop(fh, &addr)); const char *str = fh_str_at(fh, addr); if (!str) { LOGE("Type addr out of bounds!"); return FH_ERR_NOT_APPLICABLE; } FHPRINT("%.*s", count, str); return FH_OK; } static enum fh_error wp_putc(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) fh; FHPRINT("%c", w->param); return FH_OK; } static enum fh_error w_debug_dump(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; (void) fh; FHPRINT("DS "); for (int i = 0; i < fh->data_stack_top; i++) { FHPRINT("%d ", fh->data_stack[i]); } FHPRINT("\nRS "); for (int i = 0; i < fh->return_stack_top; i++) { FHPRINT("%d ", fh->return_stack[i]); } FHPRINT("\n"); return FH_OK; } static enum fh_error w_emit(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint32_t a; TRY(ds_pop(fh, &a)); char buf[5]; int num = utf8_encode(buf, a); FHPRINT("%.*s", num, buf); return FH_OK; } static enum fh_error w_see(struct fh_thread_s *fh, const struct fh_word_s *w) { enum fh_error rv; char *wordname; size_t namelen = 0; fh_input_consume_spaces(fh); TRY(fh_input_read_word(fh, &wordname, &namelen)); TRY(fh_see_word(fh, wordname, namelen)); return FH_OK; } static enum fh_error w_s_quote(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; size_t len; uint32_t addr = fh->here + (fh->state == FH_STATE_INTERPRET ? 0 : INSTR_SIZE); /* read the string straight into HEAP */ fh_input_consume_spaces(fh); char *start = (char *) &fh->heap[addr]; TRY(fh_input_read_quotedstring(fh, w->param == 1, start, HEAP_END - addr, &len)); fh->here = WORDALIGNED(addr + len); struct fh_instruction_s instr; if (fh->state == FH_STATE_INTERPRET) { LOG("Interpret a string alloc: \"%.*s\"", len, start); TRY(ds_push(fh, addr)); TRY(ds_push(fh, len)); } else { LOG("Compile a string: \"%.*s\"", len, start); instr.kind = FH_INSTR_ALLOCSTR; instr.data = len; fh_heap_write(fh, addr - INSTR_SIZE, &instr, INSTR_SIZE); } return FH_OK; } static bool chartest_equals_or_end(char c, void *param) { char cc = *(char*)param; return cc == c || c == 0; } static enum fh_error w_dot_quote(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; size_t len; // leave space for the instr in case of compiled version uint32_t addr = fh->here + (fh->state == FH_STATE_INTERPRET ? 0 : INSTR_SIZE); /* read the string straight into HEAP, but don't advance the heap pointer, so the string is immediately discarded again */ fh_input_consume_spaces(fh); char *start; char c = (char)w->param; uint32_t capacity = HEAP_END - addr; if (c == '\\') { start = (char *) &fh->heap[addr]; TRY(fh_input_read_quotedstring(fh, 1, start, capacity, &len)); } else { start = NULL; TRY(fh_input_read_delimited(fh, &start, &len, chartest_equals_or_end, &c)); if (len > capacity) { LOGE("String too low for heap"); return FH_ERR_HEAP_FULL; } if (fh->state == FH_STATE_COMPILE) { fh_heap_copyptr(fh, addr, start, len); } } struct fh_instruction_s instr; if (fh->state == FH_STATE_INTERPRET) { FHPRINT("%.*s", (int) len, start); // the string is invalidated immediately, heap pointer is NOT advanced. } else { LOG("Compile a string: \"%.*s\"", len, start); TRY(fh_put_instr(fh, FH_INSTR_TYPESTR, len)); fh->here = WORDALIGNED(addr + len); // at the end of the string } return FH_OK; } static enum fh_error w_less_hash(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; fh->pictnumptr = WORDBUF_LASTCHAR_ADDR; return FH_OK; } static enum fh_error w_hash_greater(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint64_t dummy; TRY(ds_pop_dw(fh, &dummy)); uint32_t len = WORDBUF_LASTCHAR_ADDR - fh->pictnumptr; uint32_t addr; TRY(fh_heap_reserve(fh, len, &addr)); fh_heap_copy(fh, addr, fh->pictnumptr+1, len); LOG("#> output: \"%.*s\"", len, &fh->heap[fh->pictnumptr+1]); TRY(ds_push(fh, addr)); TRY(ds_push(fh, len)); return FH_OK; } static enum fh_error pictnum_prepend_char(struct fh_thread_s *fh, char c) { enum fh_error rv; if (fh->pictnumptr < WORDBUF_ADDR) { return FH_ERR_PICTNUM_FULL; } LOG("Prepend: %c", c); TRY(fh_store_char(fh, fh->pictnumptr, c)); fh->pictnumptr--; return FH_OK; } static char dig2char(uint64_t digit) { char repr; if (digit < 10) { repr = '0' + digit; } else if (digit < 36) { repr = 'A' + (digit - 10); } else { repr = '?'; // XXX bad base? } return repr; } static enum fh_error w_hash(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint64_t num; TRY(ds_pop_dw(fh, &num)); uint64_t digit = num % fh->base; TRY(pictnum_prepend_char(fh, dig2char(digit))); TRY(ds_push_dw(fh, num / (uint64_t)fh->base)); return FH_OK; } static enum fh_error w_hash_s(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint64_t num; TRY(ds_pop_dw(fh, &num)); do { uint64_t digit = num % fh->base; num = num / (uint64_t)fh->base; TRY(pictnum_prepend_char(fh, dig2char(digit))); } while (num > 0); TRY(ds_push_dw(fh, num)); // this is zero now return FH_OK; } static enum fh_error w_sign(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint32_t num; TRY(ds_pop(fh, &num)); if ((int32_t)num < 0) { TRY(pictnum_prepend_char(fh, '-')); } return FH_OK; } static enum fh_error w_hold(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint32_t ch; TRY(ds_pop(fh, &ch)); char buf[5]; int num = utf8_encode(buf, ch); for(int i=num-1;i>=0;i--) { TRY(pictnum_prepend_char(fh, buf[i])); } return FH_OK; } static enum fh_error w_holds(struct fh_thread_s *fh, const struct fh_word_s *w) { (void) w; enum fh_error rv; uint32_t ch; uint32_t count = 0, addr = 0; LOG("Get count,addr"); TRY(ds_pop(fh, &count)); TRY(ds_pop(fh, &addr)); const char *str = fh_str_at(fh, addr); if (!str) { LOGE("HOLDS addr out of bounds!"); return FH_ERR_NOT_APPLICABLE; } for(int i=count-1;i>=0;i--) { TRY(pictnum_prepend_char(fh, str[i])); } return FH_OK; } const struct name_and_handler fh_builtins_text[] = { {"s\"", w_s_quote, 1, 0}, {"s\\\"", w_s_quote, 1, 1}, // escaped {".\"", w_dot_quote, 1, '"'}, {".(", w_dot_quote, 1, ')'}, {".\\\"", w_dot_quote, 1, '\\'}, // escaped, this is non-standard {".", w_dot, 0, 0}, {"type", w_type, 0, 0}, {"cr", wp_putc, 0, '\n'}, {"space", wp_putc, 0, ' '}, {"bl", wp_const, 0, ' '}, {"u.r", w_u_r, 0, 0}, {"??", w_debug_dump, 0, 0}, {"emit", w_emit, 0, 0}, {"see", w_see, 0, 0}, {"<#", w_less_hash, 0, 0}, {"#>", w_hash_greater, 0, 0}, {"#", w_hash, 0, 0}, {"#s", w_hash_s, 0, 0}, {"sign", w_sign, 0, 0}, {"hold", w_hold, 0, 0}, {"holds", w_holds, 0, 0}, { /* end marker */ } };