#include #include #include #include #include #include #define CONTROL_STACK_DEPTH 1024 #define DATA_STACK_DEPTH 1024 #define RETURN_STACK_DEPTH 1024 #define MAX_NAME_LEN 64 #define DICT_SIZE 1024 #define COMPILED_BUFFER_SIZE (1024*1024) #define HEAP_SIZE (1024*1024) #define MAXLINE 65535 struct fh_thread_s; struct fh_word_s; struct fh_instruction_s; /* if the return address is this, we should drop back to interactive mode */ #define MAGICADDR_INTERACTIVE 0xFFFFFFFFULL #define ALIGNWORD(var) \ do { \ while (((var) % 4) != 0) { (var)++; } \ } while (0) /* logging, TODO make levels configurable */ #define LOG(format, ...) fprintf(stderr, format "\r\n", ##__VA_ARGS__) #define LOGI(format, ...) fprintf(stderr, "\x1b[32m" format "\x1b[m\r\n", ##__VA_ARGS__) #define LOGE(format, ...) fprintf(stderr, "\x1b[31;1m" format "\x1b[m\r\n", ##__VA_ARGS__) /* Forth standard output. XXX should be stdout, but then colors get mangled */ #define FHPRINT(format, ...) fprintf(stderr, "\x1b[33;1m" format "\x1b[m", ##__VA_ARGS__) enum fh_error { FH_OK = 0, FH_ERR_CS_OVERFLOW, FH_ERR_DS_OVERFLOW, FH_ERR_RS_OVERFLOW, FH_ERR_CS_UNDERFLOW, FH_ERR_DS_UNDERFLOW, FH_ERR_RS_UNDERFLOW, FH_ERR_HEAP_FULL, FH_ERR_DICT_FULL, FH_ERR_COMPILE_FULL, FH_ERR_NAME_TOO_LONG, FH_ERR_INVALID_STATE, FH_ERR_INTERNAL, FH_ERR_UNKNOWN_WORD, FH_ERR_MAX, }; static const char *errornames[] = { [FH_OK] = "OK", [FH_ERR_CS_OVERFLOW] = "CS_OVERFLOW", [FH_ERR_DS_OVERFLOW] = "DS_OVERFLOW", [FH_ERR_RS_OVERFLOW] = "RS_OVERFLOW", [FH_ERR_CS_UNDERFLOW] = "CS_UNDERFLOW", [FH_ERR_DS_UNDERFLOW] = "DS_UNDERFLOW", [FH_ERR_RS_UNDERFLOW] = "RS_UNDERFLOW", [FH_ERR_HEAP_FULL] = "HEAP_FULL", [FH_ERR_DICT_FULL] = "DICT_FULL", [FH_ERR_COMPILE_FULL] = "COMPILE_FULL", [FH_ERR_NAME_TOO_LONG] = "NAME_TOO_LONG", [FH_ERR_INVALID_STATE] = "INVALID_STATE", [FH_ERR_INTERNAL] = "INTERNAL", [FH_ERR_UNKNOWN_WORD] = "UNKNOWN_WORD", }; const char *fherr_name(enum fh_error e) { if (e >= FH_ERR_MAX) { return "Unknown"; } return errornames[e]; } typedef enum fh_error (*word_exec_t)(struct fh_thread_s *fh); struct fh_word_s { char name[MAX_NAME_LEN]; word_exec_t handler; bool builtin; bool immediate; uint32_t start; uint32_t end; }; enum fb_instruction_kind { /* Data is a word number in the dict */ FH_INSTR_WORD, /* Data is a numeric value to push on the data stack */ FH_INSTR_NUMBER, }; struct fh_instruction_s { enum fb_instruction_kind kind; uint32_t data; }; /** words that are not in the dict, have special effect */ enum compiler_word { CPLWORD_ENDWORD = DICT_SIZE + 1, CPLWORD_ALLOCSTR, CPLWORD_TYPESTR, }; _Static_assert(sizeof(struct fh_instruction_s) % 4 == 0, "Instruction struct is aligned"); enum fh_state { FH_STATE_INTERPRET = 0, FH_STATE_COMPILE, FH_STATE_SHUTDOWN, }; enum fh_substate { FH_SUBSTATE_NONE = 0, FH_SUBSTATE_COLONNAME, FH_SUBSTATE_SQUOTE, FH_SUBSTATE_DOTQUOTE, FH_SUBSTATE_PARENCOMMENT, FH_SUBSTATE_LINECOMMENT, }; struct fh_thread_s { /** Control stack */ uint32_t control_stack[CONTROL_STACK_DEPTH]; size_t control_stack_top; /** Data stack */ uint32_t data_stack[DATA_STACK_DEPTH]; size_t data_stack_top; /** Return stack */ uint32_t return_stack[RETURN_STACK_DEPTH]; size_t return_stack_top; /** Data heap */ uint8_t heap[HEAP_SIZE]; size_t heap_top; /** Compile buffer, used for both word data and literals */ uint8_t compile[COMPILED_BUFFER_SIZE]; size_t compile_top; /** Pointer into the compile buffer for execution */ uint32_t execptr; /** Word dict */ struct fh_word_s dict[DICT_SIZE]; uint32_t dict_top; /** Forth state */ enum fh_state state; /** Forth sub-state */ enum fh_substate substate; /** Word currently being executed - a pointer is placed here * before calling the handler */ struct fh_word_s *exec_word; }; #define TRY(x) \ do { \ if (FH_OK != (rv = (x))) return rv; \ } while (0) #define TRY_FAIL(x) \ do { \ if (FH_OK != (rv = (x))) goto fail; \ } while (0) /** Add a word to the dictionary. */ enum fh_error fh_add_word(const struct fh_word_s *w, struct fh_thread_s *fh) { if (fh->dict_top == DICT_SIZE) { return FH_ERR_DICT_FULL; } memcpy(&fh->dict[fh->dict_top++], w, sizeof(struct fh_word_s)); return FH_OK; } //region Push & Pop static inline enum fh_error ds_pop(struct fh_thread_s *fh, uint32_t *out) { if (fh->data_stack_top == 0) { LOG("DS pop UNDERFLOW"); return FH_ERR_DS_UNDERFLOW; } *out = fh->data_stack[--fh->data_stack_top]; LOG("DS pop %d", *out); return FH_OK; } static inline enum fh_error rs_pop(struct fh_thread_s *fh, uint32_t *out) { if (fh->return_stack_top == 0) { LOG("RS pop UNDERFLOW"); return FH_ERR_RS_UNDERFLOW; } *out = fh->return_stack[--fh->return_stack_top]; LOG("RS pop %d", *out); return FH_OK; } static inline enum fh_error cs_pop(struct fh_thread_s *fh, uint32_t *out) { if (fh->control_stack_top == 0) { LOG("CS pop UNDERFLOW"); return FH_ERR_CS_UNDERFLOW; } *out = fh->control_stack[--fh->control_stack_top]; LOG("CS pop %d", *out); return FH_OK; } static inline enum fh_error ds_push(struct fh_thread_s *fh, uint32_t in) { LOG("DS push %d", in); if (fh->data_stack_top == DATA_STACK_DEPTH) { return FH_ERR_DS_OVERFLOW; } fh->data_stack[fh->data_stack_top++] = in; return FH_OK; } static inline enum fh_error rs_push(struct fh_thread_s *fh, uint32_t in) { LOG("RS push %d", in); if (fh->return_stack_top == RETURN_STACK_DEPTH) { return FH_ERR_RS_OVERFLOW; } fh->return_stack[fh->return_stack_top++] = in; return FH_OK; } static inline enum fh_error cs_push(struct fh_thread_s *fh, uint32_t in) { LOG("CS push %d", in); if (fh->control_stack_top == CONTROL_STACK_DEPTH) { return FH_ERR_CS_OVERFLOW; } fh->control_stack[fh->control_stack_top++] = in; return FH_OK; } //endregion Push & Pop enum fh_error fh_allot( struct fh_thread_s *fh, size_t len, uint32_t *addr ) { uint32_t p = fh->heap_top; ALIGNWORD(p); if (p + len > HEAP_SIZE) { return FH_ERR_HEAP_FULL; } *addr = p; size_t next = p + len; ALIGNWORD(next); fh->heap_top = next; return FH_OK; } enum fh_error fh_compile_reserve( struct fh_thread_s *fh, size_t len, uint32_t *addr ) { uint32_t p = fh->compile_top; // align up ALIGNWORD(p); if (p + len > COMPILED_BUFFER_SIZE) { return FH_ERR_HEAP_FULL; } *addr = p; size_t next = p + len; ALIGNWORD(next); fh->compile_top = next; return FH_OK; } //region Builtin Words enum fh_error w_add(struct fh_thread_s *fh) { enum fh_error rv; uint32_t a = 0, b = 0; TRY(ds_pop(fh, &a)); TRY(ds_pop(fh, &b)); TRY(ds_push(fh, a + b)); return FH_OK; } enum fh_error w_sub(struct fh_thread_s *fh) { enum fh_error rv; uint32_t a = 0, b = 0; TRY(ds_pop(fh, &a)); TRY(ds_pop(fh, &b)); TRY(ds_push(fh, a - b)); return FH_OK; } enum fh_error w_mul(struct fh_thread_s *fh) { enum fh_error rv; uint32_t a = 0, b = 0; TRY(ds_pop(fh, &a)); TRY(ds_pop(fh, &b)); TRY(ds_push(fh, a * b)); return FH_OK; } enum fh_error w_user_word(struct fh_thread_s *fh) { enum fh_error rv; const struct fh_word_s *w; const struct fh_word_s *w2; uint32_t wn; call: w = fh->exec_word; if (!w) { return FH_ERR_INTERNAL; } LOG("Run user word: %s", w->name); TRY(rs_push(fh, fh->execptr)); fh->execptr = w->start; instr:; // make sure it's aligned ALIGNWORD(fh->execptr); const struct fh_instruction_s *instr = (const struct fh_instruction_s *) &fh->compile[fh->execptr]; fh->execptr += sizeof(struct fh_instruction_s); uint32_t strl; uint32_t addr = 0; switch (instr->kind) { case FH_INSTR_NUMBER: TRY(ds_push(fh, instr->data)); goto instr; case FH_INSTR_WORD: wn = instr->data; switch (wn) { case CPLWORD_ALLOCSTR: case CPLWORD_TYPESTR: strl = *((uint32_t *) &fh->compile[fh->execptr]); fh->execptr += 4; if (wn == CPLWORD_ALLOCSTR) { TRY(fh_allot(fh, strl, &addr)); memcpy(&fh->heap[addr], &fh->compile[fh->execptr], strl); LOG("Exec: alloc-str \"%.*s\"", strl, &fh->heap[addr]); TRY(ds_push(fh, addr)); TRY(ds_push(fh, strl)); fh->execptr += strl; } else { FHPRINT("%.*s", (int) strl, &fh->compile[fh->execptr]); LOG("Exec: type-str \"%.*s\"", strl, &fh->heap[addr]); } goto instr; case CPLWORD_ENDWORD: LOG("Exec: word-end (RETURN)"); TRY(rs_pop(fh, &fh->execptr)); if (fh->execptr == MAGICADDR_INTERACTIVE) { goto end; } goto instr; default: w2 = &fh->dict[instr->data]; if (w2->builtin) { LOG("Exec: builtin-word %s", w2->name); w2->handler(fh); goto instr; } else { LOG("Exec: user-word %s (CALL)", w2->name); fh->exec_word = &fh->dict[instr->data]; goto call; } } } end: return FH_OK; } enum fh_error w_colon(struct fh_thread_s *fh) { if (fh->state != FH_STATE_INTERPRET) { return FH_ERR_INVALID_STATE; } LOG("state=COMPILE"); fh->state = FH_STATE_COMPILE; fh->substate = FH_SUBSTATE_COLONNAME; if (fh->dict_top >= DICT_SIZE) { return FH_ERR_DICT_FULL; } fh->dict[fh->dict_top].start = fh->compile_top; fh->dict[fh->dict_top].handler = w_user_word; return FH_OK; } enum fh_error w_semicolon(struct fh_thread_s *fh) { enum fh_error rv; uint32_t addr = 0; struct fh_instruction_s instr; if (fh->state != FH_STATE_COMPILE) { return FH_ERR_INVALID_STATE; } TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr)); instr.kind = FH_INSTR_WORD; instr.data = CPLWORD_ENDWORD; memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s)); /* Return to interpret state */ LOG("state=INTERPRET"); fh->state = FH_STATE_INTERPRET; fh->dict[fh->dict_top].end = fh->compile_top; /* one past the end cell */ fh->dict_top++; return FH_OK; } enum fh_error w_dot(struct fh_thread_s *fh) { enum fh_error rv; uint32_t a = 0; TRY(ds_pop(fh, &a)); FHPRINT("%d ", (int32_t) a); return FH_OK; } enum fh_error w_type(struct fh_thread_s *fh) { enum fh_error rv; uint32_t count = 0, addr = 0; TRY(ds_pop(fh, &count)); TRY(ds_pop(fh, &addr)); FHPRINT("%.*s", count, &fh->heap[addr]); return FH_OK; } enum fh_error w_cr(struct fh_thread_s *fh) { FHPRINT("\r\n"); return FH_OK; } enum fh_error w_space(struct fh_thread_s *fh) { FHPRINT(" "); return FH_OK; } enum fh_error w_s_quote(struct fh_thread_s *fh) { fh->substate = FH_SUBSTATE_SQUOTE; return FH_OK; } enum fh_error w_dot_quote(struct fh_thread_s *fh) { fh->substate = FH_SUBSTATE_DOTQUOTE; return FH_OK; } enum fh_error w_backslash(struct fh_thread_s *fh) { fh->substate = FH_SUBSTATE_LINECOMMENT; return FH_OK; } enum fh_error w_paren(struct fh_thread_s *fh) { fh->substate = FH_SUBSTATE_PARENCOMMENT; return FH_OK; } enum fh_error w_bye(struct fh_thread_s *fh) { LOG("state=SHUTDOWN"); fh->state = FH_STATE_SHUTDOWN; return FH_OK; } enum fh_error register_builtin_words(struct fh_thread_s *fh) { struct name_and_handler { const char *name; word_exec_t handler; bool immediate; }; const struct name_and_handler builtins[] = { {"s\"", w_s_quote}, {".\"", w_dot_quote}, /* Compiler control words */ {"bye", w_bye}, /* Basic arithmetics */ {"+", w_add}, {"-", w_sub}, {"*", w_mul}, /* Control words */ {":", w_colon}, {";", w_semicolon, 1}, {".", w_dot}, {"type", w_type}, {"cr", w_cr}, {"space", w_space}, {"\\", w_backslash}, // line comment {"(", w_paren}, // enclosed comment { /* end marker */ } }; struct fh_word_s w; const struct name_and_handler *p = builtins; enum fh_error rv; while (p->handler) { strcpy(w.name, p->name); w.handler = p->handler; w.builtin = 1; w.immediate = p->immediate; rv = fh_add_word(&w, fh); if (rv != FH_OK) { return rv; } p++; } return FH_OK; } #undef ADDWORD //endregion Builtin Words enum fh_error fh_init_thread(struct fh_thread_s *fh) { enum fh_error rv; /* Make sure we have a clean state */ memset(fh, 0, sizeof(struct fh_thread_s)); TRY(register_builtin_words(fh)); fh->execptr = MAGICADDR_INTERACTIVE; return FH_OK; } enum fh_error fh_handle_quoted_string( struct fh_thread_s *fh, char *start, size_t len ) { enum fh_error rv; uint32_t addr = 0; uint32_t addr2 = 0; struct fh_instruction_s instr; if (fh->state == FH_STATE_INTERPRET) { switch (fh->substate) { case FH_SUBSTATE_SQUOTE: TRY(fh_allot(fh, len, &addr)); memcpy(&fh->heap[addr], start, len); TRY(ds_push(fh, addr)); TRY(ds_push(fh, len)); break; case FH_SUBSTATE_DOTQUOTE: FHPRINT("%.*s", (int) len, start); break; default: LOGE("Bad substate in interpret mode: %d", fh->substate); } } else { /* compile */ TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr)); TRY(fh_compile_reserve(fh, len + 4, &addr2)); instr.kind = FH_INSTR_WORD; instr.data = fh->substate == FH_SUBSTATE_SQUOTE ? CPLWORD_ALLOCSTR : CPLWORD_TYPESTR; uint32_t len32 = len; /* string is encoded as a special compiler command, the size, * and then the string, all 4-byte aligned. */ memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s)); memcpy(&fh->compile[addr2], &len32, 4); memcpy(&fh->compile[addr2 + 4], &start, len); } return FH_OK; } enum fh_error fh_handle_word( struct fh_thread_s *fh, char *start, size_t len ) { if (len >= MAX_NAME_LEN) { return FH_ERR_NAME_TOO_LONG; } /* First, try if it's a known word */ struct fh_word_s *w = &fh->dict[0]; struct fh_instruction_s instr; uint32_t cnt = 0; uint32_t addr = 0; enum fh_error rv; while (w->handler) { if (0 == strncasecmp(start, w->name, len) && w->name[len]==0) { // word found! if (fh->state == FH_STATE_COMPILE && !w->immediate) { LOG("Compile word call: %s", w->name); TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr)); instr.kind = FH_INSTR_WORD; instr.data = cnt; memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s)); } else { /* interpret */ LOG("Interpret word: %s", w->name); fh->exec_word = w; TRY(w->handler(fh)); } return FH_OK; } w++; cnt++; } /* word not found, try parsing as number */ errno = 0; char *endptr; long v = strtol(start, &endptr, 0); if (errno != 0 || endptr == start) { LOGE("Unknown word and fail to parse as number: %.*s", (int)len, start); return FH_ERR_UNKNOWN_WORD; } if (fh->state == FH_STATE_COMPILE) { LOG("Compile number: %d", v); TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr)); instr.kind = FH_INSTR_NUMBER; instr.data = (uint32_t) v; memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s)); } else { /* interpret */ LOG("Interpret number: %d", v); TRY(ds_push(fh, (uint32_t)v)); } return FH_OK; } static inline bool iswhite(char c) { return c == ' ' || c == '\n' || c == '\t' || c == '\r'; } static inline bool isnl(char c) { return c == '\n' || c == '\r'; } enum fh_error fh_process_line(struct fh_thread_s *fh, char *linebuf) { enum fh_error rv; char *rp = linebuf; char c; LOGI("%s", linebuf); while (0 != (c = *rp) && fh->state != FH_STATE_SHUTDOWN) { /* end on newline */ if (isnl(c)) { goto done; } /* skip whitespace */ if (iswhite(c)) { rp++; continue; } char *end; size_t length; switch (fh->substate) { case FH_SUBSTATE_NONE: case FH_SUBSTATE_COLONNAME: /* try to read a word */ end = strchr(rp, ' '); if (end) { length = end - rp; /* exclude the space */ } else { length = strlen(rp); } if (fh->substate == FH_SUBSTATE_NONE) { /* eval a word */ LOG("Handle \"%.*s\"", (int)length, rp); TRY(fh_handle_word(fh, rp, length)); } else { /* new word's name is found */ LOG("New word name = \"%.*s\"", (int)length, rp); strncpy(fh->dict[fh->dict_top].name, rp, length); fh->substate = FH_SUBSTATE_NONE; } if (end) { rp = end + 1; } else { goto done; } break; case FH_SUBSTATE_SQUOTE: case FH_SUBSTATE_DOTQUOTE: end = strchr(rp, '"'); if (end) { length = end - rp - 1; LOG("Quoted string: \"%.*s\"", (int)length, rp); TRY(fh_handle_quoted_string(fh, rp, length)); fh->substate = FH_SUBSTATE_NONE; rp = end + 1; } else { /* no end. this is weird. */ LOGE("Unterminated quoted string!"); goto done; } break; case FH_SUBSTATE_PARENCOMMENT: end = strchr(rp, ')'); if (end) { LOG("Discard inline comment"); fh->substate = FH_SUBSTATE_NONE; rp = end + 1; } else { /* no end, discard all */ LOGE("Unterminated parenthesis comment"); goto done; } break; case FH_SUBSTATE_LINECOMMENT: LOG("Discard line comment"); goto done; // just discard the rest } } done: LOG("Line done."); return FH_OK; } int main(int argc, char *argv[]) { enum fh_error rv; struct fh_thread_s fh; rv = fh_init_thread(&fh); if (rv != FH_OK) { LOGE("Error in forth init: %s", fherr_name(rv)); return 1; } bool interactive = true; FILE *infile = stdin; // TODO use getopt? for (int a = 1; a < argc; a++) { if (argv[a][0] == '-') { // opt } else { infile = fopen(argv[a], "r"); interactive = false; if (!infile) { LOGE("Error opening infile: %s", argv[a]); return 1; } } } /* process input line by line */ int linecnt = 0; char linebuf[MAXLINE]; while (fh.state != FH_STATE_SHUTDOWN && fgets(linebuf, MAXLINE, infile)) { linecnt++; // trim size_t end = strlen(linebuf) -1 ; while (iswhite(linebuf[end])) { linebuf[end] = 0; } if (!linebuf[0]) { continue; } rv = fh_process_line(&fh, linebuf); if (rv == FH_OK) { FHPRINT("ok\r\n"); } else { LOGE("ERROR %s on line %d", fherr_name(rv), linecnt); return 1; } } FHPRINT("Bye.\r\n"); return 0; }