From 72ff4b510d46c90d24258c5143b0ba81d0203b8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Hru=C5=A1ka?= Date: Thu, 11 Nov 2021 00:21:26 +0100 Subject: [PATCH] initial WIP --- .gitignore | 4 + CMakeLists.txt | 8 + main.c | 737 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 749 insertions(+) create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 main.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..86af6c5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +cmake-* +*~ +*.bak +.idea/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..5d95d5d --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,8 @@ +cmake_minimum_required(VERSION 3.20) +project(forth C) + +set(CMAKE_C_STANDARD 99) + +add_executable(forth + main.c +) diff --git a/main.c b/main.c new file mode 100644 index 0000000..1019006 --- /dev/null +++ b/main.c @@ -0,0 +1,737 @@ +#include +#include +#include +#include +#include + +#define CONTROL_STACK_DEPTH 1024 +#define DATA_STACK_DEPTH 1024 +#define RETURN_STACK_DEPTH 1024 +#define MAX_NAME_LEN 64 +#define DICT_SIZE 1024 +#define COMPILED_BUFFER_SIZE (1024*1024) +#define HEAP_SIZE (1024*1024) +#define MAXLINE 65535 + +struct fh_thread_s; +struct fh_word_s; +struct fh_instruction_s; + +/* if the return address is this, we should drop back to interactive mode */ +#define MAGICADDR_INTERACTIVE 0xFFFFFFFFULL + +#define ALIGNWORD(var) \ + do { \ + while (((var) % 4) != 0) { (var)++; } \ + } while (0) + +enum fh_error { + FH_OK = 0, + FH_ERR_CS_OVERFLOW = -1, + FH_ERR_DS_OVERFLOW = -2, + FH_ERR_RS_OVERFLOW = -3, + FH_ERR_CS_UNDERFLOW = -4, + FH_ERR_DS_UNDERFLOW = -5, + FH_ERR_RS_UNDERFLOW = -6, + FH_ERR_HEAP_FULL = -7, + FH_ERR_DICT_FULL = -8, + FH_ERR_COMPILE_FULL = -9, + FH_ERR_NAME_TOO_LONG = -10, + FH_ERR_INVALID_STATE = -11, + FH_ERR_INTERNAL = -12, +}; + +typedef enum fh_error (*word_exec_t)(struct fh_thread_s *fh); + +struct fh_word_s { + char name[MAX_NAME_LEN]; + word_exec_t handler; + bool builtin; + uint32_t start; + uint32_t end; +}; + +enum fb_instruction_kind { + /* Data is a word number in the dict */ + FH_INSTR_WORD, + + /* Data is a numeric value to push on the data stack */ + FH_INSTR_NUMBER, +}; + +struct fh_instruction_s { + enum fb_instruction_kind kind; + uint32_t data; +}; + +/** words that are not in the dict, have special effect */ +enum compiler_word { + CPLWORD_ENDWORD = DICT_SIZE + 1, + CPLWORD_ALLOCSTR, + CPLWORD_TYPESTR, +}; + +_Static_assert(sizeof(struct fh_instruction_s) % 4 == 0, "Instruction struct is aligned"); + +enum fh_state { + FH_STATE_INTERPRET = 0, + FH_STATE_COMPILE, + FH_STATE_SHUTDOWN, +}; + +enum fh_substate { + FH_SUBSTATE_NONE = 0, + FH_SUBSTATE_COLONNAME, + FH_SUBSTATE_SQUOTE, + FH_SUBSTATE_DOTQUOTE, + FH_SUBSTATE_PARENCOMMENT, + FH_SUBSTATE_LINECOMMENT, +}; + +struct fh_thread_s { + /** Control stack */ + uint32_t control_stack[CONTROL_STACK_DEPTH]; + size_t control_stack_top; + + /** Data stack */ + uint32_t data_stack[DATA_STACK_DEPTH]; + size_t data_stack_top; + + /** Return stack */ + uint32_t return_stack[RETURN_STACK_DEPTH]; + size_t return_stack_top; + + /** Data heap */ + uint8_t heap[HEAP_SIZE]; + size_t heap_top; + + /** Compile buffer, used for both word data and literals */ + uint8_t compile[COMPILED_BUFFER_SIZE]; + size_t compile_top; + /** Pointer into the compile buffer for execution */ + uint32_t execptr; + + /** Word dict */ + struct fh_word_s dict[DICT_SIZE]; + uint32_t dict_top; + + /** Forth state */ + enum fh_state state; + /** Forth sub-state */ + enum fh_substate substate; + /** Word currently being executed - a pointer is placed here + * before calling the handler */ + struct fh_word_s *exec_word; + + char linebuf[MAXLINE]; + size_t linebuf_len; + size_t linebuf_readptr; +}; + +#define TRY(x) \ + do { \ + if (FH_OK != (rv = (x))) return rv; \ + } while (0) + +#define TRY_FAIL(x) \ + do { \ + if (FH_OK != (rv = (x))) goto fail; \ + } while (0) + +/** Add a word to the dictionary. */ +enum fh_error fh_add_word(const struct fh_word_s *w, struct fh_thread_s *fh) +{ + if (fh->dict_top == DICT_SIZE) { + return FH_ERR_DICT_FULL; + } + memcpy(&fh->dict[fh->dict_top++], w, sizeof(struct fh_word_s)); + return FH_OK; +} + +//region Push & Pop + +static inline enum fh_error ds_pop(struct fh_thread_s *fh, uint32_t *out) +{ + if (fh->data_stack_top == 0) { + return FH_ERR_DS_UNDERFLOW; + } + *out = fh->data_stack[--fh->data_stack_top]; + return FH_OK; +} + +static inline enum fh_error rs_pop(struct fh_thread_s *fh, uint32_t *out) +{ + if (fh->return_stack_top == 0) { + return FH_ERR_RS_UNDERFLOW; + } + *out = fh->return_stack[--fh->return_stack_top]; + return FH_OK; +} + +static inline enum fh_error cs_pop(struct fh_thread_s *fh, uint32_t *out) +{ + if (fh->control_stack_top == 0) { + return FH_ERR_CS_UNDERFLOW; + } + *out = fh->control_stack[--fh->control_stack_top]; + return FH_OK; +} + +static inline enum fh_error ds_push(struct fh_thread_s *fh, uint32_t in) +{ + if (fh->data_stack_top == DATA_STACK_DEPTH) { + return FH_ERR_DS_OVERFLOW; + } + fh->data_stack[fh->data_stack_top++] = in; + return FH_OK; +} + +static inline enum fh_error rs_push(struct fh_thread_s *fh, uint32_t in) +{ + if (fh->return_stack_top == RETURN_STACK_DEPTH) { + return FH_ERR_RS_OVERFLOW; + } + fh->return_stack[fh->return_stack_top++] = in; + return FH_OK; +} + +static inline enum fh_error cs_push(struct fh_thread_s *fh, uint32_t in) +{ + if (fh->control_stack_top == CONTROL_STACK_DEPTH) { + return FH_ERR_CS_OVERFLOW; + } + fh->control_stack[fh->control_stack_top++] = in; + return FH_OK; +} + +//endregion Push & Pop + + +enum fh_error fh_allot( + struct fh_thread_s *fh, + size_t len, + uint32_t *addr +) +{ + uint32_t p = fh->heap_top; + ALIGNWORD(p); + + if (p + len > HEAP_SIZE) { + return FH_ERR_HEAP_FULL; + } + + *addr = p; + + size_t next = p + len; + ALIGNWORD(next); + fh->heap_top = next; + + return FH_OK; +} + +enum fh_error fh_compile_reserve( + struct fh_thread_s *fh, + size_t len, + uint32_t *addr +) +{ + uint32_t p = fh->compile_top; + // align up + ALIGNWORD(p); + + if (p + len > COMPILED_BUFFER_SIZE) { + return FH_ERR_HEAP_FULL; + } + + *addr = p; + + size_t next = p + len; + ALIGNWORD(next); + fh->compile_top = next; + + return FH_OK; +} + + +//region Builtin Words + +enum fh_error w_add(struct fh_thread_s *fh) +{ + enum fh_error rv; + uint32_t a = 0, b = 0; + TRY(ds_pop(fh, &a)); + TRY(ds_pop(fh, &b)); + TRY(ds_push(fh, a + b)); + return FH_OK; +} + +enum fh_error w_sub(struct fh_thread_s *fh) +{ + enum fh_error rv; + uint32_t a = 0, b = 0; + TRY(ds_pop(fh, &a)); + TRY(ds_pop(fh, &b)); + TRY(ds_push(fh, a - b)); + return FH_OK; +} + +enum fh_error w_mul(struct fh_thread_s *fh) +{ + enum fh_error rv; + uint32_t a = 0, b = 0; + TRY(ds_pop(fh, &a)); + TRY(ds_pop(fh, &b)); + TRY(ds_push(fh, a * b)); + return FH_OK; +} + +enum fh_error w_user_word(struct fh_thread_s *fh) +{ + enum fh_error rv; + const struct fh_word_s *w; + const struct fh_word_s *w2; + uint32_t wn; + + call: + w = fh->exec_word; + if (!w) { return FH_ERR_INTERNAL; } + TRY(rs_push(fh, fh->execptr)); + fh->execptr = w->start; + + instr:; + // make sure it's aligned + ALIGNWORD(fh->execptr); + const struct fh_instruction_s *instr = (const struct fh_instruction_s *) &fh->compile[fh->execptr]; + fh->execptr += sizeof(struct fh_instruction_s); + + uint32_t strl; + uint32_t addr = 0; + switch (instr->kind) { + case FH_INSTR_NUMBER: + TRY(ds_push(fh, instr->data)); + goto instr; + + case FH_INSTR_WORD: + wn = instr->data; + switch (wn) { + case CPLWORD_ALLOCSTR: + case CPLWORD_TYPESTR: + strl = *((uint32_t *) &fh->compile[fh->execptr]); + fh->execptr += 4; + if (wn == CPLWORD_ALLOCSTR) { + TRY(fh_allot(fh, strl, &addr)); + memcpy(&fh->heap[addr], &fh->compile[fh->execptr], strl); + TRY(ds_push(fh, addr)); + TRY(ds_push(fh, strl)); + fh->execptr += strl; + } else { + printf("%.*s", (int) strl, &fh->compile[fh->execptr]); + } + goto instr; + + case CPLWORD_ENDWORD: + TRY(rs_pop(fh, &fh->execptr)); + if (fh->execptr == MAGICADDR_INTERACTIVE) { + goto end; + } + goto instr; + + default: + w2 = &fh->dict[instr->data]; + if (w2->builtin) { + w2->handler(fh); + goto instr; + } else { + fh->exec_word = &fh->dict[instr->data]; + goto call; + } + } + } + + end: + return FH_OK; +} + +enum fh_error w_colon(struct fh_thread_s *fh) +{ + if (fh->state != FH_STATE_INTERPRET) { + return FH_ERR_INVALID_STATE; + } + + fh->state = FH_STATE_COMPILE; + fh->substate = FH_SUBSTATE_COLONNAME; + + if (fh->dict_top >= DICT_SIZE) { + return FH_ERR_DICT_FULL; + } + fh->dict[fh->dict_top].start = fh->compile_top; + fh->dict[fh->dict_top].handler = w_user_word; + return FH_OK; +} + +enum fh_error w_semicolon(struct fh_thread_s *fh) +{ + enum fh_error rv; + uint32_t addr = 0; + struct fh_instruction_s instr; + + if (fh->state != FH_STATE_COMPILE) { + return FH_ERR_INVALID_STATE; + } + + TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr)); + instr.kind = FH_INSTR_WORD; + instr.data = CPLWORD_ENDWORD; + memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s)); + + /* Return to interpret state */ + fh->state = FH_STATE_INTERPRET; + fh->dict[fh->dict_top].end = fh->compile_top; /* one past the end cell */ + fh->dict_top++; + return FH_OK; +} + +enum fh_error w_dot(struct fh_thread_s *fh) +{ + enum fh_error rv; + uint32_t a = 0; + TRY(ds_pop(fh, &a)); + + printf("%d ", (int32_t) a); + return FH_OK; +} + +enum fh_error w_type(struct fh_thread_s *fh) +{ + enum fh_error rv; + uint32_t count = 0, addr = 0; + TRY(ds_pop(fh, &count)); + TRY(ds_pop(fh, &addr)); + + printf("%.*s", count, &fh->heap[addr]); + return FH_OK; +} + +enum fh_error w_cr(struct fh_thread_s *fh) +{ + printf("\r\n"); + return FH_OK; +} + +enum fh_error w_space(struct fh_thread_s *fh) +{ + printf(" "); + return FH_OK; +} + +enum fh_error w_s_quote(struct fh_thread_s *fh) +{ + fh->substate = FH_SUBSTATE_SQUOTE; + return FH_OK; +} + +enum fh_error w_dot_quote(struct fh_thread_s *fh) +{ + fh->substate = FH_SUBSTATE_DOTQUOTE; + return FH_OK; +} + +enum fh_error w_backslash(struct fh_thread_s *fh) +{ + fh->substate = FH_SUBSTATE_LINECOMMENT; + return FH_OK; +} + +enum fh_error w_paren(struct fh_thread_s *fh) +{ + fh->substate = FH_SUBSTATE_PARENCOMMENT; + return FH_OK; +} + +enum fh_error w_bye(struct fh_thread_s *fh) +{ + fh->state = FH_STATE_SHUTDOWN; + return FH_OK; +} + +enum fh_error register_builtin_words(struct fh_thread_s *fh) +{ + struct name_and_handler { + const char *name; + word_exec_t handler; + }; + + const struct name_and_handler builtins[] = { + {"s\"", w_s_quote}, + {".\"", w_dot_quote}, + /* Compiler control words */ + {"bye", w_bye}, + /* Basic arithmetics */ + {"+", w_add}, + {"-", w_sub}, + {"*", w_mul}, + /* Control words */ + {":", w_colon}, + {";", w_semicolon}, + {".", w_dot}, + {"type", w_type}, + {"cr", w_cr}, + {"space", w_space}, + {"\\", w_backslash}, // line comment + {"(", w_paren}, // enclosed comment + { /* end marker */ } + }; + + struct fh_word_s w; + const struct name_and_handler *p = builtins; + enum fh_error rv; + while (p->handler) { + strcpy(w.name, p->name); + w.handler = p->handler; + w.builtin = 1; + rv = fh_add_word(&w, fh); + if (rv != FH_OK) { + return rv; + } + p++; + } + return FH_OK; +} + +#undef ADDWORD + +//endregion Builtin Words + +enum fh_error fh_init_thread(struct fh_thread_s *fh) +{ + enum fh_error rv; + + /* Make sure we have a clean state */ + memset(fh, 0, sizeof(struct fh_thread_s)); + + TRY(register_builtin_words(fh)); + + fh->execptr = MAGICADDR_INTERACTIVE; + return FH_OK; +} + +enum fh_error fh_handle_quoted_string( + struct fh_thread_s *fh, + char *start, + size_t len +) +{ + enum fh_error rv; + uint32_t addr = 0; + uint32_t addr2 = 0; + struct fh_instruction_s instr; + + if (fh->state == FH_STATE_INTERPRET) { + switch (fh->substate) { + case FH_SUBSTATE_SQUOTE: + TRY(fh_allot(fh, len, &addr)); + memcpy(&fh->heap[addr], start, len); + TRY(ds_push(fh, addr)); + TRY(ds_push(fh, len)); + break; + case FH_SUBSTATE_DOTQUOTE: + printf("%.*s", (int) len, start); + break; + + default: + printf("!!! Bad substate\r\n"); + } + } else { + /* compile */ + TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr)); + TRY(fh_compile_reserve(fh, len + 4, &addr2)); + instr.kind = FH_INSTR_WORD; + instr.data = fh->substate == FH_SUBSTATE_SQUOTE ? + CPLWORD_ALLOCSTR : + CPLWORD_TYPESTR; + uint32_t len32 = len; + /* string is encoded as a special compiler command, the size, + * and then the string, all 4-byte aligned. */ + memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s)); + memcpy(&fh->compile[addr2], &len32, 4); + memcpy(&fh->compile[addr2 + 4], &start, len); + } + return FH_OK; +} + +enum fh_error fh_handle_word( + struct fh_thread_s *fh, + char *start, + size_t len +) +{ + if (len >= MAX_NAME_LEN) { + return FH_ERR_NAME_TOO_LONG; + } + + /* First, try if it's a known word */ + struct fh_word_s *w = &fh->dict[0]; + struct fh_instruction_s instr; + uint32_t cnt = 0; + uint32_t addr = 0; + enum fh_error rv; + while (w->handler) { + if (0 == strncasecmp(start, w->name, len) && w->name[len]==0) { + // word found! + if (fh->state == FH_STATE_COMPILE) { + TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr)); + instr.kind = FH_INSTR_WORD; + instr.data = cnt; + memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s)); + } else { + /* interpret */ + fh->exec_word = w; + TRY(w->handler(fh)); + } + return FH_OK; + } + w++; + cnt++; + } + + /* word not found, try parsing as number */ + long v = strtol(start, NULL, 0); + if (fh->state == FH_STATE_COMPILE) { + TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr)); + instr.kind = FH_INSTR_NUMBER; + instr.data = (uint32_t) v; + memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s)); + } else { + /* interpret */ + TRY(ds_push(fh, (uint32_t)v)); + } + + return FH_OK; +} + +static bool iswhite(char c) +{ + return c == ' ' || c == '\n' || c == '\t' || c == '\r'; +} + +enum fh_error fh_process_input(struct fh_thread_s *fh) +{ + enum fh_error rv; + char *rp = &fh->linebuf[fh->linebuf_readptr]; + while (fh->linebuf_readptr < fh->linebuf_len && fh->state != FH_STATE_SHUTDOWN) { + /* skip whitespace */ + char c = *rp; + if (iswhite(c)) { + rp++; + fh->linebuf_readptr++; + continue; + } + + char *end; + size_t stringlen; + switch (fh->substate) { + case FH_SUBSTATE_NONE: + /* try read a word */ + end = strchr(rp, ' '); + if (end) { + stringlen = end - rp; + } else { + stringlen = fh->linebuf_len - fh->linebuf_readptr; + } + + // rtrim + while (stringlen > 0 && iswhite(rp[stringlen - 1])) { stringlen--; } + + TRY(fh_handle_word(fh, rp, stringlen)); + rp = end + 1; + fh->linebuf_readptr = rp - &fh->linebuf[0]; + break; + + case FH_SUBSTATE_COLONNAME: + /* find space */ + end = strchr(rp, ' '); + stringlen = end - rp; + if (end) { + stringlen = end - rp; + } else { + stringlen = fh->linebuf_len - fh->linebuf_readptr; + } + + // rtrim + while (stringlen > 0 && iswhite(rp[stringlen - 1])) { stringlen--; } + + strncpy(fh->dict[fh->dict_top].name, rp, stringlen); + fh->substate = FH_SUBSTATE_NONE; + rp = end + 1; + fh->linebuf_readptr = rp - &fh->linebuf[0]; + break; + + case FH_SUBSTATE_SQUOTE: + case FH_SUBSTATE_DOTQUOTE: + end = strchr(rp, '"'); + if (end) { + stringlen = end - rp - 1; + TRY(fh_handle_quoted_string(fh, rp, stringlen)); + fh->substate = FH_SUBSTATE_NONE; + rp = end + 1; + fh->linebuf_readptr = rp - &fh->linebuf[0]; + } else { + /* no end, discard all */ + goto end; + } + break; + + case FH_SUBSTATE_PARENCOMMENT: + end = strchr(rp, ')'); + if (end) { + fh->substate = FH_SUBSTATE_NONE; + rp = end + 1; + fh->linebuf_readptr = rp - &fh->linebuf[0]; + } else { + /* no end, discard all */ + goto end; + } + break; + + case FH_SUBSTATE_LINECOMMENT: + end = strchr(rp, '\n'); + if (end) { + fh->substate = FH_SUBSTATE_NONE; + rp = end + 1; + fh->linebuf_readptr = rp - &fh->linebuf[0]; + } else { + /* no newline, discard all */ + goto end; + } + break; + } + } + end: + return FH_OK; +} + + +int main() +{ + enum fh_error rv; + struct fh_thread_s fh; + TRY_FAIL(fh_init_thread(&fh)); + + while (fh.state != FH_STATE_SHUTDOWN && fgets(fh.linebuf, MAXLINE, stdin)) { + fh.linebuf_len = strlen(fh.linebuf); + fh.linebuf_readptr = 0; + rv = fh_process_input(&fh); + if (rv == FH_OK) { + printf("ok\r\n"); + } else { + printf("ERROR %d\r\n", rv); + } + } + + printf("Bye.\r\n"); + return 0; + + fail: + printf("Error %d\r\n", rv); + return 1; +}