Trying to build a forth runtime in C
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
forth/main.c

830 lines
19 KiB

#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define CONTROL_STACK_DEPTH 1024
#define DATA_STACK_DEPTH 1024
#define RETURN_STACK_DEPTH 1024
#define MAX_NAME_LEN 64
#define DICT_SIZE 1024
#define COMPILED_BUFFER_SIZE (1024*1024)
#define HEAP_SIZE (1024*1024)
#define MAXLINE 65535
struct fh_thread_s;
struct fh_word_s;
struct fh_instruction_s;
/* if the return address is this, we should drop back to interactive mode */
#define MAGICADDR_INTERACTIVE 0xFFFFFFFFULL
#define ALIGNWORD(var) \
do { \
while (((var) % 4) != 0) { (var)++; } \
} while (0)
/* logging, TODO make levels configurable */
#define LOG(format, ...) fprintf(stderr, format "\r\n", ##__VA_ARGS__)
#define LOGI(format, ...) fprintf(stderr, "\x1b[32m" format "\x1b[m\r\n", ##__VA_ARGS__)
#define LOGE(format, ...) fprintf(stderr, "\x1b[31;1m" format "\x1b[m\r\n", ##__VA_ARGS__)
/* Forth standard output. XXX should be stdout, but then colors get mangled */
#define FHPRINT(format, ...) fprintf(stderr, "\x1b[33;1m" format "\x1b[m", ##__VA_ARGS__)
enum fh_error {
FH_OK = 0,
FH_ERR_CS_OVERFLOW,
FH_ERR_DS_OVERFLOW,
FH_ERR_RS_OVERFLOW,
FH_ERR_CS_UNDERFLOW,
FH_ERR_DS_UNDERFLOW,
FH_ERR_RS_UNDERFLOW,
FH_ERR_HEAP_FULL,
FH_ERR_DICT_FULL,
FH_ERR_COMPILE_FULL,
FH_ERR_NAME_TOO_LONG,
FH_ERR_INVALID_STATE,
FH_ERR_INTERNAL,
FH_ERR_UNKNOWN_WORD,
FH_ERR_MAX,
};
static const char *errornames[] = {
[FH_OK] = "OK",
[FH_ERR_CS_OVERFLOW] = "CS_OVERFLOW",
[FH_ERR_DS_OVERFLOW] = "DS_OVERFLOW",
[FH_ERR_RS_OVERFLOW] = "RS_OVERFLOW",
[FH_ERR_CS_UNDERFLOW] = "CS_UNDERFLOW",
[FH_ERR_DS_UNDERFLOW] = "DS_UNDERFLOW",
[FH_ERR_RS_UNDERFLOW] = "RS_UNDERFLOW",
[FH_ERR_HEAP_FULL] = "HEAP_FULL",
[FH_ERR_DICT_FULL] = "DICT_FULL",
[FH_ERR_COMPILE_FULL] = "COMPILE_FULL",
[FH_ERR_NAME_TOO_LONG] = "NAME_TOO_LONG",
[FH_ERR_INVALID_STATE] = "INVALID_STATE",
[FH_ERR_INTERNAL] = "INTERNAL",
[FH_ERR_UNKNOWN_WORD] = "UNKNOWN_WORD",
};
const char *fherr_name(enum fh_error e) {
if (e >= FH_ERR_MAX) {
return "Unknown";
}
return errornames[e];
}
typedef enum fh_error (*word_exec_t)(struct fh_thread_s *fh);
struct fh_word_s {
char name[MAX_NAME_LEN];
word_exec_t handler;
bool builtin;
bool immediate;
uint32_t start;
uint32_t end;
};
enum fb_instruction_kind {
/* Data is a word number in the dict */
FH_INSTR_WORD,
/* Data is a numeric value to push on the data stack */
FH_INSTR_NUMBER,
};
struct fh_instruction_s {
enum fb_instruction_kind kind;
uint32_t data;
};
/** words that are not in the dict, have special effect */
enum compiler_word {
CPLWORD_ENDWORD = DICT_SIZE + 1,
CPLWORD_ALLOCSTR,
CPLWORD_TYPESTR,
};
_Static_assert(sizeof(struct fh_instruction_s) % 4 == 0, "Instruction struct is aligned");
enum fh_state {
FH_STATE_INTERPRET = 0,
FH_STATE_COMPILE,
FH_STATE_SHUTDOWN,
};
enum fh_substate {
FH_SUBSTATE_NONE = 0,
FH_SUBSTATE_COLONNAME,
FH_SUBSTATE_SQUOTE,
FH_SUBSTATE_DOTQUOTE,
FH_SUBSTATE_PARENCOMMENT,
FH_SUBSTATE_LINECOMMENT,
};
struct fh_thread_s {
/** Control stack */
uint32_t control_stack[CONTROL_STACK_DEPTH];
size_t control_stack_top;
/** Data stack */
uint32_t data_stack[DATA_STACK_DEPTH];
size_t data_stack_top;
/** Return stack */
uint32_t return_stack[RETURN_STACK_DEPTH];
size_t return_stack_top;
/** Data heap */
uint8_t heap[HEAP_SIZE];
size_t heap_top;
/** Compile buffer, used for both word data and literals */
uint8_t compile[COMPILED_BUFFER_SIZE];
size_t compile_top;
/** Pointer into the compile buffer for execution */
uint32_t execptr;
/** Word dict */
struct fh_word_s dict[DICT_SIZE];
uint32_t dict_top;
/** Forth state */
enum fh_state state;
/** Forth sub-state */
enum fh_substate substate;
/** Word currently being executed - a pointer is placed here
* before calling the handler */
struct fh_word_s *exec_word;
};
#define TRY(x) \
do { \
if (FH_OK != (rv = (x))) return rv; \
} while (0)
#define TRY_FAIL(x) \
do { \
if (FH_OK != (rv = (x))) goto fail; \
} while (0)
/** Add a word to the dictionary. */
enum fh_error fh_add_word(const struct fh_word_s *w, struct fh_thread_s *fh)
{
if (fh->dict_top == DICT_SIZE) {
return FH_ERR_DICT_FULL;
}
memcpy(&fh->dict[fh->dict_top++], w, sizeof(struct fh_word_s));
return FH_OK;
}
//region Push & Pop
static inline enum fh_error ds_pop(struct fh_thread_s *fh, uint32_t *out)
{
if (fh->data_stack_top == 0) {
LOG("DS pop UNDERFLOW");
return FH_ERR_DS_UNDERFLOW;
}
*out = fh->data_stack[--fh->data_stack_top];
LOG("DS pop %d", *out);
return FH_OK;
}
static inline enum fh_error rs_pop(struct fh_thread_s *fh, uint32_t *out)
{
if (fh->return_stack_top == 0) {
LOG("RS pop UNDERFLOW");
return FH_ERR_RS_UNDERFLOW;
}
*out = fh->return_stack[--fh->return_stack_top];
LOG("RS pop %d", *out);
return FH_OK;
}
static inline enum fh_error cs_pop(struct fh_thread_s *fh, uint32_t *out)
{
if (fh->control_stack_top == 0) {
LOG("CS pop UNDERFLOW");
return FH_ERR_CS_UNDERFLOW;
}
*out = fh->control_stack[--fh->control_stack_top];
LOG("CS pop %d", *out);
return FH_OK;
}
static inline enum fh_error ds_push(struct fh_thread_s *fh, uint32_t in)
{
LOG("DS push %d", in);
if (fh->data_stack_top == DATA_STACK_DEPTH) {
return FH_ERR_DS_OVERFLOW;
}
fh->data_stack[fh->data_stack_top++] = in;
return FH_OK;
}
static inline enum fh_error rs_push(struct fh_thread_s *fh, uint32_t in)
{
LOG("RS push %d", in);
if (fh->return_stack_top == RETURN_STACK_DEPTH) {
return FH_ERR_RS_OVERFLOW;
}
fh->return_stack[fh->return_stack_top++] = in;
return FH_OK;
}
static inline enum fh_error cs_push(struct fh_thread_s *fh, uint32_t in)
{
LOG("CS push %d", in);
if (fh->control_stack_top == CONTROL_STACK_DEPTH) {
return FH_ERR_CS_OVERFLOW;
}
fh->control_stack[fh->control_stack_top++] = in;
return FH_OK;
}
//endregion Push & Pop
enum fh_error fh_allot(
struct fh_thread_s *fh,
size_t len,
uint32_t *addr
)
{
uint32_t p = fh->heap_top;
ALIGNWORD(p);
if (p + len > HEAP_SIZE) {
return FH_ERR_HEAP_FULL;
}
*addr = p;
size_t next = p + len;
ALIGNWORD(next);
fh->heap_top = next;
return FH_OK;
}
enum fh_error fh_compile_reserve(
struct fh_thread_s *fh,
size_t len,
uint32_t *addr
)
{
uint32_t p = fh->compile_top;
// align up
ALIGNWORD(p);
if (p + len > COMPILED_BUFFER_SIZE) {
return FH_ERR_HEAP_FULL;
}
*addr = p;
size_t next = p + len;
ALIGNWORD(next);
fh->compile_top = next;
return FH_OK;
}
//region Builtin Words
enum fh_error w_add(struct fh_thread_s *fh)
{
enum fh_error rv;
uint32_t a = 0, b = 0;
TRY(ds_pop(fh, &a));
TRY(ds_pop(fh, &b));
TRY(ds_push(fh, a + b));
return FH_OK;
}
enum fh_error w_sub(struct fh_thread_s *fh)
{
enum fh_error rv;
uint32_t a = 0, b = 0;
TRY(ds_pop(fh, &a));
TRY(ds_pop(fh, &b));
TRY(ds_push(fh, a - b));
return FH_OK;
}
enum fh_error w_mul(struct fh_thread_s *fh)
{
enum fh_error rv;
uint32_t a = 0, b = 0;
TRY(ds_pop(fh, &a));
TRY(ds_pop(fh, &b));
TRY(ds_push(fh, a * b));
return FH_OK;
}
enum fh_error w_user_word(struct fh_thread_s *fh)
{
enum fh_error rv;
const struct fh_word_s *w;
const struct fh_word_s *w2;
uint32_t wn;
call:
w = fh->exec_word;
if (!w) { return FH_ERR_INTERNAL; }
LOG("Run user word: %s", w->name);
TRY(rs_push(fh, fh->execptr));
fh->execptr = w->start;
instr:;
// make sure it's aligned
ALIGNWORD(fh->execptr);
const struct fh_instruction_s *instr = (const struct fh_instruction_s *) &fh->compile[fh->execptr];
fh->execptr += sizeof(struct fh_instruction_s);
uint32_t strl;
uint32_t addr = 0;
switch (instr->kind) {
case FH_INSTR_NUMBER:
TRY(ds_push(fh, instr->data));
goto instr;
case FH_INSTR_WORD:
wn = instr->data;
switch (wn) {
case CPLWORD_ALLOCSTR:
case CPLWORD_TYPESTR:
strl = *((uint32_t *) &fh->compile[fh->execptr]);
fh->execptr += 4;
if (wn == CPLWORD_ALLOCSTR) {
TRY(fh_allot(fh, strl, &addr));
memcpy(&fh->heap[addr], &fh->compile[fh->execptr], strl);
LOG("Exec: alloc-str \"%.*s\"", strl, &fh->heap[addr]);
TRY(ds_push(fh, addr));
TRY(ds_push(fh, strl));
fh->execptr += strl;
} else {
FHPRINT("%.*s", (int) strl, &fh->compile[fh->execptr]);
LOG("Exec: type-str \"%.*s\"", strl, &fh->heap[addr]);
}
goto instr;
case CPLWORD_ENDWORD:
LOG("Exec: word-end (RETURN)");
TRY(rs_pop(fh, &fh->execptr));
if (fh->execptr == MAGICADDR_INTERACTIVE) {
goto end;
}
goto instr;
default:
w2 = &fh->dict[instr->data];
if (w2->builtin) {
LOG("Exec: builtin-word %s", w2->name);
w2->handler(fh);
goto instr;
} else {
LOG("Exec: user-word %s (CALL)", w2->name);
fh->exec_word = &fh->dict[instr->data];
goto call;
}
}
}
end:
return FH_OK;
}
enum fh_error w_colon(struct fh_thread_s *fh)
{
if (fh->state != FH_STATE_INTERPRET) {
return FH_ERR_INVALID_STATE;
}
LOG("state=COMPILE");
fh->state = FH_STATE_COMPILE;
fh->substate = FH_SUBSTATE_COLONNAME;
if (fh->dict_top >= DICT_SIZE) {
return FH_ERR_DICT_FULL;
}
fh->dict[fh->dict_top].start = fh->compile_top;
fh->dict[fh->dict_top].handler = w_user_word;
return FH_OK;
}
enum fh_error w_semicolon(struct fh_thread_s *fh)
{
enum fh_error rv;
uint32_t addr = 0;
struct fh_instruction_s instr;
if (fh->state != FH_STATE_COMPILE) {
return FH_ERR_INVALID_STATE;
}
TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr));
instr.kind = FH_INSTR_WORD;
instr.data = CPLWORD_ENDWORD;
memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s));
/* Return to interpret state */
LOG("state=INTERPRET");
fh->state = FH_STATE_INTERPRET;
fh->dict[fh->dict_top].end = fh->compile_top; /* one past the end cell */
fh->dict_top++;
return FH_OK;
}
enum fh_error w_dot(struct fh_thread_s *fh)
{
enum fh_error rv;
uint32_t a = 0;
TRY(ds_pop(fh, &a));
FHPRINT("%d ", (int32_t) a);
return FH_OK;
}
enum fh_error w_type(struct fh_thread_s *fh)
{
enum fh_error rv;
uint32_t count = 0, addr = 0;
TRY(ds_pop(fh, &count));
TRY(ds_pop(fh, &addr));
FHPRINT("%.*s", count, &fh->heap[addr]);
return FH_OK;
}
enum fh_error w_cr(struct fh_thread_s *fh)
{
FHPRINT("\r\n");
return FH_OK;
}
enum fh_error w_space(struct fh_thread_s *fh)
{
FHPRINT(" ");
return FH_OK;
}
enum fh_error w_s_quote(struct fh_thread_s *fh)
{
fh->substate = FH_SUBSTATE_SQUOTE;
return FH_OK;
}
enum fh_error w_dot_quote(struct fh_thread_s *fh)
{
fh->substate = FH_SUBSTATE_DOTQUOTE;
return FH_OK;
}
enum fh_error w_backslash(struct fh_thread_s *fh)
{
fh->substate = FH_SUBSTATE_LINECOMMENT;
return FH_OK;
}
enum fh_error w_paren(struct fh_thread_s *fh)
{
fh->substate = FH_SUBSTATE_PARENCOMMENT;
return FH_OK;
}
enum fh_error w_bye(struct fh_thread_s *fh)
{
LOG("state=SHUTDOWN");
fh->state = FH_STATE_SHUTDOWN;
return FH_OK;
}
enum fh_error register_builtin_words(struct fh_thread_s *fh)
{
struct name_and_handler {
const char *name;
word_exec_t handler;
bool immediate;
};
const struct name_and_handler builtins[] = {
{"s\"", w_s_quote},
{".\"", w_dot_quote},
/* Compiler control words */
{"bye", w_bye},
/* Basic arithmetics */
{"+", w_add},
{"-", w_sub},
{"*", w_mul},
/* Control words */
{":", w_colon},
{";", w_semicolon, 1},
{".", w_dot},
{"type", w_type},
{"cr", w_cr},
{"space", w_space},
{"\\", w_backslash}, // line comment
{"(", w_paren}, // enclosed comment
{ /* end marker */ }
};
struct fh_word_s w;
const struct name_and_handler *p = builtins;
enum fh_error rv;
while (p->handler) {
strcpy(w.name, p->name);
w.handler = p->handler;
w.builtin = 1;
w.immediate = p->immediate;
rv = fh_add_word(&w, fh);
if (rv != FH_OK) {
return rv;
}
p++;
}
return FH_OK;
}
#undef ADDWORD
//endregion Builtin Words
enum fh_error fh_init_thread(struct fh_thread_s *fh)
{
enum fh_error rv;
/* Make sure we have a clean state */
memset(fh, 0, sizeof(struct fh_thread_s));
TRY(register_builtin_words(fh));
fh->execptr = MAGICADDR_INTERACTIVE;
return FH_OK;
}
enum fh_error fh_handle_quoted_string(
struct fh_thread_s *fh,
char *start,
size_t len
)
{
enum fh_error rv;
uint32_t addr = 0;
uint32_t addr2 = 0;
struct fh_instruction_s instr;
if (fh->state == FH_STATE_INTERPRET) {
switch (fh->substate) {
case FH_SUBSTATE_SQUOTE:
TRY(fh_allot(fh, len, &addr));
memcpy(&fh->heap[addr], start, len);
TRY(ds_push(fh, addr));
TRY(ds_push(fh, len));
break;
case FH_SUBSTATE_DOTQUOTE:
FHPRINT("%.*s", (int) len, start);
break;
default:
LOGE("Bad substate in interpret mode: %d", fh->substate);
}
} else {
/* compile */
TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr));
TRY(fh_compile_reserve(fh, len + 4, &addr2));
instr.kind = FH_INSTR_WORD;
instr.data = fh->substate == FH_SUBSTATE_SQUOTE ?
CPLWORD_ALLOCSTR :
CPLWORD_TYPESTR;
uint32_t len32 = len;
/* string is encoded as a special compiler command, the size,
* and then the string, all 4-byte aligned. */
memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s));
memcpy(&fh->compile[addr2], &len32, 4);
memcpy(&fh->compile[addr2 + 4], &start, len);
}
return FH_OK;
}
enum fh_error fh_handle_word(
struct fh_thread_s *fh,
char *start,
size_t len
)
{
if (len >= MAX_NAME_LEN) {
return FH_ERR_NAME_TOO_LONG;
}
/* First, try if it's a known word */
struct fh_word_s *w = &fh->dict[0];
struct fh_instruction_s instr;
uint32_t cnt = 0;
uint32_t addr = 0;
enum fh_error rv;
while (w->handler) {
if (0 == strncasecmp(start, w->name, len) && w->name[len]==0) {
// word found!
if (fh->state == FH_STATE_COMPILE && !w->immediate) {
LOG("Compile word call: %s", w->name);
TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr));
instr.kind = FH_INSTR_WORD;
instr.data = cnt;
memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s));
} else {
/* interpret */
LOG("Interpret word: %s", w->name);
fh->exec_word = w;
TRY(w->handler(fh));
}
return FH_OK;
}
w++;
cnt++;
}
/* word not found, try parsing as number */
errno = 0;
char *endptr;
long v = strtol(start, &endptr, 0);
if (errno != 0 || endptr == start) {
LOGE("Unknown word and fail to parse as number: %.*s", (int)len, start);
return FH_ERR_UNKNOWN_WORD;
}
if (fh->state == FH_STATE_COMPILE) {
LOG("Compile number: %d", v);
TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr));
instr.kind = FH_INSTR_NUMBER;
instr.data = (uint32_t) v;
memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s));
} else {
/* interpret */
LOG("Interpret number: %d", v);
TRY(ds_push(fh, (uint32_t)v));
}
return FH_OK;
}
static inline bool iswhite(char c)
{
return c == ' ' || c == '\n' || c == '\t' || c == '\r';
}
static inline bool isnl(char c)
{
return c == '\n' || c == '\r';
}
enum fh_error fh_process_line(struct fh_thread_s *fh, char *linebuf)
{
enum fh_error rv;
char *rp = linebuf;
char c;
LOGI("%s", linebuf);
while (0 != (c = *rp) && fh->state != FH_STATE_SHUTDOWN) {
/* end on newline */
if (isnl(c)) {
goto done;
}
/* skip whitespace */
if (iswhite(c)) {
rp++;
continue;
}
char *end;
size_t length;
switch (fh->substate) {
case FH_SUBSTATE_NONE:
case FH_SUBSTATE_COLONNAME:
/* try to read a word */
end = strchr(rp, ' ');
if (end) {
length = end - rp; /* exclude the space */
} else {
length = strlen(rp);
}
if (fh->substate == FH_SUBSTATE_NONE) {
/* eval a word */
LOG("Handle \"%.*s\"", (int)length, rp);
TRY(fh_handle_word(fh, rp, length));
} else {
/* new word's name is found */
LOG("New word name = \"%.*s\"", (int)length, rp);
strncpy(fh->dict[fh->dict_top].name, rp, length);
fh->substate = FH_SUBSTATE_NONE;
}
if (end) {
rp = end + 1;
} else {
goto done;
}
break;
case FH_SUBSTATE_SQUOTE:
case FH_SUBSTATE_DOTQUOTE:
end = strchr(rp, '"');
if (end) {
length = end - rp - 1;
LOG("Quoted string: \"%.*s\"", (int)length, rp);
TRY(fh_handle_quoted_string(fh, rp, length));
fh->substate = FH_SUBSTATE_NONE;
rp = end + 1;
} else {
/* no end. this is weird. */
LOGE("Unterminated quoted string!");
goto done;
}
break;
case FH_SUBSTATE_PARENCOMMENT:
end = strchr(rp, ')');
if (end) {
LOG("Discard inline comment");
fh->substate = FH_SUBSTATE_NONE;
rp = end + 1;
} else {
/* no end, discard all */
LOGE("Unterminated parenthesis comment");
goto done;
}
break;
case FH_SUBSTATE_LINECOMMENT:
LOG("Discard line comment");
goto done; // just discard the rest
}
}
done:
LOG("Line done.");
return FH_OK;
}
int main(int argc, char *argv[])
{
enum fh_error rv;
struct fh_thread_s fh;
rv = fh_init_thread(&fh);
if (rv != FH_OK) {
LOGE("Error in forth init: %s", fherr_name(rv));
return 1;
}
bool interactive = true;
FILE *infile = stdin;
// TODO use getopt?
for (int a = 1; a < argc; a++) {
if (argv[a][0] == '-') {
// opt
} else {
infile = fopen(argv[a], "r");
interactive = false;
if (!infile) {
LOGE("Error opening infile: %s", argv[a]);
return 1;
}
}
}
/* process input line by line */
int linecnt = 0;
char linebuf[MAXLINE];
while (fh.state != FH_STATE_SHUTDOWN && fgets(linebuf, MAXLINE, infile)) {
linecnt++;
// trim
size_t end = strlen(linebuf) -1 ;
while (iswhite(linebuf[end])) {
linebuf[end] = 0;
}
if (!linebuf[0]) {
continue;
}
rv = fh_process_line(&fh, linebuf);
if (rv == FH_OK) {
FHPRINT("ok\r\n");
} else {
LOGE("ERROR %s on line %d", fherr_name(rv), linecnt);
return 1;
}
}
FHPRINT("Bye.\r\n");
return 0;
}