initial WIP

master
Ondřej Hruška 3 years ago
commit 72ff4b510d
Signed by: MightyPork
GPG Key ID: 2C5FD5035250423D
  1. 4
      .gitignore
  2. 8
      CMakeLists.txt
  3. 737
      main.c

4
.gitignore vendored

@ -0,0 +1,4 @@
cmake-*
*~
*.bak
.idea/

@ -0,0 +1,8 @@
cmake_minimum_required(VERSION 3.20)
project(forth C)
set(CMAKE_C_STANDARD 99)
add_executable(forth
main.c
)

737
main.c

@ -0,0 +1,737 @@
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#define CONTROL_STACK_DEPTH 1024
#define DATA_STACK_DEPTH 1024
#define RETURN_STACK_DEPTH 1024
#define MAX_NAME_LEN 64
#define DICT_SIZE 1024
#define COMPILED_BUFFER_SIZE (1024*1024)
#define HEAP_SIZE (1024*1024)
#define MAXLINE 65535
struct fh_thread_s;
struct fh_word_s;
struct fh_instruction_s;
/* if the return address is this, we should drop back to interactive mode */
#define MAGICADDR_INTERACTIVE 0xFFFFFFFFULL
#define ALIGNWORD(var) \
do { \
while (((var) % 4) != 0) { (var)++; } \
} while (0)
enum fh_error {
FH_OK = 0,
FH_ERR_CS_OVERFLOW = -1,
FH_ERR_DS_OVERFLOW = -2,
FH_ERR_RS_OVERFLOW = -3,
FH_ERR_CS_UNDERFLOW = -4,
FH_ERR_DS_UNDERFLOW = -5,
FH_ERR_RS_UNDERFLOW = -6,
FH_ERR_HEAP_FULL = -7,
FH_ERR_DICT_FULL = -8,
FH_ERR_COMPILE_FULL = -9,
FH_ERR_NAME_TOO_LONG = -10,
FH_ERR_INVALID_STATE = -11,
FH_ERR_INTERNAL = -12,
};
typedef enum fh_error (*word_exec_t)(struct fh_thread_s *fh);
struct fh_word_s {
char name[MAX_NAME_LEN];
word_exec_t handler;
bool builtin;
uint32_t start;
uint32_t end;
};
enum fb_instruction_kind {
/* Data is a word number in the dict */
FH_INSTR_WORD,
/* Data is a numeric value to push on the data stack */
FH_INSTR_NUMBER,
};
struct fh_instruction_s {
enum fb_instruction_kind kind;
uint32_t data;
};
/** words that are not in the dict, have special effect */
enum compiler_word {
CPLWORD_ENDWORD = DICT_SIZE + 1,
CPLWORD_ALLOCSTR,
CPLWORD_TYPESTR,
};
_Static_assert(sizeof(struct fh_instruction_s) % 4 == 0, "Instruction struct is aligned");
enum fh_state {
FH_STATE_INTERPRET = 0,
FH_STATE_COMPILE,
FH_STATE_SHUTDOWN,
};
enum fh_substate {
FH_SUBSTATE_NONE = 0,
FH_SUBSTATE_COLONNAME,
FH_SUBSTATE_SQUOTE,
FH_SUBSTATE_DOTQUOTE,
FH_SUBSTATE_PARENCOMMENT,
FH_SUBSTATE_LINECOMMENT,
};
struct fh_thread_s {
/** Control stack */
uint32_t control_stack[CONTROL_STACK_DEPTH];
size_t control_stack_top;
/** Data stack */
uint32_t data_stack[DATA_STACK_DEPTH];
size_t data_stack_top;
/** Return stack */
uint32_t return_stack[RETURN_STACK_DEPTH];
size_t return_stack_top;
/** Data heap */
uint8_t heap[HEAP_SIZE];
size_t heap_top;
/** Compile buffer, used for both word data and literals */
uint8_t compile[COMPILED_BUFFER_SIZE];
size_t compile_top;
/** Pointer into the compile buffer for execution */
uint32_t execptr;
/** Word dict */
struct fh_word_s dict[DICT_SIZE];
uint32_t dict_top;
/** Forth state */
enum fh_state state;
/** Forth sub-state */
enum fh_substate substate;
/** Word currently being executed - a pointer is placed here
* before calling the handler */
struct fh_word_s *exec_word;
char linebuf[MAXLINE];
size_t linebuf_len;
size_t linebuf_readptr;
};
#define TRY(x) \
do { \
if (FH_OK != (rv = (x))) return rv; \
} while (0)
#define TRY_FAIL(x) \
do { \
if (FH_OK != (rv = (x))) goto fail; \
} while (0)
/** Add a word to the dictionary. */
enum fh_error fh_add_word(const struct fh_word_s *w, struct fh_thread_s *fh)
{
if (fh->dict_top == DICT_SIZE) {
return FH_ERR_DICT_FULL;
}
memcpy(&fh->dict[fh->dict_top++], w, sizeof(struct fh_word_s));
return FH_OK;
}
//region Push & Pop
static inline enum fh_error ds_pop(struct fh_thread_s *fh, uint32_t *out)
{
if (fh->data_stack_top == 0) {
return FH_ERR_DS_UNDERFLOW;
}
*out = fh->data_stack[--fh->data_stack_top];
return FH_OK;
}
static inline enum fh_error rs_pop(struct fh_thread_s *fh, uint32_t *out)
{
if (fh->return_stack_top == 0) {
return FH_ERR_RS_UNDERFLOW;
}
*out = fh->return_stack[--fh->return_stack_top];
return FH_OK;
}
static inline enum fh_error cs_pop(struct fh_thread_s *fh, uint32_t *out)
{
if (fh->control_stack_top == 0) {
return FH_ERR_CS_UNDERFLOW;
}
*out = fh->control_stack[--fh->control_stack_top];
return FH_OK;
}
static inline enum fh_error ds_push(struct fh_thread_s *fh, uint32_t in)
{
if (fh->data_stack_top == DATA_STACK_DEPTH) {
return FH_ERR_DS_OVERFLOW;
}
fh->data_stack[fh->data_stack_top++] = in;
return FH_OK;
}
static inline enum fh_error rs_push(struct fh_thread_s *fh, uint32_t in)
{
if (fh->return_stack_top == RETURN_STACK_DEPTH) {
return FH_ERR_RS_OVERFLOW;
}
fh->return_stack[fh->return_stack_top++] = in;
return FH_OK;
}
static inline enum fh_error cs_push(struct fh_thread_s *fh, uint32_t in)
{
if (fh->control_stack_top == CONTROL_STACK_DEPTH) {
return FH_ERR_CS_OVERFLOW;
}
fh->control_stack[fh->control_stack_top++] = in;
return FH_OK;
}
//endregion Push & Pop
enum fh_error fh_allot(
struct fh_thread_s *fh,
size_t len,
uint32_t *addr
)
{
uint32_t p = fh->heap_top;
ALIGNWORD(p);
if (p + len > HEAP_SIZE) {
return FH_ERR_HEAP_FULL;
}
*addr = p;
size_t next = p + len;
ALIGNWORD(next);
fh->heap_top = next;
return FH_OK;
}
enum fh_error fh_compile_reserve(
struct fh_thread_s *fh,
size_t len,
uint32_t *addr
)
{
uint32_t p = fh->compile_top;
// align up
ALIGNWORD(p);
if (p + len > COMPILED_BUFFER_SIZE) {
return FH_ERR_HEAP_FULL;
}
*addr = p;
size_t next = p + len;
ALIGNWORD(next);
fh->compile_top = next;
return FH_OK;
}
//region Builtin Words
enum fh_error w_add(struct fh_thread_s *fh)
{
enum fh_error rv;
uint32_t a = 0, b = 0;
TRY(ds_pop(fh, &a));
TRY(ds_pop(fh, &b));
TRY(ds_push(fh, a + b));
return FH_OK;
}
enum fh_error w_sub(struct fh_thread_s *fh)
{
enum fh_error rv;
uint32_t a = 0, b = 0;
TRY(ds_pop(fh, &a));
TRY(ds_pop(fh, &b));
TRY(ds_push(fh, a - b));
return FH_OK;
}
enum fh_error w_mul(struct fh_thread_s *fh)
{
enum fh_error rv;
uint32_t a = 0, b = 0;
TRY(ds_pop(fh, &a));
TRY(ds_pop(fh, &b));
TRY(ds_push(fh, a * b));
return FH_OK;
}
enum fh_error w_user_word(struct fh_thread_s *fh)
{
enum fh_error rv;
const struct fh_word_s *w;
const struct fh_word_s *w2;
uint32_t wn;
call:
w = fh->exec_word;
if (!w) { return FH_ERR_INTERNAL; }
TRY(rs_push(fh, fh->execptr));
fh->execptr = w->start;
instr:;
// make sure it's aligned
ALIGNWORD(fh->execptr);
const struct fh_instruction_s *instr = (const struct fh_instruction_s *) &fh->compile[fh->execptr];
fh->execptr += sizeof(struct fh_instruction_s);
uint32_t strl;
uint32_t addr = 0;
switch (instr->kind) {
case FH_INSTR_NUMBER:
TRY(ds_push(fh, instr->data));
goto instr;
case FH_INSTR_WORD:
wn = instr->data;
switch (wn) {
case CPLWORD_ALLOCSTR:
case CPLWORD_TYPESTR:
strl = *((uint32_t *) &fh->compile[fh->execptr]);
fh->execptr += 4;
if (wn == CPLWORD_ALLOCSTR) {
TRY(fh_allot(fh, strl, &addr));
memcpy(&fh->heap[addr], &fh->compile[fh->execptr], strl);
TRY(ds_push(fh, addr));
TRY(ds_push(fh, strl));
fh->execptr += strl;
} else {
printf("%.*s", (int) strl, &fh->compile[fh->execptr]);
}
goto instr;
case CPLWORD_ENDWORD:
TRY(rs_pop(fh, &fh->execptr));
if (fh->execptr == MAGICADDR_INTERACTIVE) {
goto end;
}
goto instr;
default:
w2 = &fh->dict[instr->data];
if (w2->builtin) {
w2->handler(fh);
goto instr;
} else {
fh->exec_word = &fh->dict[instr->data];
goto call;
}
}
}
end:
return FH_OK;
}
enum fh_error w_colon(struct fh_thread_s *fh)
{
if (fh->state != FH_STATE_INTERPRET) {
return FH_ERR_INVALID_STATE;
}
fh->state = FH_STATE_COMPILE;
fh->substate = FH_SUBSTATE_COLONNAME;
if (fh->dict_top >= DICT_SIZE) {
return FH_ERR_DICT_FULL;
}
fh->dict[fh->dict_top].start = fh->compile_top;
fh->dict[fh->dict_top].handler = w_user_word;
return FH_OK;
}
enum fh_error w_semicolon(struct fh_thread_s *fh)
{
enum fh_error rv;
uint32_t addr = 0;
struct fh_instruction_s instr;
if (fh->state != FH_STATE_COMPILE) {
return FH_ERR_INVALID_STATE;
}
TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr));
instr.kind = FH_INSTR_WORD;
instr.data = CPLWORD_ENDWORD;
memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s));
/* Return to interpret state */
fh->state = FH_STATE_INTERPRET;
fh->dict[fh->dict_top].end = fh->compile_top; /* one past the end cell */
fh->dict_top++;
return FH_OK;
}
enum fh_error w_dot(struct fh_thread_s *fh)
{
enum fh_error rv;
uint32_t a = 0;
TRY(ds_pop(fh, &a));
printf("%d ", (int32_t) a);
return FH_OK;
}
enum fh_error w_type(struct fh_thread_s *fh)
{
enum fh_error rv;
uint32_t count = 0, addr = 0;
TRY(ds_pop(fh, &count));
TRY(ds_pop(fh, &addr));
printf("%.*s", count, &fh->heap[addr]);
return FH_OK;
}
enum fh_error w_cr(struct fh_thread_s *fh)
{
printf("\r\n");
return FH_OK;
}
enum fh_error w_space(struct fh_thread_s *fh)
{
printf(" ");
return FH_OK;
}
enum fh_error w_s_quote(struct fh_thread_s *fh)
{
fh->substate = FH_SUBSTATE_SQUOTE;
return FH_OK;
}
enum fh_error w_dot_quote(struct fh_thread_s *fh)
{
fh->substate = FH_SUBSTATE_DOTQUOTE;
return FH_OK;
}
enum fh_error w_backslash(struct fh_thread_s *fh)
{
fh->substate = FH_SUBSTATE_LINECOMMENT;
return FH_OK;
}
enum fh_error w_paren(struct fh_thread_s *fh)
{
fh->substate = FH_SUBSTATE_PARENCOMMENT;
return FH_OK;
}
enum fh_error w_bye(struct fh_thread_s *fh)
{
fh->state = FH_STATE_SHUTDOWN;
return FH_OK;
}
enum fh_error register_builtin_words(struct fh_thread_s *fh)
{
struct name_and_handler {
const char *name;
word_exec_t handler;
};
const struct name_and_handler builtins[] = {
{"s\"", w_s_quote},
{".\"", w_dot_quote},
/* Compiler control words */
{"bye", w_bye},
/* Basic arithmetics */
{"+", w_add},
{"-", w_sub},
{"*", w_mul},
/* Control words */
{":", w_colon},
{";", w_semicolon},
{".", w_dot},
{"type", w_type},
{"cr", w_cr},
{"space", w_space},
{"\\", w_backslash}, // line comment
{"(", w_paren}, // enclosed comment
{ /* end marker */ }
};
struct fh_word_s w;
const struct name_and_handler *p = builtins;
enum fh_error rv;
while (p->handler) {
strcpy(w.name, p->name);
w.handler = p->handler;
w.builtin = 1;
rv = fh_add_word(&w, fh);
if (rv != FH_OK) {
return rv;
}
p++;
}
return FH_OK;
}
#undef ADDWORD
//endregion Builtin Words
enum fh_error fh_init_thread(struct fh_thread_s *fh)
{
enum fh_error rv;
/* Make sure we have a clean state */
memset(fh, 0, sizeof(struct fh_thread_s));
TRY(register_builtin_words(fh));
fh->execptr = MAGICADDR_INTERACTIVE;
return FH_OK;
}
enum fh_error fh_handle_quoted_string(
struct fh_thread_s *fh,
char *start,
size_t len
)
{
enum fh_error rv;
uint32_t addr = 0;
uint32_t addr2 = 0;
struct fh_instruction_s instr;
if (fh->state == FH_STATE_INTERPRET) {
switch (fh->substate) {
case FH_SUBSTATE_SQUOTE:
TRY(fh_allot(fh, len, &addr));
memcpy(&fh->heap[addr], start, len);
TRY(ds_push(fh, addr));
TRY(ds_push(fh, len));
break;
case FH_SUBSTATE_DOTQUOTE:
printf("%.*s", (int) len, start);
break;
default:
printf("!!! Bad substate\r\n");
}
} else {
/* compile */
TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr));
TRY(fh_compile_reserve(fh, len + 4, &addr2));
instr.kind = FH_INSTR_WORD;
instr.data = fh->substate == FH_SUBSTATE_SQUOTE ?
CPLWORD_ALLOCSTR :
CPLWORD_TYPESTR;
uint32_t len32 = len;
/* string is encoded as a special compiler command, the size,
* and then the string, all 4-byte aligned. */
memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s));
memcpy(&fh->compile[addr2], &len32, 4);
memcpy(&fh->compile[addr2 + 4], &start, len);
}
return FH_OK;
}
enum fh_error fh_handle_word(
struct fh_thread_s *fh,
char *start,
size_t len
)
{
if (len >= MAX_NAME_LEN) {
return FH_ERR_NAME_TOO_LONG;
}
/* First, try if it's a known word */
struct fh_word_s *w = &fh->dict[0];
struct fh_instruction_s instr;
uint32_t cnt = 0;
uint32_t addr = 0;
enum fh_error rv;
while (w->handler) {
if (0 == strncasecmp(start, w->name, len) && w->name[len]==0) {
// word found!
if (fh->state == FH_STATE_COMPILE) {
TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr));
instr.kind = FH_INSTR_WORD;
instr.data = cnt;
memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s));
} else {
/* interpret */
fh->exec_word = w;
TRY(w->handler(fh));
}
return FH_OK;
}
w++;
cnt++;
}
/* word not found, try parsing as number */
long v = strtol(start, NULL, 0);
if (fh->state == FH_STATE_COMPILE) {
TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr));
instr.kind = FH_INSTR_NUMBER;
instr.data = (uint32_t) v;
memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s));
} else {
/* interpret */
TRY(ds_push(fh, (uint32_t)v));
}
return FH_OK;
}
static bool iswhite(char c)
{
return c == ' ' || c == '\n' || c == '\t' || c == '\r';
}
enum fh_error fh_process_input(struct fh_thread_s *fh)
{
enum fh_error rv;
char *rp = &fh->linebuf[fh->linebuf_readptr];
while (fh->linebuf_readptr < fh->linebuf_len && fh->state != FH_STATE_SHUTDOWN) {
/* skip whitespace */
char c = *rp;
if (iswhite(c)) {
rp++;
fh->linebuf_readptr++;
continue;
}
char *end;
size_t stringlen;
switch (fh->substate) {
case FH_SUBSTATE_NONE:
/* try read a word */
end = strchr(rp, ' ');
if (end) {
stringlen = end - rp;
} else {
stringlen = fh->linebuf_len - fh->linebuf_readptr;
}
// rtrim
while (stringlen > 0 && iswhite(rp[stringlen - 1])) { stringlen--; }
TRY(fh_handle_word(fh, rp, stringlen));
rp = end + 1;
fh->linebuf_readptr = rp - &fh->linebuf[0];
break;
case FH_SUBSTATE_COLONNAME:
/* find space */
end = strchr(rp, ' ');
stringlen = end - rp;
if (end) {
stringlen = end - rp;
} else {
stringlen = fh->linebuf_len - fh->linebuf_readptr;
}
// rtrim
while (stringlen > 0 && iswhite(rp[stringlen - 1])) { stringlen--; }
strncpy(fh->dict[fh->dict_top].name, rp, stringlen);
fh->substate = FH_SUBSTATE_NONE;
rp = end + 1;
fh->linebuf_readptr = rp - &fh->linebuf[0];
break;
case FH_SUBSTATE_SQUOTE:
case FH_SUBSTATE_DOTQUOTE:
end = strchr(rp, '"');
if (end) {
stringlen = end - rp - 1;
TRY(fh_handle_quoted_string(fh, rp, stringlen));
fh->substate = FH_SUBSTATE_NONE;
rp = end + 1;
fh->linebuf_readptr = rp - &fh->linebuf[0];
} else {
/* no end, discard all */
goto end;
}
break;
case FH_SUBSTATE_PARENCOMMENT:
end = strchr(rp, ')');
if (end) {
fh->substate = FH_SUBSTATE_NONE;
rp = end + 1;
fh->linebuf_readptr = rp - &fh->linebuf[0];
} else {
/* no end, discard all */
goto end;
}
break;
case FH_SUBSTATE_LINECOMMENT:
end = strchr(rp, '\n');
if (end) {
fh->substate = FH_SUBSTATE_NONE;
rp = end + 1;
fh->linebuf_readptr = rp - &fh->linebuf[0];
} else {
/* no newline, discard all */
goto end;
}
break;
}
}
end:
return FH_OK;
}
int main()
{
enum fh_error rv;
struct fh_thread_s fh;
TRY_FAIL(fh_init_thread(&fh));
while (fh.state != FH_STATE_SHUTDOWN && fgets(fh.linebuf, MAXLINE, stdin)) {
fh.linebuf_len = strlen(fh.linebuf);
fh.linebuf_readptr = 0;
rv = fh_process_input(&fh);
if (rv == FH_OK) {
printf("ok\r\n");
} else {
printf("ERROR %d\r\n", rv);
}
}
printf("Bye.\r\n");
return 0;
fail:
printf("Error %d\r\n", rv);
return 1;
}
Loading…
Cancel
Save