Trying to build a forth runtime in C
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
forth/src/fh_parse.c

433 lines
10 KiB

#include "forth_internal.h"
/** True if the character is CR or LF */
static inline bool isnl(char c)
{
return c == '\n' || c == '\r';
}
static const char * strnchr(const char* s, char c, size_t len)
{
while (len > 0) {
char x = *s;
if (!x) {
break;
}
if (x == c) {
return s;
}
s++;
len--;
}
return NULL;
}
/** Process a word read from input */
enum fh_error fh_handle_ascii_word(
struct fh_thread_s *fh,
const char *name,
const size_t wordlen
)
{
enum fh_error rv;
if (wordlen >= MAX_NAME_LEN) {
return FH_ERR_NAME_TOO_LONG;
}
/* First, try if it's a known word */
uint32_t wadr = 0;
if (FH_OK == fh_find_word(fh, name, wordlen, &wadr)) {
TRY(fh_handle_word(fh, wadr));
return FH_OK;
}
/* word not found, try parsing as number */
errno = 0;
char *endptr;
int base = (int) fh->base;
// prefix can override BASE - this is a syntax extension
if (name[0] == '0') {
if (name[1] == 'x') {
base = 16;
} else if (name[1] == 'b') {
base = 2;
} else if (name[1] == 'o') {
base = 8;
}
}
long v = strtol(name, &endptr, base); // if base is 0, this will use auto-detection
if (errno != 0 || (endptr - name) != wordlen) {
LOGE("Unknown word and fail to parse as number: \"%.*s\", len %d", (int) wordlen, name, wordlen);
return FH_ERR_UNKNOWN_WORD;
}
if (fh->state == FH_STATE_COMPILE) {
LOG("\x1b[34m[COM] Compile number:\x1b[m %ld", v);
TRY(fh_put_instr(fh, FH_INSTR_NUMBER, (uint32_t) v));
} else {
/* interpret */
LOG("\x1b[35m[INT] Push number:\x1b[m %ld", v);
TRY(ds_push(fh, (uint32_t) v));
}
return FH_OK;
}
void fh_input_consume_matching(struct fh_thread_s *fh, chartest_t test, void *param)
{
char *rp = (char *) &fh->heap[fh->inputaddr + fh->inputptr];
while (test(*rp, param)) {
rp++;
fh->inputptr++;
}
}
void fh_input_consume_spaces(struct fh_thread_s *fh)
{
char *rp = (char *) &fh->heap[fh->inputaddr + fh->inputptr];
while (isspace(*rp)) {
rp++;
fh->inputptr++;
}
}
enum fh_error fh_input_read_delimited(struct fh_thread_s *fh, char **out, size_t *len, chartest_t test, void *param)
{
char *rp = (char *) &fh->heap[fh->inputaddr + fh->inputptr];
char *start = rp;
while (fh->inputptr < fh->inputlen) {
char c = *rp;
if (test(c, param)) {
if (rp == start) {
// nothing was found, this is probably OK.
LOG("Parsed an empty word");
}
*out = start;
*len = rp - start;
fh->inputptr++; // advance past the delimiter
return FH_OK;
}
rp++;
fh->inputptr++;
}
*out = start;
*len = rp - start;
return FH_OK;
}
bool fh_chartest_space_or_end(char c, void *param)
{
(void) param;
return isspace(c) || c == 0;
}
bool fh_chartest_equals_or_end(char c, void *param)
{
char cc = *(char *) param;
return cc == c || c == 0;
}
enum fh_error fh_input_read_word(struct fh_thread_s *fh, char **out, size_t *len)
{
return fh_input_read_delimited(fh, out, len, fh_chartest_space_or_end, NULL);
}
enum fh_error fh_input_read_quotedstring(struct fh_thread_s *fh, bool escaped, char *outbuf, size_t capacity, size_t *out_len)
{
char *rp = (char *) &fh->heap[fh->inputaddr + fh->inputptr];
bool next_escaped = false;
size_t remains = capacity;
size_t len = 0;
int hexdigits = 0;
uint32_t hex = 0;
while (len < capacity) {
char c = *rp;
if (c == 0) {
LOGE("Unterminated quoted string!");
return FH_ERR_SYNTAX;
}
if (hexdigits) {
hex <<= 4;
if (isdigit(c)) {
hex |= c - '0';
} else if (c >= 'a' && c <= 'f') {
hex |= 10 + (c - 'a');
} else if (c >= 'A' && c <= 'F') {
hex |= 10 + (c - 'A');
} else {
LOGE("Bad hex escape");
return FH_ERR_SYNTAX;
}
hexdigits--;
if (hexdigits == 0) {
c = (char) hex;
goto append;
} else {
goto skip;
}
}
if (!escaped || !next_escaped) {
if (c == '\"') {
*outbuf = 0;
*out_len = len;
// advance past the quote
fh->inputptr++;
return FH_OK;
}
if (c == '\\') {
next_escaped = true;
goto skip;
}
} else {
next_escaped = false;
switch (c) {
case 'a':
c = 7;
break;
case 'b':
c = 8;
break;
case 'e':
c = 27;
break;
case 'f':
c = 12;
break;
case 'l':
c = 10;
break;
case 'm':
case 'n':
if (remains < 2) { goto full; }
*outbuf++ = '\r';
*outbuf++ = '\n';
remains -= 2;
len += 2;
goto skip;
case 'q':
c = '"';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
case 'v':
c = '\v';
break;
case 'z':
c = 0;
break; // this will cause problems with string printing
case 'x':
hex = 0;
hexdigits = 2;
goto skip;
default:;
// just append normally
}
}
append:
*outbuf++ = c;
len++;
skip:
rp++;
fh->inputptr++;
}
full:
LOGE("String too long!");
return FH_ERR_SYNTAX;
}
enum fh_error fh_process_line(struct fh_thread_s *fh);
enum fh_error fh_runtime_start(struct fh_thread_s *fh, struct fh_input_spec_s *input)
{
enum fh_error rv;
void *original_input = fh->input;
TRY(fh_push_input(fh, input));
if (fh_globals.interactive) {
FHPRINT("%s", FH_PROMPT_STR);
}
input->start(fh, input);
while (1) {
//LOG("Refill input buffer");
if (fh->input->refill_input_buffer(fh, fh->input)) {
// discard spaces at the end
while (isspace(fh->heap[fh->inputaddr + fh->inputlen - 1]) && fh->inputlen > 0) {
fh->heap[fh->inputaddr + fh->inputlen - 1] = 0;
fh->inputlen--;
}
if (fh->inputlen == 0) {
continue;
}
rv = fh_process_line(fh);
if (rv == FH_OK) {
if (fh->state == FH_STATE_SHUTDOWN) {
return FH_OK;
}
if (fh_globals.interactive || fh_globals.echo) {
FHPRINT_SVC(" ok\n");
}
} else {
LOGE("Error %s on line %d in source %d", fherr_name(rv), fh->input->linenum, fh->input->source_id);
if (fh_globals.interactive || fh_globals.rescue) {
LOGE("Drop to interactive");
fh_drop_to_interactive(fh);
} else {
LOGE("Shutting down due to error");
fh->state = FH_STATE_SHUTDOWN;
return FH_OK;
}
}
if (fh_globals.interactive) {
FHPRINT("%s", FH_PROMPT_STR);
}
} else {
LOG("Pop input");
TRY(fh_pop_input(fh));
if (fh->input == original_input || !fh->input) {
// we are done
break;
}
}
}
return FH_OK;
}
/** Process a line read from input */
enum fh_error fh_process_line(struct fh_thread_s *fh)
{
enum fh_error rv;
#define ReadPtr ((char*)(&fh->heap[fh->inputaddr + fh->inputptr]))
#define ReadPos (fh->inputptr)
#define ReadLen (fh->inputlen)
//fh_fill_input_buffer(fh, linebuf, len);
char c;
if (fh_globals.echo && !fh_globals.interactive) {
LOGI("%.*s", ReadLen, ReadPtr);
}
while (ReadPos < ReadLen && fh->state != FH_STATE_SHUTDOWN) {
c = *ReadPtr;
/* end on newline */
if (isnl(c)) {
goto done;
}
/* skip whitespace */
if (isspace(c)) {
ReadPos++;
continue;
}
const char *const rp = ReadPtr;
const char *end;
size_t length;
switch (fh->substate) {
case FH_SUBSTATE_NONE:
/* try to read a word */
size_t maxlen = ReadLen - ReadPos;
end = strnchr(rp, ' ', maxlen);
if (end) {
length = end - rp; /* exclude the space */
} else {
length = strnlen(rp, maxlen);
}
ReadPos += length + 1;
if (EQ(rp, "[if]", length)) {
if (0 == fh->parse_if_level) {
uint32_t val;
TRY(ds_pop(fh, &val));
if (!val) {
LOG("\x1b[32m[if] false, start skipping\x1b[m");
fh->parse_if_level++;
} else {
LOG("\x1b[32m[if] true, proceed\x1b[m");
}
} else {
LOG("\x1b[32m[if] nest+\x1b[m");
fh->parse_if_level++;
}
} else if (EQ(rp, "[else]", length)) {
if (fh->parse_if_level == 1) {
// we got here by running the [if] branch
LOG("\x1b[3m[else] end of false skip\x1b[m");
fh->parse_if_level--;
}
} else if (EQ(rp, "[then]", length)) {
if (fh->parse_if_level > 0) {
fh->parse_if_level--;
if (fh->parse_if_level == 0) {
LOG("\x1b[32m[then] end of skipped section\x1b[m");
} else {
LOG("\x1b[32m[then] nest-\x1b[m");
}
} else {
LOG("\x1b[32m[then] end of conditional\x1b[m");
}
} else if (fh->parse_if_level == 0) {
/* eval a word */
LOG("Handle \"%.*s\"", (int) length, rp);
TRY(fh_handle_ascii_word(fh, rp, length));
} else {
if (EQ(rp, "\\", length)) {
// discard to EOL
// LOG("Discard \"%.*s\"", (int)(fh->inputlen - fh->inputptr + length), rp);
goto done;
}
LOG("Discard \"%.*s\"", (int) length, rp);
}
break;
case FH_SUBSTATE_PAREN_COMMENT:
end = strchr(rp, ')');
if (end) {
length = end - rp;
LOG("Discard inline comment: \"%.*s\"", (int)length, rp);
fh_setsubstate(fh, FH_SUBSTATE_NONE);
ReadPos += length + 1;
} else {
/* no end, discard all */
LOGE("Unterminated parenthesis comment");
goto done;
}
break;
case FH_SUBSTATE_LINE_COMMENT:
LOG("Discard line comment: \"%.*s\"", fh->inputlen - fh->inputptr, rp);
fh_setsubstate(fh, 0);
goto done; // just discard the rest
default:
LOGE("Bad substate %s", substatenames[fh->substate]);
}
}
done:
LOG("Line done, rp %d, len %d", ReadPos, ReadLen);
return FH_OK;
}