Trying to build a forth runtime in C
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
forth/src/fh_builtins_text.c

513 lines
13 KiB

#include "forth_internal.h"
enum fh_error ds_pop_addr_len(struct fh_thread_s *fh, uint32_t *addr, uint32_t *len)
{
enum fh_error rv;
TRY(ds_pop(fh, len));
TRY(ds_pop(fh, addr));
if (*addr >= HEAP_SIZE) { // not HEAP_END, because this can point into other buffers too
LOGE("heap string pointer out of bounds!");
return FH_ERR_ILLEGAL_FETCH;
}
return FH_OK;
}
static enum fh_error push_addr_len(struct fh_thread_s *fh, uint32_t addr, uint32_t len)
{
enum fh_error rv;
TRY(ds_push(fh, addr));
TRY(ds_push(fh, len));
return FH_OK;
}
/**
* Encode a code point using UTF-8
*
* Copied from ESPTERM source
*
* @param out - output buffer (min 4 characters), will be 0-terminated if shorten than 4
* @param utf - code point 0-0x10FFFF
* @return number of bytes on success, 0 on failure (also produces U+FFFD, which uses 3 bytes)
*/
static int utf8_encode(char *out, uint32_t utf)
{
if (utf <= 0x7F) {
// Plain ASCII
out[0] = (char) utf;
out[1] = 0;
return 1;
} else if (utf <= 0x07FF) {
// 2-byte unicode
out[0] = (char) (((utf >> 6) & 0x1F) | 0xC0);
out[1] = (char) (((utf >> 0) & 0x3F) | 0x80);
out[2] = 0;
return 2;
} else if (utf <= 0xFFFF) {
// 3-byte unicode
out[0] = (char) (((utf >> 12) & 0x0F) | 0xE0);
out[1] = (char) (((utf >> 6) & 0x3F) | 0x80);
out[2] = (char) (((utf >> 0) & 0x3F) | 0x80);
out[3] = 0;
return 3;
} else if (utf <= 0x10FFFF) {
// 4-byte unicode
out[0] = (char) (((utf >> 18) & 0x07) | 0xF0);
out[1] = (char) (((utf >> 12) & 0x3F) | 0x80);
out[2] = (char) (((utf >> 6) & 0x3F) | 0x80);
out[3] = (char) (((utf >> 0) & 0x3F) | 0x80);
// out[4] = 0;
return 4;
} else {
// error - use replacement character
out[0] = (char) 0xEF;
out[1] = (char) 0xBF;
out[2] = (char) 0xBD;
out[3] = 0;
return 0;
}
}
static enum fh_error w_dot(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t a = 0;
TRY(ds_pop(fh, &a));
FHPRINT("%"PRIi32" ", (int32_t) a);
return FH_OK;
}
static enum fh_error w_dot_r(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t a = 0, n;
TRY(ds_pop(fh, &n));
TRY(ds_pop(fh, &a));
FHPRINT("%*."PRIi32" ", n, (int32_t) a);
return FH_OK;
}
static enum fh_error w_u_dot(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t a = 0;
TRY(ds_pop(fh, &a));
FHPRINT("%"PRIu32" ", a);
return FH_OK;
}
static enum fh_error w_u_r(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t a = 0, n = 0;
TRY(ds_pop(fh, &n));
TRY(ds_pop(fh, &a));
FHPRINT("%*."PRIu32, n, a);
return FH_OK;
}
static enum fh_error w_type(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t count = 0, addr = 0;
TRY(ds_pop_addr_len(fh, &addr, &count));
const char *str = fh_str_at(fh, addr);
if (!str) { return FH_ERR_INTERNAL; }
FHPRINT("%.*s", count, str);
return FH_OK;
}
static enum fh_error w_fill(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t count = 0, addr = 0, ch;
TRY(ds_pop(fh, &ch));
TRY(ds_pop_addr_len(fh, &addr, &count));
const char *str = fh_str_at(fh, addr);
if (!str) { return FH_ERR_INTERNAL; }
if (count > 0) {
memset((void *) str, (uint8_t) ch, count);
}
return FH_OK;
}
static enum fh_error wp_putc(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) fh;
FHPRINT("%c", w->param);
return FH_OK;
}
static enum fh_error w_debug_dump(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
(void) fh;
FHPRINT("DS ");
for (int i = 0; i < fh->data_stack_top; i++) {
FHPRINT("%d ", fh->data_stack[i]);
}
FHPRINT("\nRS ");
for (int i = 0; i < fh->return_stack_top; i++) {
FHPRINT("%d ", fh->return_stack[i]);
}
FHPRINT("\n");
return FH_OK;
}
static enum fh_error w_emit(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t a;
TRY(ds_pop(fh, &a));
char buf[5];
int num = utf8_encode(buf, a);
FHPRINT("%.*s", num, buf);
return FH_OK;
}
static enum fh_error w_see(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
char *wordname;
size_t namelen = 0;
fh_input_consume_spaces(fh);
TRY(fh_input_read_word(fh, &wordname, &namelen));
TRY(fh_see_word(fh, wordname, namelen));
return FH_OK;
}
static enum fh_error w_s_quote(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
size_t len;
uint32_t addr = fh->here + (fh->state == FH_STATE_INTERPRET ? 0 : INSTR_SIZE);
/* read the string straight into HEAP */
fh_input_consume_spaces(fh);
char *start = (char *) &fh->heap[addr];
TRY(fh_input_read_quotedstring(fh, w->param == 1, start, HEAP_END - addr, &len));
fh->here = WORDALIGNED(addr + len);
struct fh_instruction_s instr;
if (fh->state == FH_STATE_INTERPRET) {
LOG("Interpret a string alloc: \"%.*s\"", (int) len, start);
TRY(push_addr_len(fh, addr, len));
} else {
LOG("Compile a string: \"%.*s\"", (int) len, start);
instr.kind = FH_INSTR_ALLOCSTR;
instr.data = len;
fh_heap_write(fh, addr - INSTR_SIZE, &instr, INSTR_SIZE);
}
return FH_OK;
}
static enum fh_error w_c_quote(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
size_t len;
uint32_t addr = fh->here + (fh->state == FH_STATE_INTERPRET ? 0 : INSTR_SIZE);
/* read the string straight into HEAP */
fh_input_consume_spaces(fh);
char *start = ((char *) &fh->heap[addr]) + 1; // space for the counter
uint32_t maxlen = HEAP_END - addr;
if (maxlen > 255) {
maxlen = 255;
}
TRY(fh_input_read_quotedstring(fh, w->param == 1, start, maxlen, &len));
fh->here = WORDALIGNED(addr + len + 1);
fh->heap[addr] = (uint8_t) len; // char count
struct fh_instruction_s instr;
if (fh->state == FH_STATE_INTERPRET) {
LOG("Interpret a c-string alloc: \"%.*s\", %d", (int) len, start, len);
TRY(ds_push(fh, addr));
} else {
LOG("Compile a c-string: \"%.*s\", %d", (int) len, start, len);
instr.kind = FH_INSTR_ALLOCSTR_C;
instr.data = WORDALIGNED(len + 1);
fh_heap_write(fh, addr - INSTR_SIZE, &instr, INSTR_SIZE);
}
return FH_OK;
}
static enum fh_error w_dot_quote(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
size_t len;
// leave space for the instr in case of compiled version
uint32_t addr = fh->here + (fh->state == FH_STATE_INTERPRET ? 0 : INSTR_SIZE);
/* read the string straight into HEAP, but don't advance the heap pointer, so the string is immediately discarded again */
fh_input_consume_spaces(fh);
char *start;
char c = (char) w->param;
uint32_t capacity = HEAP_END - addr;
LOG("dotquote end: %c", c);
if (c == '\\') {
start = (char *) &fh->heap[addr];
TRY(fh_input_read_quotedstring(fh, 1, start, capacity, &len));
} else {
start = NULL;
TRY(fh_input_read_delimited(fh, &start, &len, fh_chartest_equals_or_end, &c));
if (len > capacity) {
LOGE("String too low for heap");
return FH_ERR_HEAP_FULL;
}
if (fh->state == FH_STATE_COMPILE) {
fh_heap_copyptr(fh, addr, start, len);
}
}
if (fh->state == FH_STATE_INTERPRET || c == ')') { // XXX special case for .(
FHPRINT("%.*s", (int) len, start);
// the string is invalidated immediately, heap pointer is NOT advanced.
} else {
LOG("Compile a string: \"%.*s\"", (int) len, start);
TRY(fh_put_instr(fh, FH_INSTR_TYPESTR, len));
fh->here = WORDALIGNED(addr + len); // at the end of the string
}
return FH_OK;
}
static enum fh_error w_less_hash(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
fh->pictnumptr = WORDBUF_LASTCHAR_ADDR;
return FH_OK;
}
static enum fh_error w_hash_greater(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint64_t dummy;
TRY(ds_pop_dw(fh, &dummy));
uint32_t len = WORDBUF_LASTCHAR_ADDR - fh->pictnumptr;
uint32_t addr;
TRY(fh_heap_reserve(fh, len, &addr));
fh_heap_copy(fh, addr, fh->pictnumptr + 1, len);
LOG("#> output: \"%.*s\"", len, &fh->heap[fh->pictnumptr + 1]);
TRY(push_addr_len(fh, addr, len));
return FH_OK;
}
static enum fh_error pictnum_prepend_char(struct fh_thread_s *fh, char c)
{
enum fh_error rv;
if (fh->pictnumptr < WORDBUF_ADDR) {
return FH_ERR_PICTNUM_FULL;
}
LOG("Prepend: %c", c);
TRY(fh_store_char(fh, fh->pictnumptr, c));
fh->pictnumptr--;
return FH_OK;
}
static char dig2char(uint64_t digit)
{
char repr;
if (digit < 10) {
repr = (char)('0' + (char)digit);
} else if (digit < 36) {
repr = (char)('A' + ((char)digit - 10));
} else {
// This shouldn't happen
repr = '?';
}
return repr;
}
static enum fh_error w_hash(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint64_t num;
TRY(ds_pop_dw(fh, &num));
uint64_t digit = num % fh->base;
TRY(pictnum_prepend_char(fh, dig2char(digit)));
TRY(ds_push_dw(fh, num / (uint64_t) fh->base));
return FH_OK;
}
static enum fh_error w_hash_s(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint64_t num;
TRY(ds_pop_dw(fh, &num));
do {
uint64_t digit = num % fh->base;
num = num / (uint64_t) fh->base;
TRY(pictnum_prepend_char(fh, dig2char(digit)));
} while (num > 0);
TRY(ds_push_dw(fh, num)); // this is zero now
return FH_OK;
}
static enum fh_error w_sign(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t num;
TRY(ds_pop(fh, &num));
if ((int32_t) num < 0) {
TRY(pictnum_prepend_char(fh, '-'));
}
return FH_OK;
}
static enum fh_error w_hold(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t ch;
TRY(ds_pop(fh, &ch));
char buf[5];
int num = utf8_encode(buf, ch);
for (int i = num - 1; i >= 0; i--) {
TRY(pictnum_prepend_char(fh, buf[i]));
}
return FH_OK;
}
static enum fh_error w_holds(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t count = 0, addr = 0;
TRY(ds_pop_addr_len(fh, &addr, &count));
const char *str = fh_str_at(fh, addr);
if (!str) { return FH_ERR_INTERNAL; }
for (int i = (int)count - 1; i >= 0; i--) {
TRY(pictnum_prepend_char(fh, str[i]));
}
return FH_OK;
}
static enum fh_error w_spaces(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t num;
TRY(ds_pop(fh, &num));
while (num-- > 0) {
FHPRINT(" ");
}
return FH_OK;
}
static enum fh_error w_to_number(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
/*
( ud1 c-addr1 u1 -- ud2 c-addr2 u2 )
ud2 is the unsigned result of converting the characters within the string specified by c-addr1 u1 into digits, using the number in BASE, and adding each into ud1 after multiplying ud1 by the number in BASE. Conversion continues left-to-right until a character that is not convertible, including any "+" or "-", is encountered or the string is entirely converted. c-addr2 is the location of the first unconverted character or the first character past the end of the string if the string was entirely converted. u2 is the number of unconverted characters in the string. An ambiguous condition exists if ud2 overflows during the conversion.
*/
uint32_t count = 0, addr = 0;
TRY(ds_pop_addr_len(fh, &addr, &count));
const char *str = fh_str_at(fh, addr);
if (!str) { return FH_ERR_INTERNAL; }
LOG("parse num from str: %.*s", count, str);
uint64_t val;
TRY(ds_pop_dw(fh, &val));
int conv;
int i = 0;
for (; i < count; i++) {
char ch = str[i];
LOG("parse c: %c", ch);
if (ch >= '0' && ch <= '9') {
conv = ch - '0';
} else if (ch >= 'a' && ch <= 'z') {
conv = 10 + ch - 'a';
} else if (ch >= 'A' && ch <= 'Z') {
conv = 10 + ch - 'A';
} else {
break;
}
if (conv >= fh->base) {
LOG("not numeric, end num parse!");
break;
}
val = (val * (uint64_t) fh->base) + (uint64_t) conv;
}
LOG("parsed num: %d", (int)val);
TRY(ds_push_dw(fh, val));
TRY(push_addr_len(fh, addr + i, count - i));
return FH_OK;
}
const struct name_and_handler fh_builtins_text[] = {
{"s\"", w_s_quote, 1, 0},
{"c\"", w_c_quote, 1, 0},
{"s\\\"", w_s_quote, 1, 1}, // escaped
{".\"", w_dot_quote, 1, '"'},
{".(", w_dot_quote, 1, ')'},
{".\\\"", w_dot_quote, 1, '\\'}, // escaped, this is non-standard
{".", w_dot, 0, 0},
{".r", w_dot_r, 0, 0},
{"u.", w_u_dot, 0, 0},
{"type", w_type, 0, 0},
{"fill", w_fill, 0, 0},
{"cr", wp_putc, 0, '\n'},
{"space", wp_putc, 0, ' '},
{"spaces", w_spaces, 0, 0},
{"bl", wp_const, 0, ' '},
{"u.r", w_u_r, 0, 0},
{"??", w_debug_dump, 0, 0}, // XXX non-standard
{"emit", w_emit, 0, 0},
{"see", w_see, 0, 0},
{"<#", w_less_hash, 0, 0},
{"#>", w_hash_greater, 0, 0},
{"#", w_hash, 0, 0},
{"#s", w_hash_s, 0, 0},
{"sign", w_sign, 0, 0},
{"hold", w_hold, 0, 0},
{"holds", w_holds, 0, 0},
{">number", w_to_number, 0, 0},
{ /* end marker */ }
};