Trying to build a forth runtime in C
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
forth/src/fh_builtins_text.c

374 lines
9.1 KiB

#include "forth_internal.h"
/**
* Encode a code point using UTF-8
*
* Copied from ESPTERM source
*
* @param out - output buffer (min 4 characters), will be 0-terminated if shorten than 4
* @param utf - code point 0-0x10FFFF
* @return number of bytes on success, 0 on failure (also produces U+FFFD, which uses 3 bytes)
*/
static int utf8_encode(char *out, uint32_t utf)
{
if (utf <= 0x7F) {
// Plain ASCII
out[0] = (char) utf;
out[1] = 0;
return 1;
} else if (utf <= 0x07FF) {
// 2-byte unicode
out[0] = (char) (((utf >> 6) & 0x1F) | 0xC0);
out[1] = (char) (((utf >> 0) & 0x3F) | 0x80);
out[2] = 0;
return 2;
} else if (utf <= 0xFFFF) {
// 3-byte unicode
out[0] = (char) (((utf >> 12) & 0x0F) | 0xE0);
out[1] = (char) (((utf >> 6) & 0x3F) | 0x80);
out[2] = (char) (((utf >> 0) & 0x3F) | 0x80);
out[3] = 0;
return 3;
} else if (utf <= 0x10FFFF) {
// 4-byte unicode
out[0] = (char) (((utf >> 18) & 0x07) | 0xF0);
out[1] = (char) (((utf >> 12) & 0x3F) | 0x80);
out[2] = (char) (((utf >> 6) & 0x3F) | 0x80);
out[3] = (char) (((utf >> 0) & 0x3F) | 0x80);
// out[4] = 0;
return 4;
} else {
// error - use replacement character
out[0] = (char) 0xEF;
out[1] = (char) 0xBF;
out[2] = (char) 0xBD;
out[3] = 0;
return 0;
}
}
static enum fh_error w_dot(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t a = 0;
TRY(ds_pop(fh, &a));
FHPRINT("%d ", (int32_t) a);
return FH_OK;
}
3 years ago
static enum fh_error w_u_r(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t a = 0, n = 0;
TRY(ds_pop(fh, &n));
TRY(ds_pop(fh, &a));
FHPRINT("%*d", n, a);
return FH_OK;
}
static enum fh_error w_type(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t count = 0, addr = 0;
LOG("Get count,addr");
TRY(ds_pop(fh, &count));
TRY(ds_pop(fh, &addr));
const char *str = fh_str_at(fh, addr);
if (!str) {
LOGE("Type addr out of bounds!");
return FH_ERR_NOT_APPLICABLE;
}
FHPRINT("%.*s", count, str);
return FH_OK;
}
static enum fh_error wp_putc(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) fh;
FHPRINT("%c", w->param);
return FH_OK;
}
static enum fh_error w_debug_dump(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
(void) fh;
FHPRINT("DS ");
for (int i = 0; i < fh->data_stack_top; i++) {
FHPRINT("%d ", fh->data_stack[i]);
}
FHPRINT("\nRS ");
for (int i = 0; i < fh->return_stack_top; i++) {
FHPRINT("%d ", fh->return_stack[i]);
}
FHPRINT("\n");
return FH_OK;
}
static enum fh_error w_emit(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t a;
TRY(ds_pop(fh, &a));
char buf[5];
int num = utf8_encode(buf, a);
FHPRINT("%.*s", num, buf);
return FH_OK;
}
static enum fh_error w_see(struct fh_thread_s *fh, const struct fh_word_s *w)
{
enum fh_error rv;
char *wordname;
size_t namelen = 0;
fh_input_consume_spaces(fh);
TRY(fh_input_read_word(fh, &wordname, &namelen));
TRY(fh_see_word(fh, wordname, namelen));
return FH_OK;
}
static enum fh_error w_s_quote(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
size_t len;
uint32_t addr = fh->here + (fh->state == FH_STATE_INTERPRET ? 0 : INSTR_SIZE);
/* read the string straight into HEAP */
fh_input_consume_spaces(fh);
char *start = (char *) &fh->heap[addr];
TRY(fh_input_read_quotedstring(fh, w->param == 1, start, HEAP_END - addr, &len));
fh->here = WORDALIGNED(addr + len);
struct fh_instruction_s instr;
if (fh->state == FH_STATE_INTERPRET) {
LOG("Interpret a string alloc: \"%.*s\"", len, start);
TRY(ds_push(fh, addr));
TRY(ds_push(fh, len));
} else {
LOG("Compile a string: \"%.*s\"", len, start);
instr.kind = FH_INSTR_ALLOCSTR;
instr.data = len;
fh_heap_write(fh, addr - INSTR_SIZE, &instr, INSTR_SIZE);
}
return FH_OK;
}
static bool chartest_equals_or_end(char c, void *param)
{
char cc = *(char*)param;
return cc == c || c == 0;
}
static enum fh_error w_dot_quote(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
size_t len;
// leave space for the instr in case of compiled version
uint32_t addr = fh->here + (fh->state == FH_STATE_INTERPRET ? 0 : INSTR_SIZE);
/* read the string straight into HEAP, but don't advance the heap pointer, so the string is immediately discarded again */
fh_input_consume_spaces(fh);
char *start;
char c = (char)w->param;
uint32_t capacity = HEAP_END - addr;
if (c == '\\') {
start = (char *) &fh->heap[addr];
TRY(fh_input_read_quotedstring(fh, 1, start, capacity, &len));
} else {
start = NULL;
TRY(fh_input_read_delimited(fh, &start, &len, chartest_equals_or_end, &c));
if (len > capacity) {
LOGE("String too low for heap");
return FH_ERR_HEAP_FULL;
}
if (fh->state == FH_STATE_COMPILE) {
fh_heap_copyptr(fh, addr, start, len);
}
}
struct fh_instruction_s instr;
if (fh->state == FH_STATE_INTERPRET) {
FHPRINT("%.*s", (int) len, start);
// the string is invalidated immediately, heap pointer is NOT advanced.
} else {
LOG("Compile a string: \"%.*s\"", len, start);
TRY(fh_put_instr(fh, FH_INSTR_TYPESTR, len));
fh->here = WORDALIGNED(addr + len); // at the end of the string
}
return FH_OK;
}
static enum fh_error w_less_hash(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
fh->pictnumptr = WORDBUF_LASTCHAR_ADDR;
return FH_OK;
}
static enum fh_error w_hash_greater(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint64_t dummy;
TRY(ds_pop_dw(fh, &dummy));
uint32_t len = WORDBUF_LASTCHAR_ADDR - fh->pictnumptr;
uint32_t addr;
TRY(fh_heap_reserve(fh, len, &addr));
fh_heap_copy(fh, addr, fh->pictnumptr+1, len);
LOG("#> output: \"%.*s\"", len, &fh->heap[fh->pictnumptr+1]);
TRY(ds_push(fh, addr));
TRY(ds_push(fh, len));
return FH_OK;
}
static enum fh_error pictnum_prepend_char(struct fh_thread_s *fh, char c) {
enum fh_error rv;
if (fh->pictnumptr < WORDBUF_ADDR) {
return FH_ERR_PICTNUM_FULL;
}
LOG("Prepend: %c", c);
TRY(fh_store_char(fh, fh->pictnumptr, c));
fh->pictnumptr--;
return FH_OK;
}
static char dig2char(uint64_t digit) {
char repr;
if (digit < 10) {
repr = '0' + digit;
} else if (digit < 36) {
repr = 'A' + (digit - 10);
} else {
repr = '?'; // XXX bad base?
}
return repr;
}
static enum fh_error w_hash(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint64_t num;
TRY(ds_pop_dw(fh, &num));
uint64_t digit = num % fh->base;
TRY(pictnum_prepend_char(fh, dig2char(digit)));
TRY(ds_push_dw(fh, num / (uint64_t)fh->base));
return FH_OK;
}
static enum fh_error w_hash_s(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint64_t num;
TRY(ds_pop_dw(fh, &num));
do {
uint64_t digit = num % fh->base;
num = num / (uint64_t)fh->base;
TRY(pictnum_prepend_char(fh, dig2char(digit)));
} while (num > 0);
TRY(ds_push_dw(fh, num)); // this is zero now
return FH_OK;
}
static enum fh_error w_sign(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t num;
TRY(ds_pop(fh, &num));
if ((int32_t)num < 0) {
TRY(pictnum_prepend_char(fh, '-'));
}
return FH_OK;
}
static enum fh_error w_hold(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t ch;
TRY(ds_pop(fh, &ch));
char buf[5];
int num = utf8_encode(buf, ch);
for(int i=num-1;i>=0;i--) {
TRY(pictnum_prepend_char(fh, buf[i]));
}
return FH_OK;
}
static enum fh_error w_holds(struct fh_thread_s *fh, const struct fh_word_s *w)
{
(void) w;
enum fh_error rv;
uint32_t ch;
uint32_t count = 0, addr = 0;
LOG("Get count,addr");
TRY(ds_pop(fh, &count));
TRY(ds_pop(fh, &addr));
const char *str = fh_str_at(fh, addr);
if (!str) {
LOGE("HOLDS addr out of bounds!");
return FH_ERR_NOT_APPLICABLE;
}
for(int i=count-1;i>=0;i--) {
TRY(pictnum_prepend_char(fh, str[i]));
}
return FH_OK;
}
const struct name_and_handler fh_builtins_text[] = {
{"s\"", w_s_quote, 1, 0},
{"s\\\"", w_s_quote, 1, 1}, // escaped
{".\"", w_dot_quote, 1, '"'},
{".(", w_dot_quote, 1, ')'},
{".\\\"", w_dot_quote, 1, '\\'}, // escaped, this is non-standard
{".", w_dot, 0, 0},
{"type", w_type, 0, 0},
{"cr", wp_putc, 0, '\n'},
{"space", wp_putc, 0, ' '},
{"bl", wp_const, 0, ' '},
3 years ago
{"u.r", w_u_r, 0, 0},
{"??", w_debug_dump, 0, 0},
{"emit", w_emit, 0, 0},
{"see", w_see, 0, 0},
{"<#", w_less_hash, 0, 0},
{"#>", w_hash_greater, 0, 0},
{"#", w_hash, 0, 0},
{"#s", w_hash_s, 0, 0},
{"sign", w_sign, 0, 0},
{"hold", w_hold, 0, 0},
{"holds", w_holds, 0, 0},
{ /* end marker */ }
};