From 1f2dbaa81dfc0217a7d7f2acc642378511eb53d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Hru=C5=A1ka?= Date: Fri, 9 Oct 2020 00:41:02 +0200 Subject: [PATCH] add better string and number parsing to sexp. cleaning, more tests --- README.md | 4 +- crsn/Cargo.toml | 2 +- .../crsn-sexp}/.gitignore | 0 .../crsn-sexp}/Cargo.toml | 0 {lib/spanned_sexp => crsn/crsn-sexp}/LICENSE | 1 + crsn/crsn-sexp/README.md | 19 + .../crsn-sexp}/src/error.rs | 12 +- crsn/crsn-sexp/src/lib.rs | 444 ++++++++++++++++++ .../crsn-sexp}/src/test.rs | 45 +- crsn/src/asm/parse/parse_data.rs | 72 ++- crsn/src/asm/parse/sexp_expect.rs | 24 +- crsn/src/builtin/defs.rs | 2 +- crsn/src/builtin/exec.rs | 2 +- crsn/src/builtin/parse.rs | 22 +- crsn/src/lib.rs | 1 + crsn/src/utils/mod.rs | 11 +- crsn_stdio/src/lib.rs | 6 +- examples/stacks.csn | 2 +- examples/stdio.csn | 10 +- launcher/src/main.rs | 24 +- lib/spanned_sexp/README.md | 9 - lib/spanned_sexp/src/lib.rs | 324 ------------- 22 files changed, 612 insertions(+), 424 deletions(-) rename {lib/spanned_sexp => crsn/crsn-sexp}/.gitignore (100%) rename {lib/spanned_sexp => crsn/crsn-sexp}/Cargo.toml (100%) rename {lib/spanned_sexp => crsn/crsn-sexp}/LICENSE (95%) create mode 100644 crsn/crsn-sexp/README.md rename {lib/spanned_sexp => crsn/crsn-sexp}/src/error.rs (86%) create mode 100644 crsn/crsn-sexp/src/lib.rs rename {lib/spanned_sexp => crsn/crsn-sexp}/src/test.rs (60%) delete mode 100644 lib/spanned_sexp/README.md delete mode 100644 lib/spanned_sexp/src/lib.rs diff --git a/README.md b/README.md index 3e07c81..f32235e 100644 --- a/README.md +++ b/README.md @@ -374,7 +374,7 @@ Many instructions have two forms: (asl DST B) ; Delete an object by its handle. Objects are used by some extensions. -(drop @REG) +(del @REG) ``` ## Stacks Module @@ -398,7 +398,7 @@ This module defines data stacks. Stacks can be shared by routines by passing a h (rpop DST @REG) ``` -To delete a stack, drop its handle - `(drop @REG)` +To delete a stack, use the `del` instruction - `(del @REG)` ## Screen module diff --git a/crsn/Cargo.toml b/crsn/Cargo.toml index eba809f..41fa0ef 100644 --- a/crsn/Cargo.toml +++ b/crsn/Cargo.toml @@ -6,7 +6,7 @@ edition = "2018" publish = false [dependencies] -sexp = { path = "../lib/spanned_sexp" } +sexp = { path = "crsn-sexp" } thiserror = "1.0.20" anyhow = "1.0.32" dyn-clonable = "0.9.0" diff --git a/lib/spanned_sexp/.gitignore b/crsn/crsn-sexp/.gitignore similarity index 100% rename from lib/spanned_sexp/.gitignore rename to crsn/crsn-sexp/.gitignore diff --git a/lib/spanned_sexp/Cargo.toml b/crsn/crsn-sexp/Cargo.toml similarity index 100% rename from lib/spanned_sexp/Cargo.toml rename to crsn/crsn-sexp/Cargo.toml diff --git a/lib/spanned_sexp/LICENSE b/crsn/crsn-sexp/LICENSE similarity index 95% rename from lib/spanned_sexp/LICENSE rename to crsn/crsn-sexp/LICENSE index 683f7fa..cd0c683 100644 --- a/lib/spanned_sexp/LICENSE +++ b/crsn/crsn-sexp/LICENSE @@ -1,4 +1,5 @@ Copyright (c) 2015 Clark Gaebel +Copyright (c) 2020 Ondřej Hruška Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/crsn/crsn-sexp/README.md b/crsn/crsn-sexp/README.md new file mode 100644 index 0000000..86a89da --- /dev/null +++ b/crsn/crsn-sexp/README.md @@ -0,0 +1,19 @@ +CRSN Sexp +========= + +This is an updated and extended version of the "sexp" crate by Clark Gaebel: [https://github.com/cgaebel/sexp](https://github.com/cgaebel/sexp). + +## Changes from "cgaebel/sexp" + +- Updated to the 2018 Rust edition (that is, removed `try!()` and such) +- All parsed atoms now track their source location. This enables better error reporting in subsequent parsing and processing. +- Quoted strings now support C-style escapes other than `\"`, such as `\n`, `\t` and `\\`. + - Unrecognized escapes result in the slash being removed and the next character being taken literally. Use `\\` to enter a backslash. +- Added special parsing of "character literals" that use single quotes (`'A'`) and may contain backslash escapes (`'\n'`). + - This does not interfere with "apostrophe tokens" like `'foo`, that becomes an unquoted string atom. +- Added "quoted string", "unsigned" and "character" atom types +- Added parsing for `0x123`, `#ff00ff` and `0b123456` +- Numeric literals may now contain underscores to separate digit groups +- Numbers are preferably parsed as the unsigned atom (u64). The signed atom (i64) is only used for negative numbers. + +. diff --git a/lib/spanned_sexp/src/error.rs b/crsn/crsn-sexp/src/error.rs similarity index 86% rename from lib/spanned_sexp/src/error.rs rename to crsn/crsn-sexp/src/error.rs index 1807365..e935717 100644 --- a/lib/spanned_sexp/src/error.rs +++ b/crsn/crsn-sexp/src/error.rs @@ -63,23 +63,23 @@ pub(crate) fn get_line_and_column(s: &str, pos: usize) -> SourcePosition { } #[cold] -fn err_impl(message: &'static str, s: &str, pos: &usize) -> Err { +fn err_impl(message: &'static str, s: &str, pos: usize) -> Err { Box::new(Error { message, - pos: get_line_and_column(s, *pos), + pos: get_line_and_column(s, pos), }) } /// Build an error with span information -pub(crate) fn err(message: &'static str, s: &str, pos: &usize) -> ERes { +pub(crate) fn err(message: &'static str, s: &str, pos: usize) -> ERes { Err(err_impl(message, s, pos)) } /// Build a span -pub(crate) fn spos(s: &str, pos: &usize) -> SourcePosition { - if *pos >= s.len() { +pub(crate) fn spos(s: &str, pos: usize) -> SourcePosition { + if pos >= s.len() { Default::default() } else { - get_line_and_column(s, *pos) + get_line_and_column(s, pos) } } diff --git a/crsn/crsn-sexp/src/lib.rs b/crsn/crsn-sexp/src/lib.rs new file mode 100644 index 0000000..838a550 --- /dev/null +++ b/crsn/crsn-sexp/src/lib.rs @@ -0,0 +1,444 @@ +//! A lightweight, self-contained s-expression parser and data format. +//! Use `parse` to get an s-expression from its string representation, and the +//! `Display` trait to serialize it, potentially by doing `sexp.to_string()`. + +#![deny(unsafe_code)] + +#[macro_use] +extern crate log; + +use std::borrow::Cow; +use std::fmt; +use std::str::{self, FromStr}; + +use error::{ERes, err, spos}; +pub use error::Error; +pub use error::SourcePosition; + +#[cfg(test)] +mod test; + +mod error; + +/// A single data element in an s-expression. Floats are excluded to ensure +/// atoms may be used as keys in ordered and hashed data structures. +/// +/// All strings must be valid utf-8. +#[derive(Debug, PartialEq, Clone, PartialOrd)] +pub enum Atom { + /// Simple string atom + S(String), + /// Quoted string + QS(String), + /// Character literal (with single quotes) + C(char), + /// Signed integer (normally only used for negative values) + I(i64), + /// Unsigned integer + U(u64), + /// Float + F(f64), +} + +/// An s-expression is either an atom or a list of s-expressions. This is +/// similar to the data format used by lisp. +#[derive(Debug, Clone)] +pub enum Sexp { + /// Atom + Atom(Atom, SourcePosition), + /// List of expressions + List(Vec, SourcePosition), +} + +impl Sexp { + pub fn pos(&self) -> &SourcePosition { + match self { + Sexp::List(_, pos) | Sexp::Atom(_, pos) => pos + } + } + + /// Check fi thsi Sexp is an atom + pub fn is_atom(&self) -> bool { + match self { + Sexp::Atom(_, _) => true, + _ => false, + } + } + + /// Check fi thsi Sexp is a list + pub fn is_list(&self) -> bool { + match self { + Sexp::List(_, _) => true, + _ => false, + } + } +} + +impl PartialEq for Sexp { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Sexp::Atom(a, _), Sexp::Atom(b, _)) => { + a == b + } + (Sexp::List(a, _), Sexp::List(b, _)) => { + a == b + } + _ => false + } + } +} + +fn without_underscores(s: &str) -> Cow { + if s.contains('_') { + s.chars() + .filter(|c| *c != '_') + .collect::().into() + } else { + s.into() + } +} + +fn atom_of_string(s: String) -> Atom { + if s.starts_with('#') { + match u64::from_str_radix(&without_underscores(&s[1..]), 16) { + Ok(u) => return Atom::U(u), + Err(_) => {} + }; + } + + if s.starts_with("0x") { + match u64::from_str_radix(&without_underscores(&s[2..]), 16) { + Ok(u) => return Atom::U(u), + Err(_) => {} + }; + } + + if s.starts_with("0b") { + match u64::from_str_radix(&without_underscores(&s[2..]), 2) { + Ok(u) => return Atom::U(u), + Err(_) => {} + }; + } + + if !s.starts_with('_') { + let filtered = without_underscores(&s); + + match FromStr::from_str(&filtered) { + Ok(u) => return Atom::U(u), + Err(_) => {} + }; + + match FromStr::from_str(&filtered) { + Ok(i) => return Atom::I(i), + Err(_) => {} + }; + + match FromStr::from_str(&filtered) { + Ok(f) => return Atom::F(f), + Err(_) => {} + }; + } + + Atom::S(s) +} + +// returns the char it found, and the new pos if you wish to consume that char +fn peek(s: &str, pos: usize) -> ERes<(char, usize)> { + trace!("peek {}", pos); + if pos == s.len() { return err("unexpected eof", s, pos); } + if s.is_char_boundary(pos) { + let ch = s[pos..].chars().next().unwrap(); + let next = pos + ch.len_utf8(); + Ok((ch, next)) + } else { + // strings must be composed of valid utf-8 chars. + unreachable!() + } +} + +// returns the char it found, and the new pos if you wish to consume that char +fn peekn(s: &str, nth: usize, pos: usize) -> Option<(char, usize)> { + trace!("peekn {}", pos); + if nth == 0 { + panic!("peekn with nth=0"); + } + if s.is_char_boundary(pos) { + let mut iter = s[pos..].chars(); + let mut bytelen = 0; + for n in 0..nth { + if let Some(ch) = iter.next() { + bytelen += ch.len_utf8(); + if n == (nth - 1) { + return Some((ch, pos + bytelen)); + } + } else { + return None; + } + } + unreachable!() + } else { + // strings must be composed of valid utf-8 chars. + unreachable!() + } +} + +fn expect(s: &str, pos: &mut usize, c: char) -> ERes<()> { + trace!("expect {}", pos); + let (ch, next) = peek(s, *pos)?; + *pos = next; + if ch == c { + Ok(()) + } else { + err("unexpected character", s, *pos) + } +} + +fn consume_until_newline(s: &str, pos: &mut usize) -> ERes<()> { + loop { + if *pos == s.len() { + return Ok(()); + } + let (ch, next) = peek(s, *pos)?; + *pos = next; + if ch == '\n' { + return Ok(()); + } + } +} + +// zero or more spaces +fn zspace(s: &str, pos: &mut usize) -> ERes<()> { + trace!("zspace {}", pos); + loop { + if *pos == s.len() { + return Ok(()); + } + let (ch, next) = peek(s, *pos)?; + + if ch == ';' { + consume_until_newline(s, pos)? + } else if ch.is_whitespace() { + *pos = next; + } else { + return Ok(()); + } + } +} + +fn parse_quoted_atom(s: &str, quote: char, pos: &mut usize) -> ERes { + trace!("parse_quoted_atom {}", pos); + let pos0 = *pos; + let mut cs: String = String::new(); + + expect(s, pos, quote)?; + + loop { + let (ch, next) = peek(s, *pos)?; + if ch == quote { + *pos = next; + break; + } else if ch == '\\' { + let (postslash, nextnext) = peek(s, next)?; + match postslash { + 'r' => cs.push('\r'), + 'n' => cs.push('\n'), + 't' => cs.push('\t'), + other => cs.push(other) + } + *pos = nextnext; + } else { + cs.push(ch); + *pos = next; + } + } + + if quote == '\'' { + // This is a character literal + if cs.chars().count() == 1 { + return Ok(Atom::C(cs.chars().next().unwrap())); + } else { + return err("Too long character literal!", s, pos0); + } + } + + // Do not try i64 conversion, since this atom was explicitly quoted. + Ok(Atom::QS(cs)) +} + +fn parse_unquoted_atom(s: &str, pos: &mut usize) -> ERes { + trace!("parse_unquoted_atom {}", pos); + let mut cs: String = String::new(); + + loop { + if *pos == s.len() { break; } + let (c, next) = peek(s, *pos)?; + + if c == ';' { + consume_until_newline(s, pos)?; + break; + } + if c.is_whitespace() || c == '(' || c == ')' { + break; + } + cs.push(c); + *pos = next; + } + + Ok(atom_of_string(cs)) +} + +fn parse_atom(s: &str, pos: &mut usize) -> ERes { + trace!("parse_atom {}", pos); + let (ch, _) = peek(s, *pos)?; + + if ch == '"' { + return parse_quoted_atom(s, ch, pos); + } else if ch == '\'' { + if let Some(('\\', _)) = peekn(s, 2, *pos) { + if let Some(('\'', _)) = peekn(s, 4, *pos) { + // Character literal with an escape sequence + return parse_quoted_atom(s, '\'', pos); + } + } else if let Some(('\'', _)) = peekn(s, 3, *pos) { + // Simple character literal + return parse_quoted_atom(s, '\'', pos); + } + } + + parse_unquoted_atom(s, pos) +} + +fn parse_list(s: &str, pos: &mut usize) -> ERes> { + trace!("parse_list {}", pos); + zspace(s, pos)?; + expect(s, pos, '(')?; + + let mut sexps: Vec = Vec::new(); + + loop { + zspace(s, pos)?; + let (c, next) = peek(s, *pos)?; + if c == ')' { + *pos = next; + break; + } + sexps.push(parse_sexp(s, pos)?); + } + + zspace(s, pos)?; + + Ok(sexps) +} + +fn parse_sexp(s: &str, pos: &mut usize) -> ERes { + trace!("parse_sexp {}", pos); + zspace(s, pos)?; + let (c, _) = peek(s, *pos)?; + let r = if c == '(' { + Ok(Sexp::List(parse_list(s, pos)?, spos(s, *pos))) + } else { + Ok(Sexp::Atom(parse_atom(s, pos)?, spos(s, *pos))) + }; + zspace(s, pos)?; + r +} + +/// Constructs an atomic s-expression from a string. +pub fn atom_s(s: &str) -> Sexp { + Sexp::Atom(Atom::S(s.to_owned()), Default::default()) +} + +/// Constructs an atomic s-expression from a string. +pub fn atom_qs(s: &str) -> Sexp { + Sexp::Atom(Atom::QS(s.to_owned()), Default::default()) +} + +/// Constructs an atomic s-expression from an int. +pub fn atom_i(i: i64) -> Sexp { + Sexp::Atom(Atom::I(i), Default::default()) +} + +/// Constructs an atomic s-expression from an unsigned int. +pub fn atom_u(u: u64) -> Sexp { + Sexp::Atom(Atom::U(u), Default::default()) +} + +/// Constructs an atomic s-expression from a char +pub fn atom_c(c: char) -> Sexp { + Sexp::Atom(Atom::C(c), Default::default()) +} + +/// Constructs an atomic s-expression from a float. +pub fn atom_f(f: f64) -> Sexp { + Sexp::Atom(Atom::F(f), Default::default()) +} + +/// Constructs a list s-expression given a slice of s-expressions. +pub fn list(xs: &[Sexp]) -> Sexp { + Sexp::List(xs.to_owned(), Default::default()) +} + +/// Reads an s-expression out of a `&str`. +#[inline(never)] +pub fn parse(s: &str) -> Result> { + let mut pos = 0; + let ret = parse_sexp(s, &mut pos)?; + if pos == s.len() { + Ok(ret) + } else { + err("unrecognized post-s-expression data", s, pos) + } +} + +fn quote(s: &str) -> String { + s.chars().fold(String::new(), |mut s, ch| { + match ch { + '\'' | '\\' | '"' => { + s.push('\\'); + s.push(ch); + } + '\n' => { + s.push_str("\\n"); + } + '\r' => { + s.push_str("\\n"); + } + '\t' => { + s.push_str("\\t"); + } + other => { + s.push(other); + } + } + s + }) +} + +impl fmt::Display for Atom { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match *self { + Atom::QS(ref s) => write!(f, "\"{}\"", quote(s)), + Atom::S(ref s) => write!(f, "{}", s), + Atom::C(c) => write!(f, "'{}'", quote(&c.to_string())), + Atom::I(i) => write!(f, "{}", i), + Atom::U(u) => write!(f, "{}", u), + Atom::F(d) => write!(f, "{}", d), + } + } +} + +impl fmt::Display for Sexp { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match *self { + Sexp::Atom(ref a, _) => write!(f, "{}", a), + Sexp::List(ref xs, _) => { + write!(f, "(")?; + for (i, x) in xs.iter().enumerate() { + let s = if i == 0 { "" } else { " " }; + write!(f, "{}{}", s, x)?; + } + write!(f, ")") + } + } + } +} diff --git a/lib/spanned_sexp/src/test.rs b/crsn/crsn-sexp/src/test.rs similarity index 60% rename from lib/spanned_sexp/src/test.rs rename to crsn/crsn-sexp/src/test.rs index f32f2ce..3f71cb8 100644 --- a/lib/spanned_sexp/src/test.rs +++ b/crsn/crsn-sexp/src/test.rs @@ -3,16 +3,33 @@ use super::error::get_line_and_column; #[test] fn test_hello_world() { + let parsed = parse("(hello #ffcc00 'c' ' ' '\\\\' '\\n' 'not_a_char 0xf_f 0b01_01 18_446_744_073_709_551_615 9223372036854775807 9223372036854775808 -42\n\t -4.0 \"world\") ; comment").unwrap(); assert_eq!( - parse("(hello -42\n\t -4.0 \"world\") ; comment").unwrap(), - list(&[atom_s("hello"), atom_i(-42), atom_f(-4.0), atom_s("world")])); + parsed, + list(&[ + atom_s("hello"), + atom_u(0xffcc00), + atom_c('c'), + atom_c(' '), + atom_c('\\'), + atom_c('\n'), + atom_s("'not_a_char"), + atom_u(255), + atom_u(0b0101), + atom_u(18_446_744_073_709_551_615), + atom_u(9223372036854775807), + atom_u(9223372036854775808), + atom_i(-42), + atom_f(-4.0), + atom_qs("world") + ])); } #[test] fn test_escaping() { assert_eq!( - parse("(\"\\\"\\q\" \"1234\" 1234)").unwrap(), - list(&[atom_s("\"\\q"), atom_s("1234"), atom_i(1234)])); + parse(r#"("\"\\q\t\n\r " "1234" 1234)"#).unwrap(), + list(&[atom_qs("\"\\q\t\n\r "), atom_qs("1234"), atom_u(1234)])); } #[test] @@ -20,7 +37,7 @@ fn test_pp() { let s = "(hello world (what is (up) (4 6.4 you \"123\\\\ \\\"\")))"; let sexp = parse(s).unwrap(); assert_eq!(s, sexp.to_string()); - assert_eq!(s, format!("{:?}", sexp)); + assert_eq!(s, format!("{}", sexp)); } #[test] @@ -36,7 +53,7 @@ fn test_tight_parens() { #[test] fn test_space_in_atom() { - let sexp = list(&[atom_s("hello world")]); + let sexp = list(&[atom_qs("hello world")]); let sexp_as_string = sexp.to_string(); assert_eq!("(\"hello world\")", sexp_as_string); assert_eq!(sexp, parse(&sexp_as_string).unwrap()); @@ -68,3 +85,19 @@ fn sexp_size() { use std::mem; assert_eq!(mem::size_of::(), mem::size_of::() * 6); } + +#[test] +fn test_peek() { + assert_eq!(('a', 1), peek("ahoj", 0).unwrap()); + assert_eq!(('j', 4), peek("ahoj", 3).unwrap()); + assert!(peek("ahoj", 4).is_err()); +} + +#[test] +fn test_peekn() { + assert_eq!(Some(('a', 1)), peekn("ahoj", 1, 0)); + assert_eq!(Some(('h', 2)), peekn("ahoj", 2, 0)); + assert_eq!(Some(('j', 4)), peekn("ahoj", 4, 0)); + assert_eq!(Some(('o', 3)), peekn("ahoj", 1, 2)); + assert_eq!(None, peekn("ahoj", 3, 2)); +} diff --git a/crsn/src/asm/parse/parse_data.rs b/crsn/src/asm/parse/parse_data.rs index 453acce..ac2a7f3 100644 --- a/crsn/src/asm/parse/parse_data.rs +++ b/crsn/src/asm/parse/parse_data.rs @@ -78,36 +78,26 @@ pub fn parse_data_disp(tok: Sexp, pcx: &ParserContext) -> Result { Ok(DataDisp::Immediate(unsafe { std::mem::transmute(val) })) } + Sexp::Atom(Atom::F(val), _pos) => { + Ok(DataDisp::Immediate(unsafe { std::mem::transmute(val) })) + } + Sexp::Atom(Atom::U(val), _pos) => { + Ok(DataDisp::Immediate(val)) + } + Sexp::Atom(Atom::C(val), _pos) => { + Ok(DataDisp::Immediate(val as u64)) + } + Sexp::Atom(Atom::QS(_s), pos) => { + Err(CrsnError::Parse("Quoted string not expected here".into(), pos)) + } + Sexp::List(_list, pos) => { + Err(CrsnError::Parse("List not expected here".into(), pos)) + } Sexp::Atom(Atom::S(s), pos) => { if s == "_" { return Ok(DataDisp::Discard); } - if s.starts_with('\'') && s.ends_with('\'') { - if s.chars().count() == 3 { - let ch = s.chars().nth(1).unwrap(); - if ch == '\'' { - return Err(CrsnError::Parse("Use '\\'' for apos".into(), pos)); - } - return Ok(DataDisp::Immediate(ch as u64)); - } else if s.chars().count() == 4 && s.chars().nth(1).unwrap() == '\\' { - let ch = s.chars().nth(2).unwrap(); - let ch = match ch { - '\\' => '\\', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\'' => '\'', - _ => { - return Err(CrsnError::Parse(format!("Unknown escape sequence: {}", s).into(), pos)); - } - }; - return Ok(DataDisp::Immediate(ch as u64)); - } else { - return Err(CrsnError::Parse(format!("Invalid character literal synax {}", s).into(), pos)); - } - } - if let Some(reference) = s.strip_prefix('@') { /* extension constants (pre-defined handles) */ for p in pcx.parsers { @@ -129,8 +119,6 @@ pub fn parse_data_disp(tok: Sexp, pcx: &ParserContext) -> Result Result { - Err(CrsnError::Parse(format!("bad data disp: {:?}", other).into(), other.pos().clone())) - } } } @@ -173,16 +158,28 @@ pub fn parse_value(tok: Sexp, pcx: &ParserContext) -> Result { Sexp::Atom(Atom::I(val), _pos) => { Ok(unsafe { std::mem::transmute(val) }) } + Sexp::Atom(Atom::U(val), _pos) => { + Ok(val) + } + Sexp::Atom(Atom::C(val), _pos) => { + Ok(val as u64) + } + Sexp::Atom(Atom::QS(_), pos) => { + Err(CrsnError::Parse("quoted string not expected here".into(), pos)) + } + Sexp::Atom(Atom::F(val), _) => { + Ok(unsafe { std::mem::transmute(val) }) + } Sexp::Atom(Atom::S(s), pos) => { let pstate = pcx.state.borrow(); if let Some(val) = pstate.constants.get(&s) { return Ok(*val); } - Ok(unsafe { std::mem::transmute(parse_i64(&s, &pos)?) }) + Err(CrsnError::Parse(format!("unknown constant: {}", s).into(), pos)) } - other => { - Err(CrsnError::Parse(format!("bad value format: {:?}", other).into(), other.pos().clone())) + Sexp::List(_, pos) => { + Err(CrsnError::Parse("expected a value".into(), pos)) } } } @@ -204,15 +201,6 @@ pub fn parse_u64(literal: &str, pos: &SourcePosition) -> Result } } -pub fn parse_i64(literal: &str, pos: &SourcePosition) -> Result { - // trace!("parse i64 from {}", literal); - if let Some(_value) = literal.strip_prefix("-") { - Ok(-1 * i64::try_from(parse_u64(literal, pos)?).err_pos(pos)?) - } else { - Ok(i64::try_from(parse_u64(literal, pos)?).err_pos(pos)?) - } -} - pub fn parse_rd(tok: Sexp, pcx: &ParserContext) -> Result { let pos = tok.pos().clone(); Ok(Rd::new(RdData::try_from(parse_data_disp(tok, pcx)?).err_pos(&pos)?)) diff --git a/crsn/src/asm/parse/sexp_expect.rs b/crsn/src/asm/parse/sexp_expect.rs index eb2da85..d97483b 100644 --- a/crsn/src/asm/parse/sexp_expect.rs +++ b/crsn/src/asm/parse/sexp_expect.rs @@ -36,10 +36,20 @@ pub fn expect_string_atom(expr: Sexp) -> Result<(String, SourcePosition), CrsnEr } } -// pub fn expect_int_atom(expr: Option) -> Result { -// match expect_atom(expr) { -// Ok(Atom::I(v)) => Ok(v), -// Ok(atom) => Err(Error::ParseIn("Expected int atom".into(), Sexp::Atom(atom))), -// Err(e) => Err(e), -// } -// } +pub fn expect_quoted_string_atom(expr: Sexp) -> Result<(String, SourcePosition), CrsnError> { + match expect_atom(expr) { + Ok((Atom::QS(s), pos)) => Ok((s, pos)), + Ok((_, pos)) => Err(CrsnError::Parse("Expected quoted string atom".into(), pos)), + Err(e) => Err(e), + } +} + +/// Quoted or simple string +pub fn expect_any_string_atom(expr: Sexp) -> Result<(String, SourcePosition), CrsnError> { + match expect_atom(expr) { + Ok((Atom::S(s), pos)) => Ok((s, pos)), + Ok((Atom::QS(s), pos)) => Ok((s, pos)), + Ok((_, pos)) => Err(CrsnError::Parse("Expected quoted string atom".into(), pos)), + Err(e) => Err(e), + } +} diff --git a/crsn/src/builtin/defs.rs b/crsn/src/builtin/defs.rs index db3de9d..6c01f63 100644 --- a/crsn/src/builtin/defs.rs +++ b/crsn/src/builtin/defs.rs @@ -53,7 +53,7 @@ pub enum BuiltinOp { Fault(Option), /// Deallocate an extension object. /// The object is released and the handle becomes invalid. - Drop(RdObj), + Delete(RdObj), /// Copy value Move { dst: Wr, src: Rd }, /// Store runtime status to a register diff --git a/crsn/src/builtin/exec.rs b/crsn/src/builtin/exec.rs index 45a0db5..41c0386 100644 --- a/crsn/src/builtin/exec.rs +++ b/crsn/src/builtin/exec.rs @@ -120,7 +120,7 @@ impl OpTrait for BuiltinOp { BuiltinOp::Sleep { micros } => { std::thread::sleep(Duration::from_micros(state.read(*micros)?)) } - BuiltinOp::Drop(obj) => { + BuiltinOp::Delete(obj) => { let x = state.read(Rd::new(RdData::Register(obj.reg())))?; trace!("Drop object: {:#x}", x); diff --git a/crsn/src/builtin/parse.rs b/crsn/src/builtin/parse.rs index c35ef7c..9a62adb 100644 --- a/crsn/src/builtin/parse.rs +++ b/crsn/src/builtin/parse.rs @@ -6,7 +6,7 @@ use crate::asm::error::CrsnError; use crate::asm::instr::op::OpKind; use crate::asm::parse::arg_parser::TokenParser; use crate::asm::parse::parse_data::{parse_constant_name, parse_label, parse_label_str, parse_rd, parse_reg_alias, parse_value}; -use crate::asm::parse::sexp_expect::expect_string_atom; +use crate::asm::parse::sexp_expect::{expect_any_string_atom}; use crate::asm::patches::ErrWithPos; use crate::builtin::defs::{Barrier, BuiltinOp}; use crate::module::ParseRes; @@ -135,7 +135,7 @@ pub(crate) fn parse_op<'a>(op_pos: &SourcePosition, keyword: &str, mut args: Tok kind: Barrier::Standalone, msg: match args.next() { None => None, - Some(s) => Some(expect_string_atom(s)?.0.into()), + Some(s) => Some(expect_any_string_atom(s)?.0.into()), }, } } @@ -157,7 +157,7 @@ pub(crate) fn parse_op<'a>(op_pos: &SourcePosition, keyword: &str, mut args: Tok "fault" => { BuiltinOp::Fault(match args.next() { None => None, - Some(s) => Some(expect_string_atom(s)?.0.into()), + Some(s) => Some(expect_any_string_atom(s)?.0.into()), }) } @@ -180,8 +180,8 @@ pub(crate) fn parse_op<'a>(op_pos: &SourcePosition, keyword: &str, mut args: Tok } } - "drop" => { - BuiltinOp::Drop(args.next_rdobj()?) + "del" => { + BuiltinOp::Delete(args.next_rdobj()?) } "far" => { @@ -277,7 +277,7 @@ pub(crate) fn to_sexp(op: &BuiltinOp) -> Sexp { sexp::list(&[A("fault")]) } } - BuiltinOp::Drop(obj) => sexp::list(&[A("drop"), A(obj)]), + BuiltinOp::Delete(obj) => sexp::list(&[A("del"), A(obj)]), BuiltinOp::Move { dst, src } => sexp::list(&[A("ld"), A(dst), A(src)]), BuiltinOp::StoreStatus { dst } => sexp::list(&[A("sst"), A(dst)]), BuiltinOp::LoadStatus { src } => sexp::list(&[A("sld"), A(src)]) @@ -303,6 +303,12 @@ mod test { ("(nop)", "(nop)"), ("(halt)", "(halt)"), ("(sleep 1000)", "(sleep 1000)"), + ("(sleep 1_0_0_0)", "(sleep 1000)"), + ("(sleep ' ')", "(sleep 32)"), + ("(sleep '\\n')", "(sleep 10)"), + ("(sleep 0b111)", "(sleep 7)"), + ("(sleep 0xab_cd)", "(sleep 43981)"), + ("(sleep #ab_cd)", "(sleep 43981)"), ("(:x)", "(:x)"), ("(j :x)", "(j :x)"), ("(:#7)", "(:#7)"), @@ -345,8 +351,8 @@ mod test { ("(sst r0)", "(sst r0)"), ("(sld r0)", "(sld r0)"), ("(far :label)", "(far :label)"), - ("(drop @r5)", "(drop @r5)"), - ("(sym cat r0)(drop @cat)", "(drop @r0)"), + ("(del @r5)", "(del @r5)"), + ("(sym cat r0)(del @cat)", "(del @r0)"), ]; let parser = BuiltinOps::new(); diff --git a/crsn/src/lib.rs b/crsn/src/lib.rs index d270bf3..d5e99ca 100644 --- a/crsn/src/lib.rs +++ b/crsn/src/lib.rs @@ -1,6 +1,7 @@ #[macro_use] extern crate log; +// re-export our customized version of sexp pub use sexp; pub mod asm; diff --git a/crsn/src/utils/mod.rs b/crsn/src/utils/mod.rs index 052b9a0..95765c8 100644 --- a/crsn/src/utils/mod.rs +++ b/crsn/src/utils/mod.rs @@ -11,6 +11,11 @@ mod option_ext; pub fn A(s: impl Display) -> Sexp { let s = s.to_string(); + let x: Result = FromStr::from_str(&s); + if let Ok(x) = x { + return Sexp::Atom(Atom::U(x), Default::default()); + } + let x: Result = FromStr::from_str(&s); if let Ok(x) = x { return Sexp::Atom(Atom::I(x), Default::default()); @@ -21,5 +26,9 @@ pub fn A(s: impl Display) -> Sexp { return Sexp::Atom(Atom::F(y), Default::default()); } - Sexp::Atom(Atom::S(s), Default::default()) + if s.contains(|c: char| " \t\"\\\n\t\r".contains(c)) { + Sexp::Atom(Atom::QS(s), Default::default()) + } else { + Sexp::Atom(Atom::S(s), Default::default()) + } } diff --git a/crsn_stdio/src/lib.rs b/crsn_stdio/src/lib.rs index edc7208..133921e 100644 --- a/crsn_stdio/src/lib.rs +++ b/crsn_stdio/src/lib.rs @@ -4,10 +4,10 @@ use crsn::asm::instr::op::OpKind; use crsn::asm::parse::arg_parser::TokenParser; use crsn::module::{CrsnExtension, ParseRes, CrsnUniq}; use crsn::runtime::fault::Fault; -use crsn::runtime::run_thread::{RunState, ThreadInfo}; +use crsn::runtime::run_thread::{RunState}; use crsn::sexp::SourcePosition; use crsn::asm::data::Mask; -use std::io::{Read, stdin, stdout, Write}; +use std::io::{Write}; use std::convert::TryFrom; use crsn::asm::instr::Cond; use console::Term; @@ -61,7 +61,7 @@ impl CrsnExtension for StdioOps { "stdio" } - fn parse_op<'a>(&self, pos: &SourcePosition, keyword: &str, args: TokenParser<'a>) -> Result, CrsnError> { + fn parse_op<'a>(&self, _pos: &SourcePosition, _keyword: &str, args: TokenParser<'a>) -> Result, CrsnError> { Ok(ParseRes::Unknown(args)) } diff --git a/examples/stacks.csn b/examples/stacks.csn index 24dfa1b..ca550de 100644 --- a/examples/stacks.csn +++ b/examples/stacks.csn @@ -32,5 +32,5 @@ (rpop r1 @r0) (cmp r1 1000 (ne? (fault))) - (drop @r0) + (del @r0) ) diff --git a/examples/stdio.csn b/examples/stdio.csn index 764cf13..a533a6b 100644 --- a/examples/stdio.csn +++ b/examples/stdio.csn @@ -1,8 +1,10 @@ ( ; we don't have strings yet 👌 - (ld @stdout 'H') (ld @stdout 'e') (ld @stdout 'l') (ld @stdout 'l') (ld @stdout 'o') - (ld @stdout 32) (ld @stdout 'c') (ld @stdout 'r') (ld @stdout 's') (ld @stdout 'n') - (ld @stdout '…') (ld @stdout 32) + (ld @stdout 'P') (ld @stdout 'r') (ld @stdout 'e') (ld @stdout 's') (ld @stdout 's') (ld @stdout ' ') + (ld @stdout 'q') (ld @stdout ' ') + (ld @stdout 't') (ld @stdout 'o') (ld @stdout ' ') + (ld @stdout 'q') (ld @stdout 'u') (ld @stdout 'i') (ld @stdout 't') + (ld @stdout '…') (ld @stdout ' ') (:loop) (ld r0 @stdin) @@ -15,7 +17,7 @@ ; uppercase ASCII (cmp r0 'a' (? (j :badchar))) - (sub r0 32) + (sub r0 ' ') (ld @stdout r0) (j :loop) diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 2cbed04..7da65cb 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -33,7 +33,7 @@ struct Config { #[serde(skip)] program_file: String, #[serde(skip)] - asm_only: bool, + assemble_only: bool, #[serde(with = "serde_duration_millis")] cycle_time: Duration, } @@ -42,11 +42,11 @@ impl Default for Config { fn default() -> Self { Self { log: LogConfig { - level: "info".to_string(), + level: "warn".to_string(), modules: Default::default(), }, program_file: "".to_string(), - asm_only: false, + assemble_only: false, cycle_time: Duration::default(), } } @@ -63,6 +63,14 @@ impl AppConfig for Config { Some(&self.log.modules) } + fn pre_log_println(_message: String) { + // shut up + } + + fn print_banner(_name: &str, _version: &str) { + // No banner + } + /// Add args to later use in the `configure` method. fn add_args<'a: 'b, 'b>(clap: clap::App<'a, 'b>) -> clap::App<'a, 'b> { // Default impl @@ -106,7 +114,7 @@ impl AppConfig for Config { fn configure(mut self, clap: &ArgMatches) -> anyhow::Result { self.program_file = clap.value_of("input").unwrap().to_string(); - self.asm_only = clap.is_present("asm-only"); + self.assemble_only = clap.is_present("asm-only"); if let Some(c) = clap.value_of("cycle") { self.cycle_time = Duration::from_millis(c.parse().unwrap()); } @@ -118,7 +126,7 @@ impl AppConfig for Config { fn main() -> anyhow::Result<()> { let config = Config::init("crsn", "crsn.json5", env!("CARGO_PKG_VERSION"))?; - info!("Loading {}", config.program_file); + debug!("Loading {}", config.program_file); let source = read_file::read_file(&config.program_file)?; @@ -131,7 +139,7 @@ fn main() -> anyhow::Result<()> { StdioOps::new(), ])?; - if config.asm_only { + if config.assemble_only { for (n, op) in parsed.ops.iter().enumerate() { println!("{:04} : {}", n, op.to_sexp()); } @@ -144,7 +152,7 @@ fn main() -> anyhow::Result<()> { trace!("------------------------"); } - info!("Start runtime"); + debug!("Start runtime"); let args = &[]; let thread = RunThread::new(ThreadParams { @@ -159,7 +167,7 @@ fn main() -> anyhow::Result<()> { // run without spawning, so it is on the main thread - required by some extensions thread.run(); - info!("Runtime shut down."); + debug!("Runtime shut down."); Ok(()) } diff --git a/lib/spanned_sexp/README.md b/lib/spanned_sexp/README.md deleted file mode 100644 index e49f312..0000000 --- a/lib/spanned_sexp/README.md +++ /dev/null @@ -1,9 +0,0 @@ -Source-location tracking Sexp -===== - -**This is a fork of "sexp", updated to the 2018 edition, where each parsed node tracks its -source file position. This enables better error reporting in subsequent parsing and processing.** - ---- - -Original version by Clark Gaebel: [https://github.com/cgaebel/sexp](https://github.com/cgaebel/sexp). diff --git a/lib/spanned_sexp/src/lib.rs b/lib/spanned_sexp/src/lib.rs deleted file mode 100644 index bbbaa9c..0000000 --- a/lib/spanned_sexp/src/lib.rs +++ /dev/null @@ -1,324 +0,0 @@ -//! A lightweight, self-contained s-expression parser and data format. -//! Use `parse` to get an s-expression from its string representation, and the -//! `Display` trait to serialize it, potentially by doing `sexp.to_string()`. - -#![deny(unsafe_code)] - -#[macro_use] -extern crate log; - -use std::borrow::Cow; -use std::fmt; -use std::str::{self, FromStr}; - -use error::{ERes, err, spos}; -pub use error::Error; -pub use error::SourcePosition; - -#[cfg(test)] -mod test; - -mod error; - -/// A single data element in an s-expression. Floats are excluded to ensure -/// atoms may be used as keys in ordered and hashed data structures. -/// -/// All strings must be valid utf-8. -#[derive(PartialEq, Clone, PartialOrd)] -#[allow(missing_docs)] -pub enum Atom { - S(String), - I(i64), - F(f64), -} - -/// An s-expression is either an atom or a list of s-expressions. This is -/// similar to the data format used by lisp. -#[derive(Clone)] -pub enum Sexp { - /// Atom - Atom(Atom, SourcePosition), - /// List of expressions - List(Vec, SourcePosition), -} - -impl Sexp { - pub fn pos(&self) -> &SourcePosition { - match self { - Sexp::List(_, pos) | Sexp::Atom(_, pos) => pos - } - } - - /// Check fi thsi Sexp is an atom - pub fn is_atom(&self) -> bool { - match self { - Sexp::Atom(_, _) => true, - _ => false, - } - } - - /// Check fi thsi Sexp is a list - pub fn is_list(&self) -> bool { - match self { - Sexp::List(_, _) => true, - _ => false, - } - } -} - -impl PartialEq for Sexp { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (Sexp::Atom(a, _), Sexp::Atom(b, _)) => { - a == b - } - (Sexp::List(a, _), Sexp::List(b, _)) => { - a == b - } - _ => false - } - } -} - - -fn atom_of_string(s: String) -> Atom { - match FromStr::from_str(&s) { - Ok(i) => return Atom::I(i), - Err(_) => {} - }; - - match FromStr::from_str(&s) { - Ok(f) => return Atom::F(f), - Err(_) => {} - }; - - Atom::S(s) -} - -// returns the char it found, and the new size if you wish to consume that char -fn peek(s: &str, pos: &usize) -> ERes<(char, usize)> { - trace!("peek {}", pos); - if *pos == s.len() { return err("unexpected eof", s, pos); } - if s.is_char_boundary(*pos) { - let ch = s[*pos..].chars().next().unwrap(); - let next = *pos + ch.len_utf8(); - Ok((ch, next)) - } else { - // strings must be composed of valid utf-8 chars. - unreachable!() - } -} - -fn expect(s: &str, pos: &mut usize, c: char) -> ERes<()> { - trace!("expect {}", pos); - let (ch, next) = peek(s, pos)?; - *pos = next; - if ch == c { Ok(()) } else { err("unexpected character", s, pos) } -} - -fn consume_until_newline(s: &str, pos: &mut usize) -> ERes<()> { - loop { - if *pos == s.len() { return Ok(()); } - let (ch, next) = peek(s, pos)?; - *pos = next; - if ch == '\n' { return Ok(()); } - } -} - -// zero or more spaces -fn zspace(s: &str, pos: &mut usize) -> ERes<()> { - trace!("zspace {}", pos); - loop { - if *pos == s.len() { return Ok(()); } - let (ch, next) = peek(s, pos)?; - - if ch == ';' { consume_until_newline(s, pos)? } else if ch.is_whitespace() { *pos = next; } else { return Ok(()); } - } -} - -fn parse_quoted_atom(s: &str, pos: &mut usize) -> ERes { - trace!("parse_quoted_atom {}", pos); - let mut cs: String = String::new(); - - expect(s, pos, '"')?; - - loop { - let (ch, next) = peek(s, pos)?; - if ch == '"' { - *pos = next; - break; - } else if ch == '\\' { - let (postslash, nextnext) = peek(s, &next)?; - if postslash == '"' || postslash == '\\' { - cs.push(postslash); - } else { - cs.push(ch); - cs.push(postslash); - } - *pos = nextnext; - } else { - cs.push(ch); - *pos = next; - } - } - - // Do not try i64 conversion, since this atom was explicitly quoted. - Ok(Atom::S(cs)) -} - -fn parse_unquoted_atom(s: &str, pos: &mut usize) -> ERes { - trace!("parse_unquoted_atom {}", pos); - let mut cs: String = String::new(); - - loop { - if *pos == s.len() { break; } - let (c, next) = peek(s, pos)?; - - if c == ';' { - consume_until_newline(s, pos)?; - break; - } - if c.is_whitespace() || c == '(' || c == ')' { break; } - cs.push(c); - *pos = next; - } - - Ok(atom_of_string(cs)) -} - -fn parse_atom(s: &str, pos: &mut usize) -> ERes { - trace!("parse_atom {}", pos); - let (ch, _) = peek(s, pos)?; - - if ch == '"' { parse_quoted_atom(s, pos) } else { parse_unquoted_atom(s, pos) } -} - -fn parse_list(s: &str, pos: &mut usize) -> ERes> { - trace!("parse_list {}", pos); - zspace(s, pos)?; - expect(s, pos, '(')?; - - let mut sexps: Vec = Vec::new(); - - loop { - zspace(s, pos)?; - let (c, next) = peek(s, pos)?; - if c == ')' { - *pos = next; - break; - } - sexps.push(parse_sexp(s, pos)?); - } - - zspace(s, pos)?; - - Ok(sexps) -} - -fn parse_sexp(s: &str, pos: &mut usize) -> ERes { - trace!("parse_sexp {}", pos); - zspace(s, pos)?; - let (c, _) = peek(s, pos)?; - let r = if c == '(' { - Ok(Sexp::List(parse_list(s, pos)?, spos(s, pos))) - } else { - Ok(Sexp::Atom(parse_atom(s, pos)?, spos(s, pos))) - }; - zspace(s, pos)?; - r -} - -/// Constructs an atomic s-expression from a string. -pub fn atom_s(s: &str) -> Sexp { - Sexp::Atom(Atom::S(s.to_owned()), Default::default()) -} - -/// Constructs an atomic s-expression from an int. -pub fn atom_i(i: i64) -> Sexp { - Sexp::Atom(Atom::I(i), Default::default()) -} - -/// Constructs an atomic s-expression from a float. -pub fn atom_f(f: f64) -> Sexp { - Sexp::Atom(Atom::F(f), Default::default()) -} - -/// Constructs a list s-expression given a slice of s-expressions. -pub fn list(xs: &[Sexp]) -> Sexp { - Sexp::List(xs.to_owned(), Default::default()) -} - -/// Reads an s-expression out of a `&str`. -#[inline(never)] -pub fn parse(s: &str) -> Result> { - let mut pos = 0; - let ret = parse_sexp(s, &mut pos)?; - if pos == s.len() { Ok(ret) } else { err("unrecognized post-s-expression data", s, &pos) } -} - -// TODO: Pretty print in lisp convention, instead of all on the same line, -// packed as tightly as possible. It's kinda ugly. - -fn is_num_string(s: &str) -> bool { - let x: Result = FromStr::from_str(&s); - let y: Result = FromStr::from_str(&s); - x.is_ok() || y.is_ok() -} - -fn string_contains_whitespace(s: &str) -> bool { - for c in s.chars() { - if c.is_whitespace() { return true; } - } - false -} - -fn quote(s: &str) -> Cow { - if !s.contains("\"") - && !string_contains_whitespace(s) - && !is_num_string(s) { - Cow::Borrowed(s) - } else { - let mut r: String = "\"".to_string(); - r.push_str(&s.replace("\\", "\\\\").replace("\"", "\\\"")); - r.push_str("\""); - Cow::Owned(r) - } -} - -impl fmt::Display for Atom { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - match *self { - Atom::S(ref s) => write!(f, "{}", quote(s)), - Atom::I(i) => write!(f, "{}", i), - Atom::F(d) => write!(f, "{}", d), - } - } -} - -impl fmt::Display for Sexp { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - match *self { - Sexp::Atom(ref a, _) => write!(f, "{}", a), - Sexp::List(ref xs, _) => { - write!(f, "(")?; - for (i, x) in xs.iter().enumerate() { - let s = if i == 0 { "" } else { " " }; - write!(f, "{}{}", s, x)?; - } - write!(f, ")") - } - } - } -} - -impl fmt::Debug for Atom { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - write!(f, "{}", self) - } -} - -impl fmt::Debug for Sexp { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - write!(f, "{}", self) - } -}