forked from MightyPork/crsn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
442 lines
11 KiB
442 lines
11 KiB
//! A lightweight, self-contained s-expression parser and data format.
|
|
//! Use `parse` to get an s-expression from its string representation, and the
|
|
//! `Display` trait to serialize it, potentially by doing `sexp.to_string()`.
|
|
|
|
#![deny(unsafe_code)]
|
|
|
|
use std::borrow::Cow;
|
|
use std::fmt;
|
|
use std::str::{self, FromStr};
|
|
|
|
use error::{ERes, err, spos};
|
|
pub use error::Error;
|
|
pub use error::SourcePosition;
|
|
|
|
#[cfg(test)]
|
|
mod test;
|
|
|
|
mod error;
|
|
|
|
/// A single data element in an s-expression. Floats are excluded to ensure
|
|
/// atoms may be used as keys in ordered and hashed data structures.
|
|
///
|
|
/// All strings must be valid utf-8.
|
|
#[derive(Debug, PartialEq, Clone, PartialOrd)]
|
|
pub enum Atom {
|
|
/// Simple string atom
|
|
S(String),
|
|
/// Quoted string
|
|
QS(String),
|
|
/// Character literal (with single quotes)
|
|
C(char),
|
|
/// Signed integer (normally only used for negative values)
|
|
I(i64),
|
|
/// Unsigned integer
|
|
U(u64),
|
|
/// Float
|
|
F(f64),
|
|
}
|
|
|
|
/// An s-expression is either an atom or a list of s-expressions. This is
|
|
/// similar to the data format used by lisp.
|
|
#[derive(Debug, Clone)]
|
|
pub enum Sexp {
|
|
/// Atom
|
|
Atom(Atom, SourcePosition),
|
|
/// List of expressions
|
|
List(Vec<Sexp>, SourcePosition),
|
|
}
|
|
|
|
impl Sexp {
|
|
pub fn pos(&self) -> &SourcePosition {
|
|
match self {
|
|
Sexp::List(_, pos) | Sexp::Atom(_, pos) => pos
|
|
}
|
|
}
|
|
|
|
/// Check fi thsi Sexp is an atom
|
|
pub fn is_atom(&self) -> bool {
|
|
match self {
|
|
Sexp::Atom(_, _) => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
|
|
/// Check fi thsi Sexp is a list
|
|
pub fn is_list(&self) -> bool {
|
|
match self {
|
|
Sexp::List(_, _) => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl PartialEq for Sexp {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
match (self, other) {
|
|
(Sexp::Atom(a, _), Sexp::Atom(b, _)) => {
|
|
a == b
|
|
}
|
|
(Sexp::List(a, _), Sexp::List(b, _)) => {
|
|
a == b
|
|
}
|
|
_ => false
|
|
}
|
|
}
|
|
}
|
|
|
|
fn without_underscores(s: &str) -> Cow<str> {
|
|
if s.contains('_') {
|
|
s.chars()
|
|
.filter(|c| *c != '_')
|
|
.collect::<String>().into()
|
|
} else {
|
|
s.into()
|
|
}
|
|
}
|
|
|
|
pub fn atom_of_string(s: String) -> Atom {
|
|
if s.starts_with('#') {
|
|
match u64::from_str_radix(&without_underscores(&s[1..]), 16) {
|
|
Ok(u) => return Atom::U(u),
|
|
Err(_) => {}
|
|
};
|
|
}
|
|
|
|
if s.starts_with("0x") {
|
|
match u64::from_str_radix(&without_underscores(&s[2..]), 16) {
|
|
Ok(u) => return Atom::U(u),
|
|
Err(_) => {}
|
|
};
|
|
}
|
|
|
|
if s.starts_with("0b") {
|
|
match u64::from_str_radix(&without_underscores(&s[2..]), 2) {
|
|
Ok(u) => return Atom::U(u),
|
|
Err(_) => {}
|
|
};
|
|
}
|
|
|
|
if !s.starts_with('_') {
|
|
let filtered = without_underscores(&s);
|
|
|
|
match FromStr::from_str(&filtered) {
|
|
Ok(u) => return Atom::U(u),
|
|
Err(_) => {}
|
|
};
|
|
|
|
match FromStr::from_str(&filtered) {
|
|
Ok(i) => return Atom::I(i),
|
|
Err(_) => {}
|
|
};
|
|
|
|
match FromStr::from_str(&filtered) {
|
|
Ok(f) => return Atom::F(f),
|
|
Err(_) => {}
|
|
};
|
|
}
|
|
|
|
Atom::S(s)
|
|
}
|
|
|
|
// returns the char it found, and the new pos if you wish to consume that char
|
|
fn peek(s: &str, pos: usize) -> ERes<(char, usize)> {
|
|
//trace!("peek {}", pos);
|
|
if pos == s.len() { return err("unexpected eof", s, pos); }
|
|
if s.is_char_boundary(pos) {
|
|
let ch = s[pos..].chars().next().unwrap();
|
|
let next = pos + ch.len_utf8();
|
|
Ok((ch, next))
|
|
} else {
|
|
// strings must be composed of valid utf-8 chars.
|
|
unreachable!()
|
|
}
|
|
}
|
|
|
|
// returns the char it found, and the new pos if you wish to consume that char
|
|
fn peekn(s: &str, nth: usize, pos: usize) -> Option<(char, usize)> {
|
|
//trace!("peekn {}", pos);
|
|
if nth == 0 {
|
|
panic!("peekn with nth=0");
|
|
}
|
|
if s.is_char_boundary(pos) {
|
|
let mut iter = s[pos..].chars();
|
|
let mut bytelen = 0;
|
|
for n in 0..nth {
|
|
if let Some(ch) = iter.next() {
|
|
bytelen += ch.len_utf8();
|
|
if n == (nth - 1) {
|
|
return Some((ch, pos + bytelen));
|
|
}
|
|
} else {
|
|
return None;
|
|
}
|
|
}
|
|
unreachable!()
|
|
} else {
|
|
// strings must be composed of valid utf-8 chars.
|
|
unreachable!()
|
|
}
|
|
}
|
|
|
|
fn expect(s: &str, pos: &mut usize, c: char) -> ERes<()> {
|
|
//trace!("expect {}", pos);
|
|
let (ch, next) = peek(s, *pos)?;
|
|
*pos = next;
|
|
if ch == c {
|
|
Ok(())
|
|
} else {
|
|
err("unexpected character", s, *pos)
|
|
}
|
|
}
|
|
|
|
fn consume_until_newline(s: &str, pos: &mut usize) -> ERes<()> {
|
|
loop {
|
|
if *pos == s.len() {
|
|
return Ok(());
|
|
}
|
|
let (ch, next) = peek(s, *pos)?;
|
|
*pos = next;
|
|
if ch == '\n' {
|
|
return Ok(());
|
|
}
|
|
}
|
|
}
|
|
|
|
// zero or more spaces
|
|
fn zspace(s: &str, pos: &mut usize) -> ERes<()> {
|
|
//trace!("zspace {}", pos);
|
|
loop {
|
|
if *pos == s.len() {
|
|
return Ok(());
|
|
}
|
|
let (ch, next) = peek(s, *pos)?;
|
|
|
|
if ch == ';' {
|
|
consume_until_newline(s, pos)?
|
|
} else if ch.is_whitespace() {
|
|
*pos = next;
|
|
} else {
|
|
return Ok(());
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_quoted_atom(s: &str, quote: char, pos: &mut usize) -> ERes<Atom> {
|
|
//trace!("parse_quoted_atom {}", pos);
|
|
let pos0 = *pos;
|
|
let mut cs: String = String::new();
|
|
|
|
expect(s, pos, quote)?;
|
|
|
|
loop {
|
|
let (ch, next) = peek(s, *pos)?;
|
|
if ch == quote {
|
|
*pos = next;
|
|
break;
|
|
} else if ch == '\\' {
|
|
let (postslash, nextnext) = peek(s, next)?;
|
|
match postslash {
|
|
'r' => cs.push('\r'),
|
|
'n' => cs.push('\n'),
|
|
't' => cs.push('\t'),
|
|
other => cs.push(other)
|
|
}
|
|
*pos = nextnext;
|
|
} else {
|
|
cs.push(ch);
|
|
*pos = next;
|
|
}
|
|
}
|
|
|
|
if quote == '\'' {
|
|
// This is a character literal
|
|
if cs.chars().count() == 1 {
|
|
return Ok(Atom::C(cs.chars().next().unwrap()));
|
|
} else {
|
|
return err("Too long character literal!", s, pos0);
|
|
}
|
|
}
|
|
|
|
// Do not try i64 conversion, since this atom was explicitly quoted.
|
|
Ok(Atom::QS(cs))
|
|
}
|
|
|
|
fn parse_unquoted_atom(s: &str, pos: &mut usize) -> ERes<Atom> {
|
|
//trace!("parse_unquoted_atom {}", pos);
|
|
let mut cs: String = String::new();
|
|
|
|
loop {
|
|
if *pos == s.len() { break; }
|
|
let (c, next) = peek(s, *pos)?;
|
|
|
|
if c == ';' {
|
|
consume_until_newline(s, pos)?;
|
|
break;
|
|
}
|
|
if c.is_whitespace() || c == '(' || c == ')' {
|
|
break;
|
|
}
|
|
cs.push(c);
|
|
*pos = next;
|
|
}
|
|
|
|
Ok(atom_of_string(cs))
|
|
}
|
|
|
|
fn parse_atom(s: &str, pos: &mut usize) -> ERes<Atom> {
|
|
//trace!("parse_atom {}", pos);
|
|
let (ch, _) = peek(s, *pos)?;
|
|
|
|
if ch == '"' {
|
|
return parse_quoted_atom(s, ch, pos);
|
|
} else if ch == '\'' {
|
|
if let Some(('\\', _)) = peekn(s, 2, *pos) {
|
|
if let Some(('\'', _)) = peekn(s, 4, *pos) {
|
|
// Character literal with an escape sequence
|
|
return parse_quoted_atom(s, '\'', pos);
|
|
}
|
|
} else if let Some(('\'', _)) = peekn(s, 3, *pos) {
|
|
// Simple character literal
|
|
return parse_quoted_atom(s, '\'', pos);
|
|
}
|
|
}
|
|
|
|
parse_unquoted_atom(s, pos)
|
|
}
|
|
|
|
fn parse_list(s: &str, pos: &mut usize) -> ERes<Vec<Sexp>> {
|
|
//trace!("parse_list {}", pos);
|
|
zspace(s, pos)?;
|
|
expect(s, pos, '(')?;
|
|
|
|
let mut sexps: Vec<Sexp> = Vec::new();
|
|
|
|
loop {
|
|
zspace(s, pos)?;
|
|
let (c, next) = peek(s, *pos)?;
|
|
if c == ')' {
|
|
*pos = next;
|
|
break;
|
|
}
|
|
sexps.push(parse_sexp(s, pos)?);
|
|
}
|
|
|
|
zspace(s, pos)?;
|
|
|
|
Ok(sexps)
|
|
}
|
|
|
|
fn parse_sexp(s: &str, pos: &mut usize) -> ERes<Sexp> {
|
|
//trace!("parse_sexp {}", pos);
|
|
zspace(s, pos)?;
|
|
let (c, _) = peek(s, *pos)?;
|
|
let pos0 = *pos;
|
|
let r = if c == '(' {
|
|
Ok(Sexp::List(parse_list(s, pos)?, spos(s, pos0)))
|
|
} else {
|
|
Ok(Sexp::Atom(parse_atom(s, pos)?, spos(s, pos0)))
|
|
};
|
|
zspace(s, pos)?;
|
|
r
|
|
}
|
|
|
|
/// Constructs an atomic s-expression from a string.
|
|
pub fn atom_s(s: &str) -> Sexp {
|
|
Sexp::Atom(Atom::S(s.to_owned()), Default::default())
|
|
}
|
|
|
|
/// Constructs an atomic s-expression from a string.
|
|
pub fn atom_qs(s: &str) -> Sexp {
|
|
Sexp::Atom(Atom::QS(s.to_owned()), Default::default())
|
|
}
|
|
|
|
/// Constructs an atomic s-expression from an int.
|
|
pub fn atom_i(i: i64) -> Sexp {
|
|
Sexp::Atom(Atom::I(i), Default::default())
|
|
}
|
|
|
|
/// Constructs an atomic s-expression from an unsigned int.
|
|
pub fn atom_u(u: u64) -> Sexp {
|
|
Sexp::Atom(Atom::U(u), Default::default())
|
|
}
|
|
|
|
/// Constructs an atomic s-expression from a char
|
|
pub fn atom_c(c: char) -> Sexp {
|
|
Sexp::Atom(Atom::C(c), Default::default())
|
|
}
|
|
|
|
/// Constructs an atomic s-expression from a float.
|
|
pub fn atom_f(f: f64) -> Sexp {
|
|
Sexp::Atom(Atom::F(f), Default::default())
|
|
}
|
|
|
|
/// Constructs a list s-expression given a slice of s-expressions.
|
|
pub fn list(xs: &[Sexp]) -> Sexp {
|
|
Sexp::List(xs.to_owned(), Default::default())
|
|
}
|
|
|
|
/// Reads an s-expression out of a `&str`.
|
|
#[inline(never)]
|
|
pub fn parse(s: &str) -> Result<Sexp, Box<Error>> {
|
|
let mut pos = 0;
|
|
let ret = parse_sexp(s, &mut pos)?;
|
|
if pos == s.len() {
|
|
Ok(ret)
|
|
} else {
|
|
err("unrecognized post-s-expression data", s, pos)
|
|
}
|
|
}
|
|
|
|
fn quote(s: &str) -> String {
|
|
s.chars().fold(String::new(), |mut s, ch| {
|
|
match ch {
|
|
'\'' | '\\' | '"' => {
|
|
s.push('\\');
|
|
s.push(ch);
|
|
}
|
|
'\n' => {
|
|
s.push_str("\\n");
|
|
}
|
|
'\r' => {
|
|
s.push_str("\\n");
|
|
}
|
|
'\t' => {
|
|
s.push_str("\\t");
|
|
}
|
|
other => {
|
|
s.push(other);
|
|
}
|
|
}
|
|
s
|
|
})
|
|
}
|
|
|
|
impl fmt::Display for Atom {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
|
match *self {
|
|
Atom::QS(ref s) => write!(f, "\"{}\"", quote(s)),
|
|
Atom::S(ref s) => write!(f, "{}", s),
|
|
Atom::C(c) => write!(f, "'{}'", quote(&c.to_string())),
|
|
Atom::I(i) => write!(f, "{}", i),
|
|
Atom::U(u) => write!(f, "{}", u),
|
|
Atom::F(d) => write!(f, "{}", d),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for Sexp {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
|
match *self {
|
|
Sexp::Atom(ref a, _) => write!(f, "{}", a),
|
|
Sexp::List(ref xs, _) => {
|
|
write!(f, "(")?;
|
|
for (i, x) in xs.iter().enumerate() {
|
|
let s = if i == 0 { "" } else { " " };
|
|
write!(f, "{}{}", s, x)?;
|
|
}
|
|
write!(f, ")")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|