Croissant Runtime
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
crsn/lib/spanned_sexp/src/lib.rs

324 lines
8.1 KiB

//! A lightweight, self-contained s-expression parser and data format.
//! Use `parse` to get an s-expression from its string representation, and the
//! `Display` trait to serialize it, potentially by doing `sexp.to_string()`.
#![deny(unsafe_code)]
#[macro_use]
extern crate log;
use std::borrow::Cow;
use std::fmt;
use std::str::{self, FromStr};
use error::{ERes, err, spos};
pub use error::Error;
pub use error::SourcePosition;
#[cfg(test)]
mod test;
mod error;
/// A single data element in an s-expression. Floats are excluded to ensure
/// atoms may be used as keys in ordered and hashed data structures.
///
/// All strings must be valid utf-8.
#[derive(PartialEq, Clone, PartialOrd)]
#[allow(missing_docs)]
pub enum Atom {
S(String),
I(i64),
F(f64),
}
/// An s-expression is either an atom or a list of s-expressions. This is
/// similar to the data format used by lisp.
#[derive(Clone)]
pub enum Sexp {
/// Atom
Atom(Atom, SourcePosition),
/// List of expressions
List(Vec<Sexp>, SourcePosition),
}
impl Sexp {
pub fn pos(&self) -> &SourcePosition {
match self {
Sexp::List(_, pos) | Sexp::Atom(_, pos) => pos
}
}
/// Check fi thsi Sexp is an atom
pub fn is_atom(&self) -> bool {
match self {
Sexp::Atom(_, _) => true,
_ => false,
}
}
/// Check fi thsi Sexp is a list
pub fn is_list(&self) -> bool {
match self {
Sexp::List(_, _) => true,
_ => false,
}
}
}
impl PartialEq for Sexp {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Sexp::Atom(a, _), Sexp::Atom(b, _)) => {
a == b
}
(Sexp::List(a, _), Sexp::List(b, _)) => {
a == b
}
_ => false
}
}
}
fn atom_of_string(s: String) -> Atom {
match FromStr::from_str(&s) {
Ok(i) => return Atom::I(i),
Err(_) => {}
};
match FromStr::from_str(&s) {
Ok(f) => return Atom::F(f),
Err(_) => {}
};
Atom::S(s)
}
// returns the char it found, and the new size if you wish to consume that char
fn peek(s: &str, pos: &usize) -> ERes<(char, usize)> {
trace!("peek {}", pos);
if *pos == s.len() { return err("unexpected eof", s, pos); }
if s.is_char_boundary(*pos) {
let ch = s[*pos..].chars().next().unwrap();
let next = *pos + ch.len_utf8();
Ok((ch, next))
} else {
// strings must be composed of valid utf-8 chars.
unreachable!()
}
}
fn expect(s: &str, pos: &mut usize, c: char) -> ERes<()> {
trace!("expect {}", pos);
let (ch, next) = peek(s, pos)?;
*pos = next;
if ch == c { Ok(()) } else { err("unexpected character", s, pos) }
}
fn consume_until_newline(s: &str, pos: &mut usize) -> ERes<()> {
loop {
if *pos == s.len() { return Ok(()); }
let (ch, next) = peek(s, pos)?;
*pos = next;
if ch == '\n' { return Ok(()); }
}
}
// zero or more spaces
fn zspace(s: &str, pos: &mut usize) -> ERes<()> {
trace!("zspace {}", pos);
loop {
if *pos == s.len() { return Ok(()); }
let (ch, next) = peek(s, pos)?;
if ch == ';' { consume_until_newline(s, pos)? } else if ch.is_whitespace() { *pos = next; } else { return Ok(()); }
}
}
fn parse_quoted_atom(s: &str, pos: &mut usize) -> ERes<Atom> {
trace!("parse_quoted_atom {}", pos);
let mut cs: String = String::new();
expect(s, pos, '"')?;
loop {
let (ch, next) = peek(s, pos)?;
if ch == '"' {
*pos = next;
break;
} else if ch == '\\' {
let (postslash, nextnext) = peek(s, &next)?;
if postslash == '"' || postslash == '\\' {
cs.push(postslash);
} else {
cs.push(ch);
cs.push(postslash);
}
*pos = nextnext;
} else {
cs.push(ch);
*pos = next;
}
}
// Do not try i64 conversion, since this atom was explicitly quoted.
Ok(Atom::S(cs))
}
fn parse_unquoted_atom(s: &str, pos: &mut usize) -> ERes<Atom> {
trace!("parse_unquoted_atom {}", pos);
let mut cs: String = String::new();
loop {
if *pos == s.len() { break; }
let (c, next) = peek(s, pos)?;
if c == ';' {
consume_until_newline(s, pos)?;
break;
}
if c.is_whitespace() || c == '(' || c == ')' { break; }
cs.push(c);
*pos = next;
}
Ok(atom_of_string(cs))
}
fn parse_atom(s: &str, pos: &mut usize) -> ERes<Atom> {
trace!("parse_atom {}", pos);
let (ch, _) = peek(s, pos)?;
if ch == '"' { parse_quoted_atom(s, pos) } else { parse_unquoted_atom(s, pos) }
}
fn parse_list(s: &str, pos: &mut usize) -> ERes<Vec<Sexp>> {
trace!("parse_list {}", pos);
zspace(s, pos)?;
expect(s, pos, '(')?;
let mut sexps: Vec<Sexp> = Vec::new();
loop {
zspace(s, pos)?;
let (c, next) = peek(s, pos)?;
if c == ')' {
*pos = next;
break;
}
sexps.push(parse_sexp(s, pos)?);
}
zspace(s, pos)?;
Ok(sexps)
}
fn parse_sexp(s: &str, pos: &mut usize) -> ERes<Sexp> {
trace!("parse_sexp {}", pos);
zspace(s, pos)?;
let (c, _) = peek(s, pos)?;
let r = if c == '(' {
Ok(Sexp::List(parse_list(s, pos)?, spos(s, pos)))
} else {
Ok(Sexp::Atom(parse_atom(s, pos)?, spos(s, pos)))
};
zspace(s, pos)?;
r
}
/// Constructs an atomic s-expression from a string.
pub fn atom_s(s: &str) -> Sexp {
Sexp::Atom(Atom::S(s.to_owned()), Default::default())
}
/// Constructs an atomic s-expression from an int.
pub fn atom_i(i: i64) -> Sexp {
Sexp::Atom(Atom::I(i), Default::default())
}
/// Constructs an atomic s-expression from a float.
pub fn atom_f(f: f64) -> Sexp {
Sexp::Atom(Atom::F(f), Default::default())
}
/// Constructs a list s-expression given a slice of s-expressions.
pub fn list(xs: &[Sexp]) -> Sexp {
Sexp::List(xs.to_owned(), Default::default())
}
/// Reads an s-expression out of a `&str`.
#[inline(never)]
pub fn parse(s: &str) -> Result<Sexp, Box<Error>> {
let mut pos = 0;
let ret = parse_sexp(s, &mut pos)?;
if pos == s.len() { Ok(ret) } else { err("unrecognized post-s-expression data", s, &pos) }
}
// TODO: Pretty print in lisp convention, instead of all on the same line,
// packed as tightly as possible. It's kinda ugly.
fn is_num_string(s: &str) -> bool {
let x: Result<i64, _> = FromStr::from_str(&s);
let y: Result<f64, _> = FromStr::from_str(&s);
x.is_ok() || y.is_ok()
}
fn string_contains_whitespace(s: &str) -> bool {
for c in s.chars() {
if c.is_whitespace() { return true; }
}
false
}
fn quote(s: &str) -> Cow<str> {
if !s.contains("\"")
&& !string_contains_whitespace(s)
&& !is_num_string(s) {
Cow::Borrowed(s)
} else {
let mut r: String = "\"".to_string();
r.push_str(&s.replace("\\", "\\\\").replace("\"", "\\\""));
r.push_str("\"");
Cow::Owned(r)
}
}
impl fmt::Display for Atom {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
match *self {
Atom::S(ref s) => write!(f, "{}", quote(s)),
Atom::I(i) => write!(f, "{}", i),
Atom::F(d) => write!(f, "{}", d),
}
}
}
impl fmt::Display for Sexp {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
match *self {
Sexp::Atom(ref a, _) => write!(f, "{}", a),
Sexp::List(ref xs, _) => {
write!(f, "(")?;
for (i, x) in xs.iter().enumerate() {
let s = if i == 0 { "" } else { " " };
write!(f, "{}{}", s, x)?;
}
write!(f, ")")
}
}
}
}
impl fmt::Debug for Atom {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{}", self)
}
}
impl fmt::Debug for Sexp {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{}", self)
}
}