//! A lightweight, self-contained s-expression parser and data format. //! Use `parse` to get an s-expression from its string representation, and the //! `Display` trait to serialize it, potentially by doing `sexp.to_string()`. #![deny(unsafe_code)] #[macro_use] extern crate log; use std::borrow::Cow; use std::fmt; use std::str::{self, FromStr}; use error::{ERes, err, spos}; pub use error::Error; pub use error::SourcePosition; #[cfg(test)] mod test; mod error; /// A single data element in an s-expression. Floats are excluded to ensure /// atoms may be used as keys in ordered and hashed data structures. /// /// All strings must be valid utf-8. #[derive(PartialEq, Clone, PartialOrd)] #[allow(missing_docs)] pub enum Atom { S(String), I(i64), F(f64), } /// An s-expression is either an atom or a list of s-expressions. This is /// similar to the data format used by lisp. #[derive(Clone)] pub enum Sexp { /// Atom Atom(Atom, SourcePosition), /// List of expressions List(Vec, SourcePosition), } impl Sexp { pub fn pos(&self) -> &SourcePosition { match self { Sexp::List(_, pos) | Sexp::Atom(_, pos) => pos } } /// Check fi thsi Sexp is an atom pub fn is_atom(&self) -> bool { match self { Sexp::Atom(_, _) => true, _ => false, } } /// Check fi thsi Sexp is a list pub fn is_list(&self) -> bool { match self { Sexp::List(_, _) => true, _ => false, } } } impl PartialEq for Sexp { fn eq(&self, other: &Self) -> bool { match (self, other) { (Sexp::Atom(a, _), Sexp::Atom(b, _)) => { a == b } (Sexp::List(a, _), Sexp::List(b, _)) => { a == b } _ => false } } } fn atom_of_string(s: String) -> Atom { match FromStr::from_str(&s) { Ok(i) => return Atom::I(i), Err(_) => {} }; match FromStr::from_str(&s) { Ok(f) => return Atom::F(f), Err(_) => {} }; Atom::S(s) } // returns the char it found, and the new size if you wish to consume that char fn peek(s: &str, pos: &usize) -> ERes<(char, usize)> { trace!("peek {}", pos); if *pos == s.len() { return err("unexpected eof", s, pos); } if s.is_char_boundary(*pos) { let ch = s[*pos..].chars().next().unwrap(); let next = *pos + ch.len_utf8(); Ok((ch, next)) } else { // strings must be composed of valid utf-8 chars. unreachable!() } } fn expect(s: &str, pos: &mut usize, c: char) -> ERes<()> { trace!("expect {}", pos); let (ch, next) = peek(s, pos)?; *pos = next; if ch == c { Ok(()) } else { err("unexpected character", s, pos) } } fn consume_until_newline(s: &str, pos: &mut usize) -> ERes<()> { loop { if *pos == s.len() { return Ok(()); } let (ch, next) = peek(s, pos)?; *pos = next; if ch == '\n' { return Ok(()); } } } // zero or more spaces fn zspace(s: &str, pos: &mut usize) -> ERes<()> { trace!("zspace {}", pos); loop { if *pos == s.len() { return Ok(()); } let (ch, next) = peek(s, pos)?; if ch == ';' { consume_until_newline(s, pos)? } else if ch.is_whitespace() { *pos = next; } else { return Ok(()); } } } fn parse_quoted_atom(s: &str, pos: &mut usize) -> ERes { trace!("parse_quoted_atom {}", pos); let mut cs: String = String::new(); expect(s, pos, '"')?; loop { let (ch, next) = peek(s, pos)?; if ch == '"' { *pos = next; break; } else if ch == '\\' { let (postslash, nextnext) = peek(s, &next)?; if postslash == '"' || postslash == '\\' { cs.push(postslash); } else { cs.push(ch); cs.push(postslash); } *pos = nextnext; } else { cs.push(ch); *pos = next; } } // Do not try i64 conversion, since this atom was explicitly quoted. Ok(Atom::S(cs)) } fn parse_unquoted_atom(s: &str, pos: &mut usize) -> ERes { trace!("parse_unquoted_atom {}", pos); let mut cs: String = String::new(); loop { if *pos == s.len() { break; } let (c, next) = peek(s, pos)?; if c == ';' { consume_until_newline(s, pos)?; break; } if c.is_whitespace() || c == '(' || c == ')' { break; } cs.push(c); *pos = next; } Ok(atom_of_string(cs)) } fn parse_atom(s: &str, pos: &mut usize) -> ERes { trace!("parse_atom {}", pos); let (ch, _) = peek(s, pos)?; if ch == '"' { parse_quoted_atom(s, pos) } else { parse_unquoted_atom(s, pos) } } fn parse_list(s: &str, pos: &mut usize) -> ERes> { trace!("parse_list {}", pos); zspace(s, pos)?; expect(s, pos, '(')?; let mut sexps: Vec = Vec::new(); loop { zspace(s, pos)?; let (c, next) = peek(s, pos)?; if c == ')' { *pos = next; break; } sexps.push(parse_sexp(s, pos)?); } zspace(s, pos)?; Ok(sexps) } fn parse_sexp(s: &str, pos: &mut usize) -> ERes { trace!("parse_sexp {}", pos); zspace(s, pos)?; let (c, _) = peek(s, pos)?; let r = if c == '(' { Ok(Sexp::List(parse_list(s, pos)?, spos(s, pos))) } else { Ok(Sexp::Atom(parse_atom(s, pos)?, spos(s, pos))) }; zspace(s, pos)?; r } /// Constructs an atomic s-expression from a string. pub fn atom_s(s: &str) -> Sexp { Sexp::Atom(Atom::S(s.to_owned()), Default::default()) } /// Constructs an atomic s-expression from an int. pub fn atom_i(i: i64) -> Sexp { Sexp::Atom(Atom::I(i), Default::default()) } /// Constructs an atomic s-expression from a float. pub fn atom_f(f: f64) -> Sexp { Sexp::Atom(Atom::F(f), Default::default()) } /// Constructs a list s-expression given a slice of s-expressions. pub fn list(xs: &[Sexp]) -> Sexp { Sexp::List(xs.to_owned(), Default::default()) } /// Reads an s-expression out of a `&str`. #[inline(never)] pub fn parse(s: &str) -> Result> { let mut pos = 0; let ret = parse_sexp(s, &mut pos)?; if pos == s.len() { Ok(ret) } else { err("unrecognized post-s-expression data", s, &pos) } } // TODO: Pretty print in lisp convention, instead of all on the same line, // packed as tightly as possible. It's kinda ugly. fn is_num_string(s: &str) -> bool { let x: Result = FromStr::from_str(&s); let y: Result = FromStr::from_str(&s); x.is_ok() || y.is_ok() } fn string_contains_whitespace(s: &str) -> bool { for c in s.chars() { if c.is_whitespace() { return true; } } false } fn quote(s: &str) -> Cow { if !s.contains("\"") && !string_contains_whitespace(s) && !is_num_string(s) { Cow::Borrowed(s) } else { let mut r: String = "\"".to_string(); r.push_str(&s.replace("\\", "\\\\").replace("\"", "\\\"")); r.push_str("\""); Cow::Owned(r) } } impl fmt::Display for Atom { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { match *self { Atom::S(ref s) => write!(f, "{}", quote(s)), Atom::I(i) => write!(f, "{}", i), Atom::F(d) => write!(f, "{}", d), } } } impl fmt::Display for Sexp { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { match *self { Sexp::Atom(ref a, _) => write!(f, "{}", a), Sexp::List(ref xs, _) => { write!(f, "(")?; for (i, x) in xs.iter().enumerate() { let s = if i == 0 { "" } else { " " }; write!(f, "{}{}", s, x)?; } write!(f, ")") } } } } impl fmt::Debug for Atom { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(f, "{}", self) } } impl fmt::Debug for Sexp { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(f, "{}", self) } }