//! A lightweight, self-contained s-expression parser and data format. //! Use `parse` to get an s-expression from its string representation, and the //! `Display` trait to serialize it, potentially by doing `sexp.to_string()`. #![deny(unsafe_code)] use std::borrow::Cow; use std::fmt; use std::str::{self, FromStr}; use error::{ERes, err}; pub use error::Error; pub use position::SourcePosition; use position::spos; #[cfg(test)] mod test; mod error; mod position; /// A single data element in an s-expression. Floats are excluded to ensure /// atoms may be used as keys in ordered and hashed data structures. /// /// All strings must be valid utf-8. #[derive(Debug, PartialEq, Clone, PartialOrd)] pub enum Atom { /// Simple string atom S(String), /// Quoted string QS(String), /// Character literal (with single quotes) C(char), /// Signed integer (normally only used for negative values) I(i64), /// Unsigned integer U(u64), /// Float F(f64), } /// An s-expression is either an atom or a list of s-expressions. This is /// similar to the data format used by lisp. #[derive(Debug, Clone)] pub enum Sexp { /// Atom Atom(Atom, SourcePosition), /// List of expressions List(Vec, SourcePosition), } impl Sexp { pub fn pos(&self) -> &SourcePosition { match self { Sexp::List(_, pos) | Sexp::Atom(_, pos) => pos } } /// Check fi thsi Sexp is an atom pub fn is_atom(&self) -> bool { match self { Sexp::Atom(_, _) => true, _ => false, } } /// Check fi thsi Sexp is a list pub fn is_list(&self) -> bool { match self { Sexp::List(_, _) => true, _ => false, } } } impl PartialEq for Sexp { fn eq(&self, other: &Self) -> bool { match (self, other) { (Sexp::Atom(a, _), Sexp::Atom(b, _)) => { a == b } (Sexp::List(a, _), Sexp::List(b, _)) => { a == b } _ => false } } } fn without_underscores(s: &str) -> Cow { if s.contains('_') { s.chars() .filter(|c| *c != '_') .collect::().into() } else { s.into() } } pub fn atom_of_string(s: String) -> Atom { if s.starts_with('#') { match u64::from_str_radix(&without_underscores(&s[1..]), 16) { Ok(u) => return Atom::U(u), Err(_) => {} }; } if s.starts_with("0x") { match u64::from_str_radix(&without_underscores(&s[2..]), 16) { Ok(u) => return Atom::U(u), Err(_) => {} }; } if s.starts_with("0b") { match u64::from_str_radix(&without_underscores(&s[2..]), 2) { Ok(u) => return Atom::U(u), Err(_) => {} }; } if !s.starts_with('_') { let filtered = without_underscores(&s); match FromStr::from_str(&filtered) { Ok(u) => return Atom::U(u), Err(_) => {} }; match FromStr::from_str(&filtered) { Ok(i) => return Atom::I(i), Err(_) => {} }; match FromStr::from_str(&filtered) { Ok(f) => return Atom::F(f), Err(_) => {} }; } Atom::S(s) } // returns the char it found, and the new pos if you wish to consume that char fn peek(s: &str, pos: usize) -> ERes<(char, usize)> { //trace!("peek {}", pos); if pos == s.len() { return err("unexpected eof", s, pos); } if s.is_char_boundary(pos) { let ch = s[pos..].chars().next().unwrap(); let next = pos + ch.len_utf8(); Ok((ch, next)) } else { // strings must be composed of valid utf-8 chars. unreachable!() } } // returns the char it found, and the new pos if you wish to consume that char fn peekn(s: &str, nth: usize, pos: usize) -> Option<(char, usize)> { //trace!("peekn {}", pos); if nth == 0 { panic!("peekn with nth=0"); } if s.is_char_boundary(pos) { let mut iter = s[pos..].chars(); let mut bytelen = 0; for n in 0..nth { if let Some(ch) = iter.next() { bytelen += ch.len_utf8(); if n == (nth - 1) { return Some((ch, pos + bytelen)); } } else { return None; } } unreachable!() } else { // strings must be composed of valid utf-8 chars. unreachable!() } } fn expect(s: &str, pos: &mut usize, c: char) -> ERes<()> { //trace!("expect {}", pos); let (ch, next) = peek(s, *pos)?; *pos = next; if ch == c { Ok(()) } else { err("unexpected character", s, *pos) } } fn consume_until_newline(s: &str, pos: &mut usize) -> ERes<()> { loop { if *pos == s.len() { return Ok(()); } let (ch, next) = peek(s, *pos)?; *pos = next; if ch == '\n' { return Ok(()); } } } // zero or more spaces fn zspace(s: &str, pos: &mut usize) -> ERes<()> { //trace!("zspace {}", pos); loop { if *pos == s.len() { return Ok(()); } let (ch, next) = peek(s, *pos)?; if ch == ';' { consume_until_newline(s, pos)? } else if ch.is_whitespace() { *pos = next; } else { return Ok(()); } } } fn parse_quoted_atom(s: &str, quote: char, pos: &mut usize) -> ERes { //trace!("parse_quoted_atom {}", pos); let pos0 = *pos; let mut cs: String = String::new(); expect(s, pos, quote)?; loop { let (ch, next) = peek(s, *pos)?; if ch == quote { *pos = next; break; } else if ch == '\\' { let (postslash, nextnext) = peek(s, next)?; match postslash { 'r' => cs.push('\r'), 'n' => cs.push('\n'), 't' => cs.push('\t'), other => cs.push(other) } *pos = nextnext; } else { cs.push(ch); *pos = next; } } if quote == '\'' { // This is a character literal if cs.chars().count() == 1 { return Ok(Atom::C(cs.chars().next().unwrap())); } else { return err("Too long character literal!", s, pos0); } } // Do not try i64 conversion, since this atom was explicitly quoted. Ok(Atom::QS(cs)) } fn parse_unquoted_atom(s: &str, pos: &mut usize) -> ERes { //trace!("parse_unquoted_atom {}", pos); let mut cs: String = String::new(); loop { if *pos == s.len() { break; } let (c, next) = peek(s, *pos)?; if c == ';' { consume_until_newline(s, pos)?; break; } if c.is_whitespace() || c == '(' || c == ')' { break; } cs.push(c); *pos = next; } Ok(atom_of_string(cs)) } fn parse_atom(s: &str, pos: &mut usize) -> ERes { //trace!("parse_atom {}", pos); let (ch, _) = peek(s, *pos)?; if ch == '"' { return parse_quoted_atom(s, ch, pos); } else if ch == '\'' { if let Some(('\\', _)) = peekn(s, 2, *pos) { if let Some(('\'', _)) = peekn(s, 4, *pos) { // Character literal with an escape sequence return parse_quoted_atom(s, '\'', pos); } } else if let Some(('\'', _)) = peekn(s, 3, *pos) { // Simple character literal return parse_quoted_atom(s, '\'', pos); } } parse_unquoted_atom(s, pos) } fn parse_list(s: &str, pos: &mut usize) -> ERes> { //trace!("parse_list {}", pos); zspace(s, pos)?; expect(s, pos, '(')?; let mut sexps: Vec = Vec::new(); loop { zspace(s, pos)?; let (c, next) = peek(s, *pos)?; if c == ')' { *pos = next; break; } sexps.push(parse_sexp(s, pos)?); } zspace(s, pos)?; Ok(sexps) } fn parse_sexp(s: &str, pos: &mut usize) -> ERes { //trace!("parse_sexp {}", pos); zspace(s, pos)?; let (c, _) = peek(s, *pos)?; let pos0 = *pos; let r = if c == '(' { Ok(Sexp::List(parse_list(s, pos)?, spos(s, pos0))) } else { Ok(Sexp::Atom(parse_atom(s, pos)?, spos(s, pos0))) }; zspace(s, pos)?; r } /// Constructs an atomic s-expression from a string. pub fn atom_s(s: &str) -> Sexp { Sexp::Atom(Atom::S(s.to_owned()), Default::default()) } /// Constructs an atomic s-expression from a string. pub fn atom_qs(s: &str) -> Sexp { Sexp::Atom(Atom::QS(s.to_owned()), Default::default()) } /// Constructs an atomic s-expression from an int. pub fn atom_i(i: i64) -> Sexp { Sexp::Atom(Atom::I(i), Default::default()) } /// Constructs an atomic s-expression from an unsigned int. pub fn atom_u(u: u64) -> Sexp { Sexp::Atom(Atom::U(u), Default::default()) } /// Constructs an atomic s-expression from a char pub fn atom_c(c: char) -> Sexp { Sexp::Atom(Atom::C(c), Default::default()) } /// Constructs an atomic s-expression from a float. pub fn atom_f(f: f64) -> Sexp { Sexp::Atom(Atom::F(f), Default::default()) } /// Constructs a list s-expression given a slice of s-expressions. pub fn list(xs: &[Sexp]) -> Sexp { Sexp::List(xs.to_owned(), Default::default()) } /// Reads an s-expression out of a `&str`. #[inline(never)] pub fn parse(s: &str) -> Result> { let mut pos = 0; let ret = parse_sexp(s, &mut pos)?; if pos == s.len() { Ok(ret) } else { err("unrecognized post-s-expression data", s, pos) } } fn quote(s: &str) -> String { s.chars().fold(String::new(), |mut s, ch| { match ch { '\'' | '\\' | '"' => { s.push('\\'); s.push(ch); } '\n' => { s.push_str("\\n"); } '\r' => { s.push_str("\\n"); } '\t' => { s.push_str("\\t"); } other => { s.push(other); } } s }) } impl fmt::Display for Atom { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { match *self { Atom::QS(ref s) => write!(f, "\"{}\"", quote(s)), Atom::S(ref s) => write!(f, "{}", s), Atom::C(c) => write!(f, "'{}'", quote(&c.to_string())), Atom::I(i) => write!(f, "{}", i), Atom::U(u) => write!(f, "{}", u), Atom::F(d) => write!(f, "{}", d), } } } impl fmt::Display for Sexp { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { match *self { Sexp::Atom(ref a, _) => write!(f, "{}", a), Sexp::List(ref xs, _) => { write!(f, "(")?; for (i, x) in xs.iter().enumerate() { let s = if i == 0 { "" } else { " " }; write!(f, "{}{}", s, x)?; } write!(f, ")") } } } }