sexp automated format & cleanup

pull/21/head
Ondřej Hruška 4 years ago
parent 3cb445a31c
commit 7272eecc48
Signed by: MightyPork
GPG Key ID: 2C5FD5035250423D
  1. 379
      lib/spanned_sexp/src/lib.rs

@ -18,9 +18,9 @@ use std::str::{self, FromStr};
#[derive(PartialEq, Clone, PartialOrd)] #[derive(PartialEq, Clone, PartialOrd)]
#[allow(missing_docs)] #[allow(missing_docs)]
pub enum Atom { pub enum Atom {
S(String), S(String),
I(i64), I(i64),
F(f64), F(f64),
} }
/// An s-expression is either an atom or a list of s-expressions. This is /// An s-expression is either an atom or a list of s-expressions. This is
@ -28,25 +28,25 @@ pub enum Atom {
#[derive(PartialEq, Clone, PartialOrd)] #[derive(PartialEq, Clone, PartialOrd)]
#[allow(missing_docs)] #[allow(missing_docs)]
pub enum Sexp { pub enum Sexp {
Atom(Atom), Atom(Atom),
List(Vec<Sexp>), List(Vec<Sexp>),
} }
/// The representation of an s-expression parse error. /// The representation of an s-expression parse error.
pub struct Error { pub struct Error {
/// The error message. /// The error message.
pub message: &'static str, pub message: &'static str,
/// The line number on which the error occurred. /// The line number on which the error occurred.
pub line: usize, pub line: usize,
/// The column number on which the error occurred. /// The column number on which the error occurred.
pub column: usize, pub column: usize,
/// The index in the given string which caused the error. /// The index in the given string which caused the error.
pub index: usize, pub index: usize,
} }
impl error::Error for Error { impl error::Error for Error {
fn description(&self) -> &str { self.message } fn description(&self) -> &str { self.message }
fn cause(&self) -> Option<&dyn error::Error> { None } fn cause(&self) -> Option<&dyn error::Error> { None }
} }
/// Since errors are the uncommon case, they're boxed. This keeps the size of /// Since errors are the uncommon case, they're boxed. This keeps the size of
@ -61,292 +61,291 @@ type Err = Box<Error>;
type ERes<T> = Result<T, Err>; type ERes<T> = Result<T, Err>;
impl fmt::Display for Error { impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{}:{}: {}", self.line, self.column, self.message) write!(f, "{}:{}: {}", self.line, self.column, self.message)
} }
} }
impl fmt::Debug for Error { impl fmt::Debug for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{}", self) write!(f, "{}", self)
} }
} }
fn get_line_and_column(s: &str, pos: usize) -> (usize, usize) { fn get_line_and_column(s: &str, pos: usize) -> (usize, usize) {
let mut line: usize = 1; let mut line: usize = 1;
let mut col: isize = -1; let mut col: isize = -1;
for c in s.chars().take(pos+1) { for c in s.chars().take(pos + 1) {
if c == '\n' { if c == '\n' {
line += 1; line += 1;
col = -1; col = -1;
} else { } else {
col += 1; col += 1;
}
} }
} (line, cmp::max(col, 0) as usize)
(line, cmp::max(col, 0) as usize)
} }
#[cold] #[cold]
fn err_impl(message: &'static str, s: &str, pos: &usize) -> Err { fn err_impl(message: &'static str, s: &str, pos: &usize) -> Err {
let (line, column) = get_line_and_column(s, *pos); let (line, column) = get_line_and_column(s, *pos);
Box::new(Error { Box::new(Error {
message: message, message: message,
line: line, line: line,
column: column, column: column,
index: *pos, index: *pos,
}) })
} }
fn err<T>(message: &'static str, s: &str, pos: &usize) -> ERes<T> { fn err<T>(message: &'static str, s: &str, pos: &usize) -> ERes<T> {
Err(err_impl(message, s, pos)) Err(err_impl(message, s, pos))
} }
/// A helpful utility to trace the execution of a parser while testing. It will /// A helpful utility to trace the execution of a parser while testing. It will
/// be compiled out in release builds. /// be compiled out in release builds.
#[allow(unused_variables)] #[allow(unused_variables)]
fn dbg(msg: &str, pos: &usize) { fn dbg(msg: &str, pos: &usize) {
//println!("{} @ {}", msg, pos) //println!("{} @ {}", msg, pos)
} }
fn atom_of_string(s: String) -> Atom { fn atom_of_string(s: String) -> Atom {
match FromStr::from_str(&s) { match FromStr::from_str(&s) {
Ok(i) => return Atom::I(i), Ok(i) => return Atom::I(i),
Err(_) => {}, Err(_) => {}
}; };
match FromStr::from_str(&s) { match FromStr::from_str(&s) {
Ok(f) => return Atom::F(f), Ok(f) => return Atom::F(f),
Err(_) => {}, Err(_) => {}
}; };
Atom::S(s) Atom::S(s)
} }
// returns the char it found, and the new size if you wish to consume that char // returns the char it found, and the new size if you wish to consume that char
fn peek(s: &str, pos: &usize) -> ERes<(char, usize)> { fn peek(s: &str, pos: &usize) -> ERes<(char, usize)> {
dbg("peek", pos); dbg("peek", pos);
if *pos == s.len() { return err("unexpected eof", s, pos) } if *pos == s.len() { return err("unexpected eof", s, pos); }
if s.is_char_boundary(*pos) { if s.is_char_boundary(*pos) {
let ch = s[*pos..].chars().next().unwrap(); let ch = s[*pos..].chars().next().unwrap();
let next = *pos + ch.len_utf8(); let next = *pos + ch.len_utf8();
Ok((ch, next)) Ok((ch, next))
} else { } else {
// strings must be composed of valid utf-8 chars. // strings must be composed of valid utf-8 chars.
unreachable!() unreachable!()
} }
} }
fn expect(s: &str, pos: &mut usize, c: char) -> ERes<()> { fn expect(s: &str, pos: &mut usize, c: char) -> ERes<()> {
dbg("expect", pos); dbg("expect", pos);
let (ch, next) = peek(s, pos)?; let (ch, next) = peek(s, pos)?;
*pos = next; *pos = next;
if ch == c { Ok(()) } else { err("unexpected character", s, pos) } if ch == c { Ok(()) } else { err("unexpected character", s, pos) }
} }
fn consume_until_newline(s: &str, pos: &mut usize) -> ERes<()> { fn consume_until_newline(s: &str, pos: &mut usize) -> ERes<()> {
loop { loop {
if *pos == s.len() { return Ok(()) } if *pos == s.len() { return Ok(()); }
let (ch, next) = peek(s, pos)?; let (ch, next) = peek(s, pos)?;
*pos = next; *pos = next;
if ch == '\n' { return Ok(()) } if ch == '\n' { return Ok(()); }
} }
} }
// zero or more spaces // zero or more spaces
fn zspace(s: &str, pos: &mut usize) -> ERes<()> { fn zspace(s: &str, pos: &mut usize) -> ERes<()> {
dbg("zspace", pos); dbg("zspace", pos);
loop { loop {
if *pos == s.len() { return Ok(()) } if *pos == s.len() { return Ok(()); }
let (ch, next) = peek(s, pos)?; let (ch, next) = peek(s, pos)?;
if ch == ';' { consume_until_newline(s, pos)? } if ch == ';' { consume_until_newline(s, pos)? } else if ch.is_whitespace() { *pos = next; } else { return Ok(()); }
else if ch.is_whitespace() { *pos = next; } }
else { return Ok(()) }
}
} }
fn parse_quoted_atom(s: &str, pos: &mut usize) -> ERes<Atom> { fn parse_quoted_atom(s: &str, pos: &mut usize) -> ERes<Atom> {
dbg("parse_quoted_atom", pos); dbg("parse_quoted_atom", pos);
let mut cs: String = String::new(); let mut cs: String = String::new();
expect(s, pos, '"')?; expect(s, pos, '"')?;
loop { loop {
let (ch, next) = peek(s, pos)?; let (ch, next) = peek(s, pos)?;
if ch == '"' { if ch == '"' {
*pos = next; *pos = next;
break; break;
} else if ch == '\\' { } else if ch == '\\' {
let (postslash, nextnext) = peek(s, &next)?; let (postslash, nextnext) = peek(s, &next)?;
if postslash == '"' || postslash == '\\' { if postslash == '"' || postslash == '\\' {
cs.push(postslash); cs.push(postslash);
} else { } else {
cs.push(ch); cs.push(ch);
cs.push(postslash); cs.push(postslash);
} }
*pos = nextnext; *pos = nextnext;
} else { } else {
cs.push(ch); cs.push(ch);
*pos = next; *pos = next;
}
} }
}
// Do not try i64 conversion, since this atom was explicitly quoted. // Do not try i64 conversion, since this atom was explicitly quoted.
Ok(Atom::S(cs)) Ok(Atom::S(cs))
} }
fn parse_unquoted_atom(s: &str, pos: &mut usize) -> ERes<Atom> { fn parse_unquoted_atom(s: &str, pos: &mut usize) -> ERes<Atom> {
dbg("parse_unquoted_atom", pos); dbg("parse_unquoted_atom", pos);
let mut cs: String = String::new(); let mut cs: String = String::new();
loop { loop {
if *pos == s.len() { break } if *pos == s.len() { break; }
let (c, next) = peek(s, pos)?; let (c, next) = peek(s, pos)?;
if c == ';' { consume_until_newline(s, pos)?; break } if c == ';' {
if c.is_whitespace() || c == '(' || c == ')' { break } consume_until_newline(s, pos)?;
cs.push(c); break;
*pos = next; }
} if c.is_whitespace() || c == '(' || c == ')' { break; }
cs.push(c);
*pos = next;
}
Ok(atom_of_string(cs)) Ok(atom_of_string(cs))
} }
fn parse_atom(s: &str, pos: &mut usize) -> ERes<Atom> { fn parse_atom(s: &str, pos: &mut usize) -> ERes<Atom> {
dbg("parse_atom", pos); dbg("parse_atom", pos);
let (ch, _) = peek(s, pos)?; let (ch, _) = peek(s, pos)?;
if ch == '"' { parse_quoted_atom (s, pos) } if ch == '"' { parse_quoted_atom(s, pos) } else { parse_unquoted_atom(s, pos) }
else { parse_unquoted_atom(s, pos) }
} }
fn parse_list(s: &str, pos: &mut usize) -> ERes<Vec<Sexp>> { fn parse_list(s: &str, pos: &mut usize) -> ERes<Vec<Sexp>> {
dbg("parse_list", pos); dbg("parse_list", pos);
zspace(s, pos)?; zspace(s, pos)?;
expect(s, pos, '(')?; expect(s, pos, '(')?;
let mut sexps: Vec<Sexp> = Vec::new(); let mut sexps: Vec<Sexp> = Vec::new();
loop { loop {
zspace(s, pos)?; zspace(s, pos)?;
let (c, next) = peek(s, pos)?; let (c, next) = peek(s, pos)?;
if c == ')' { if c == ')' {
*pos = next; *pos = next;
break; break;
}
sexps.push(parse_sexp(s, pos)?);
} }
sexps.push(parse_sexp(s, pos)?);
}
zspace(s, pos)?; zspace(s, pos)?;
Ok(sexps) Ok(sexps)
} }
fn parse_sexp(s: &str, pos: &mut usize) -> ERes<Sexp> { fn parse_sexp(s: &str, pos: &mut usize) -> ERes<Sexp> {
dbg("parse_sexp", pos); dbg("parse_sexp", pos);
zspace(s, pos)?; zspace(s, pos)?;
let (c, _) = peek(s, pos)?; let (c, _) = peek(s, pos)?;
let r = let r =
if c == '(' { Ok(Sexp::List(parse_list(s, pos)?)) } if c == '(' { Ok(Sexp::List(parse_list(s, pos)?)) } else { Ok(Sexp::Atom(parse_atom(s, pos)?)) };
else { Ok(Sexp::Atom(parse_atom(s, pos)?)) }; zspace(s, pos)?;
zspace(s, pos)?; r
r
} }
/// Constructs an atomic s-expression from a string. /// Constructs an atomic s-expression from a string.
pub fn atom_s(s: &str) -> Sexp { pub fn atom_s(s: &str) -> Sexp {
Sexp::Atom(Atom::S(s.to_owned())) Sexp::Atom(Atom::S(s.to_owned()))
} }
/// Constructs an atomic s-expression from an int. /// Constructs an atomic s-expression from an int.
pub fn atom_i(i: i64) -> Sexp { pub fn atom_i(i: i64) -> Sexp {
Sexp::Atom(Atom::I(i)) Sexp::Atom(Atom::I(i))
} }
/// Constructs an atomic s-expression from a float. /// Constructs an atomic s-expression from a float.
pub fn atom_f(f: f64) -> Sexp { pub fn atom_f(f: f64) -> Sexp {
Sexp::Atom(Atom::F(f)) Sexp::Atom(Atom::F(f))
} }
/// Constructs a list s-expression given a slice of s-expressions. /// Constructs a list s-expression given a slice of s-expressions.
pub fn list(xs: &[Sexp]) -> Sexp { pub fn list(xs: &[Sexp]) -> Sexp {
Sexp::List(xs.to_owned()) Sexp::List(xs.to_owned())
} }
/// Reads an s-expression out of a `&str`. /// Reads an s-expression out of a `&str`.
#[inline(never)] #[inline(never)]
pub fn parse(s: &str) -> Result<Sexp, Box<Error>> { pub fn parse(s: &str) -> Result<Sexp, Box<Error>> {
let mut pos = 0; let mut pos = 0;
let ret = parse_sexp(s, &mut pos)?; let ret = parse_sexp(s, &mut pos)?;
if pos == s.len() { Ok(ret) } else { err("unrecognized post-s-expression data", s, &pos) } if pos == s.len() { Ok(ret) } else { err("unrecognized post-s-expression data", s, &pos) }
} }
// TODO: Pretty print in lisp convention, instead of all on the same line, // TODO: Pretty print in lisp convention, instead of all on the same line,
// packed as tightly as possible. It's kinda ugly. // packed as tightly as possible. It's kinda ugly.
fn is_num_string(s: &str) -> bool { fn is_num_string(s: &str) -> bool {
let x: Result<i64, _> = FromStr::from_str(&s); let x: Result<i64, _> = FromStr::from_str(&s);
let y: Result<f64, _> = FromStr::from_str(&s); let y: Result<f64, _> = FromStr::from_str(&s);
x.is_ok() || y.is_ok() x.is_ok() || y.is_ok()
} }
fn string_contains_whitespace(s: &str) -> bool { fn string_contains_whitespace(s: &str) -> bool {
for c in s.chars() { for c in s.chars() {
if c.is_whitespace() { return true } if c.is_whitespace() { return true; }
} }
false false
} }
fn quote(s: &str) -> Cow<str> { fn quote(s: &str) -> Cow<str> {
if !s.contains("\"") if !s.contains("\"")
&& !string_contains_whitespace(s) && !string_contains_whitespace(s)
&& !is_num_string(s) { && !is_num_string(s) {
Cow::Borrowed(s) Cow::Borrowed(s)
} else { } else {
let mut r: String = "\"".to_string(); let mut r: String = "\"".to_string();
r.push_str(&s.replace("\\", "\\\\").replace("\"", "\\\"")); r.push_str(&s.replace("\\", "\\\\").replace("\"", "\\\""));
r.push_str("\""); r.push_str("\"");
Cow::Owned(r) Cow::Owned(r)
} }
} }
impl fmt::Display for Atom { impl fmt::Display for Atom {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
match *self { match *self {
Atom::S(ref s) => write!(f, "{}", quote(s)), Atom::S(ref s) => write!(f, "{}", quote(s)),
Atom::I(i) => write!(f, "{}", i), Atom::I(i) => write!(f, "{}", i),
Atom::F(d) => write!(f, "{}", d), Atom::F(d) => write!(f, "{}", d),
}
} }
}
} }
impl fmt::Display for Sexp { impl fmt::Display for Sexp {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
match *self { match *self {
Sexp::Atom(ref a) => write!(f, "{}", a), Sexp::Atom(ref a) => write!(f, "{}", a),
Sexp::List(ref xs) => { Sexp::List(ref xs) => {
write!(f, "(")?; write!(f, "(")?;
for (i, x) in xs.iter().enumerate() { for (i, x) in xs.iter().enumerate() {
let s = if i == 0 { "" } else { " " }; let s = if i == 0 { "" } else { " " };
write!(f, "{}{}", s, x)?; write!(f, "{}{}", s, x)?;
}
write!(f, ")")
}
} }
write!(f, ")")
},
} }
}
} }
impl fmt::Debug for Atom { impl fmt::Debug for Atom {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{}", self) write!(f, "{}", self)
} }
} }
impl fmt::Debug for Sexp { impl fmt::Debug for Sexp {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{}", self) write!(f, "{}", self)
} }
} }
#[cfg(test)] #[cfg(test)]
@ -357,14 +356,14 @@ mod test {
fn test_hello_world() { fn test_hello_world() {
assert_eq!( assert_eq!(
parse("(hello -42\n\t -4.0 \"world\") ; comment").unwrap(), parse("(hello -42\n\t -4.0 \"world\") ; comment").unwrap(),
list(&[ atom_s("hello"), atom_i(-42), atom_f(-4.0), atom_s("world") ])); list(&[atom_s("hello"), atom_i(-42), atom_f(-4.0), atom_s("world")]));
} }
#[test] #[test]
fn test_escaping() { fn test_escaping() {
assert_eq!( assert_eq!(
parse("(\"\\\"\\q\" \"1234\" 1234)").unwrap(), parse("(\"\\\"\\q\" \"1234\" 1234)").unwrap(),
list(&[ atom_s("\"\\q"), atom_s("1234"), atom_i(1234) ])); list(&[atom_s("\"\\q"), atom_s("1234"), atom_i(1234)]));
} }
#[test] #[test]
@ -388,7 +387,7 @@ mod test {
#[test] #[test]
fn test_space_in_atom() { fn test_space_in_atom() {
let sexp = list(&[ atom_s("hello world")]); let sexp = list(&[atom_s("hello world")]);
let sexp_as_string = sexp.to_string(); let sexp_as_string = sexp.to_string();
assert_eq!("(\"hello world\")", sexp_as_string); assert_eq!("(\"hello world\")", sexp_as_string);
assert_eq!(sexp, parse(&sexp_as_string).unwrap()); assert_eq!(sexp, parse(&sexp_as_string).unwrap());
@ -418,6 +417,6 @@ mod test {
fn sexp_size() { fn sexp_size() {
// I just want to see when this changes, in the diff. // I just want to see when this changes, in the diff.
use std::mem; use std::mem;
assert_eq!(mem::size_of::<Sexp>(), mem::size_of::<isize>()*5); assert_eq!(mem::size_of::<Sexp>(), mem::size_of::<isize>() * 5);
} }
} }

Loading…
Cancel
Save