From 85911c8e99992f4190636b1c0ce44abed04e5aff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Hru=C5=A1ka?= Date: Tue, 6 Oct 2020 19:36:13 +0200 Subject: [PATCH] add sexp into source tree --- Cargo.lock | 2 - crsn/Cargo.toml | 2 +- lib/spanned_sexp/.cargo-ok | 1 + lib/spanned_sexp/.editorconfig | 10 + lib/spanned_sexp/.gitignore | 2 + lib/spanned_sexp/.travis.yml | 36 +++ lib/spanned_sexp/Cargo.toml | 16 ++ lib/spanned_sexp/LICENSE | 19 ++ lib/spanned_sexp/README.md | 15 ++ lib/spanned_sexp/src/lib.rs | 417 +++++++++++++++++++++++++++++++++ 10 files changed, 517 insertions(+), 3 deletions(-) create mode 100644 lib/spanned_sexp/.cargo-ok create mode 100644 lib/spanned_sexp/.editorconfig create mode 100644 lib/spanned_sexp/.gitignore create mode 100644 lib/spanned_sexp/.travis.yml create mode 100644 lib/spanned_sexp/Cargo.toml create mode 100644 lib/spanned_sexp/LICENSE create mode 100644 lib/spanned_sexp/README.md create mode 100644 lib/spanned_sexp/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index f18d411..8f09537 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -879,8 +879,6 @@ dependencies = [ [[package]] name = "sexp" version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8fa7ac9df84000b0238cf497cb2d3056bac2ff2a7d8cf179d2803b4b58571f" [[package]] name = "sha-1" diff --git a/crsn/Cargo.toml b/crsn/Cargo.toml index 369e7aa..15c3d4e 100644 --- a/crsn/Cargo.toml +++ b/crsn/Cargo.toml @@ -6,7 +6,7 @@ edition = "2018" publish = false [dependencies] -sexp = "1.1.4" +sexp = { path = "../lib/spanned_sexp" } thiserror = "1.0.20" anyhow = "1.0.32" dyn-clonable = "0.9.0" diff --git a/lib/spanned_sexp/.cargo-ok b/lib/spanned_sexp/.cargo-ok new file mode 100644 index 0000000..b5754e2 --- /dev/null +++ b/lib/spanned_sexp/.cargo-ok @@ -0,0 +1 @@ +ok \ No newline at end of file diff --git a/lib/spanned_sexp/.editorconfig b/lib/spanned_sexp/.editorconfig new file mode 100644 index 0000000..fa9becf --- /dev/null +++ b/lib/spanned_sexp/.editorconfig @@ -0,0 +1,10 @@ +root = true + +[*] +end_of_line = lf +insert_final_newline = true +charset = utf-8 +indent_style = space +indent_size = 2 +tab_width = 4 +trim_trailing_whitespace = true diff --git a/lib/spanned_sexp/.gitignore b/lib/spanned_sexp/.gitignore new file mode 100644 index 0000000..a9d37c5 --- /dev/null +++ b/lib/spanned_sexp/.gitignore @@ -0,0 +1,2 @@ +target +Cargo.lock diff --git a/lib/spanned_sexp/.travis.yml b/lib/spanned_sexp/.travis.yml new file mode 100644 index 0000000..a4430b7 --- /dev/null +++ b/lib/spanned_sexp/.travis.yml @@ -0,0 +1,36 @@ +language: rust +sudo: false +# necessary for `travis-cargo coveralls --no-sudo` +addons: + apt: + packages: + - libcurl4-openssl-dev + - libelf-dev + - libdw-dev +rust: +- nightly +os: +- linux + +env: + global: + - TRAVIS_CARGO_NIGHTLY_FEATURE="" + - secure: C2P1wLHzBxccS3jrimsG2TaDy4sAhYiKSq1g+cwYHhAKZKkiIpL7Ez5iEHH6BbEvvg4HiUJy4j0w83luZ/FXUuxkD2GZsXWoG+20DFBTLQvCJE/LPahVNbb5i+NdmyIsZPHLloXNvT63hXwu8KNV4U0hrYAgViIXkumoLnOiQD/jim81i7gxUOSe65AzMHcfPRaAwKHn+NGIvUfwMzU2hKZbnH/BPIi2PNtQ6e0VZEvAqA5Ad3hRV0YaBKZ3HZn8tr8UnHKmLbPffb/01EVWAFBU+rFMVYrdzDsiVp7UHMPtVV9aNXUVszB+a/ASWHsAZEdX8XsbmH9RSEBCzsUq2j2HFM2R7yYZnkL3FPcpf/ZKgy4ZVw6gKO42DCvBRGwhI1JMjeKBmrzCGZHE70FxD0zAZRwX9n9M7mUKhakzMvs/LSKMQKlOJslSR+OLEUpr3MCBthpKIiajNYDrJL5P/3KrFOF2R4H/2Z91/3osEIRqzYiEKdeJU01Yef5FCI+H6SLvbhIlVAQTM0IJKGAP0B2N6J4Ot7XrYuGDQag48oPzWzJ2dOGwYjwkda1rgW7pdjtWuullOi2ob1zdI6y/i/CdAS8AE0yRz7VCK4grwonUICzdVaaIAaTTd0yq9PRWAjSjZqNG5EOLADzABIihPnkBw4WygoDq18rSkk0pRbE= + +before_script: + - pip install 'travis-cargo<0.2' --user && export PATH=$HOME/.local/bin:$PATH + +script: +- travis-cargo build +- travis-cargo test +- travis-cargo bench +- travis-cargo doc +after_success: +- | + [ $TRAVIS_BRANCH = master ] && + [ $TRAVIS_PULL_REQUEST = false ] && + echo '' > target/doc/index.html && + git clone --depth 1 https://github.com/davisp/ghp-import && + ./ghp-import/ghp-import -n target/doc && + git push -fq https://${GH_TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages +- travis-cargo coveralls --no-sudo diff --git a/lib/spanned_sexp/Cargo.toml b/lib/spanned_sexp/Cargo.toml new file mode 100644 index 0000000..26c6fc8 --- /dev/null +++ b/lib/spanned_sexp/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "sexp" +version = "1.1.4" +authors = ["Clark Gaebel "] + +documentation = "https://cgaebel.github.io/sexp" +homepage = "https://github.com/cgaebel/sexp" +repository = "https://github.com/cgaebel/sexp" + +readme = "README.md" + +keywords = [ "sexp", "parsing", "s-expression", "file-format" ] + +description = "A small, simple, self-contained, s-expression parser and pretty-printer." + +license = "MIT" diff --git a/lib/spanned_sexp/LICENSE b/lib/spanned_sexp/LICENSE new file mode 100644 index 0000000..683f7fa --- /dev/null +++ b/lib/spanned_sexp/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2015 Clark Gaebel + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/lib/spanned_sexp/README.md b/lib/spanned_sexp/README.md new file mode 100644 index 0000000..6da62fb --- /dev/null +++ b/lib/spanned_sexp/README.md @@ -0,0 +1,15 @@ +Sexp +===== + +A small, simple, self-contained, s-expression parser and pretty-printer. + +[![crates.io](https://img.shields.io/crates/v/sexp.svg)](https://crates.io/crates/sexp/) + +[![Build Status](https://travis-ci.org/cgaebel/sexp.svg?branch=master)](https://travis-ci.org/cgaebel/sexp) + +[![Coverage Status](https://coveralls.io/repos/cgaebel/sexp/badge.svg?branch=master&service=github)](https://coveralls.io/github/cgaebel/sexp?branch=master) + +Documentation +------------- + +See the [API Docs](https://cgaebel.github.io/sexp/). diff --git a/lib/spanned_sexp/src/lib.rs b/lib/spanned_sexp/src/lib.rs new file mode 100644 index 0000000..d514af7 --- /dev/null +++ b/lib/spanned_sexp/src/lib.rs @@ -0,0 +1,417 @@ +//! A lightweight, self-contained s-expression parser and data format. +//! Use `parse` to get an s-expression from its string representation, and the +//! `Display` trait to serialize it, potentially by doing `sexp.to_string()`. + +#![deny(missing_docs)] +#![deny(unsafe_code)] + +use std::borrow::Cow; +use std::cmp; +use std::error; +use std::fmt; +use std::str::{self, FromStr}; + +/// A single data element in an s-expression. Floats are excluded to ensure +/// atoms may be used as keys in ordered and hashed data structures. +/// +/// All strings must be valid utf-8. +#[derive(PartialEq, Clone, PartialOrd)] +#[allow(missing_docs)] +pub enum Atom { + S(String), + I(i64), + F(f64), +} + +/// An s-expression is either an atom or a list of s-expressions. This is +/// similar to the data format used by lisp. +#[derive(PartialEq, Clone, PartialOrd)] +#[allow(missing_docs)] +pub enum Sexp { + Atom(Atom), + List(Vec), +} + +#[test] +fn sexp_size() { + // I just want to see when this changes, in the diff. + use std::mem; + assert_eq!(mem::size_of::(), mem::size_of::()*5); +} + +/// The representation of an s-expression parse error. +pub struct Error { + /// The error message. + pub message: &'static str, + /// The line number on which the error occurred. + pub line: usize, + /// The column number on which the error occurred. + pub column: usize, + /// The index in the given string which caused the error. + pub index: usize, +} + +impl error::Error for Error { + fn description(&self) -> &str { self.message } + fn cause(&self) -> Option<&error::Error> { None } +} + +/// Since errors are the uncommon case, they're boxed. This keeps the size of +/// structs down, which helps performance in the common case. +/// +/// For example, an `ERes<()>` becomes 8 bytes, instead of the 24 bytes it would +/// be if `Err` were unboxed. +type Err = Box; + +/// Helps clean up type signatures, but shouldn't be exposed to the outside +/// world. +type ERes = Result; + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "{}:{}: {}", self.line, self.column, self.message) + } +} + +impl fmt::Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "{}", self) + } +} + +#[test] +fn show_an_error() { + assert_eq!(format!("{:?}", parse("(aaaa").unwrap_err()), "1:4: unexpected eof"); +} + +fn get_line_and_column(s: &str, pos: usize) -> (usize, usize) { + let mut line: usize = 1; + let mut col: isize = -1; + for c in s.chars().take(pos+1) { + if c == '\n' { + line += 1; + col = -1; + } else { + col += 1; + } + } + (line, cmp::max(col, 0) as usize) +} + +#[test] +fn line_and_col_test() { + let s = "0123456789\n0123456789\n\n6"; + assert_eq!(get_line_and_column(s, 4), (1, 4)); + + assert_eq!(get_line_and_column(s, 10), (2, 0)); + assert_eq!(get_line_and_column(s, 11), (2, 0)); + assert_eq!(get_line_and_column(s, 15), (2, 4)); + + assert_eq!(get_line_and_column(s, 21), (3, 0)); + assert_eq!(get_line_and_column(s, 22), (4, 0)); + assert_eq!(get_line_and_column(s, 23), (4, 0)); + assert_eq!(get_line_and_column(s, 500), (4, 0)); +} + +#[cold] +fn err_impl(message: &'static str, s: &str, pos: &usize) -> Err { + let (line, column) = get_line_and_column(s, *pos); + Box::new(Error { + message: message, + line: line, + column: column, + index: *pos, + }) +} + +fn err(message: &'static str, s: &str, pos: &usize) -> ERes { + Err(err_impl(message, s, pos)) +} + +/// A helpful utility to trace the execution of a parser while testing. It will +/// be compiled out in release builds. +#[allow(unused_variables)] +fn dbg(msg: &str, pos: &usize) { + //println!("{} @ {}", msg, pos) +} + +fn atom_of_string(s: String) -> Atom { + match FromStr::from_str(&s) { + Ok(i) => return Atom::I(i), + Err(_) => {}, + }; + + match FromStr::from_str(&s) { + Ok(f) => return Atom::F(f), + Err(_) => {}, + }; + + Atom::S(s) +} + +// returns the char it found, and the new size if you wish to consume that char +fn peek(s: &str, pos: &usize) -> ERes<(char, usize)> { + dbg("peek", pos); + if *pos == s.len() { return err("unexpected eof", s, pos) } + if s.is_char_boundary(*pos) { + let ch = s[*pos..].chars().next().unwrap(); + let next = *pos + ch.len_utf8(); + Ok((ch, next)) + } else { + // strings must be composed of valid utf-8 chars. + unreachable!() + } +} + +fn expect(s: &str, pos: &mut usize, c: char) -> ERes<()> { + dbg("expect", pos); + let (ch, next) = try!(peek(s, pos)); + *pos = next; + if ch == c { Ok(()) } else { err("unexpected character", s, pos) } +} + +fn consume_until_newline(s: &str, pos: &mut usize) -> ERes<()> { + loop { + if *pos == s.len() { return Ok(()) } + let (ch, next) = try!(peek(s, pos)); + *pos = next; + if ch == '\n' { return Ok(()) } + } +} + +// zero or more spaces +fn zspace(s: &str, pos: &mut usize) -> ERes<()> { + dbg("zspace", pos); + loop { + if *pos == s.len() { return Ok(()) } + let (ch, next) = try!(peek(s, pos)); + + if ch == ';' { try!(consume_until_newline(s, pos)) } + else if ch.is_whitespace() { *pos = next; } + else { return Ok(()) } + } +} + +fn parse_quoted_atom(s: &str, pos: &mut usize) -> ERes { + dbg("parse_quoted_atom", pos); + let mut cs: String = String::new(); + + try!(expect(s, pos, '"')); + + loop { + let (ch, next) = try!(peek(s, pos)); + if ch == '"' { + *pos = next; + break; + } else if ch == '\\' { + let (postslash, nextnext) = try!(peek(s, &next)); + if postslash == '"' || postslash == '\\' { + cs.push(postslash); + } else { + cs.push(ch); + cs.push(postslash); + } + *pos = nextnext; + } else { + cs.push(ch); + *pos = next; + } + } + + // Do not try i64 conversion, since this atom was explicitly quoted. + Ok(Atom::S(cs)) +} + +fn parse_unquoted_atom(s: &str, pos: &mut usize) -> ERes { + dbg("parse_unquoted_atom", pos); + let mut cs: String = String::new(); + + loop { + if *pos == s.len() { break } + let (c, next) = try!(peek(s, pos)); + + if c == ';' { try!(consume_until_newline(s, pos)); break } + if c.is_whitespace() || c == '(' || c == ')' { break } + cs.push(c); + *pos = next; + } + + Ok(atom_of_string(cs)) +} + +fn parse_atom(s: &str, pos: &mut usize) -> ERes { + dbg("parse_atom", pos); + let (ch, _) = try!(peek(s, pos)); + + if ch == '"' { parse_quoted_atom (s, pos) } + else { parse_unquoted_atom(s, pos) } +} + +fn parse_list(s: &str, pos: &mut usize) -> ERes> { + dbg("parse_list", pos); + try!(zspace(s, pos)); + try!(expect(s, pos, '(')); + + let mut sexps: Vec = Vec::new(); + + loop { + try!(zspace(s, pos)); + let (c, next) = try!(peek(s, pos)); + if c == ')' { + *pos = next; + break; + } + sexps.push(try!(parse_sexp(s, pos))); + } + + try!(zspace(s, pos)); + + Ok(sexps) +} + +fn parse_sexp(s: &str, pos: &mut usize) -> ERes { + dbg("parse_sexp", pos); + try!(zspace(s, pos)); + let (c, _) = try!(peek(s, pos)); + let r = + if c == '(' { Ok(Sexp::List(try!(parse_list(s, pos)))) } + else { Ok(Sexp::Atom(try!(parse_atom(s, pos)))) }; + try!(zspace(s, pos)); + r +} + +/// Constructs an atomic s-expression from a string. +pub fn atom_s(s: &str) -> Sexp { + Sexp::Atom(Atom::S(s.to_owned())) +} + +/// Constructs an atomic s-expression from an int. +pub fn atom_i(i: i64) -> Sexp { + Sexp::Atom(Atom::I(i)) +} + +/// Constructs an atomic s-expression from a float. +pub fn atom_f(f: f64) -> Sexp { + Sexp::Atom(Atom::F(f)) +} + +/// Constructs a list s-expression given a slice of s-expressions. +pub fn list(xs: &[Sexp]) -> Sexp { + Sexp::List(xs.to_owned()) +} + +/// Reads an s-expression out of a `&str`. +#[inline(never)] +pub fn parse(s: &str) -> Result> { + let mut pos = 0; + let ret = try!(parse_sexp(s, &mut pos)); + if pos == s.len() { Ok(ret) } else { err("unrecognized post-s-expression data", s, &pos) } +} + +// TODO: Pretty print in lisp convention, instead of all on the same line, +// packed as tightly as possible. It's kinda ugly. + +fn is_num_string(s: &str) -> bool { + let x: Result = FromStr::from_str(&s); + let y: Result = FromStr::from_str(&s); + x.is_ok() || y.is_ok() +} + +fn string_contains_whitespace(s: &str) -> bool { + for c in s.chars() { + if c.is_whitespace() { return true } + } + false +} + +fn quote(s: &str) -> Cow { + if !s.contains("\"") + && !string_contains_whitespace(s) + && !is_num_string(s) { + Cow::Borrowed(s) + } else { + let mut r: String = "\"".to_string(); + r.push_str(&s.replace("\\", "\\\\").replace("\"", "\\\"")); + r.push_str("\""); + Cow::Owned(r) + } +} + +impl fmt::Display for Atom { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match *self { + Atom::S(ref s) => write!(f, "{}", quote(s)), + Atom::I(i) => write!(f, "{}", i), + Atom::F(d) => write!(f, "{}", d), + } + } +} + +impl fmt::Display for Sexp { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match *self { + Sexp::Atom(ref a) => write!(f, "{}", a), + Sexp::List(ref xs) => { + try!(write!(f, "(")); + for (i, x) in xs.iter().enumerate() { + let s = if i == 0 { "" } else { " " }; + try!(write!(f, "{}{}", s, x)); + } + write!(f, ")") + }, + } + } +} + +impl fmt::Debug for Atom { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "{}", self) + } +} + +impl fmt::Debug for Sexp { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "{}", self) + } +} + +#[test] +fn test_hello_world() { + assert_eq!( + parse("(hello -42\n\t -4.0 \"world\") ; comment").unwrap(), + list(&[ atom_s("hello"), atom_i(-42), atom_f(-4.0), atom_s("world") ])); +} + +#[test] +fn test_escaping() { + assert_eq!( + parse("(\"\\\"\\q\" \"1234\" 1234)").unwrap(), + list(&[ atom_s("\"\\q"), atom_s("1234"), atom_i(1234) ])); +} + +#[test] +fn test_pp() { + let s = "(hello world (what is (up) (4 6.4 you \"123\\\\ \\\"\")))"; + let sexp = parse(s).unwrap(); + assert_eq!(s, sexp.to_string()); + assert_eq!(s, format!("{:?}", sexp)); +} + +#[test] +fn test_tight_parens() { + let s = "(hello(world))"; + let sexp = parse(s).unwrap(); + assert_eq!(sexp, Sexp::List(vec![Sexp::Atom(Atom::S("hello".into())), + Sexp::List(vec![Sexp::Atom(Atom::S("world".into()))])])); + let s = "(this (has)tight(parens))"; + let s2 = "( this ( has ) tight ( parens ) )"; + assert_eq!(parse(s).unwrap(), parse(s2).unwrap()); +} + +#[test] +fn test_space_in_atom() { + let sexp = list(&[ atom_s("hello world")]); + let sexp_as_string = sexp.to_string(); + assert_eq!("(\"hello world\")", sexp_as_string); + assert_eq!(sexp, parse(&sexp_as_string).unwrap()); +}