From 30cd0304d2899f241e4a1b4e4381c8b6c4ab62ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Hru=C5=A1ka?= Date: Mon, 21 Sep 2020 23:06:10 +0200 Subject: [PATCH] Initial --- .gitignore | 1 + .idea/csnparse.iml | 12 ++ .idea/modules.xml | 8 + .idea/vcs.xml | 6 + .idea/workspace.xml | 265 ++++++++++++++++++++++++++++ Cargo.lock | 77 ++++++++ Cargo.toml | 5 + csn_asm/Cargo.toml | 11 ++ csn_asm/src/data/literal.rs | 113 ++++++++++++ csn_asm/src/data/mask.rs | 61 +++++++ csn_asm/src/data/mod.rs | 98 ++++++++++ csn_asm/src/data/reg.rs | 22 +++ csn_asm/src/error.rs | 48 +++++ csn_asm/src/instr/cond.rs | 73 ++++++++ csn_asm/src/instr/mod.rs | 68 +++++++ csn_asm/src/instr/op.rs | 57 ++++++ csn_asm/src/lib.rs | 158 +++++++++++++++++ csn_asm/src/parse/mod.rs | 31 ++++ csn_asm/src/parse/parse_cond.rs | 41 +++++ csn_asm/src/parse/parse_data.rs | 86 +++++++++ csn_asm/src/parse/parse_instr.rs | 51 ++++++ csn_asm/src/parse/parse_op.rs | 105 +++++++++++ csn_asm/src/parse/parse_routines.rs | 21 +++ csn_asm/src/parse/sexp_expect.rs | 60 +++++++ csn_asm/src/patches/mod.rs | 5 + csn_asm/src/patches/sexp_is_a.rs | 23 +++ csn_asm/src/patches/try_remove.rs | 17 ++ 27 files changed, 1523 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/csnparse.iml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 csn_asm/Cargo.toml create mode 100644 csn_asm/src/data/literal.rs create mode 100644 csn_asm/src/data/mask.rs create mode 100644 csn_asm/src/data/mod.rs create mode 100644 csn_asm/src/data/reg.rs create mode 100644 csn_asm/src/error.rs create mode 100644 csn_asm/src/instr/cond.rs create mode 100644 csn_asm/src/instr/mod.rs create mode 100644 csn_asm/src/instr/op.rs create mode 100644 csn_asm/src/lib.rs create mode 100644 csn_asm/src/parse/mod.rs create mode 100644 csn_asm/src/parse/parse_cond.rs create mode 100644 csn_asm/src/parse/parse_data.rs create mode 100644 csn_asm/src/parse/parse_instr.rs create mode 100644 csn_asm/src/parse/parse_op.rs create mode 100644 csn_asm/src/parse/parse_routines.rs create mode 100644 csn_asm/src/parse/sexp_expect.rs create mode 100644 csn_asm/src/patches/mod.rs create mode 100644 csn_asm/src/patches/sexp_is_a.rs create mode 100644 csn_asm/src/patches/try_remove.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/.idea/csnparse.iml b/.idea/csnparse.iml new file mode 100644 index 0000000..4f9dbef --- /dev/null +++ b/.idea/csnparse.iml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..e68404d --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..a66725b --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,265 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1600694919906 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..e842fa6 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,77 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "anyhow" +version = "1.0.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b602bfe940d21c130f3895acd65221e8a61270debe89d628b9cb4e3ccb8569b" + +[[package]] +name = "csn_asm" +version = "0.1.0" +dependencies = [ + "anyhow", + "sexp", + "thiserror", +] + +[[package]] +name = "proc-macro2" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36e28516df94f3dd551a587da5357459d9b36d945a7c37c3557928c1c2ff2a2c" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "sexp" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8fa7ac9df84000b0238cf497cb2d3056bac2ff2a7d8cf179d2803b4b58571f" + +[[package]] +name = "syn" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6690e3e9f692504b941dc6c3b188fd28df054f7fb8469ab40680df52fdcc842b" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "thiserror" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dfdd070ccd8ccb78f4ad66bf1982dc37f620ef696c6b5028fe2ed83dd3d0d08" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..38e8248 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,5 @@ +[workspace] +members = [ + "csn_asm", +] + diff --git a/csn_asm/Cargo.toml b/csn_asm/Cargo.toml new file mode 100644 index 0000000..c5abb5f --- /dev/null +++ b/csn_asm/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "csn_asm" +version = "0.1.0" +authors = ["Ondřej Hruška "] +edition = "2018" +publish = false + +[dependencies] +sexp = "1.1.4" +thiserror = "1.0.20" +anyhow = "1.0.32" diff --git a/csn_asm/src/data/literal.rs b/csn_asm/src/data/literal.rs new file mode 100644 index 0000000..6e0ea5f --- /dev/null +++ b/csn_asm/src/data/literal.rs @@ -0,0 +1,113 @@ +use std::fmt::{self, Display, Formatter}; +use std::convert::TryFrom; +use std::sync::atomic::AtomicU32; +use std::borrow::Cow; + +pub type DebugMsg = Cow<'static, str>; + +/// Immediate value +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub struct Value(pub i64); + +impl From for Value { + fn from(n: i64) -> Self { + Self(n) + } +} + +impl Display for Value { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + if f.alternate() { + write!(f, "{:#010x}", self.0) + } else { + write!(f, "{}", self.0) + } + } +} + +impl Value { + pub fn as_u64(self) -> u64 { + u64::from_ne_bytes(self.0.to_ne_bytes()) + } + + pub fn as_u32(self) -> Option { + u32::try_from(self.as_u64()).ok() + } + + pub fn as_i32(self) -> Option { + i32::try_from(self.0).ok() + } +} + +/// Immediate address +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub struct Addr(pub u64); + +impl Display for Addr { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "@{:#010x}", self.0) + } +} + +impl From for Addr { + fn from(n: u64) -> Self { + Self(n) + } +} + +/// Label name +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum Label { + Named(String), + Numbered(u32), +} + +impl Label { + /// Generate a unique numbered label from a counter + pub fn unique(counter : &AtomicU32) -> Self { + Label::Numbered(counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed)) + } +} + +impl Display for Label { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Label::Named(name) => write!(f, ":{}", name), + Label::Numbered(num) => write!(f, ":#{}", num), + } + } +} + +impl From<&str> for Label { + fn from(n: &str) -> Self { + Self::Named(n.to_string()) + } +} + +impl From for Label { + fn from(n: String) -> Self { + Self::Named(n) + } +} + +/// Routine name +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct RoutineName(pub String); + +impl Display for RoutineName { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From<&str> for RoutineName { + fn from(n: &str) -> Self { + Self(n.to_string()) + } +} + +impl From for RoutineName { + fn from(n: String) -> Self { + Self(n) + } +} diff --git a/csn_asm/src/data/mask.rs b/csn_asm/src/data/mask.rs new file mode 100644 index 0000000..1172b77 --- /dev/null +++ b/csn_asm/src/data/mask.rs @@ -0,0 +1,61 @@ +//! Mask applied to a data source or destination + +use crate::error::AsmError; + +/// Bit mask to apply to a value +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub struct Mask { + /// Length of the selected bit slice + len: u8, + /// Offset of the selected bit slice from bit zero + offset: u8, +} + +impl Default for Mask { + fn default() -> Self { + Mask { + len: 64, + offset: 0 + } + } +} + +impl Mask { + pub const BYTE: Mask = Mask { + len: 8, + offset: 0, + }; + pub const HALF_WORD: Mask = Mask { + len: 16, + offset: 0, + }; + pub const WORD: Mask = Mask { + len: 32, + offset: 0, + }; + pub const DOUBLE_WORD: Mask = Mask { + len: 64, + offset: 0, + }; + pub const FULL: Mask = Self::DOUBLE_WORD; + + pub fn new(len: u8, offset: u8) -> Result { + if len == 0 || offset >= 64 { + // create the invalid mask to display it in the error + return Err(AsmError::BadMask(Mask { + len, + offset + })); + } + + Ok(Self { + len: len.min(64 - offset), + offset, + }) + } + + /// Get a binary mask representing the span + pub fn as_bitmask(self) -> u64 { + ((1 << self.len) - 1) << self.offset + } +} diff --git a/csn_asm/src/data/mod.rs b/csn_asm/src/data/mod.rs new file mode 100644 index 0000000..c01cfc7 --- /dev/null +++ b/csn_asm/src/data/mod.rs @@ -0,0 +1,98 @@ +use super::error::AsmError; + + +pub(crate) mod literal; +mod reg; +mod mask; + +pub use reg::Register; +pub use mask::Mask; +use literal::Addr; + + +use std::convert::TryFrom; + +use crate::data::literal::Value; + +/// Data source disposition +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum DataDisp { + /// Constant value + Immediate(Value), + /// Constant memory address + ImmediatePtr(Addr), + /// Register + Register(Register), + /// Pointer into memory, stored in a numbered register + RegisterPtr(Register), +} + +/// Data source disposition +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum SrcDisp { + /// Constant value + Immediate(Value), + /// Constant memory address + ImmediatePtr(Addr), + /// Register + Register(Register), + /// Pointer into memory, stored in a numbered register + RegisterPtr(Register), +} + +impl TryFrom for SrcDisp { + type Error = AsmError; + + fn try_from(value: DataDisp) -> Result { + match value { + DataDisp::Immediate(x) => Ok(SrcDisp::Immediate(x)), + DataDisp::ImmediatePtr(x) => Ok(SrcDisp::ImmediatePtr(x)), + DataDisp::Register(x) => Ok(SrcDisp::Register(x)), + DataDisp::RegisterPtr(x) => Ok(SrcDisp::RegisterPtr(x)), + } + } +} + +/// Data destination disposition +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum DstDisp { + /// Constant memory address + ImmediatePtr(Addr), + /// Register + Register(Register), + /// Pointer into memory, stored in a numbered register + RegisterPtr(Register), +} + +impl TryFrom for DstDisp { + type Error = AsmError; + + fn try_from(value: DataDisp) -> Result { + match value { + DataDisp::Immediate(_x) => Err(AsmError::ValueAsOutput), + DataDisp::ImmediatePtr(x) => Ok(DstDisp::ImmediatePtr(x)), + DataDisp::Register(x) => Ok(DstDisp::Register(x)), + DataDisp::RegisterPtr(x) => Ok(DstDisp::RegisterPtr(x)), + } + } +} + +/// Data source argument (read-only) +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub struct Rd(pub SrcDisp, pub Mask); + +impl Rd { + pub fn new(src : SrcDisp) -> Self { + Rd(src, Mask::default()) + } +} + +/// Data destination argument (read-write) +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub struct Wr(pub DstDisp, pub Mask); + +impl Wr { + pub fn new(dst : DstDisp) -> Self { + Wr(dst, Mask::default()) + } +} diff --git a/csn_asm/src/data/reg.rs b/csn_asm/src/data/reg.rs new file mode 100644 index 0000000..1c85844 --- /dev/null +++ b/csn_asm/src/data/reg.rs @@ -0,0 +1,22 @@ +use std::fmt::{self, Display, Formatter}; + +/// Register name +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum Register { + /// Argument register, read-only + Arg(u8), + /// Result register, read-only + Res(u8), + /// General purpose register + Gen(u8) +} + +impl Display for Register { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Register::Arg(n) => write!(f, "arg{}", n), + Register::Res(n) => write!(f, "res{}", n), + Register::Gen(n) => write!(f, "r{}", n), + } + } +} diff --git a/csn_asm/src/error.rs b/csn_asm/src/error.rs new file mode 100644 index 0000000..6cf40f8 --- /dev/null +++ b/csn_asm/src/error.rs @@ -0,0 +1,48 @@ +use crate::instr::{Cond}; +use crate::data::{Mask, Register}; +use thiserror::Error; +use std::borrow::Cow; + + +/// csn_asm unified error type +#[derive(Error,Debug)] +pub enum Error { + #[error("S-expression syntax error: {0:?}")] + PreParse(#[from] Box), + #[error("Parse error: {0:?}")] + Parse(Cow<'static, str>), + #[error("Parse error in {1:?}: {0:?}")] + ParseIn(Cow<'static, str>, sexp::Sexp), + #[error("Assembler error: {0:?}")] + Asm(AsmError), + #[error("Architecture error: {0:?}")] + Arch(ArchError), + #[error(transparent)] + Other(#[from] anyhow::Error), +} + +/// Error from the assembler stage (after parsing S-expressions and basic validation) +#[derive(Error,Debug)] +pub enum AsmError { + #[error("Unknown instruction")] + UnknownInstruction, + #[error("Bad bit mask")] + BadMask(Mask), + #[error("Uneven operand size")] + UnevenOperandSize(Mask, Mask), + #[error("Value provided as output argument")] + ValueAsOutput, + #[error("Conditional branch already defined for \"{0}\"")] + ConditionalAlreadyUsed(Cond), +} + +/// Architectural error - the code is syntactically OK, but cannot run +#[derive(Error,Debug)] +pub enum ArchError { + #[error("Register {0} does not exist")] + RegisterNotExist(Register), + #[error("Register {0} is not writable")] + RegisterNotWritable(Register), + #[error("Register {0} is not readable")] + RegisterNotReadable(Register), +} diff --git a/csn_asm/src/instr/cond.rs b/csn_asm/src/instr/cond.rs new file mode 100644 index 0000000..bedc5ea --- /dev/null +++ b/csn_asm/src/instr/cond.rs @@ -0,0 +1,73 @@ +use std::fmt::{self, Display, Formatter}; +use std::ops::Not; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +pub enum Cond { + Equal, + NotEqual, + Zero, + NotZero, + Less, + LessOrEqual, + Greater, + GreaterOrEqual, + Positive, + NonPositive, + Negative, + NonNegative, + Overflow, + NotOverflow, + Carry, + NotCarry, +} + +impl Display for Cond { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.write_str(match self { + Cond::Equal => "eq", + Cond::NotEqual => "ne", + Cond::Zero => "z", + Cond::NotZero => "nz", + Cond::Less => "lt", + Cond::LessOrEqual => "le", + Cond::Greater => "gt", + Cond::GreaterOrEqual => "ge", + Cond::Positive => "pos", + Cond::Negative => "neg", + Cond::NonPositive => "npos", + Cond::NonNegative => "nneg", + Cond::Overflow => "ov", + Cond::Carry => "c", + Cond::NotCarry => "nc", + Cond::NotOverflow => "nov" + }) + } +} + +impl Not for Cond { + type Output = Cond; + + fn not(self) -> Self::Output { + match self { + Cond::Equal => Cond::NotEqual, + Cond::Zero => Cond::NotZero, + Cond::Overflow => Cond::NotOverflow, + Cond::Carry => Cond::NotCarry, + + Cond::Positive => Cond::NonPositive, + Cond::Negative => Cond::NonNegative, + Cond::NonPositive => Cond::Positive, + Cond::NonNegative => Cond::Negative, + + Cond::NotEqual => Cond::Equal, + Cond::NotZero => Cond::Zero, + Cond::NotOverflow => Cond::Overflow, + Cond::NotCarry => Cond::Carry, + + Cond::Less => Cond::GreaterOrEqual, + Cond::Greater => Cond::LessOrEqual, + Cond::LessOrEqual => Cond::Greater, + Cond::GreaterOrEqual => Cond::Less, + } + } +} diff --git a/csn_asm/src/instr/mod.rs b/csn_asm/src/instr/mod.rs new file mode 100644 index 0000000..c172ac2 --- /dev/null +++ b/csn_asm/src/instr/mod.rs @@ -0,0 +1,68 @@ +mod op; +mod cond; + +pub use op::Op; +pub use cond::Cond; +use crate::data::literal::{Label, RoutineName}; +use std::sync::atomic::{AtomicU32}; +use std::collections::HashMap; +use crate::error::{AsmError, Error}; + +/// A higher-level instruction +pub struct Instr { + pub op: Op, + pub branches: Option)>>, +} + +/// A routine +pub struct Routine { + pub name: RoutineName, + pub body: Vec, +} + +/// A trait for something that can turn into multiple instructions +pub trait Flatten { + fn flatten(self, label_num: &AtomicU32) -> Result, Error>; +} + +impl Flatten for Instr { + fn flatten(self, label_num: &AtomicU32) -> Result, Error> { + let mut ops = vec![self.op]; + + if let Some(branches) = self.branches { + let labels = HashMap::::new(); + let _branch_count = branches.len(); + for (_cnt, (cond, branch)) in branches.into_iter().enumerate() { + if labels.contains_key(&cond) { + return Err(Error::Asm(AsmError::ConditionalAlreadyUsed(cond))); + } + + let next_lbl = Label::unique(label_num); + ops.push(Op::JumpIf(!cond, next_lbl.clone())); + + for branch_instr in branch { + ops.extend(branch_instr.flatten(label_num)?); + } + + ops.push(Op::Label(next_lbl)); + } + } + + Ok(ops) + } +} + +impl Flatten for Routine { + fn flatten(self, label_num: &AtomicU32) -> Result, Error> { + let mut ops = vec![ + Op::Routine(self.name.clone()), + ]; + + for instr in self.body { + ops.extend(instr.flatten(label_num)?); + } + + ops.push(Op::Barrier(Some(format!("Routine \"{}\" overrun", self.name).into()))); + Ok(ops) + } +} diff --git a/csn_asm/src/instr/op.rs b/csn_asm/src/instr/op.rs new file mode 100644 index 0000000..f3a91ae --- /dev/null +++ b/csn_asm/src/instr/op.rs @@ -0,0 +1,57 @@ +use crate::data::{ + Wr, Rd, + literal::Label, + literal::RoutineName, + literal::DebugMsg, +}; +use crate::instr::{Cond}; + +/// A low level instruction +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum Op { + /* Marker instructions */ + + /// Mark a jump target. + /// Is optimized out when jumps are replaced by relative skips + Label(Label), + /// Mark a far jump target (can be jumped to from another routine). + /// This label is preserved in optimized code. + FarLabel(Label), + + /* Control flow */ + + /// Jump to a label + Jump(Label), + /// Jump to a label that can be in another function + FarJump(Label), + /// Call a routine with arguments + Call(RoutineName, Vec), + /// Exit the current routine with return values + Ret(Vec), + + /* Synthetic instructions */ + + /// Mark a routine entry point (call target) + Routine(RoutineName), + /// Skip backward or forward + Skip(Rd), + /// Jump to a label if a flag is set + JumpIf(Cond, Label), + /// Deny jumps, skips and run across this address, producing a run-time fault with a message. + Barrier(Option), + /// Generate a run-time fault with a debugger message + Fault(Option), + + /* Arithmetic */ + + /// Copy a value + Mov(Wr, Rd), + /// Compare two values and set conditional flags + Cmp(Rd, Rd), + // Increment a value + Inc(Wr), + // Decrement a value + Dec(Wr), + + // TODO arithmetics, bit manipulation, byte operations +} diff --git a/csn_asm/src/lib.rs b/csn_asm/src/lib.rs new file mode 100644 index 0000000..e97f902 --- /dev/null +++ b/csn_asm/src/lib.rs @@ -0,0 +1,158 @@ +mod data; +mod error; +mod instr; +mod parse; +mod patches; + +pub use parse::parse; + +#[cfg(test)] +mod tests { + use crate::parse; + use crate::instr::{Op, Flatten}; + use crate::data::{Wr, DstDisp, Register, SrcDisp, Rd}; + use crate::data::literal::{Value, Addr}; + use std::sync::atomic::AtomicU32; + + #[test] + fn test_parse_empty() { + let parsed = parse(" + () + ").unwrap(); + assert_eq!(Vec::::new(), parsed); + } + + #[test] + fn test_parse_empty_routine() { + let parsed = parse(" + ( + (hello) + ) + ").unwrap(); + assert_eq!(vec![ + Op::Routine("hello".into()), + Op::Barrier(Some("Routine \"hello\" overrun".into())) + ], parsed); + + let parsed = parse(" + ( + (hello) + (world) + ) + ").unwrap(); + assert_eq!(vec![ + Op::Routine("hello".into()), + Op::Barrier(Some("Routine \"hello\" overrun".into())), + Op::Routine("world".into()), + Op::Barrier(Some("Routine \"world\" overrun".into())) + ], parsed); + } + + #[test] + fn test_parse_data_formats() { + let parsed = parse(" + ( + (move + (mov r0 r1) + (mov r15 7) + (mov r15 0xabcd) + (mov r7 0b11110000) + (mov r7 arg1) + (mov r255 arg255) + (mov r7 res0) + (mov r7 res255) + (mov @r0 @r0) ; test in both Rd and Wr positions + (mov @r0 @arg0) + (mov @r0 @res0) + (mov @123456 @0x123456) + (mov @0b010101 @0b010101) + ) + ) + ").unwrap(); + assert_eq!(vec![ + Op::Routine("move".into()), + // (mov r0 r1) + Op::Mov( + Wr::new(DstDisp::Register(Register::Gen(0))), + Rd::new(SrcDisp::Register(Register::Gen(1))), + ), + // (mov r15 7) + Op::Mov( + Wr::new(DstDisp::Register(Register::Gen(15))), + Rd::new(SrcDisp::Immediate(Value(7))), + ), + // (mov r15 0xabcd) + Op::Mov( + Wr::new(DstDisp::Register(Register::Gen(15))), + Rd::new(SrcDisp::Immediate(Value(0xabcd))), + ), + // (mov r7 0b11110000) + Op::Mov( + Wr::new(DstDisp::Register(Register::Gen(7))), + Rd::new(SrcDisp::Immediate(Value(0b11110000))), + ), + // (mov r7 arg1) + Op::Mov( + Wr::new(DstDisp::Register(Register::Gen(7))), + Rd::new(SrcDisp::Register(Register::Arg(1))), + ), + // (mov r255 arg255) + Op::Mov( + Wr::new(DstDisp::Register(Register::Gen(255))), + Rd::new(SrcDisp::Register(Register::Arg(255))), + ), + // (mov r7 res0) + Op::Mov( + Wr::new(DstDisp::Register(Register::Gen(7))), + Rd::new(SrcDisp::Register(Register::Res(0))), + ), + // (mov r7 res255) + Op::Mov( + Wr::new(DstDisp::Register(Register::Gen(7))), + Rd::new(SrcDisp::Register(Register::Res(255))), + ), + // (mov @r0 @r0) + Op::Mov( + Wr::new(DstDisp::RegisterPtr(Register::Gen(0))), + Rd::new(SrcDisp::RegisterPtr(Register::Gen(0))), + ), + // (mov @r0 @arg0) + Op::Mov( + Wr::new(DstDisp::RegisterPtr(Register::Gen(0))), + Rd::new(SrcDisp::RegisterPtr(Register::Arg(0))), + ), + // (mov @r0 @res0) + Op::Mov( + Wr::new(DstDisp::RegisterPtr(Register::Gen(0))), + Rd::new(SrcDisp::RegisterPtr(Register::Res(0))), + ), + // (mov @123456 @0x123456) + Op::Mov( + Wr::new(DstDisp::ImmediatePtr(Addr(123456))), + Rd::new(SrcDisp::ImmediatePtr(Addr(0x123456))), + ), + // (mov @0b010101 @0b010101) + Op::Mov( + Wr::new(DstDisp::ImmediatePtr(Addr(0b010101))), + Rd::new(SrcDisp::ImmediatePtr(Addr(0b010101))), + ), + Op::Barrier(Some("Routine \"move\" overrun".into())), + ], parsed); + } + + fn parse_single_instr(src : &str) -> anyhow::Result> { + let num = AtomicU32::new(0); + Ok(parse::parse_instructions(vec![sexp::parse(src)?])?.remove(0).flatten(&num)?) + } + + #[test] + fn test_parse_single() { + let parsed = parse_single_instr("(mov r0 r1)").unwrap(); + assert_eq!(vec![ + Op::Mov( + Wr::new(DstDisp::Register(Register::Gen(0))), + Rd::new(SrcDisp::Register(Register::Gen(1))), + ), + ], parsed); + } +} diff --git a/csn_asm/src/parse/mod.rs b/csn_asm/src/parse/mod.rs new file mode 100644 index 0000000..d878b47 --- /dev/null +++ b/csn_asm/src/parse/mod.rs @@ -0,0 +1,31 @@ + +use crate::instr::{Routine, Op, Flatten}; +use crate::error::Error; + + +use std::sync::atomic::AtomicU32; +use crate::parse::sexp_expect::expect_list; + +mod parse_cond; +mod parse_instr; +mod parse_data; +mod parse_routines; +mod sexp_expect; +mod parse_op; + +use parse_routines::parse_routines; +pub use parse_instr::parse_instructions; + +pub fn parse(source: &str) -> Result, Error> { + let root = sexp::parse(source)?; + + let subs: Vec = parse_routines(expect_list(Some(root), true)?)?; + + let mut combined = vec![]; + let label_num = AtomicU32::new(0); + for sub in subs { + combined.extend(sub.flatten(&label_num)?); + } + + Ok(combined) +} diff --git a/csn_asm/src/parse/parse_cond.rs b/csn_asm/src/parse/parse_cond.rs new file mode 100644 index 0000000..1d9c637 --- /dev/null +++ b/csn_asm/src/parse/parse_cond.rs @@ -0,0 +1,41 @@ +use crate::parse::sexp_expect::{expect_list, expect_string_atom}; +use sexp::Sexp; +use crate::instr::{Cond, Instr}; +use crate::error::Error; +use crate::parse::parse_instr::parse_instructions; +use crate::patches::TryRemove; + +pub fn parse_cond_branch(tok: Sexp) -> Result<(Cond, Vec), Error> { + let mut list = expect_list(Some(tok), false)?; + let kw = expect_string_atom(list.try_remove(0))?; + + if !kw.ends_with('?') { + return Err(Error::Parse(format!("Condition must end with '?': {}", kw).into())); + } + + Ok((parse_cond(&kw)?, parse_instructions(list)?)) +} + +pub fn parse_cond(text: &str) -> Result { + Ok(match text.trim_end_matches('?') { + "eq" | "=" | "==" => Cond::Equal, + "ne" | "<>" | "!=" | "≠" => Cond::NotEqual, + "z" | "0" => Cond::Zero, + "nz" | "<>0" | "!0" => Cond::NotZero, + "lt" | "<" => Cond::Less, + "le" | "<=" | "≤" => Cond::LessOrEqual, + "gt" => Cond::Greater, + "ge" | ">=" | "≥" => Cond::GreaterOrEqual, + "pos" | "+" | ">0" => Cond::Positive, + "neg" | "-" | "<0" => Cond::Negative, + "npos" | "!+" | "0-" | "<=0" | "≥0" => Cond::NonPositive, + "nneg" | "!-" | "0+" | ">=0" | "≤0" => Cond::NonNegative, + "ov" | "^" => Cond::Overflow, + "c" => Cond::Carry, + "nc" | "!c" => Cond::NotCarry, + "nov" | "!ov" | "!^" => Cond::NotOverflow, + _ => { + return Err(Error::Parse(format!("Unknown cond: {}", text).into())); + } + }) +} diff --git a/csn_asm/src/parse/parse_data.rs b/csn_asm/src/parse/parse_data.rs new file mode 100644 index 0000000..3d4b6f8 --- /dev/null +++ b/csn_asm/src/parse/parse_data.rs @@ -0,0 +1,86 @@ +use sexp::{Sexp, Atom}; +use crate::data::{DataDisp, Register, Rd, Mask, Wr, DstDisp, SrcDisp}; +use crate::error::Error; +use crate::data::literal::{Value, Addr, Label}; +use std::convert::TryFrom; +use crate::parse::sexp_expect::expect_string_atom; + +pub fn parse_label(name : Option) -> Result { + let name = expect_string_atom(name)?; + Ok(Label::Named(name.trim_start_matches(':').into())) +} + +/// Parse data disposition (address/value, without the read/write restriction) +pub fn parse_data_disp(tok: Option) -> Result { + let tok = if let Some(tok) = tok { + tok + } else { + return Err(Error::Parse("Expected data disposition token".into())); + }; + + // TODO implement masks + + match &tok { + Sexp::Atom(Atom::I(val)) => { + Ok(DataDisp::Immediate(Value(*val))) + }, + Sexp::Atom(Atom::S(s)) => { + if let Some(reference) = s.strip_prefix('@') { + if reference.starts_with(|c : char| c.is_ascii_digit()) { + let val : u64 = parse_u64(reference)?; + Ok(DataDisp::ImmediatePtr(Addr(val))) + } else { + Ok(DataDisp::RegisterPtr(parse_reg(reference)?)) + } + } else if s.starts_with(|c : char| c.is_ascii_digit()) { + Ok(DataDisp::Immediate(Value(parse_i64(s)?))) + } else { + Ok(DataDisp::Register(parse_reg(s)?)) + } + }, + _ => { + Err(Error::Parse(format!("bad data disp: {:?}", tok).into())) + }, + } +} + +pub fn parse_reg(name : &str) -> anyhow::Result { + if let Some(rn) = name.strip_prefix("arg") { + let val : u8 = rn.parse()?; + Ok(Register::Arg(val)) + } else if let Some(rn) = name.strip_prefix("res") { + let val : u8 = rn.parse()?; + Ok(Register::Res(val)) + } else if let Some(rn) = name.strip_prefix("r") { + let val : u8 = rn.parse()?; + Ok(Register::Gen(val)) + } else { + Err(Error::Parse(format!("Bad reg name: {}", name).into()))? + } +} + +pub fn parse_u64(literal : &str) -> anyhow::Result { + if let Some(hex) = literal.strip_prefix("0x") { + Ok(u64::from_str_radix(hex, 16)?) + } else if let Some(hex) = literal.strip_prefix("0b") { + Ok(u64::from_str_radix(hex, 2)?) + } else { + Ok(u64::from_str_radix(literal, 10)?) + } +} + +pub fn parse_i64(literal : &str) -> anyhow::Result { + if let Some(_value) = literal.strip_prefix("-") { + Ok(-1 * i64::try_from(parse_u64(literal)?)?) + } else { + Ok(i64::try_from(parse_u64(literal)?)?) + } +} + +pub fn parse_rd(tok: Option) -> anyhow::Result { + Ok(Rd(SrcDisp::try_from(parse_data_disp(tok)?)?, Mask::default())) +} + +pub fn parse_wr(tok: Option) -> anyhow::Result { + Ok(Wr(DstDisp::try_from(parse_data_disp(tok)?)?, Mask::default())) +} diff --git a/csn_asm/src/parse/parse_instr.rs b/csn_asm/src/parse/parse_instr.rs new file mode 100644 index 0000000..5a41b09 --- /dev/null +++ b/csn_asm/src/parse/parse_instr.rs @@ -0,0 +1,51 @@ +use sexp::Sexp; +use crate::instr::{Instr, Op}; +use crate::error::Error; +use crate::parse::parse_cond::{parse_cond_branch, parse_cond}; +use crate::data::literal::{Label, RoutineName}; +use crate::parse::parse_data::{parse_rd, parse_wr}; +use crate::parse::sexp_expect::{expect_list, expect_string_atom}; +use crate::patches::SexpIsA; +use super::parse_op::parse_op; + +pub fn parse_instructions(instrs: Vec) -> Result, Error> { + let mut parsed = vec![]; + for expr in instrs { + let tokens = expect_list(Some(expr), false)?; + + let mut toki = tokens.into_iter(); + + let mut name = expect_string_atom(toki.next())?; + + let far = if name == "far" { + name = expect_string_atom(toki.next())?; + true + } else { + false + }; + + let arg_tokens = toki.clone().take_while(|e| e.is_atom()); + let branch_tokens = toki + .skip_while(|e| e.is_atom()) + .take_while(|e| e.is_list()); + + let branches = { + let mut branches = vec![]; + for t in branch_tokens { + branches.push(parse_cond_branch(t)?); + } + if branches.is_empty() { + None + } else { + Some(branches) + } + }; + + parsed.push(Instr { + op: parse_op(name.as_str(), far, arg_tokens)?, + branches + }); + } + Ok(parsed) +} + diff --git a/csn_asm/src/parse/parse_op.rs b/csn_asm/src/parse/parse_op.rs new file mode 100644 index 0000000..67e29c4 --- /dev/null +++ b/csn_asm/src/parse/parse_op.rs @@ -0,0 +1,105 @@ +use crate::instr::Op; +use sexp::Sexp; +use crate::error::Error; +use crate::data::literal::{RoutineName, Label}; +use crate::parse::sexp_expect::expect_string_atom; +use crate::parse::parse_data::{parse_rd, parse_wr, parse_label}; +use crate::parse::parse_cond::parse_cond; + +pub fn parse_op(keyword: &str, far : bool, mut arg_tokens: impl Iterator) -> Result { + Ok(match keyword { + "jmp" | "j" => { + let dest = parse_label(arg_tokens.next())?; + if far { + Op::Jump(dest) + } else { + Op::FarJump(dest) + } + } + + "call" => { + let dest = RoutineName(expect_string_atom(arg_tokens.next())?); + + let mut args = vec![]; + for t in arg_tokens { + args.push(parse_rd(Some(t))?); + } + Op::Call(dest, args) + } + + "ret" => { + let mut args = vec![]; + for t in arg_tokens { + args.push(parse_rd(Some(t))?); + } + Op::Ret(args) + } + + "rtn" | "fn" => { + let dest = RoutineName(expect_string_atom(arg_tokens.next())?); + Op::Routine(dest) + } + + "skip" => { + Op::Skip(parse_rd(arg_tokens.next())?) + } + + "jmp.if" | "j.if" => { + let dest = parse_label(arg_tokens.next())?; + Op::JumpIf(parse_cond(&expect_string_atom(arg_tokens.next())?)?, dest) + } + + "barrier" => { + Op::Barrier(match arg_tokens.next() { + None => None, + Some(s) => Some(expect_string_atom(Some(s))?.into()), + }) + } + + "fault" => { + Op::Fault(match arg_tokens.next() { + None => None, + Some(s) => Some(expect_string_atom(Some(s))?.into()), + }) + } + + "mov" | "ld" | "mv" => { + Op::Mov( + parse_wr(arg_tokens.next())?, + parse_rd(arg_tokens.next())? + ) + } + + "cmp" => { + Op::Cmp( + parse_rd(arg_tokens.next())?, + parse_rd(arg_tokens.next())? + ) + } + + "inc" => { + Op::Inc( + parse_wr(arg_tokens.next())? + ) + } + + "dec" => { + Op::Dec( + parse_wr(arg_tokens.next())? + ) + } + + other => { + if let Some(label) = other.strip_prefix(':') { + let label = Label::Named(label.to_string()); + if far { + Op::Label(label) + } else { + Op::FarLabel(label) + } + } else { + return Err(Error::Parse(format!("Unknown instruction: {}", other).into())); + } + } + }) +} diff --git a/csn_asm/src/parse/parse_routines.rs b/csn_asm/src/parse/parse_routines.rs new file mode 100644 index 0000000..5d79e57 --- /dev/null +++ b/csn_asm/src/parse/parse_routines.rs @@ -0,0 +1,21 @@ +use crate::parse::parse_instr::parse_instructions; +use crate::instr::Routine; +use crate::data::literal::RoutineName; +use sexp::Sexp; +use crate::error::Error; +use crate::parse::sexp_expect::{expect_list, expect_string_atom}; +use crate::patches::TryRemove; + +pub fn parse_routines(routines: Vec) -> Result, Error> { + let mut parsed = vec![]; + for rt in routines { + let mut def = expect_list(Some(rt), false)?; + let name = expect_string_atom(def.try_remove(0))?; + let body = parse_instructions(def)?; + parsed.push(Routine { + name: RoutineName(name), + body, + }) + } + Ok(parsed) +} diff --git a/csn_asm/src/parse/sexp_expect.rs b/csn_asm/src/parse/sexp_expect.rs new file mode 100644 index 0000000..62cdf70 --- /dev/null +++ b/csn_asm/src/parse/sexp_expect.rs @@ -0,0 +1,60 @@ +use sexp::{Sexp, Atom}; +use crate::error::Error; + +pub fn expect_list(expr: Option, allow_empty: bool) -> Result, Error> { + if let Some(expr) = expr { + match &expr { + Sexp::Atom(_) => { + return Err(Error::ParseIn("Expected a list".into(), expr)); + } + Sexp::List(list) => { + if !allow_empty && list.is_empty() { + return Err(Error::ParseIn("Routine: Empty list".into(), expr)); + } + + if let Sexp::List(list) = expr { + return Ok(list); + } else { + unreachable!(); + } + } + } + } + + Err(Error::Parse("Expected a list, got nothing".into())) +} + +pub fn expect_atom(expr: Option) -> Result { + if let Some(expr) = expr { + match &expr { + Sexp::Atom(_atom) => { + if let Sexp::Atom(a) = expr { + return Ok(a); + } else { + unreachable!(); + } + } + Sexp::List(_) => { + return Err(Error::ParseIn("Expected atom got list".into(), expr)); + } + } + } + + Err(Error::Parse("Expected atom, got nothing".into())) +} + +pub fn expect_string_atom(expr: Option) -> Result { + match expect_atom(expr) { + Ok(Atom::S(s)) => Ok(s), + Ok(atom) => Err(Error::ParseIn("Expected string atom".into(), Sexp::Atom(atom))), + Err(e) => Err(e), + } +} + +// pub fn expect_int_atom(expr: Option) -> Result { +// match expect_atom(expr) { +// Ok(Atom::I(v)) => Ok(v), +// Ok(atom) => Err(Error::ParseIn("Expected int atom".into(), Sexp::Atom(atom))), +// Err(e) => Err(e), +// } +// } diff --git a/csn_asm/src/patches/mod.rs b/csn_asm/src/patches/mod.rs new file mode 100644 index 0000000..2e90c4e --- /dev/null +++ b/csn_asm/src/patches/mod.rs @@ -0,0 +1,5 @@ +mod try_remove; +mod sexp_is_a; + +pub use try_remove::TryRemove; +pub use sexp_is_a::SexpIsA; diff --git a/csn_asm/src/patches/sexp_is_a.rs b/csn_asm/src/patches/sexp_is_a.rs new file mode 100644 index 0000000..f620eed --- /dev/null +++ b/csn_asm/src/patches/sexp_is_a.rs @@ -0,0 +1,23 @@ +use sexp::Sexp; + +pub trait SexpIsA { + fn is_atom(&self) -> bool; + + fn is_list(&self) -> bool; +} + +impl SexpIsA for Sexp { + fn is_atom(&self) -> bool { + match self { + Sexp::Atom(_) => true, + _ => false, + } + } + + fn is_list(&self) -> bool { + match self { + Sexp::List(_) => true, + _ => false, + } + } +} diff --git a/csn_asm/src/patches/try_remove.rs b/csn_asm/src/patches/try_remove.rs new file mode 100644 index 0000000..1ac3913 --- /dev/null +++ b/csn_asm/src/patches/try_remove.rs @@ -0,0 +1,17 @@ +pub trait TryRemove { + type Item; + fn try_remove(&mut self, index: usize) -> Option; +} + +impl TryRemove for Vec { + type Item = T; + + fn try_remove(&mut self, index: usize) -> Option { + if self.is_empty() { + None + } else { + Some(self.remove(index)) + } + } +} +