From ce5bf513c0afc7bdda86ceb59679857d8eb2b013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Hru=C5=A1ka?= Date: Wed, 7 Oct 2020 19:10:52 +0200 Subject: [PATCH] stricter label parsing, more relaxed identifier rules --- crsn/src/asm/parse/parse_data.rs | 15 ++++++++++++--- crsn/src/builtin/parse.rs | 12 ++++++------ examples/aliases.csn | 4 ++++ 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/crsn/src/asm/parse/parse_data.rs b/crsn/src/asm/parse/parse_data.rs index bc0e6d4..c991bfb 100644 --- a/crsn/src/asm/parse/parse_data.rs +++ b/crsn/src/asm/parse/parse_data.rs @@ -11,8 +11,11 @@ use crate::asm::parse::sexp_expect::expect_string_atom; use crate::asm::patches::ErrWithPos; fn is_valid_identifier(name: &str) -> bool { - name.starts_with(|c: char| c.is_ascii_alphabetic() || c == '_') - && name.chars().find(|c| !c.is_ascii_alphanumeric() && *c != '_').is_none() + // ascii symbols "!\"#$_&'()*+,-./:;<=>?@[\\]^_`{|}~" + const BLACKLIST : &str = "!\"#$&'()*+,-./:;<=>?@[\\]^`{|}~"; + name != "_" + && !name.starts_with(|c: char| c.is_ascii_digit() || BLACKLIST.contains(c)) + && !name.contains(|c : char| c.is_whitespace() || BLACKLIST.contains(c)) } /// Parse register alias @@ -43,18 +46,24 @@ pub fn parse_constant_name(name: Sexp) -> Result<(ConstantName, SourcePosition), /// Parse a label pub fn parse_label(name: Sexp) -> Result { - // trace!("parse label: {:?}", name); let (name, namepos) = expect_string_atom(name)?; Ok(parse_label_str(&name, &namepos)?) } pub fn parse_label_str(name: &str, pos: &SourcePosition) -> Result { + if !name.starts_with(':') { + return Err(CrsnError::Parse("Label must start with a colon".into(), pos.clone())); + } let label = name.trim_start_matches(':'); Ok(if label.starts_with('#') { let val = parse_u64(&label[1..], pos)?; Label::Numbered(u32::try_from(val).err_pos(pos)?) } else { + if !is_valid_identifier(&label) { + return Err(CrsnError::Parse(format!("\"{}\" is not an allowed label name.", name).into(), pos.clone())); + } + Label::Named(label.to_string()) }) } diff --git a/crsn/src/builtin/parse.rs b/crsn/src/builtin/parse.rs index c8bb9d3..c35ef7c 100644 --- a/crsn/src/builtin/parse.rs +++ b/crsn/src/builtin/parse.rs @@ -1,6 +1,6 @@ use sexp::{Atom, Sexp, SourcePosition}; -use crate::asm::data::literal::{Label, RoutineName}; +use crate::asm::data::literal::{RoutineName}; use crate::asm::data::reg::parse_reg; use crate::asm::error::CrsnError; use crate::asm::instr::op::OpKind; @@ -185,9 +185,9 @@ pub(crate) fn parse_op<'a>(op_pos: &SourcePosition, keyword: &str, mut args: Tok } "far" => { - if let Some(Sexp::Atom(Atom::S(ref label), _)) = args.peek() { - if let Some(label) = label.strip_prefix(':') { - let label = Label::Named(label.to_string()); + if let Some(Sexp::Atom(Atom::S(ref label), labelpos)) = args.peek() { + if label.starts_with(':') { + let label = parse_label_str(label, labelpos)?; BuiltinOp::FarLabel(label) } else { return Ok(ParseRes::Unknown(args)); @@ -198,8 +198,8 @@ pub(crate) fn parse_op<'a>(op_pos: &SourcePosition, keyword: &str, mut args: Tok } other => { - if let Some(label) = other.strip_prefix(':') { - BuiltinOp::Label(parse_label_str(label, &op_pos)?) + if other.starts_with(':') { + BuiltinOp::Label(parse_label_str(other, &op_pos)?) } else { return Ok(ParseRes::Unknown(args)); } diff --git a/examples/aliases.csn b/examples/aliases.csn index 54bef63..5e4899e 100644 --- a/examples/aliases.csn +++ b/examples/aliases.csn @@ -1,4 +1,8 @@ ( + (sym 🐈 r0) + (ld 🐈 7) + (unsym 🐈) + (def FOO 777) (undef FOO)