diff --git a/Cargo.lock b/Cargo.lock index 4fd9bdb8..509dfde1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,7 +150,7 @@ dependencies = [ "proc-macro2", "quote", "swc_macros_common", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -295,7 +295,7 @@ checksum = "fdde5c9cd29ebd706ce1b35600920a33550e402fc998a2e53ad3b42c3c47a192" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -672,6 +672,12 @@ dependencies = [ [[package]] name = "fine" version = "0.1.0" +dependencies = [ + "glob", + "prettyplease", + "quote", + "syn 2.0.47", +] [[package]] name = "flate2" @@ -720,7 +726,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -753,7 +759,7 @@ dependencies = [ "pmutil", "proc-macro2", "swc_macros_common", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -1038,7 +1044,7 @@ dependencies = [ "pmutil", "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -1717,7 +1723,7 @@ checksum = "52a40bc70c2c58040d2d8b167ba9a5ff59fc9dab7ad44771cfde3dcfde7a09c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -1751,6 +1757,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "prettyplease" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" +dependencies = [ + "proc-macro2", + "syn 2.0.47", +] + [[package]] name = "proc-macro-crate" version = "1.3.1" @@ -1769,9 +1785,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.59" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b" +checksum = "907a61bd0f64c2f29cd1cf1dc34d05176426a3f504a78010f08416ddb7b13708" dependencies = [ "unicode-ident", ] @@ -1793,9 +1809,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.28" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -2011,7 +2027,7 @@ checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -2216,7 +2232,7 @@ dependencies = [ "proc-macro2", "quote", "swc_macros_common", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -2282,7 +2298,7 @@ dependencies = [ "proc-macro2", "quote", "swc_macros_common", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -2331,7 +2347,7 @@ dependencies = [ "proc-macro2", "quote", "swc_macros_common", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -2414,7 +2430,7 @@ dependencies = [ "proc-macro2", "quote", "swc_macros_common", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -2518,7 +2534,7 @@ dependencies = [ "pmutil", "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -2530,7 +2546,7 @@ dependencies = [ "pmutil", "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -2554,7 +2570,7 @@ dependencies = [ "proc-macro2", "quote", "swc_macros_common", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -2570,9 +2586,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.18" +version = "2.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" +checksum = "1726efe18f42ae774cc644f330953a5e7b3c3003d3edcecf18850fe9d4dd9afb" dependencies = [ "proc-macro2", "quote", @@ -2614,7 +2630,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -2704,7 +2720,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.47", ] [[package]] @@ -2912,7 +2928,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.47", "wasm-bindgen-shared", ] @@ -2946,7 +2962,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.47", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/fine/Cargo.lock b/fine/Cargo.lock index f3a8ee96..535a9196 100644 --- a/fine/Cargo.lock +++ b/fine/Cargo.lock @@ -12,9 +12,19 @@ checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" name = "fine" version = "0.1.0" dependencies = [ + "glob", "pretty_assertions", + "prettyplease", + "quote", + "syn", ] +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "pretty_assertions" version = "1.4.0" @@ -25,6 +35,51 @@ dependencies = [ "yansi", ] +[[package]] +name = "prettyplease" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "907a61bd0f64c2f29cd1cf1dc34d05176426a3f504a78010f08416ddb7b13708" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "2.0.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1726efe18f42ae774cc644f330953a5e7b3c3003d3edcecf18850fe9d4dd9afb" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + [[package]] name = "yansi" version = "0.5.1" diff --git a/fine/Cargo.toml b/fine/Cargo.toml index 3ee646e5..731a7d40 100644 --- a/fine/Cargo.toml +++ b/fine/Cargo.toml @@ -5,3 +5,9 @@ edition = "2021" [dev-dependencies] pretty_assertions = "1.4.0" + +[build-dependencies] +glob = "0.3.1" +prettyplease = "0.2.16" +quote = "1.0.35" +syn = "2.0.47" diff --git a/fine/build.rs b/fine/build.rs new file mode 100644 index 00000000..100b5229 --- /dev/null +++ b/fine/build.rs @@ -0,0 +1,96 @@ +use quote::{format_ident, quote}; +use std::env; +use std::fs; +use std::path::{Path, PathBuf}; + +fn generate_test_for_file(path: PathBuf) -> String { + let contents = fs::read_to_string(&path).expect("Unable to read input"); + + let mut concrete_stuff: Option = None; + + // Start iterating over lines and processing directives.... + let mut lines = contents.lines(); + while let Some(line) = lines.next() { + let line = match line.strip_prefix("//") { + Some(line) => line, + None => break, + }; + + let line = line.trim(); + if line == "concrete:" { + let mut concrete = String::new(); + while let Some(line) = lines.next() { + let line = match line.strip_prefix("// | ") { + Some(line) => line, + None => break, + }; + + concrete.push_str(line); + concrete.push_str("\n"); + } + concrete_stuff = Some(concrete); + } + } + + let concrete_comparison = if let Some(concrete) = concrete_stuff { + quote! { + crate::assert_concrete(&_tree, #concrete) + } + } else { + quote! {} + }; + + let name = format_ident!("{}", path.file_stem().unwrap().to_string_lossy()); + let test_method = quote! { + fn #name() { + let (_tree, _lines) = fine::parser::concrete::parse_concrete(#contents); + #concrete_comparison; + } + }; + + let syntax_tree = syn::parse2(test_method).unwrap(); + prettyplease::unparse(&syntax_tree) +} + +fn process_directory(output: &mut String, path: T) +where + T: AsRef, +{ + let fine_ext: std::ffi::OsString = "fine".into(); + let path = path.as_ref(); + for entry in std::fs::read_dir(path).expect("Unable to read directory") { + match entry { + Ok(dirent) => { + let file_type = dirent.file_type().unwrap(); + if file_type.is_dir() { + let file_name = dirent.file_name(); + let file_name = file_name.to_string_lossy().to_owned(); + output.push_str(&format!("mod {file_name} {{\n")); + process_directory(output, dirent.path()); + output.push_str("}\n\n"); + } else if file_type.is_file() { + if dirent.path().extension() == Some(&fine_ext) { + output.push_str(&format!("// {}\n", dirent.path().display())); + output.push_str("#[test]\n"); + output.push_str(&generate_test_for_file(dirent.path())); + output.push_str("\n\n"); + } + } else { + eprintln!("Skipping symlink: {}", path.display()); + } + } + Err(e) => eprintln!("Unable to read directory entry: {:?}", e), + } + } +} + +fn main() { + println!("cargo:rerun-if-changed=./tests"); + + let mut test_source = String::new(); + process_directory(&mut test_source, "./tests"); + + let out_dir = env::var_os("OUT_DIR").unwrap(); + let dest_path = Path::new(&out_dir).join("generated_tests.rs"); + fs::write(dest_path, test_source).unwrap(); +} diff --git a/fine/src/parser.rs b/fine/src/parser.rs index 70e50203..ce8fd3e6 100644 --- a/fine/src/parser.rs +++ b/fine/src/parser.rs @@ -646,8 +646,13 @@ impl<'a> Parser<'a> { fn advance(&mut self) { self.previous = self.current.clone(); self.current = self.tokens.next(); - while self.current.kind == TokenKind::Error { - self.error_at_current(self.current.to_string()); + while self.current.kind == TokenKind::Error + || self.current.kind == TokenKind::Whitespace + || self.current.kind == TokenKind::Comment + { + if self.current.kind == TokenKind::Error { + self.error_at_current(self.current.to_string()); + } self.current = self.tokens.next(); } } diff --git a/fine/src/parser/concrete.rs b/fine/src/parser/concrete.rs index e110e019..7e857af1 100644 --- a/fine/src/parser/concrete.rs +++ b/fine/src/parser/concrete.rs @@ -32,6 +32,7 @@ fn token_power<'a>(token: TokenKind) -> Option { } } +#[derive(Debug)] pub enum TreeKind { Error, File, @@ -62,11 +63,53 @@ pub struct Tree<'a> { pub children: Vec>, } +impl<'a> Tree<'a> { + pub fn dump(&self) -> String { + let mut output = String::new(); + output.push_str(&format!("{:?}\n", self.kind)); + for child in self.children.iter() { + child.dump_rec(2, &mut output); + } + output + } +} + +impl<'a> std::fmt::Debug for Tree<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "[{:?}", self.kind)?; + for child in self.children.iter() { + match child { + Child::Token(t) => write!(f, " {:?}:'{}'", t.kind, t.as_str())?, + Child::Tree(t) => write!(f, " {t:?}")?, + } + } + write!(f, "]")?; + Ok(()) + } +} + pub enum Child<'a> { Token(Token<'a>), Tree(Tree<'a>), } +impl<'a> Child<'a> { + fn dump_rec(&self, indent: usize, output: &mut String) { + for _ in 0..indent { + output.push(' '); + } + match self { + Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())), + Child::Tree(t) => { + output.push_str(&format!("{:?}\n", t.kind)); + for child in t.children.iter() { + child.dump_rec(indent + 2, output); + } + } + } + } +} + enum ParseEvent<'a> { Start { kind: TreeKind }, End, @@ -97,6 +140,7 @@ impl<'a> CParser<'a> { events: Vec::new(), }; parser.current = parser.tokens.next(); + parser.skip_ephemera(); parser } @@ -135,6 +179,14 @@ impl<'a> CParser<'a> { token: self.current.clone(), }); self.current = self.tokens.next(); + self.skip_ephemera(); + } + + fn skip_ephemera(&mut self) { + while self.current.kind == TokenKind::Whitespace || self.current.kind == TokenKind::Comment + { + self.current = self.tokens.next(); + } } fn eof(&self) -> bool { @@ -213,8 +265,14 @@ impl<'a> CParser<'a> { let mut events = self.events; let mut stack = Vec::new(); - // Special case: pop the last `Close` event to ensure that the stack - // is non-empty inside the loop. + // The first element in our events vector must be a start; the whole + // thing must be bracketed in a tree. + assert!(matches!(events.get(0), Some(ParseEvent::Start { .. }))); + + // The last element in our events vector must be an end, otherwise + // the parser has failed badly. We'll remove it here so that, after + // processing the entire array, the stack retains the tree that we + // start with the very first ::Start. assert!(matches!(events.pop(), Some(ParseEvent::End))); for event in events { @@ -240,7 +298,7 @@ impl<'a> CParser<'a> { } } -pub fn c_parse(source: &str) -> (Tree, Lines) { +pub fn parse_concrete(source: &str) -> (Tree, Lines) { let tokens = Tokens::new(source); let mut parser = CParser::new(tokens); @@ -527,3 +585,34 @@ fn identifier(p: &mut CParser) -> MarkClosed { p.end(m, TreeKind::Identifier) } + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + fn test_successful_expression_parse(source: &str, expected: &str) { + let tokens = Tokens::new(source); + let mut parser = CParser::new(tokens); + + expression(&mut parser); + + let (tree, _) = parser.build_tree(); + assert_eq!( + expected, + format!("{tree:?}"), + "The parse structure of the expressions did not match" + ); + } + + macro_rules! test_expr { + ($name:ident, $input:expr, $expected:expr) => { + #[test] + fn $name() { + test_successful_expression_parse($input, $expected); + } + }; + } + + test_expr!(number_expr, "12", "[LiteralExpression Number:'12']"); +} diff --git a/fine/src/tokens.rs b/fine/src/tokens.rs index ceaa22f0..a6ddccdf 100644 --- a/fine/src/tokens.rs +++ b/fine/src/tokens.rs @@ -3,6 +3,9 @@ pub enum TokenKind { EOF, Error, + Whitespace, + Comment, + LeftBrace, RightBrace, LeftBracket, @@ -390,7 +393,7 @@ impl<'a> Tokens<'a> { self.next_char.is_none() } - fn skip_whitespace(&mut self) { + fn whitespace(&mut self, pos: usize) -> Token<'a> { while let Some((pos, ch)) = self.next_char { if ch == '\n' { self.lines.add_line(pos); @@ -399,16 +402,27 @@ impl<'a> Tokens<'a> { } self.advance(); } + self.token(pos, TokenKind::Whitespace) + } + + fn comment(&mut self, pos: usize) -> Token<'a> { + while let Some((_, ch)) = self.next_char { + if ch == '\n' { + break; + } + self.advance(); + } + self.token(pos, TokenKind::Comment) } pub fn next(&mut self) -> Token<'a> { - self.skip_whitespace(); // TODO: Whitespace preserving/comment preserving let (pos, c) = match self.advance() { Some((p, c)) => (p, c), None => return self.token(self.source.len(), TokenKind::EOF), }; match c { + ' ' | '\t' | '\r' | '\n' => self.whitespace(pos), '{' => self.token(pos, TokenKind::LeftBrace), '}' => self.token(pos, TokenKind::RightBrace), '[' => self.token(pos, TokenKind::LeftBracket), @@ -427,7 +441,13 @@ impl<'a> Tokens<'a> { '+' => self.token(pos, TokenKind::Plus), ':' => self.token(pos, TokenKind::Colon), ';' => self.token(pos, TokenKind::Semicolon), - '/' => self.token(pos, TokenKind::Slash), + '/' => { + if self.matches('/') { + self.comment(pos) + } else { + self.token(pos, TokenKind::Slash) + } + } '*' => self.token(pos, TokenKind::Star), '!' => { if self.matches('=') { @@ -484,6 +504,9 @@ mod tests { while !is_eof { let token = tokens.next(); is_eof = token.kind == TokenKind::EOF; + if token.kind == TokenKind::Whitespace { + continue; + } result.push(token); } diff --git a/fine/tests/example_tests.rs b/fine/tests/example_tests.rs new file mode 100644 index 00000000..c61f26c6 --- /dev/null +++ b/fine/tests/example_tests.rs @@ -0,0 +1,8 @@ +use fine::parser::concrete::Tree; +use pretty_assertions::assert_eq; + +fn assert_concrete(tree: &Tree, expected: &str) { + assert_eq!(tree.dump(), expected, "concrete syntax trees did not match"); +} + +include!(concat!(env!("OUT_DIR"), "/generated_tests.rs")); diff --git a/fine/tests/expression/expressions.fine b/fine/tests/expression/expressions.fine new file mode 100644 index 00000000..b353d91a --- /dev/null +++ b/fine/tests/expression/expressions.fine @@ -0,0 +1,27 @@ +// concrete: +// | File +// | ExpressionStatement +// | LiteralExpression +// | Number:'"42"' +// | Semicolon:'";"' +// | ExpressionStatement +// | BinaryExpression +// | BinaryExpression +// | LiteralExpression +// | Number:'"1"' +// | Star:'"*"' +// | LiteralExpression +// | Number:'"2"' +// | Plus:'"+"' +// | BinaryExpression +// | UnaryExpression +// | Minus:'"-"' +// | LiteralExpression +// | Number:'"3"' +// | Star:'"*"' +// | LiteralExpression +// | Number:'"4"' +// | Semicolon:'";"' +// +42; +1 * 2 + -3 * 4;