diff --git a/Cargo.lock b/Cargo.lock index 509dfde1..4fd9bdb8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,7 +150,7 @@ dependencies = [ "proc-macro2", "quote", "swc_macros_common", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -295,7 +295,7 @@ checksum = "fdde5c9cd29ebd706ce1b35600920a33550e402fc998a2e53ad3b42c3c47a192" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -672,12 +672,6 @@ dependencies = [ [[package]] name = "fine" version = "0.1.0" -dependencies = [ - "glob", - "prettyplease", - "quote", - "syn 2.0.47", -] [[package]] name = "flate2" @@ -726,7 +720,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -759,7 +753,7 @@ dependencies = [ "pmutil", "proc-macro2", "swc_macros_common", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -1044,7 +1038,7 @@ dependencies = [ "pmutil", "proc-macro2", "quote", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -1723,7 +1717,7 @@ checksum = "52a40bc70c2c58040d2d8b167ba9a5ff59fc9dab7ad44771cfde3dcfde7a09c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -1757,16 +1751,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" -[[package]] -name = "prettyplease" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" -dependencies = [ - "proc-macro2", - "syn 2.0.47", -] - [[package]] name = "proc-macro-crate" version = "1.3.1" @@ -1785,9 +1769,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.75" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907a61bd0f64c2f29cd1cf1dc34d05176426a3f504a78010f08416ddb7b13708" +checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b" dependencies = [ "unicode-ident", ] @@ -1809,9 +1793,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.35" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" dependencies = [ "proc-macro2", ] @@ -2027,7 +2011,7 @@ checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -2232,7 +2216,7 @@ dependencies = [ "proc-macro2", "quote", "swc_macros_common", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -2298,7 +2282,7 @@ dependencies = [ "proc-macro2", "quote", "swc_macros_common", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -2347,7 +2331,7 @@ dependencies = [ "proc-macro2", "quote", "swc_macros_common", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -2430,7 +2414,7 @@ dependencies = [ "proc-macro2", "quote", "swc_macros_common", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -2534,7 +2518,7 @@ dependencies = [ "pmutil", "proc-macro2", "quote", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -2546,7 +2530,7 @@ dependencies = [ "pmutil", "proc-macro2", "quote", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -2570,7 +2554,7 @@ dependencies = [ "proc-macro2", "quote", "swc_macros_common", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -2586,9 +2570,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.47" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1726efe18f42ae774cc644f330953a5e7b3c3003d3edcecf18850fe9d4dd9afb" +checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" dependencies = [ "proc-macro2", "quote", @@ -2630,7 +2614,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -2720,7 +2704,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn 2.0.18", ] [[package]] @@ -2928,7 +2912,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.47", + "syn 2.0.18", "wasm-bindgen-shared", ] @@ -2962,7 +2946,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn 2.0.18", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/fine/Cargo.lock b/fine/Cargo.lock index 535a9196..f3a8ee96 100644 --- a/fine/Cargo.lock +++ b/fine/Cargo.lock @@ -12,19 +12,9 @@ checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" name = "fine" version = "0.1.0" dependencies = [ - "glob", "pretty_assertions", - "prettyplease", - "quote", - "syn", ] -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - [[package]] name = "pretty_assertions" version = "1.4.0" @@ -35,51 +25,6 @@ dependencies = [ "yansi", ] -[[package]] -name = "prettyplease" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" -dependencies = [ - "proc-macro2", - "syn", -] - -[[package]] -name = "proc-macro2" -version = "1.0.75" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907a61bd0f64c2f29cd1cf1dc34d05176426a3f504a78010f08416ddb7b13708" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "syn" -version = "2.0.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1726efe18f42ae774cc644f330953a5e7b3c3003d3edcecf18850fe9d4dd9afb" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - [[package]] name = "yansi" version = "0.5.1" diff --git a/fine/Cargo.toml b/fine/Cargo.toml index 731a7d40..3ee646e5 100644 --- a/fine/Cargo.toml +++ b/fine/Cargo.toml @@ -5,9 +5,3 @@ edition = "2021" [dev-dependencies] pretty_assertions = "1.4.0" - -[build-dependencies] -glob = "0.3.1" -prettyplease = "0.2.16" -quote = "1.0.35" -syn = "2.0.47" diff --git a/fine/build.rs b/fine/build.rs deleted file mode 100644 index 100b5229..00000000 --- a/fine/build.rs +++ /dev/null @@ -1,96 +0,0 @@ -use quote::{format_ident, quote}; -use std::env; -use std::fs; -use std::path::{Path, PathBuf}; - -fn generate_test_for_file(path: PathBuf) -> String { - let contents = fs::read_to_string(&path).expect("Unable to read input"); - - let mut concrete_stuff: Option = None; - - // Start iterating over lines and processing directives.... - let mut lines = contents.lines(); - while let Some(line) = lines.next() { - let line = match line.strip_prefix("//") { - Some(line) => line, - None => break, - }; - - let line = line.trim(); - if line == "concrete:" { - let mut concrete = String::new(); - while let Some(line) = lines.next() { - let line = match line.strip_prefix("// | ") { - Some(line) => line, - None => break, - }; - - concrete.push_str(line); - concrete.push_str("\n"); - } - concrete_stuff = Some(concrete); - } - } - - let concrete_comparison = if let Some(concrete) = concrete_stuff { - quote! { - crate::assert_concrete(&_tree, #concrete) - } - } else { - quote! {} - }; - - let name = format_ident!("{}", path.file_stem().unwrap().to_string_lossy()); - let test_method = quote! { - fn #name() { - let (_tree, _lines) = fine::parser::concrete::parse_concrete(#contents); - #concrete_comparison; - } - }; - - let syntax_tree = syn::parse2(test_method).unwrap(); - prettyplease::unparse(&syntax_tree) -} - -fn process_directory(output: &mut String, path: T) -where - T: AsRef, -{ - let fine_ext: std::ffi::OsString = "fine".into(); - let path = path.as_ref(); - for entry in std::fs::read_dir(path).expect("Unable to read directory") { - match entry { - Ok(dirent) => { - let file_type = dirent.file_type().unwrap(); - if file_type.is_dir() { - let file_name = dirent.file_name(); - let file_name = file_name.to_string_lossy().to_owned(); - output.push_str(&format!("mod {file_name} {{\n")); - process_directory(output, dirent.path()); - output.push_str("}\n\n"); - } else if file_type.is_file() { - if dirent.path().extension() == Some(&fine_ext) { - output.push_str(&format!("// {}\n", dirent.path().display())); - output.push_str("#[test]\n"); - output.push_str(&generate_test_for_file(dirent.path())); - output.push_str("\n\n"); - } - } else { - eprintln!("Skipping symlink: {}", path.display()); - } - } - Err(e) => eprintln!("Unable to read directory entry: {:?}", e), - } - } -} - -fn main() { - println!("cargo:rerun-if-changed=./tests"); - - let mut test_source = String::new(); - process_directory(&mut test_source, "./tests"); - - let out_dir = env::var_os("OUT_DIR").unwrap(); - let dest_path = Path::new(&out_dir).join("generated_tests.rs"); - fs::write(dest_path, test_source).unwrap(); -} diff --git a/fine/src/parser.rs b/fine/src/parser.rs index ce8fd3e6..5ab842ca 100644 --- a/fine/src/parser.rs +++ b/fine/src/parser.rs @@ -1,7 +1,5 @@ use crate::tokens::{Lines, Token, TokenKind, Tokens}; -use std::fmt; - -pub mod concrete; +use std::{cell::Cell, fmt}; // TODO: An error should have: // @@ -159,6 +157,492 @@ impl std::fmt::Display for Type { } } +// NOTE: much of this parser structure derived from +// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html +pub enum TreeKind { + Error, + File, + FunDecl, + ParamList, + Parameter, + TypeExpression, + Block, + LetStatement, + ReturnStatement, + ExpressionStatement, + LiteralExpression, + GroupingExpression, + UnaryExpression, + ConditionalExpression, + CallExpression, + ArgumentList, + Argument, + BinaryExpression, + IfStatement, +} + +pub struct Tree<'a> { + pub kind: TreeKind, + // TODO: Indirect reference? Flatness? Using a reference structure will + // make caching and annotation easier if desired. + pub children: Vec>, +} + +pub enum Child<'a> { + Token(Token<'a>), + Tree(Tree<'a>), +} + +enum ParseEvent<'a> { + Start { kind: TreeKind }, + End, + Advance { token: Token<'a> }, +} + +struct MarkStarted { + index: usize, +} + +struct MarkClosed { + index: usize, +} + +struct CParser<'a> { + tokens: Tokens<'a>, + current: Token<'a>, + fuel: Cell, + events: Vec>, +} + +impl<'a> CParser<'a> { + fn new(tokens: Tokens<'a>) -> Self { + let mut parser = CParser { + tokens, + current: Token::new(TokenKind::EOF, 0, ""), + fuel: Cell::new(256), + events: Vec::new(), + }; + parser.current = parser.tokens.next(); + parser + } + + fn start(&mut self) -> MarkStarted { + let mark = MarkStarted { + index: self.events.len(), + }; + self.events.push(ParseEvent::Start { + kind: TreeKind::Error, + }); + mark + } + + fn end(&mut self, mark: MarkStarted, kind: TreeKind) -> MarkClosed { + self.events[mark.index] = ParseEvent::Start { kind }; + self.events.push(ParseEvent::End); + MarkClosed { index: mark.index } + } + + fn start_before(&mut self, mark: MarkClosed) -> MarkStarted { + // TODO: Point backwards and pointer chase in tree build? + let mark = MarkStarted { index: mark.index }; + self.events.insert( + mark.index, + ParseEvent::Start { + kind: TreeKind::Error, + }, + ); + mark + } + + fn advance(&mut self) { + assert!(!self.eof()); // Don't try to advance past EOF + self.fuel.set(256); // Consuming a token, rest stuck detector + self.events.push(ParseEvent::Advance { + token: self.current.clone(), + }); + self.current = self.tokens.next(); + } + + fn eof(&self) -> bool { + self.current.kind == TokenKind::EOF + } + + fn peek(&self) -> TokenKind { + assert!(self.fuel.get() > 0, "parser is stuck!"); + self.fuel.set(self.fuel.get() - 1); + self.current.kind + } + + fn at(&self, kind: TokenKind) -> bool { + self.peek() == kind + } + + fn eat(&mut self, kind: TokenKind) -> bool { + if self.at(kind) { + self.advance(); + true + } else { + false + } + } + + fn expect(&mut self, kind: TokenKind, error: T) + where + T: Into, + { + if self.eat(kind) { + return; + } + self.error(error); + } + + fn advance_with_error(&mut self, error: T) -> MarkClosed + where + T: Into, + { + let m = self.start(); + self.error(error); + self.advance(); + self.end(m, TreeKind::Error) + } + + fn error(&mut self, message: T) + where + T: Into, + { + self.error_at(self.current.clone(), message) + } + + fn error_at(&mut self, token: Token<'a>, message: T) + where + T: Into, + { + let message: String = message.into(); + let mut final_message = "Error ".to_string(); + + if token.kind == TokenKind::EOF { + final_message.push_str("at end") + } else if token.kind != TokenKind::Error { + final_message.push_str("at '"); + final_message.push_str(token.as_str()); + final_message.push_str("'"); + } + final_message.push_str(": "); + final_message.push_str(&message); + + self.events.push(ParseEvent::Advance { + token: Token::error(token.start, final_message), + }); + } + + fn build_tree(self) -> (Tree<'a>, Lines) { + let mut events = self.events; + let mut stack = Vec::new(); + + // Special case: pop the last `Close` event to ensure that the stack + // is non-empty inside the loop. + assert!(matches!(events.pop(), Some(ParseEvent::End))); + + for event in events { + match event { + ParseEvent::Start { kind } => stack.push(Tree { + kind, + children: Vec::new(), + }), + + ParseEvent::End => { + let tree = stack.pop().unwrap(); + stack.last_mut().unwrap().children.push(Child::Tree(tree)); + } + + ParseEvent::Advance { token } => { + stack.last_mut().unwrap().children.push(Child::Token(token)); + } + } + } + + assert!(stack.len() == 1, "Not all trees were ended!"); + (stack.pop().unwrap(), self.tokens.lines()) + } +} + +pub fn c_parse(source: &str) -> (Tree, Lines) { + let tokens = Tokens::new(source); + let mut parser = CParser::new(tokens); + + file(&mut parser); + + parser.build_tree() +} + +fn file(p: &mut CParser) { + let m = p.start(); + while !p.eof() { + match p.peek() { + TokenKind::Fun => function(p), + _ => statement(p), + } + } + p.end(m, TreeKind::File); +} + +fn function(p: &mut CParser) { + assert!(p.at(TokenKind::Fun)); + let m = p.start(); + + p.expect(TokenKind::Fun, "expected a function to start with 'fun'"); + p.expect(TokenKind::Identifier, "expected a function name"); + if p.at(TokenKind::LeftParen) { + param_list(p); + } + if p.eat(TokenKind::Arrow) { + type_expr(p); + } + if p.at(TokenKind::LeftBrace) { + block(p); + } + + p.end(m, TreeKind::FunDecl); +} + +fn param_list(p: &mut CParser) { + assert!(p.at(TokenKind::LeftParen)); + let m = p.start(); + + p.expect(TokenKind::LeftParen, "expect '(' to start a parameter list"); + while !p.at(TokenKind::RightParen) && !p.eof() { + if p.at(TokenKind::Identifier) { + parameter(p); + } else { + break; + } + } + p.expect(TokenKind::RightParen, "expect ')' to end a parameter list"); + + p.end(m, TreeKind::ParamList); +} + +fn parameter(p: &mut CParser) { + assert!(p.at(TokenKind::Identifier)); + let m = p.start(); + p.expect( + TokenKind::Identifier, + "expected an identifier for a parameter name", + ); + if p.eat(TokenKind::Colon) { + type_expr(p); + } + if !p.at(TokenKind::RightParen) { + p.expect(TokenKind::Comma, "expected a comma between parameters"); + } + + p.end(m, TreeKind::Parameter); +} + +fn type_expr(p: &mut CParser) { + let m = p.start(); + // TODO: Other kinds of type expressions probably! + p.expect(TokenKind::Identifier, "expected the identifier of a type"); + p.end(m, TreeKind::TypeExpression); +} + +fn block(p: &mut CParser) { + assert!(p.at(TokenKind::LeftBrace)); + let m = p.start(); + + p.expect(TokenKind::LeftBrace, "expect '{' to start a block"); + while !p.at(TokenKind::RightBrace) && !p.eof() { + statement(p); + } + p.expect(TokenKind::RightBrace, "expect '}' to start a block"); + + p.end(m, TreeKind::Block); +} + +fn statement(p: &mut CParser) { + match p.peek() { + TokenKind::LeftBrace => block(p), + TokenKind::Let => statement_let(p), + TokenKind::Return => statement_return(p), + + // NOTE: Technically 'if' is an expression, but `if` doesn't + // require a semicolon at the end if it's all by itself. + TokenKind::If => statement_if(p), + + _ => statement_expression(p), + } +} + +fn statement_if(p: &mut CParser) { + assert!(p.at(TokenKind::If)); + let m = p.start(); + + conditional(p); + + p.end(m, TreeKind::IfStatement); +} + +fn statement_let(p: &mut CParser) { + assert!(p.at(TokenKind::Let)); + let m = p.start(); + + p.expect(TokenKind::Let, "expect 'let' to start a let statement"); + p.expect(TokenKind::Identifier, "expected a name for the variable"); + p.expect(TokenKind::Equal, "expected a '=' after the variable name"); + expression(p); + p.expect(TokenKind::Semicolon, "expect ';' to end a let statement"); + + p.end(m, TreeKind::LetStatement); +} + +fn statement_return(p: &mut CParser) { + assert!(p.at(TokenKind::Return)); + let m = p.start(); + + p.expect( + TokenKind::Return, + "expect 'return' to start a return statement", + ); + expression(p); + p.expect(TokenKind::Semicolon, "expect ';' to end a return statement"); + + p.end(m, TreeKind::ReturnStatement); +} + +fn statement_expression(p: &mut CParser) { + let m = p.start(); + + expression(p); + p.expect( + TokenKind::Semicolon, + "expect ';' to end an expression statement", + ); + + p.end(m, TreeKind::ExpressionStatement); +} + +fn expression(p: &mut CParser) { + expression_with_power(p, 0) +} + +fn expression_with_power(p: &mut CParser, minimum_power: u8) { + let mut expr = prefix_expression(p); + while p.at(TokenKind::LeftParen) { + let m = p.start_before(expr); + argument_list(p); + expr = p.end(m, TreeKind::CallExpression); + } + + loop { + let Some(power) = token_power(p.peek()) else { + break; + }; + if power < minimum_power { + break; + } + + // TODO: I don't think this works for other "infix" types, but we'll + // see won't we. + let m = p.start_before(expr); + p.advance(); // Consume the operator + expression_with_power(p, power); + expr = p.end(m, TreeKind::BinaryExpression); + } +} + +fn argument_list(p: &mut CParser) { + assert!(p.at(TokenKind::LeftParen)); + let m = p.start(); + + p.expect( + TokenKind::LeftParen, + "expect an argument list to start with '('", + ); + while !p.at(TokenKind::RightParen) && !p.eof() { + argument(p); + } + p.expect( + TokenKind::RightParen, + "expect an argument list to start with '('", + ); + + p.end(m, TreeKind::ArgumentList); +} + +fn argument(p: &mut CParser) { + let m = p.start(); + + expression(p); + if !p.at(TokenKind::RightParen) { + p.expect(TokenKind::Comma, "expect a ',' between arguments"); + } + + p.end(m, TreeKind::Argument); +} + +fn prefix_expression(p: &mut CParser) -> MarkClosed { + match p.peek() { + TokenKind::Number => literal(p), + TokenKind::String => literal(p), + TokenKind::True => literal(p), + TokenKind::False => literal(p), + + TokenKind::LeftParen => grouping(p), + + TokenKind::Bang => unary(p), + TokenKind::Minus => unary(p), + + TokenKind::If => conditional(p), + + _ => p.advance_with_error("expected an expression"), + } +} + +fn literal(p: &mut CParser) -> MarkClosed { + let m = p.start(); + p.advance(); + p.end(m, TreeKind::LiteralExpression) +} + +fn grouping(p: &mut CParser) -> MarkClosed { + assert!(p.at(TokenKind::LeftParen)); + let m = p.start(); + + p.expect(TokenKind::LeftParen, "expected '(' to start grouping"); + expression(p); + p.expect(TokenKind::RightParen, "unmatched parentheses in expression"); + + p.end(m, TreeKind::GroupingExpression) +} + +fn unary(p: &mut CParser) -> MarkClosed { + let m = p.start(); + + p.advance(); // Past the operator + expression_with_power(p, UNARY_POWER); + + p.end(m, TreeKind::UnaryExpression) +} + +fn conditional(p: &mut CParser) -> MarkClosed { + assert!(p.at(TokenKind::If)); + let m = p.start(); + + p.expect(TokenKind::If, "expected conditional to start with 'if'"); + expression(p); + block(p); + if p.eat(TokenKind::Else) { + if p.at(TokenKind::If) { + // Don't require another block, just jump right into the conditional. + conditional(p); + } else { + block(p); + } + } + + p.end(m, TreeKind::ConditionalExpression) +} + pub struct SyntaxTree<'a> { pub errors: Vec, expressions: Vec>, @@ -646,13 +1130,8 @@ impl<'a> Parser<'a> { fn advance(&mut self) { self.previous = self.current.clone(); self.current = self.tokens.next(); - while self.current.kind == TokenKind::Error - || self.current.kind == TokenKind::Whitespace - || self.current.kind == TokenKind::Comment - { - if self.current.kind == TokenKind::Error { - self.error_at_current(self.current.to_string()); - } + while self.current.kind == TokenKind::Error { + self.error_at_current(self.current.to_string()); self.current = self.tokens.next(); } } diff --git a/fine/src/parser/concrete.rs b/fine/src/parser/concrete.rs deleted file mode 100644 index 7e857af1..00000000 --- a/fine/src/parser/concrete.rs +++ /dev/null @@ -1,618 +0,0 @@ -// NOTE: much of this parser structure derived from -// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html -use crate::tokens::{Lines, Token, TokenKind, Tokens}; -use std::cell::Cell; - -// BINDING POWERS. When parsing expressions we only accept expressions that -// meet a minimum binding power. (This is like "precedence" but I just super -// don't like that terminology.) -const ASSIGNMENT_POWER: u8 = 0; // = -const OR_POWER: u8 = 1; // or -const AND_POWER: u8 = 2; // and -const EQUALITY_POWER: u8 = 3; // == != -const COMPARISON_POWER: u8 = 4; // < > <= >= -const TERM_POWER: u8 = 5; // + - -const FACTOR_POWER: u8 = 6; // * / -const UNARY_POWER: u8 = 7; // ! - - -// const PRIMARY_POWER: u8 = 9; - -fn token_power<'a>(token: TokenKind) -> Option { - match token { - TokenKind::Equal => Some(ASSIGNMENT_POWER), - TokenKind::Or => Some(OR_POWER), - TokenKind::And => Some(AND_POWER), - TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER), - TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => { - Some(COMPARISON_POWER) - } - TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER), - TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER), - _ => None, - } -} - -#[derive(Debug)] -pub enum TreeKind { - Error, - File, - FunDecl, - ParamList, - Parameter, - TypeExpression, - Block, - LetStatement, - ReturnStatement, - ExpressionStatement, - LiteralExpression, - GroupingExpression, - UnaryExpression, - ConditionalExpression, - CallExpression, - ArgumentList, - Argument, - BinaryExpression, - IfStatement, - Identifier, -} - -pub struct Tree<'a> { - pub kind: TreeKind, - // TODO: Indirect reference? Flatness? Using a reference structure will - // make caching and annotation easier if desired. - pub children: Vec>, -} - -impl<'a> Tree<'a> { - pub fn dump(&self) -> String { - let mut output = String::new(); - output.push_str(&format!("{:?}\n", self.kind)); - for child in self.children.iter() { - child.dump_rec(2, &mut output); - } - output - } -} - -impl<'a> std::fmt::Debug for Tree<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "[{:?}", self.kind)?; - for child in self.children.iter() { - match child { - Child::Token(t) => write!(f, " {:?}:'{}'", t.kind, t.as_str())?, - Child::Tree(t) => write!(f, " {t:?}")?, - } - } - write!(f, "]")?; - Ok(()) - } -} - -pub enum Child<'a> { - Token(Token<'a>), - Tree(Tree<'a>), -} - -impl<'a> Child<'a> { - fn dump_rec(&self, indent: usize, output: &mut String) { - for _ in 0..indent { - output.push(' '); - } - match self { - Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())), - Child::Tree(t) => { - output.push_str(&format!("{:?}\n", t.kind)); - for child in t.children.iter() { - child.dump_rec(indent + 2, output); - } - } - } - } -} - -enum ParseEvent<'a> { - Start { kind: TreeKind }, - End, - Advance { token: Token<'a> }, -} - -struct MarkStarted { - index: usize, -} - -struct MarkClosed { - index: usize, -} - -struct CParser<'a> { - tokens: Tokens<'a>, - current: Token<'a>, - fuel: Cell, - events: Vec>, -} - -impl<'a> CParser<'a> { - fn new(tokens: Tokens<'a>) -> Self { - let mut parser = CParser { - tokens, - current: Token::new(TokenKind::EOF, 0, ""), - fuel: Cell::new(256), - events: Vec::new(), - }; - parser.current = parser.tokens.next(); - parser.skip_ephemera(); - parser - } - - fn start(&mut self) -> MarkStarted { - let mark = MarkStarted { - index: self.events.len(), - }; - self.events.push(ParseEvent::Start { - kind: TreeKind::Error, - }); - mark - } - - fn end(&mut self, mark: MarkStarted, kind: TreeKind) -> MarkClosed { - self.events[mark.index] = ParseEvent::Start { kind }; - self.events.push(ParseEvent::End); - MarkClosed { index: mark.index } - } - - fn start_before(&mut self, mark: MarkClosed) -> MarkStarted { - // TODO: Point backwards and pointer chase in tree build? - let mark = MarkStarted { index: mark.index }; - self.events.insert( - mark.index, - ParseEvent::Start { - kind: TreeKind::Error, - }, - ); - mark - } - - fn advance(&mut self) { - assert!(!self.eof()); // Don't try to advance past EOF - self.fuel.set(256); // Consuming a token, rest stuck detector - self.events.push(ParseEvent::Advance { - token: self.current.clone(), - }); - self.current = self.tokens.next(); - self.skip_ephemera(); - } - - fn skip_ephemera(&mut self) { - while self.current.kind == TokenKind::Whitespace || self.current.kind == TokenKind::Comment - { - self.current = self.tokens.next(); - } - } - - fn eof(&self) -> bool { - self.current.kind == TokenKind::EOF - } - - fn peek(&self) -> TokenKind { - assert!(self.fuel.get() > 0, "parser is stuck!"); - self.fuel.set(self.fuel.get() - 1); - self.current.kind - } - - fn at(&self, kind: TokenKind) -> bool { - self.peek() == kind - } - - fn eat(&mut self, kind: TokenKind) -> bool { - if self.at(kind) { - self.advance(); - true - } else { - false - } - } - - fn expect(&mut self, kind: TokenKind, error: T) - where - T: Into, - { - if self.eat(kind) { - return; - } - self.error(error); - } - - fn advance_with_error(&mut self, error: T) -> MarkClosed - where - T: Into, - { - let m = self.start(); - self.error(error); - self.advance(); - self.end(m, TreeKind::Error) - } - - fn error(&mut self, message: T) - where - T: Into, - { - self.error_at(self.current.clone(), message) - } - - fn error_at(&mut self, token: Token<'a>, message: T) - where - T: Into, - { - let message: String = message.into(); - let mut final_message = "Error ".to_string(); - - if token.kind == TokenKind::EOF { - final_message.push_str("at end") - } else if token.kind != TokenKind::Error { - final_message.push_str("at '"); - final_message.push_str(token.as_str()); - final_message.push_str("'"); - } - final_message.push_str(": "); - final_message.push_str(&message); - - self.events.push(ParseEvent::Advance { - token: Token::error(token.start, final_message), - }); - } - - fn build_tree(self) -> (Tree<'a>, Lines) { - let mut events = self.events; - let mut stack = Vec::new(); - - // The first element in our events vector must be a start; the whole - // thing must be bracketed in a tree. - assert!(matches!(events.get(0), Some(ParseEvent::Start { .. }))); - - // The last element in our events vector must be an end, otherwise - // the parser has failed badly. We'll remove it here so that, after - // processing the entire array, the stack retains the tree that we - // start with the very first ::Start. - assert!(matches!(events.pop(), Some(ParseEvent::End))); - - for event in events { - match event { - ParseEvent::Start { kind } => stack.push(Tree { - kind, - children: Vec::new(), - }), - - ParseEvent::End => { - let tree = stack.pop().unwrap(); - stack.last_mut().unwrap().children.push(Child::Tree(tree)); - } - - ParseEvent::Advance { token } => { - stack.last_mut().unwrap().children.push(Child::Token(token)); - } - } - } - - assert!(stack.len() == 1, "Not all trees were ended!"); - (stack.pop().unwrap(), self.tokens.lines()) - } -} - -pub fn parse_concrete(source: &str) -> (Tree, Lines) { - let tokens = Tokens::new(source); - let mut parser = CParser::new(tokens); - - file(&mut parser); - - parser.build_tree() -} - -fn file(p: &mut CParser) { - let m = p.start(); - while !p.eof() { - match p.peek() { - TokenKind::Fun => function(p), - _ => statement(p), - } - } - p.end(m, TreeKind::File); -} - -fn function(p: &mut CParser) { - assert!(p.at(TokenKind::Fun)); - let m = p.start(); - - p.expect(TokenKind::Fun, "expected a function to start with 'fun'"); - p.expect(TokenKind::Identifier, "expected a function name"); - if p.at(TokenKind::LeftParen) { - param_list(p); - } - if p.eat(TokenKind::Arrow) { - type_expr(p); - } - if p.at(TokenKind::LeftBrace) { - block(p); - } - - p.end(m, TreeKind::FunDecl); -} - -fn param_list(p: &mut CParser) { - assert!(p.at(TokenKind::LeftParen)); - let m = p.start(); - - p.expect(TokenKind::LeftParen, "expect '(' to start a parameter list"); - while !p.at(TokenKind::RightParen) && !p.eof() { - if p.at(TokenKind::Identifier) { - parameter(p); - } else { - break; - } - } - p.expect(TokenKind::RightParen, "expect ')' to end a parameter list"); - - p.end(m, TreeKind::ParamList); -} - -fn parameter(p: &mut CParser) { - assert!(p.at(TokenKind::Identifier)); - let m = p.start(); - p.expect( - TokenKind::Identifier, - "expected an identifier for a parameter name", - ); - if p.eat(TokenKind::Colon) { - type_expr(p); - } - if !p.at(TokenKind::RightParen) { - p.expect(TokenKind::Comma, "expected a comma between parameters"); - } - - p.end(m, TreeKind::Parameter); -} - -fn type_expr(p: &mut CParser) { - let m = p.start(); - // TODO: Other kinds of type expressions probably! - p.expect(TokenKind::Identifier, "expected the identifier of a type"); - p.end(m, TreeKind::TypeExpression); -} - -fn block(p: &mut CParser) { - assert!(p.at(TokenKind::LeftBrace)); - let m = p.start(); - - p.expect(TokenKind::LeftBrace, "expect '{' to start a block"); - while !p.at(TokenKind::RightBrace) && !p.eof() { - statement(p); - } - p.expect(TokenKind::RightBrace, "expect '}' to start a block"); - - p.end(m, TreeKind::Block); -} - -fn statement(p: &mut CParser) { - match p.peek() { - TokenKind::LeftBrace => block(p), - TokenKind::Let => statement_let(p), - TokenKind::Return => statement_return(p), - - // NOTE: Technically 'if' is an expression, but `if` doesn't - // require a semicolon at the end if it's all by itself. - TokenKind::If => statement_if(p), - - _ => statement_expression(p), - } -} - -fn statement_if(p: &mut CParser) { - assert!(p.at(TokenKind::If)); - let m = p.start(); - - conditional(p); - - p.end(m, TreeKind::IfStatement); -} - -fn statement_let(p: &mut CParser) { - assert!(p.at(TokenKind::Let)); - let m = p.start(); - - p.expect(TokenKind::Let, "expect 'let' to start a let statement"); - p.expect(TokenKind::Identifier, "expected a name for the variable"); - p.expect(TokenKind::Equal, "expected a '=' after the variable name"); - expression(p); - p.expect(TokenKind::Semicolon, "expect ';' to end a let statement"); - - p.end(m, TreeKind::LetStatement); -} - -fn statement_return(p: &mut CParser) { - assert!(p.at(TokenKind::Return)); - let m = p.start(); - - p.expect( - TokenKind::Return, - "expect 'return' to start a return statement", - ); - expression(p); - p.expect(TokenKind::Semicolon, "expect ';' to end a return statement"); - - p.end(m, TreeKind::ReturnStatement); -} - -fn statement_expression(p: &mut CParser) { - let m = p.start(); - - expression(p); - p.expect( - TokenKind::Semicolon, - "expect ';' to end an expression statement", - ); - - p.end(m, TreeKind::ExpressionStatement); -} - -fn expression(p: &mut CParser) { - expression_with_power(p, 0) -} - -fn expression_with_power(p: &mut CParser, minimum_power: u8) { - let mut expr = prefix_expression(p); - while p.at(TokenKind::LeftParen) { - let m = p.start_before(expr); - argument_list(p); - expr = p.end(m, TreeKind::CallExpression); - } - - loop { - let Some(power) = token_power(p.peek()) else { - break; - }; - if power < minimum_power { - break; - } - - // TODO: I don't think this works for other "infix" types, but we'll - // see won't we. - let m = p.start_before(expr); - p.advance(); // Consume the operator - expression_with_power(p, power); - expr = p.end(m, TreeKind::BinaryExpression); - } -} - -fn argument_list(p: &mut CParser) { - assert!(p.at(TokenKind::LeftParen)); - let m = p.start(); - - p.expect( - TokenKind::LeftParen, - "expect an argument list to start with '('", - ); - while !p.at(TokenKind::RightParen) && !p.eof() { - argument(p); - } - p.expect( - TokenKind::RightParen, - "expect an argument list to start with '('", - ); - - p.end(m, TreeKind::ArgumentList); -} - -fn argument(p: &mut CParser) { - let m = p.start(); - - expression(p); - if !p.at(TokenKind::RightParen) { - p.expect(TokenKind::Comma, "expect a ',' between arguments"); - } - - p.end(m, TreeKind::Argument); -} - -fn prefix_expression(p: &mut CParser) -> MarkClosed { - match p.peek() { - TokenKind::Number => literal(p), - TokenKind::String => literal(p), - TokenKind::True => literal(p), - TokenKind::False => literal(p), - - TokenKind::LeftParen => grouping(p), - - TokenKind::Bang => unary(p), - TokenKind::Minus => unary(p), - - TokenKind::If => conditional(p), - - TokenKind::Identifier => identifier(p), - - _ => p.advance_with_error("expected an expression"), - } -} - -fn literal(p: &mut CParser) -> MarkClosed { - let m = p.start(); - p.advance(); - p.end(m, TreeKind::LiteralExpression) -} - -fn grouping(p: &mut CParser) -> MarkClosed { - assert!(p.at(TokenKind::LeftParen)); - let m = p.start(); - - p.expect(TokenKind::LeftParen, "expected '(' to start grouping"); - expression(p); - p.expect(TokenKind::RightParen, "unmatched parentheses in expression"); - - p.end(m, TreeKind::GroupingExpression) -} - -fn unary(p: &mut CParser) -> MarkClosed { - let m = p.start(); - - p.advance(); // Past the operator - expression_with_power(p, UNARY_POWER); - - p.end(m, TreeKind::UnaryExpression) -} - -fn conditional(p: &mut CParser) -> MarkClosed { - assert!(p.at(TokenKind::If)); - let m = p.start(); - - p.expect(TokenKind::If, "expected conditional to start with 'if'"); - expression(p); - block(p); - if p.eat(TokenKind::Else) { - if p.at(TokenKind::If) { - // Don't require another block, just jump right into the conditional. - conditional(p); - } else { - block(p); - } - } - - p.end(m, TreeKind::ConditionalExpression) -} - -fn identifier(p: &mut CParser) -> MarkClosed { - assert!(p.at(TokenKind::Identifier)); - let m = p.start(); - - p.advance(); - - p.end(m, TreeKind::Identifier) -} - -#[cfg(test)] -mod tests { - use super::*; - use pretty_assertions::assert_eq; - - fn test_successful_expression_parse(source: &str, expected: &str) { - let tokens = Tokens::new(source); - let mut parser = CParser::new(tokens); - - expression(&mut parser); - - let (tree, _) = parser.build_tree(); - assert_eq!( - expected, - format!("{tree:?}"), - "The parse structure of the expressions did not match" - ); - } - - macro_rules! test_expr { - ($name:ident, $input:expr, $expected:expr) => { - #[test] - fn $name() { - test_successful_expression_parse($input, $expected); - } - }; - } - - test_expr!(number_expr, "12", "[LiteralExpression Number:'12']"); -} diff --git a/fine/src/tokens.rs b/fine/src/tokens.rs index a6ddccdf..ceaa22f0 100644 --- a/fine/src/tokens.rs +++ b/fine/src/tokens.rs @@ -3,9 +3,6 @@ pub enum TokenKind { EOF, Error, - Whitespace, - Comment, - LeftBrace, RightBrace, LeftBracket, @@ -393,7 +390,7 @@ impl<'a> Tokens<'a> { self.next_char.is_none() } - fn whitespace(&mut self, pos: usize) -> Token<'a> { + fn skip_whitespace(&mut self) { while let Some((pos, ch)) = self.next_char { if ch == '\n' { self.lines.add_line(pos); @@ -402,27 +399,16 @@ impl<'a> Tokens<'a> { } self.advance(); } - self.token(pos, TokenKind::Whitespace) - } - - fn comment(&mut self, pos: usize) -> Token<'a> { - while let Some((_, ch)) = self.next_char { - if ch == '\n' { - break; - } - self.advance(); - } - self.token(pos, TokenKind::Comment) } pub fn next(&mut self) -> Token<'a> { + self.skip_whitespace(); // TODO: Whitespace preserving/comment preserving let (pos, c) = match self.advance() { Some((p, c)) => (p, c), None => return self.token(self.source.len(), TokenKind::EOF), }; match c { - ' ' | '\t' | '\r' | '\n' => self.whitespace(pos), '{' => self.token(pos, TokenKind::LeftBrace), '}' => self.token(pos, TokenKind::RightBrace), '[' => self.token(pos, TokenKind::LeftBracket), @@ -441,13 +427,7 @@ impl<'a> Tokens<'a> { '+' => self.token(pos, TokenKind::Plus), ':' => self.token(pos, TokenKind::Colon), ';' => self.token(pos, TokenKind::Semicolon), - '/' => { - if self.matches('/') { - self.comment(pos) - } else { - self.token(pos, TokenKind::Slash) - } - } + '/' => self.token(pos, TokenKind::Slash), '*' => self.token(pos, TokenKind::Star), '!' => { if self.matches('=') { @@ -504,9 +484,6 @@ mod tests { while !is_eof { let token = tokens.next(); is_eof = token.kind == TokenKind::EOF; - if token.kind == TokenKind::Whitespace { - continue; - } result.push(token); } diff --git a/fine/tests/example_tests.rs b/fine/tests/example_tests.rs deleted file mode 100644 index c61f26c6..00000000 --- a/fine/tests/example_tests.rs +++ /dev/null @@ -1,8 +0,0 @@ -use fine::parser::concrete::Tree; -use pretty_assertions::assert_eq; - -fn assert_concrete(tree: &Tree, expected: &str) { - assert_eq!(tree.dump(), expected, "concrete syntax trees did not match"); -} - -include!(concat!(env!("OUT_DIR"), "/generated_tests.rs")); diff --git a/fine/tests/expression/expressions.fine b/fine/tests/expression/expressions.fine deleted file mode 100644 index b353d91a..00000000 --- a/fine/tests/expression/expressions.fine +++ /dev/null @@ -1,27 +0,0 @@ -// concrete: -// | File -// | ExpressionStatement -// | LiteralExpression -// | Number:'"42"' -// | Semicolon:'";"' -// | ExpressionStatement -// | BinaryExpression -// | BinaryExpression -// | LiteralExpression -// | Number:'"1"' -// | Star:'"*"' -// | LiteralExpression -// | Number:'"2"' -// | Plus:'"+"' -// | BinaryExpression -// | UnaryExpression -// | Minus:'"-"' -// | LiteralExpression -// | Number:'"3"' -// | Star:'"*"' -// | LiteralExpression -// | Number:'"4"' -// | Semicolon:'";"' -// -42; -1 * 2 + -3 * 4;