From 5cc9ecc398e61e42e1add5873c38cfe5e91d084f Mon Sep 17 00:00:00 2001 From: John Doty Date: Fri, 5 Jan 2024 11:22:45 -0800 Subject: [PATCH] [fine] New parser usurps old parser --- fine/build.rs | 2 +- fine/src/main.rs | 2 +- fine/src/parser.rs | 1325 +++++++++++++++-------------------- fine/src/parser/concrete.rs | 663 ------------------ fine/src/parser/old.rs | 872 +++++++++++++++++++++++ fine/tests/example_tests.rs | 2 +- 6 files changed, 1433 insertions(+), 1433 deletions(-) delete mode 100644 fine/src/parser/concrete.rs create mode 100644 fine/src/parser/old.rs diff --git a/fine/build.rs b/fine/build.rs index 69f39cb9..eb7876d8 100644 --- a/fine/build.rs +++ b/fine/build.rs @@ -44,7 +44,7 @@ fn generate_test_for_file(path: PathBuf) -> String { let name = format_ident!("{}", path.file_stem().unwrap().to_string_lossy()); let test_method = quote! { fn #name() { - let (_tree, _lines) = fine::parser::concrete::parse_concrete(#contents); + let (_tree, _lines) = fine::parser::parse(#contents); #concrete_comparison; } }; diff --git a/fine/src/main.rs b/fine/src/main.rs index d8e6ecab..0de793e7 100644 --- a/fine/src/main.rs +++ b/fine/src/main.rs @@ -1,4 +1,4 @@ -use fine::parser::Parser; +use fine::parser::old::Parser; use std::env; use std::fs; diff --git a/fine/src/parser.rs b/fine/src/parser.rs index ce8fd3e6..e24cd788 100644 --- a/fine/src/parser.rs +++ b/fine/src/parser.rs @@ -1,421 +1,499 @@ +// NOTE: much of this parser structure derived from +// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html use crate::tokens::{Lines, Token, TokenKind, Tokens}; -use std::fmt; +use std::{cell::Cell, num::NonZeroU32}; -pub mod concrete; - -// TODO: An error should have: -// -// - a start -// - an end -// - a focus -// - descriptive messages -// -// that will have to wait for now -#[derive(PartialEq, Eq)] -pub struct SyntaxError { - pub start: (usize, usize), - pub end: (usize, usize), - pub message: String, -} - -impl SyntaxError { - pub fn new(line: usize, column: usize, message: T) -> Self - where - T: ToString, - { - SyntaxError { - start: (line, column), - end: (line, column), - message: message.to_string(), - } - } - - pub fn new_spanned(start: (usize, usize), end: (usize, usize), message: T) -> Self - where - T: ToString, - { - SyntaxError { - start, - end, - message: message.to_string(), - } - } -} - -impl fmt::Debug for SyntaxError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{self}") - } -} - -impl fmt::Display for SyntaxError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}:{}: {}", self.start.0, self.end.0, self.message) - } -} - -#[derive(Clone)] -pub enum Literal { - Float64(f64), - String(String), - Bool(bool), -} - -#[derive(Copy, Clone)] -pub enum UnaryOp { - Negate, - Not, -} - -#[derive(Copy, Clone)] -pub enum BinaryOp { - Add, - Subtract, - Multiply, - Divide, - And, - Or, -} - -#[derive(Clone)] -pub enum Expr<'a> { - Literal(Literal, Token<'a>), - Unary(UnaryOp, Token<'a>, ExprRef), - Binary(BinaryOp, Token<'a>, ExprRef, ExprRef), - Conditional(Token<'a>, ExprRef, ExprRef, Option, Token<'a>), -} - -#[derive(Clone)] -pub struct ExprRef(Option); - -impl ExprRef { - pub fn error() -> Self { - ExprRef(None) - } -} - -// TODO: Eventually we will be unable to use Eq and PartialEq here, and will -// need to do out own thing. -#[derive(Copy, Clone)] -pub enum Type { - // Signals a type error. If you receive this then you know that an error - // has already been reported; if you produce this be sure to also note - // the error in the errors collection. - Error, - - // Signals that the expression has a control-flow side-effect and that no - // value will ever result from this expression. Usually this means - // everything's fine. - Unreachable, - - // TODO: Numeric literals should be implicitly convertable, unlike other - // types. Maybe just "numeric literal" type? - F64, - String, - Bool, -} - -impl Type { - pub fn is_error(&self) -> bool { - match self { - Type::Error => true, - _ => false, - } - } - - pub fn compatible_with(&self, other: &Type) -> bool { - // TODO: This is wrong; we because of numeric literals etc. - match (self, other) { - (Type::F64, Type::F64) => true, - (Type::String, Type::String) => true, - (Type::Bool, Type::Bool) => true, - (Type::Unreachable, Type::Unreachable) => true, - - // Avoid introducing more errors - (Type::Error, _) => true, - (_, Type::Error) => true, - - (_, _) => false, - } - } -} - -impl std::fmt::Debug for Type { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{self}") - } -} - -impl std::fmt::Display for Type { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use Type::*; - match self { - Error => write!(f, "<< INTERNAL ERROR >>"), - Unreachable => write!(f, "<< UNREACHABLE >>"), - F64 => write!(f, "f64"), - String => write!(f, "string"), - Bool => write!(f, "bool"), - } - } -} +pub mod old; // Until I decide to delete it. pub struct SyntaxTree<'a> { - pub errors: Vec, - expressions: Vec>, + trees: Vec>, + root: Option, } impl<'a> SyntaxTree<'a> { pub fn new() -> Self { SyntaxTree { - errors: Vec::new(), - expressions: Vec::new(), + trees: vec![], + root: None, } } - pub fn add_error(&mut self, error: SyntaxError) { - self.errors.push(error); + pub fn add_tree(&mut self, t: Tree<'a>) -> TreeRef { + assert!(t.parent.is_none()); + let tr = TreeRef::from_index(self.trees.len()); + + // NOTE: Because of the difficulty of holding multiple mutable + // references it's this is our best chance to patch up parent + // pointers. + for child in t.children.iter() { + if let Child::Tree(ct) = child { + self[*ct].parent = Some(tr); + } + } + self.trees.push(t); + tr } - pub fn add_expr(&mut self, expr: Expr<'a>) -> ExprRef { - let index = self.expressions.len(); - self.expressions.push(expr); - ExprRef(Some(index)) + pub fn dump(&self) -> String { + match self.root { + Some(r) => self[r].dump(self), + None => String::new(), + } + } +} + +impl<'a> std::ops::Index for SyntaxTree<'a> { + type Output = Tree<'a>; + + fn index(&self, index: TreeRef) -> &Self::Output { + &self.trees[index.index()] + } +} + +impl<'a> std::ops::IndexMut for SyntaxTree<'a> { + fn index_mut(&mut self, index: TreeRef) -> &mut Self::Output { + &mut self.trees[index.index()] + } +} + +#[derive(Debug)] +pub enum TreeKind { + Error, + File, + FunDecl, + ParamList, + Parameter, + TypeExpression, + Block, + LetStatement, + ReturnStatement, + ExpressionStatement, + LiteralExpression, + GroupingExpression, + UnaryExpression, + ConditionalExpression, + CallExpression, + ArgumentList, + Argument, + BinaryExpression, + IfStatement, + Identifier, +} + +pub struct Tree<'a> { + pub kind: TreeKind, + pub parent: Option, + pub children: Vec>, +} + +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct TreeRef(NonZeroU32); + +impl TreeRef { + pub fn from_index(index: usize) -> TreeRef { + let index: u32 = (index + 1).try_into().unwrap(); + TreeRef(NonZeroU32::new(index).unwrap()) } - pub fn dump_expr(&self, expr: &ExprRef) -> String { - match expr.0 { - Some(idx) => { - let expr = &self.expressions[idx]; - match expr { - Expr::Literal(_, tok) => tok.to_string(), - Expr::Unary(_, tok, e) => { - format!("({tok} {})", self.dump_expr(e)) - } - Expr::Binary(_, tok, l, r) => { - format!("({tok} {} {})", self.dump_expr(l), self.dump_expr(r)) - } - Expr::Conditional(tok, cond, t, e, _) => { - if let Some(e) = e { - format!( - "({tok} {} {} {})", - self.dump_expr(cond), - self.dump_expr(t), - self.dump_expr(e) - ) - } else { - format!("({tok} {} {})", self.dump_expr(cond), self.dump_expr(t)) - } - } + pub fn index(&self) -> usize { + let index: usize = self.0.get().try_into().unwrap(); + index - 1 + } +} + +impl<'a> Tree<'a> { + pub fn dump(&self, tree: &SyntaxTree<'a>) -> String { + let mut output = String::new(); + output.push_str(&format!("{:?}\n", self.kind)); + for child in self.children.iter() { + child.dump_rec(2, tree, &mut output); + } + output + } +} + +pub enum Child<'a> { + Token(Token<'a>), + Tree(TreeRef), +} + +impl<'a> Child<'a> { + fn dump_rec(&self, indent: usize, tree: &SyntaxTree<'a>, output: &mut String) { + for _ in 0..indent { + output.push(' '); + } + match self { + Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())), + Child::Tree(t) => { + let t = &tree[*t]; + output.push_str(&format!("{:?}\n", t.kind)); + for child in t.children.iter() { + child.dump_rec(indent + 2, tree, output); } } - None => "<|EOF|>".to_string(), } } +} - pub fn expr_span(&self, expr: &ExprRef) -> Option<(Token<'a>, Token<'a>)> { - let expr = match expr.0 { - Some(idx) => &self.expressions[idx], - None => return None, +enum ParseEvent<'a> { + Start { kind: TreeKind }, + End, + Advance { token: Token<'a> }, +} + +struct MarkStarted { + index: usize, +} + +struct MarkClosed { + index: usize, +} + +struct CParser<'a> { + tokens: Tokens<'a>, + current: Token<'a>, + fuel: Cell, + events: Vec>, +} + +impl<'a> CParser<'a> { + fn new(tokens: Tokens<'a>) -> Self { + let mut parser = CParser { + tokens, + current: Token::new(TokenKind::EOF, 0, ""), + fuel: Cell::new(256), + events: Vec::new(), }; - - match expr { - Expr::Literal(_, tok) => Some((tok.clone(), tok.clone())), - Expr::Unary(_, tok, arg) => { - let arg = self.expr_span(arg); - match arg { - None => None, - Some((_, end)) => Some((tok.clone(), end)), - } - } - Expr::Binary(_, _, left, right) => { - let left = self.expr_span(left); - let right = self.expr_span(right); - match (left, right) { - (None, _) => None, - (_, None) => None, - (Some((start, _)), Some((_, end))) => Some((start, end)), - } - } - Expr::Conditional(head, _, _, _, tail) => Some((head.clone(), tail.clone())), - } + parser.current = parser.tokens.next(); + parser.skip_ephemera(); + parser } - pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines, value_required: bool) -> Type { - // TODO: Cache and work on demand? Or is this just fine? - - let exr = expr.clone(); - let expr = match expr.0 { - Some(idx) => &self.expressions[idx], - None => return Type::Error, + fn start(&mut self) -> MarkStarted { + let mark = MarkStarted { + index: self.events.len(), }; - match expr { - Expr::Literal(lit, _) => match lit { - Literal::Float64(_) => Type::F64, - Literal::String(_) => Type::String, - Literal::Bool(_) => Type::Bool, + self.events.push(ParseEvent::Start { + kind: TreeKind::Error, + }); + mark + } + + fn end(&mut self, mark: MarkStarted, kind: TreeKind) -> MarkClosed { + self.events[mark.index] = ParseEvent::Start { kind }; + self.events.push(ParseEvent::End); + MarkClosed { index: mark.index } + } + + fn start_before(&mut self, mark: MarkClosed) -> MarkStarted { + // TODO: Point backwards and pointer chase in tree build? + let mark = MarkStarted { index: mark.index }; + self.events.insert( + mark.index, + ParseEvent::Start { + kind: TreeKind::Error, }, + ); + mark + } - // Figure out the main thing. Check for a... trait? - Expr::Unary(op, tok, arg) => { - let op = op.clone(); - let arg = arg.clone(); - let tok = tok.clone(); - let arg_type = self.expr_type(&arg, lines, true); - match (op, arg_type) { - (UnaryOp::Negate, Type::F64) => Type::F64, - (UnaryOp::Not, Type::Bool) => Type::Bool, + fn advance(&mut self) { + assert!(!self.eof()); // Don't try to advance past EOF + self.fuel.set(256); // Consuming a token, rest stuck detector + self.events.push(ParseEvent::Advance { + token: self.current.clone(), + }); + self.current = self.tokens.next(); + self.skip_ephemera(); + } - // This is dumb and should be punished, probably. - (_, Type::Unreachable) => { - let (line, col) = lines.position(tok.start); - self.errors.push(SyntaxError::new(line, col, format!("cannot apply a unary operator to something that doesn't yield a value"))); - Type::Error - } + fn skip_ephemera(&mut self) { + while self.current.kind == TokenKind::Whitespace || self.current.kind == TokenKind::Comment + { + self.current = self.tokens.next(); + } + } - // Propagate existing errors without additional complaint. - (_, Type::Error) => Type::Error, + fn eof(&self) -> bool { + self.current.kind == TokenKind::EOF + } - // Missed the whole table, must be an error. - (_, arg_type) => { - let (line, col) = lines.position(tok.start); - self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'"))); - Type::Error - } - } - } + fn peek(&self) -> TokenKind { + assert!(self.fuel.get() > 0, "parser is stuck!"); + self.fuel.set(self.fuel.get() - 1); + self.current.kind + } - Expr::Binary(op, tok, left, right) => { - let op = op.clone(); - let tok = tok.clone(); - let left = left.clone(); - let right = right.clone(); - let left_type = self.expr_type(&left, lines, true); - let right_type = self.expr_type(&right, lines, true); + fn at(&self, kind: TokenKind) -> bool { + self.peek() == kind + } - match (op, left_type, right_type) { - ( - BinaryOp::Add | BinaryOp::Subtract | BinaryOp::Multiply | BinaryOp::Divide, - Type::F64, - Type::F64, - ) => Type::F64, + fn eat(&mut self, kind: TokenKind) -> bool { + if self.at(kind) { + self.advance(); + true + } else { + false + } + } - (BinaryOp::Add, Type::String, Type::String) => Type::String, + fn expect(&mut self, kind: TokenKind, error: T) + where + T: Into, + { + if self.eat(kind) { + return; + } + self.error(error); + } - (BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool, + fn advance_with_error(&mut self, error: T) -> MarkClosed + where + T: Into, + { + let m = self.start(); + self.error(error); + self.advance(); + self.end(m, TreeKind::Error) + } - // This is dumb and should be punished, probably. - (_, _, Type::Unreachable) => { - let (line, col) = lines.position(tok.start); - self.errors.push(SyntaxError::new( - line, - col, - format!( - "cannot apply '{tok}' to an argument that doesn't yield a value (on the right)" - ), - )); - Type::Error - } - (_, Type::Unreachable, _) => { - let (line, col) = lines.position(tok.start); - self.errors.push(SyntaxError::new( - line, - col, - format!( - "cannot apply '{tok}' to an argument that doesn't yield a value (on the left)" - ), - )); - Type::Error - } + fn error(&mut self, message: T) + where + T: Into, + { + self.error_at(self.current.clone(), message) + } - // Propagate existing errors without additional complaint. - (_, Type::Error, _) => Type::Error, - (_, _, Type::Error) => Type::Error, + fn error_at(&mut self, token: Token<'a>, message: T) + where + T: Into, + { + let message: String = message.into(); + let mut final_message = "Error ".to_string(); - // Missed the whole table, it must be an error. - (_, left_type, right_type) => { - let (line, col) = lines.position(tok.start); - self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)"))); - Type::Error - } - } - } + if token.kind == TokenKind::EOF { + final_message.push_str("at end") + } else if token.kind != TokenKind::Error { + final_message.push_str("at '"); + final_message.push_str(token.as_str()); + final_message.push_str("'"); + } + final_message.push_str(": "); + final_message.push_str(&message); - Expr::Conditional(_, cond, then_exp, else_exp, _) => { - let cond = cond.clone(); - let then_exp = then_exp.clone(); - let else_exp = else_exp.clone(); + self.events.push(ParseEvent::Advance { + token: Token::error(token.start, final_message), + }); + } - let cond_type = self.expr_type(&cond, lines, true); - let then_type = self.expr_type(&then_exp, lines, value_required); - let else_type = else_exp.map(|e| self.expr_type(&e, lines, value_required)); - if !cond_type.compatible_with(&Type::Bool) { - if !cond_type.is_error() { - let span = self - .expr_span(&cond) - .expect("If the expression has a type it must have a span"); + fn build_tree(self) -> (SyntaxTree<'a>, Lines) { + let mut events = self.events; + let mut stack = Vec::new(); - let start = lines.position(span.0.start); - let end = lines.position(span.1.start); - self.errors.push(SyntaxError::new_spanned( - start, - end, - "the condition of an `if` expression must be a boolean", - )); - } - return Type::Error; + let mut result = SyntaxTree::new(); + + // The first element in our events vector must be a start; the whole + // thing must be bracketed in a tree. + assert!(matches!(events.get(0), Some(ParseEvent::Start { .. }))); + + // The last element in our events vector must be an end, otherwise + // the parser has failed badly. We'll remove it here so that, after + // processing the entire array, the stack retains the tree that we + // start with the very first ::Start. + assert!(matches!(events.pop(), Some(ParseEvent::End))); + + for event in events { + match event { + ParseEvent::Start { kind } => stack.push(Tree { + kind, + parent: None, + children: Vec::new(), + }), + + ParseEvent::End => { + let t = result.add_tree(stack.pop().unwrap()); + stack.last_mut().unwrap().children.push(Child::Tree(t)); } - match (then_type, else_type) { - (Type::Error, _) => Type::Error, - (_, Some(Type::Error)) => Type::Error, - - // It's an error to have a missing else branch if the value is required - (_, None) if value_required => { - let span = self - .expr_span(&exr) - .expect("How did I get this far with a broken parse?"); - let start = lines.position(span.0.start); - let end = lines.position(span.1.start); - self.errors.push(SyntaxError::new_spanned( - start, - end, - "this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value", - )); - Type::Error - } - - // If the value is required then the branches must be - // compatible, and the type of the expression is the type - // of the `then` branch. - (then_type, Some(else_type)) if value_required => { - if !then_type.compatible_with(&else_type) { - let span = self - .expr_span(&exr) - .expect("How did I get this far with a broken parse?"); - let start = lines.position(span.0.start); - let end = lines.position(span.1.start); - self.errors.push(SyntaxError::new_spanned( - start, - end, - format!("the type of the `then` branch ({then_type}) must match the type of the `else` branch ({else_type})"), - )); - Type::Error - } else { - then_type - } - } - - // The value must not be required, just mark this as unreachable. - (_, _) => { - assert!(!value_required); - Type::Unreachable - } + ParseEvent::Advance { token } => { + stack.last_mut().unwrap().children.push(Child::Token(token)); } } } + + assert!(stack.len() == 1, "Not all trees were ended!"); + let root = result.add_tree(stack.pop().unwrap()); + result.root = Some(root); + + (result, self.tokens.lines()) } } +pub fn parse(source: &str) -> (SyntaxTree, Lines) { + let tokens = Tokens::new(source); + let mut parser = CParser::new(tokens); + + file(&mut parser); + + parser.build_tree() +} + +fn file(p: &mut CParser) { + let m = p.start(); + while !p.eof() { + match p.peek() { + TokenKind::Fun => function(p), + _ => statement(p), + } + } + p.end(m, TreeKind::File); +} + +fn function(p: &mut CParser) { + assert!(p.at(TokenKind::Fun)); + let m = p.start(); + + p.expect(TokenKind::Fun, "expected a function to start with 'fun'"); + p.expect(TokenKind::Identifier, "expected a function name"); + if p.at(TokenKind::LeftParen) { + param_list(p); + } + if p.eat(TokenKind::Arrow) { + type_expr(p); + } + if p.at(TokenKind::LeftBrace) { + block(p); + } + + p.end(m, TreeKind::FunDecl); +} + +fn param_list(p: &mut CParser) { + assert!(p.at(TokenKind::LeftParen)); + let m = p.start(); + + p.expect(TokenKind::LeftParen, "expect '(' to start a parameter list"); + while !p.at(TokenKind::RightParen) && !p.eof() { + if p.at(TokenKind::Identifier) { + parameter(p); + } else { + break; + } + } + p.expect(TokenKind::RightParen, "expect ')' to end a parameter list"); + + p.end(m, TreeKind::ParamList); +} + +fn parameter(p: &mut CParser) { + assert!(p.at(TokenKind::Identifier)); + let m = p.start(); + p.expect( + TokenKind::Identifier, + "expected an identifier for a parameter name", + ); + if p.eat(TokenKind::Colon) { + type_expr(p); + } + if !p.at(TokenKind::RightParen) { + p.expect(TokenKind::Comma, "expected a comma between parameters"); + } + + p.end(m, TreeKind::Parameter); +} + +fn type_expr(p: &mut CParser) { + let m = p.start(); + // TODO: Other kinds of type expressions probably! + p.expect(TokenKind::Identifier, "expected the identifier of a type"); + p.end(m, TreeKind::TypeExpression); +} + +fn block(p: &mut CParser) { + assert!(p.at(TokenKind::LeftBrace)); + let m = p.start(); + + p.expect(TokenKind::LeftBrace, "expect '{' to start a block"); + while !p.at(TokenKind::RightBrace) && !p.eof() { + statement(p); + } + p.expect(TokenKind::RightBrace, "expect '}' to start a block"); + + p.end(m, TreeKind::Block); +} + +fn statement(p: &mut CParser) { + match p.peek() { + TokenKind::LeftBrace => block(p), + TokenKind::Let => statement_let(p), + TokenKind::Return => statement_return(p), + + // NOTE: Technically 'if' is an expression, but `if` doesn't + // require a semicolon at the end if it's all by itself. + TokenKind::If => statement_if(p), + + _ => statement_expression(p), + } +} + +fn statement_if(p: &mut CParser) { + assert!(p.at(TokenKind::If)); + let m = p.start(); + + conditional(p); + + p.end(m, TreeKind::IfStatement); +} + +fn statement_let(p: &mut CParser) { + assert!(p.at(TokenKind::Let)); + let m = p.start(); + + p.expect(TokenKind::Let, "expect 'let' to start a let statement"); + p.expect(TokenKind::Identifier, "expected a name for the variable"); + p.expect(TokenKind::Equal, "expected a '=' after the variable name"); + expression(p); + if !p.at(TokenKind::RightBrace) { + p.expect(TokenKind::Semicolon, "expect ';' to end a let statement"); + } + + p.end(m, TreeKind::LetStatement); +} + +fn statement_return(p: &mut CParser) { + assert!(p.at(TokenKind::Return)); + let m = p.start(); + + p.expect( + TokenKind::Return, + "expect 'return' to start a return statement", + ); + expression(p); + if !p.at(TokenKind::RightBrace) { + p.expect(TokenKind::Semicolon, "expect ';' to end a return statement"); + } + + p.end(m, TreeKind::ReturnStatement); +} + +fn statement_expression(p: &mut CParser) { + let m = p.start(); + + expression(p); + if !p.at(TokenKind::RightBrace) { + p.expect( + TokenKind::Semicolon, + "expect ';' to end an expression statement", + ); + } + + p.end(m, TreeKind::ExpressionStatement); +} + +fn expression(p: &mut CParser) { + expression_with_power(p, 0) +} + // BINDING POWERS. When parsing expressions we only accept expressions that // meet a minimum binding power. (This is like "precedence" but I just super // don't like that terminology.) @@ -428,7 +506,6 @@ const TERM_POWER: u8 = 5; // + - const FACTOR_POWER: u8 = 6; // * / const UNARY_POWER: u8 = 7; // ! - -// const CALL_POWER: u8 = 8; // . () // const PRIMARY_POWER: u8 = 9; fn token_power<'a>(token: TokenKind) -> Option { @@ -446,429 +523,143 @@ fn token_power<'a>(token: TokenKind) -> Option { } } -pub struct Parser<'a> { - tokens: Tokens<'a>, - tree: SyntaxTree<'a>, - current: Token<'a>, - previous: Token<'a>, +fn expression_with_power(p: &mut CParser, minimum_power: u8) { + let mut expr = prefix_expression(p); + while p.at(TokenKind::LeftParen) { + let m = p.start_before(expr); + argument_list(p); + expr = p.end(m, TreeKind::CallExpression); + } - panic_mode: bool, + loop { + let Some(power) = token_power(p.peek()) else { + break; + }; + if power < minimum_power { + break; + } + + // TODO: I don't think this works for other "infix" types, but we'll + // see won't we. + let m = p.start_before(expr); + p.advance(); // Consume the operator + expression_with_power(p, power); + expr = p.end(m, TreeKind::BinaryExpression); + } } -impl<'a> Parser<'a> { - pub fn new(source: &'a str) -> Self { - let mut parser = Parser { - tokens: Tokens::new(source), - tree: SyntaxTree::new(), - current: Token::new(TokenKind::EOF, 0, ""), - previous: Token::new(TokenKind::EOF, 0, ""), - panic_mode: false, - }; - parser.advance(); - parser +fn argument_list(p: &mut CParser) { + assert!(p.at(TokenKind::LeftParen)); + let m = p.start(); + + p.expect( + TokenKind::LeftParen, + "expect an argument list to start with '('", + ); + while !p.at(TokenKind::RightParen) && !p.eof() { + argument(p); + } + p.expect( + TokenKind::RightParen, + "expect an argument list to start with '('", + ); + + p.end(m, TreeKind::ArgumentList); +} + +fn argument(p: &mut CParser) { + let m = p.start(); + + expression(p); + if !p.at(TokenKind::RightParen) { + p.expect(TokenKind::Comma, "expect a ',' between arguments"); } - pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) { - let expr = self.expression(); - self.consume(TokenKind::EOF, "expected end of expression"); - (self.tree, expr, self.tokens.lines()) + p.end(m, TreeKind::Argument); +} + +fn prefix_expression(p: &mut CParser) -> MarkClosed { + match p.peek() { + TokenKind::Number => literal(p), + TokenKind::String => literal(p), + TokenKind::True => literal(p), + TokenKind::False => literal(p), + + TokenKind::LeftParen => grouping(p), + + TokenKind::Bang => unary(p), + TokenKind::Minus => unary(p), + + TokenKind::If => conditional(p), + + TokenKind::Identifier => identifier(p), + + _ => p.advance_with_error("expected an expression"), } +} - fn expression(&mut self) -> ExprRef { - self.expression_with_power(0) - } +fn literal(p: &mut CParser) -> MarkClosed { + let m = p.start(); + p.advance(); + p.end(m, TreeKind::LiteralExpression) +} - fn expression_with_power(&mut self, minimum_power: u8) -> ExprRef { - self.trace("expression with power"); - self.advance(); - let mut expr = self.prefix_expression(); - loop { - let power = match token_power(self.current.kind) { - Some(p) => p, - None => break, // EOF, end of expression? - }; +fn grouping(p: &mut CParser) -> MarkClosed { + assert!(p.at(TokenKind::LeftParen)); + let m = p.start(); - if power < minimum_power { - break; - } + p.expect(TokenKind::LeftParen, "expected '(' to start grouping"); + expression(p); + p.expect(TokenKind::RightParen, "unmatched parentheses in expression"); - self.advance(); - expr = self.infix_expression(power, expr); - } - expr - } + p.end(m, TreeKind::GroupingExpression) +} - fn prefix_expression(&mut self) -> ExprRef { - self.trace("prefix"); - let token = &self.previous; - match token.kind { - TokenKind::Bang => self.unary(), - TokenKind::LeftParen => self.grouping(), - TokenKind::Number => self.number(), - TokenKind::Minus => self.unary(), - TokenKind::String => self.string(), +fn unary(p: &mut CParser) -> MarkClosed { + let m = p.start(); - TokenKind::True => self - .tree - .add_expr(Expr::Literal(Literal::Bool(true), token.clone())), - TokenKind::False => self - .tree - .add_expr(Expr::Literal(Literal::Bool(false), token.clone())), + p.advance(); // Past the operator + expression_with_power(p, UNARY_POWER); - TokenKind::If => self.conditional(), + p.end(m, TreeKind::UnaryExpression) +} - _ => { - self.error("expected an expression"); - ExprRef::error() - } - } - } +fn conditional(p: &mut CParser) -> MarkClosed { + assert!(p.at(TokenKind::If)); + let m = p.start(); - fn infix_expression(&mut self, power: u8, left: ExprRef) -> ExprRef { - self.trace("infix"); - match self.previous.kind { - TokenKind::Plus - | TokenKind::Minus - | TokenKind::Star - | TokenKind::Slash - | TokenKind::And - | TokenKind::Or => self.binary(power, left), - _ => panic!("Unknown infix operator, dispatch error?"), - } - } - - fn number(&mut self) -> ExprRef { - let token = &self.previous; - // What kind is it? For now let's just ... make it good. - - let literal = match token.as_str().parse::() { - Ok(v) => Literal::Float64(v), - Err(e) => { - self.error(format!("invalid f64: {e}")); - return ExprRef::error(); - } - }; - - self.tree.add_expr(Expr::Literal(literal, token.clone())) - } - - fn string(&mut self) -> ExprRef { - let token = &self.previous; - - let mut result = String::new(); - let mut input = token.as_str().chars(); - - assert!(input.next().is_some()); // Delimiter - while let Some(ch) = input.next() { - match ch { - '\\' => match input.next().unwrap() { - 'n' => result.push('\n'), - 'r' => result.push('\r'), - 't' => result.push('\t'), - ch => result.push(ch), - }, - _ => result.push(ch), - } - } - result.pop(); // We pushed the other delimiter on, whoops. - - let literal = Literal::String(result); - self.tree.add_expr(Expr::Literal(literal, token.clone())) - } - - fn grouping(&mut self) -> ExprRef { - let result = self.expression(); - self.consume(TokenKind::RightParen, "expected ')' after an expression"); - result - } - - fn conditional(&mut self) -> ExprRef { - let token = self.previous.clone(); - let condition_expr = self.expression(); - self.consume(TokenKind::LeftBrace, "expected '{' to start an 'if' block"); - let then_expr = self.expression(); - self.consume(TokenKind::RightBrace, "expected '}' to end an 'if' block"); - let else_expr = if self.current.kind == TokenKind::Else { - self.advance(); - if self.current.kind == TokenKind::If { - self.advance(); - Some(self.conditional()) - } else { - self.consume( - TokenKind::LeftBrace, - "expected '{' to start an 'else' block", - ); - let else_expr = self.expression(); - self.consume(TokenKind::RightBrace, "Expected '}' to end an 'else' block"); - Some(else_expr) - } + p.expect(TokenKind::If, "expected conditional to start with 'if'"); + expression(p); + block(p); + if p.eat(TokenKind::Else) { + if p.at(TokenKind::If) { + // Don't require another block, just jump right into the conditional. + conditional(p); } else { - None - }; - let tail = self.previous.clone(); - self.tree.add_expr(Expr::Conditional( - token, - condition_expr, - then_expr, - else_expr, - tail, - )) - } - - fn unary(&mut self) -> ExprRef { - let token = self.previous.clone(); - let kind = token.kind; - let expr = self.expression_with_power(UNARY_POWER); - let op = match kind { - TokenKind::Minus => UnaryOp::Negate, - TokenKind::Bang => UnaryOp::Not, - _ => panic!("unsuitable unary: {:?}: no op", kind), - }; - - self.tree.add_expr(Expr::Unary(op, token, expr)) - } - - fn binary(&mut self, power: u8, left: ExprRef) -> ExprRef { - let token = self.previous.clone(); - let op = match token.kind { - TokenKind::Plus => BinaryOp::Add, - TokenKind::Minus => BinaryOp::Subtract, - TokenKind::Star => BinaryOp::Multiply, - TokenKind::Slash => BinaryOp::Divide, - TokenKind::And => BinaryOp::And, - TokenKind::Or => BinaryOp::Or, - _ => panic!("unsuitable binary: {:?}: no op", self.previous), - }; - let right = self.expression_with_power(power + 1); - self.tree.add_expr(Expr::Binary(op, token, left, right)) - } - - fn advance(&mut self) { - self.previous = self.current.clone(); - self.current = self.tokens.next(); - while self.current.kind == TokenKind::Error - || self.current.kind == TokenKind::Whitespace - || self.current.kind == TokenKind::Comment - { - if self.current.kind == TokenKind::Error { - self.error_at_current(self.current.to_string()); - } - self.current = self.tokens.next(); + block(p); } } - fn consume(&mut self, kind: TokenKind, error: &str) { - if self.current.kind == kind { - self.advance(); - } else { - self.error_at_current(error); - } - } + p.end(m, TreeKind::ConditionalExpression) +} - fn error(&mut self, message: T) - where - T: Into, - { - self.error_at(self.previous.clone(), message) - } +fn identifier(p: &mut CParser) -> MarkClosed { + assert!(p.at(TokenKind::Identifier)); + let m = p.start(); - fn error_at_current(&mut self, message: T) - where - T: Into, - { - self.error_at(self.current.clone(), message) - } + p.advance(); - fn error_at(&mut self, token: Token<'a>, message: T) - where - T: Into, - { - if self.panic_mode { - return; - } - self.panic_mode = true; - - let message: String = message.into(); - let (line, column) = self.tokens.token_position(&token); - let mut final_message = "Error ".to_string(); - - if token.kind == TokenKind::EOF { - final_message.push_str("at end") - } else if token.kind != TokenKind::Error { - final_message.push_str("at '"); - final_message.push_str(token.as_str()); - final_message.push_str("'"); - } - final_message.push_str(": "); - final_message.push_str(&message); - - self.tree - .add_error(SyntaxError::new(line, column, final_message)); - } - - fn trace(&self, _msg: &str) { - // let cpos = self.tokens.token_position(&self.current); - // let ppos = self.tokens.token_position(&self.previous); - - // eprintln!( - // "[{}:{}:{}] [{}:{}:{}]: {msg}", - // ppos.0, - // ppos.1, - // self.previous - // .as_ref() - // .map(|t| t.as_str()) - // .unwrap_or(""), - // cpos.0, - // cpos.1, - // self.current.as_ref().map(|t| t.as_str()).unwrap_or("") - // ); - } + p.end(m, TreeKind::Identifier) } #[cfg(test)] mod tests { use super::*; - use pretty_assertions::assert_eq; - fn test_successful_expression_parse(source: &str, expected: &str, expected_type: Type) { - let (mut tree, expr, lines) = Parser::new(source).parse(); - assert_eq!( - Vec::::new(), - tree.errors, - "Expected successful parse" - ); - assert_eq!( - expected, - tree.dump_expr(&expr), - "The parse structure of the expressions did not match" - ); - - // TODO: 'assert_eq' is probably wrong here - let expr_type = tree.expr_type(&expr, &lines, true); - assert!( - expected_type.compatible_with(&expr_type), - "The type of the expression did not match. expected: {expected_type}, actual: {expr_type}" - ); + #[test] + fn tree_ref_size() { + // What's the point of doing all that work if the tree ref isn't nice + // and "small"? + assert_eq!(4, std::mem::size_of::>()); } - - macro_rules! test_expr { - ($name:ident, $input:expr, $expected:expr, $type:expr) => { - #[test] - fn $name() { - test_successful_expression_parse($input, $expected, $type); - } - }; - } - - test_expr!(number_expr, "12", "12", Type::F64); - test_expr!(add_expr, "1 + 2", "(+ 1 2)", Type::F64); - test_expr!( - prec_expr, - "1 + 2 * 3 - 7 * 7", - "(- (+ 1 (* 2 3)) (* 7 7))", - Type::F64 - ); - test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)", Type::F64); - test_expr!( - strings, - r#" "Hello " + 'world!' "#, - r#"(+ "Hello " 'world!')"#, - Type::String - ); - - test_expr!( - booleans, - "true and false or false and !true", - "(or (and true false) (and false (! true)))", - Type::Bool - ); - - test_expr!( - if_expression, - "if true { 23 } else { 45 }", - "(if true 23 45)", - Type::F64 - ); - // test_expr!( - // if_with_return, - // "if true { 23 } else { return 'nothing' }", - // "", - // Type::F64 - // ); - - // ======================================================================== - // Type Error Tests - // ======================================================================== - - fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) { - let (mut tree, expr, lines) = Parser::new(source).parse(); - assert_eq!( - Vec::::new(), - tree.errors, - "Expected successful parse" - ); - - let expr_type = tree.expr_type(&expr, &lines, true); - assert!(expr_type.is_error()); - - let actual_errors = tree - .errors - .iter() - .map(|e| e.message.as_str()) - .collect::>(); - assert_eq!(expected_errors, actual_errors); - } - - macro_rules! test_type_error_expr { - ($name:ident, $input:expr, $($s:expr),+) => { - #[test] - fn $name() { - let expected_errors: Vec<&str> = (vec![$($s),*]); - test_type_error_expression($input, expected_errors); - } - } - } - - test_type_error_expr!( - negate_string, - "-('what?')", - "cannot apply unary operator '-' to expression of type 'string'" - ); - - test_type_error_expr!( - add_string_number, - "'what?' + 5", - "cannot apply binary operator '+' to expressions of type 'string' (on the left) and 'f64' (on the right)" - ); - - test_type_error_expr!( - add_number_string, - "5 + 'what?'", - "cannot apply binary operator '+' to expressions of type 'f64' (on the left) and 'string' (on the right)" - ); - - test_type_error_expr!( - errors_propagate_do_not_duplicate, - "!'hello' / 27 * -('what?') + 23", - "cannot apply unary operator '!' to expression of type 'string'", - "cannot apply unary operator '-' to expression of type 'string'" - ); - - test_type_error_expr!( - if_not_bool, - "if 23 { 1 } else { 2 }", - "the condition of an `if` expression must be a boolean" - ); - - test_type_error_expr!( - if_arm_mismatch, - "if true { 1 } else { '1' }", - "the type of the `then` branch (f64) must match the type of the `else` branch (string)" - ); - - test_type_error_expr!( - if_no_else, - "if true { 1 }", - "this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value" - ); } diff --git a/fine/src/parser/concrete.rs b/fine/src/parser/concrete.rs deleted file mode 100644 index 8c36d85f..00000000 --- a/fine/src/parser/concrete.rs +++ /dev/null @@ -1,663 +0,0 @@ -// NOTE: much of this parser structure derived from -// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html -use crate::tokens::{Lines, Token, TokenKind, Tokens}; -use std::{cell::Cell, num::NonZeroU32}; - -pub struct SyntaxTree<'a> { - trees: Vec>, - root: Option, -} - -impl<'a> SyntaxTree<'a> { - pub fn new() -> Self { - SyntaxTree { - trees: vec![], - root: None, - } - } - - pub fn add_tree(&mut self, t: Tree<'a>) -> TreeRef { - assert!(t.parent.is_none()); - let tr = TreeRef::from_index(self.trees.len()); - - // NOTE: Because of the difficulty of holding multiple mutable - // references it's this is our best chance to patch up parent - // pointers. - for child in t.children.iter() { - if let Child::Tree(ct) = child { - self[*ct].parent = Some(tr); - } - } - self.trees.push(t); - tr - } - - pub fn dump(&self) -> String { - match self.root { - Some(r) => self[r].dump(self), - None => String::new(), - } - } -} - -impl<'a> std::ops::Index for SyntaxTree<'a> { - type Output = Tree<'a>; - - fn index(&self, index: TreeRef) -> &Self::Output { - &self.trees[index.index()] - } -} - -impl<'a> std::ops::IndexMut for SyntaxTree<'a> { - fn index_mut(&mut self, index: TreeRef) -> &mut Self::Output { - &mut self.trees[index.index()] - } -} - -#[derive(Debug)] -pub enum TreeKind { - Error, - File, - FunDecl, - ParamList, - Parameter, - TypeExpression, - Block, - LetStatement, - ReturnStatement, - ExpressionStatement, - LiteralExpression, - GroupingExpression, - UnaryExpression, - ConditionalExpression, - CallExpression, - ArgumentList, - Argument, - BinaryExpression, - IfStatement, - Identifier, -} - -pub struct Tree<'a> { - pub kind: TreeKind, - pub parent: Option, - pub children: Vec>, -} - -#[derive(Copy, Clone, Eq, PartialEq)] -pub struct TreeRef(NonZeroU32); - -impl TreeRef { - pub fn from_index(index: usize) -> TreeRef { - let index: u32 = (index + 1).try_into().unwrap(); - TreeRef(NonZeroU32::new(index).unwrap()) - } - - pub fn index(&self) -> usize { - let index: usize = self.0.get().try_into().unwrap(); - index - 1 - } -} - -impl<'a> Tree<'a> { - pub fn dump(&self, tree: &SyntaxTree<'a>) -> String { - let mut output = String::new(); - output.push_str(&format!("{:?}\n", self.kind)); - for child in self.children.iter() { - child.dump_rec(2, tree, &mut output); - } - output - } -} - -pub enum Child<'a> { - Token(Token<'a>), - Tree(TreeRef), -} - -impl<'a> Child<'a> { - fn dump_rec(&self, indent: usize, tree: &SyntaxTree<'a>, output: &mut String) { - for _ in 0..indent { - output.push(' '); - } - match self { - Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())), - Child::Tree(t) => { - let t = &tree[*t]; - output.push_str(&format!("{:?}\n", t.kind)); - for child in t.children.iter() { - child.dump_rec(indent + 2, tree, output); - } - } - } - } -} - -enum ParseEvent<'a> { - Start { kind: TreeKind }, - End, - Advance { token: Token<'a> }, -} - -struct MarkStarted { - index: usize, -} - -struct MarkClosed { - index: usize, -} - -struct CParser<'a> { - tokens: Tokens<'a>, - current: Token<'a>, - fuel: Cell, - events: Vec>, -} - -impl<'a> CParser<'a> { - fn new(tokens: Tokens<'a>) -> Self { - let mut parser = CParser { - tokens, - current: Token::new(TokenKind::EOF, 0, ""), - fuel: Cell::new(256), - events: Vec::new(), - }; - parser.current = parser.tokens.next(); - parser.skip_ephemera(); - parser - } - - fn start(&mut self) -> MarkStarted { - let mark = MarkStarted { - index: self.events.len(), - }; - self.events.push(ParseEvent::Start { - kind: TreeKind::Error, - }); - mark - } - - fn end(&mut self, mark: MarkStarted, kind: TreeKind) -> MarkClosed { - self.events[mark.index] = ParseEvent::Start { kind }; - self.events.push(ParseEvent::End); - MarkClosed { index: mark.index } - } - - fn start_before(&mut self, mark: MarkClosed) -> MarkStarted { - // TODO: Point backwards and pointer chase in tree build? - let mark = MarkStarted { index: mark.index }; - self.events.insert( - mark.index, - ParseEvent::Start { - kind: TreeKind::Error, - }, - ); - mark - } - - fn advance(&mut self) { - assert!(!self.eof()); // Don't try to advance past EOF - self.fuel.set(256); // Consuming a token, rest stuck detector - self.events.push(ParseEvent::Advance { - token: self.current.clone(), - }); - self.current = self.tokens.next(); - self.skip_ephemera(); - } - - fn skip_ephemera(&mut self) { - while self.current.kind == TokenKind::Whitespace || self.current.kind == TokenKind::Comment - { - self.current = self.tokens.next(); - } - } - - fn eof(&self) -> bool { - self.current.kind == TokenKind::EOF - } - - fn peek(&self) -> TokenKind { - assert!(self.fuel.get() > 0, "parser is stuck!"); - self.fuel.set(self.fuel.get() - 1); - self.current.kind - } - - fn at(&self, kind: TokenKind) -> bool { - self.peek() == kind - } - - fn eat(&mut self, kind: TokenKind) -> bool { - if self.at(kind) { - self.advance(); - true - } else { - false - } - } - - fn expect(&mut self, kind: TokenKind, error: T) - where - T: Into, - { - if self.eat(kind) { - return; - } - self.error(error); - } - - fn advance_with_error(&mut self, error: T) -> MarkClosed - where - T: Into, - { - let m = self.start(); - self.error(error); - self.advance(); - self.end(m, TreeKind::Error) - } - - fn error(&mut self, message: T) - where - T: Into, - { - self.error_at(self.current.clone(), message) - } - - fn error_at(&mut self, token: Token<'a>, message: T) - where - T: Into, - { - let message: String = message.into(); - let mut final_message = "Error ".to_string(); - - if token.kind == TokenKind::EOF { - final_message.push_str("at end") - } else if token.kind != TokenKind::Error { - final_message.push_str("at '"); - final_message.push_str(token.as_str()); - final_message.push_str("'"); - } - final_message.push_str(": "); - final_message.push_str(&message); - - self.events.push(ParseEvent::Advance { - token: Token::error(token.start, final_message), - }); - } - - fn build_tree(self) -> (SyntaxTree<'a>, Lines) { - let mut events = self.events; - let mut stack = Vec::new(); - - let mut result = SyntaxTree::new(); - - // The first element in our events vector must be a start; the whole - // thing must be bracketed in a tree. - assert!(matches!(events.get(0), Some(ParseEvent::Start { .. }))); - - // The last element in our events vector must be an end, otherwise - // the parser has failed badly. We'll remove it here so that, after - // processing the entire array, the stack retains the tree that we - // start with the very first ::Start. - assert!(matches!(events.pop(), Some(ParseEvent::End))); - - for event in events { - match event { - ParseEvent::Start { kind } => stack.push(Tree { - kind, - parent: None, - children: Vec::new(), - }), - - ParseEvent::End => { - let t = result.add_tree(stack.pop().unwrap()); - stack.last_mut().unwrap().children.push(Child::Tree(t)); - } - - ParseEvent::Advance { token } => { - stack.last_mut().unwrap().children.push(Child::Token(token)); - } - } - } - - assert!(stack.len() == 1, "Not all trees were ended!"); - let root = result.add_tree(stack.pop().unwrap()); - result.root = Some(root); - - (result, self.tokens.lines()) - } -} - -pub fn parse_concrete(source: &str) -> (SyntaxTree, Lines) { - let tokens = Tokens::new(source); - let mut parser = CParser::new(tokens); - - file(&mut parser); - - parser.build_tree() -} - -fn file(p: &mut CParser) { - let m = p.start(); - while !p.eof() { - match p.peek() { - TokenKind::Fun => function(p), - _ => statement(p), - } - } - p.end(m, TreeKind::File); -} - -fn function(p: &mut CParser) { - assert!(p.at(TokenKind::Fun)); - let m = p.start(); - - p.expect(TokenKind::Fun, "expected a function to start with 'fun'"); - p.expect(TokenKind::Identifier, "expected a function name"); - if p.at(TokenKind::LeftParen) { - param_list(p); - } - if p.eat(TokenKind::Arrow) { - type_expr(p); - } - if p.at(TokenKind::LeftBrace) { - block(p); - } - - p.end(m, TreeKind::FunDecl); -} - -fn param_list(p: &mut CParser) { - assert!(p.at(TokenKind::LeftParen)); - let m = p.start(); - - p.expect(TokenKind::LeftParen, "expect '(' to start a parameter list"); - while !p.at(TokenKind::RightParen) && !p.eof() { - if p.at(TokenKind::Identifier) { - parameter(p); - } else { - break; - } - } - p.expect(TokenKind::RightParen, "expect ')' to end a parameter list"); - - p.end(m, TreeKind::ParamList); -} - -fn parameter(p: &mut CParser) { - assert!(p.at(TokenKind::Identifier)); - let m = p.start(); - p.expect( - TokenKind::Identifier, - "expected an identifier for a parameter name", - ); - if p.eat(TokenKind::Colon) { - type_expr(p); - } - if !p.at(TokenKind::RightParen) { - p.expect(TokenKind::Comma, "expected a comma between parameters"); - } - - p.end(m, TreeKind::Parameter); -} - -fn type_expr(p: &mut CParser) { - let m = p.start(); - // TODO: Other kinds of type expressions probably! - p.expect(TokenKind::Identifier, "expected the identifier of a type"); - p.end(m, TreeKind::TypeExpression); -} - -fn block(p: &mut CParser) { - assert!(p.at(TokenKind::LeftBrace)); - let m = p.start(); - - p.expect(TokenKind::LeftBrace, "expect '{' to start a block"); - while !p.at(TokenKind::RightBrace) && !p.eof() { - statement(p); - } - p.expect(TokenKind::RightBrace, "expect '}' to start a block"); - - p.end(m, TreeKind::Block); -} - -fn statement(p: &mut CParser) { - match p.peek() { - TokenKind::LeftBrace => block(p), - TokenKind::Let => statement_let(p), - TokenKind::Return => statement_return(p), - - // NOTE: Technically 'if' is an expression, but `if` doesn't - // require a semicolon at the end if it's all by itself. - TokenKind::If => statement_if(p), - - _ => statement_expression(p), - } -} - -fn statement_if(p: &mut CParser) { - assert!(p.at(TokenKind::If)); - let m = p.start(); - - conditional(p); - - p.end(m, TreeKind::IfStatement); -} - -fn statement_let(p: &mut CParser) { - assert!(p.at(TokenKind::Let)); - let m = p.start(); - - p.expect(TokenKind::Let, "expect 'let' to start a let statement"); - p.expect(TokenKind::Identifier, "expected a name for the variable"); - p.expect(TokenKind::Equal, "expected a '=' after the variable name"); - expression(p); - if !p.at(TokenKind::RightBrace) { - p.expect(TokenKind::Semicolon, "expect ';' to end a let statement"); - } - - p.end(m, TreeKind::LetStatement); -} - -fn statement_return(p: &mut CParser) { - assert!(p.at(TokenKind::Return)); - let m = p.start(); - - p.expect( - TokenKind::Return, - "expect 'return' to start a return statement", - ); - expression(p); - if !p.at(TokenKind::RightBrace) { - p.expect(TokenKind::Semicolon, "expect ';' to end a return statement"); - } - - p.end(m, TreeKind::ReturnStatement); -} - -fn statement_expression(p: &mut CParser) { - let m = p.start(); - - expression(p); - if !p.at(TokenKind::RightBrace) { - p.expect( - TokenKind::Semicolon, - "expect ';' to end an expression statement", - ); - } - - p.end(m, TreeKind::ExpressionStatement); -} - -fn expression(p: &mut CParser) { - expression_with_power(p, 0) -} - -// BINDING POWERS. When parsing expressions we only accept expressions that -// meet a minimum binding power. (This is like "precedence" but I just super -// don't like that terminology.) -const ASSIGNMENT_POWER: u8 = 0; // = -const OR_POWER: u8 = 1; // or -const AND_POWER: u8 = 2; // and -const EQUALITY_POWER: u8 = 3; // == != -const COMPARISON_POWER: u8 = 4; // < > <= >= -const TERM_POWER: u8 = 5; // + - -const FACTOR_POWER: u8 = 6; // * / -const UNARY_POWER: u8 = 7; // ! - - -// const PRIMARY_POWER: u8 = 9; - -fn token_power<'a>(token: TokenKind) -> Option { - match token { - TokenKind::Equal => Some(ASSIGNMENT_POWER), - TokenKind::Or => Some(OR_POWER), - TokenKind::And => Some(AND_POWER), - TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER), - TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => { - Some(COMPARISON_POWER) - } - TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER), - TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER), - _ => None, - } -} - -fn expression_with_power(p: &mut CParser, minimum_power: u8) { - let mut expr = prefix_expression(p); - while p.at(TokenKind::LeftParen) { - let m = p.start_before(expr); - argument_list(p); - expr = p.end(m, TreeKind::CallExpression); - } - - loop { - let Some(power) = token_power(p.peek()) else { - break; - }; - if power < minimum_power { - break; - } - - // TODO: I don't think this works for other "infix" types, but we'll - // see won't we. - let m = p.start_before(expr); - p.advance(); // Consume the operator - expression_with_power(p, power); - expr = p.end(m, TreeKind::BinaryExpression); - } -} - -fn argument_list(p: &mut CParser) { - assert!(p.at(TokenKind::LeftParen)); - let m = p.start(); - - p.expect( - TokenKind::LeftParen, - "expect an argument list to start with '('", - ); - while !p.at(TokenKind::RightParen) && !p.eof() { - argument(p); - } - p.expect( - TokenKind::RightParen, - "expect an argument list to start with '('", - ); - - p.end(m, TreeKind::ArgumentList); -} - -fn argument(p: &mut CParser) { - let m = p.start(); - - expression(p); - if !p.at(TokenKind::RightParen) { - p.expect(TokenKind::Comma, "expect a ',' between arguments"); - } - - p.end(m, TreeKind::Argument); -} - -fn prefix_expression(p: &mut CParser) -> MarkClosed { - match p.peek() { - TokenKind::Number => literal(p), - TokenKind::String => literal(p), - TokenKind::True => literal(p), - TokenKind::False => literal(p), - - TokenKind::LeftParen => grouping(p), - - TokenKind::Bang => unary(p), - TokenKind::Minus => unary(p), - - TokenKind::If => conditional(p), - - TokenKind::Identifier => identifier(p), - - _ => p.advance_with_error("expected an expression"), - } -} - -fn literal(p: &mut CParser) -> MarkClosed { - let m = p.start(); - p.advance(); - p.end(m, TreeKind::LiteralExpression) -} - -fn grouping(p: &mut CParser) -> MarkClosed { - assert!(p.at(TokenKind::LeftParen)); - let m = p.start(); - - p.expect(TokenKind::LeftParen, "expected '(' to start grouping"); - expression(p); - p.expect(TokenKind::RightParen, "unmatched parentheses in expression"); - - p.end(m, TreeKind::GroupingExpression) -} - -fn unary(p: &mut CParser) -> MarkClosed { - let m = p.start(); - - p.advance(); // Past the operator - expression_with_power(p, UNARY_POWER); - - p.end(m, TreeKind::UnaryExpression) -} - -fn conditional(p: &mut CParser) -> MarkClosed { - assert!(p.at(TokenKind::If)); - let m = p.start(); - - p.expect(TokenKind::If, "expected conditional to start with 'if'"); - expression(p); - block(p); - if p.eat(TokenKind::Else) { - if p.at(TokenKind::If) { - // Don't require another block, just jump right into the conditional. - conditional(p); - } else { - block(p); - } - } - - p.end(m, TreeKind::ConditionalExpression) -} - -fn identifier(p: &mut CParser) -> MarkClosed { - assert!(p.at(TokenKind::Identifier)); - let m = p.start(); - - p.advance(); - - p.end(m, TreeKind::Identifier) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn tree_ref_size() { - // What's the point of doing all that work if the tree ref isn't nice - // and "small"? - assert_eq!(4, std::mem::size_of::>()); - } -} diff --git a/fine/src/parser/old.rs b/fine/src/parser/old.rs new file mode 100644 index 00000000..2efe09b4 --- /dev/null +++ b/fine/src/parser/old.rs @@ -0,0 +1,872 @@ +use crate::tokens::{Lines, Token, TokenKind, Tokens}; +use std::fmt; + +// TODO: An error should have: +// +// - a start +// - an end +// - a focus +// - descriptive messages +// +// that will have to wait for now +#[derive(PartialEq, Eq)] +pub struct SyntaxError { + pub start: (usize, usize), + pub end: (usize, usize), + pub message: String, +} + +impl SyntaxError { + pub fn new(line: usize, column: usize, message: T) -> Self + where + T: ToString, + { + SyntaxError { + start: (line, column), + end: (line, column), + message: message.to_string(), + } + } + + pub fn new_spanned(start: (usize, usize), end: (usize, usize), message: T) -> Self + where + T: ToString, + { + SyntaxError { + start, + end, + message: message.to_string(), + } + } +} + +impl fmt::Debug for SyntaxError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{self}") + } +} + +impl fmt::Display for SyntaxError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}: {}", self.start.0, self.end.0, self.message) + } +} + +#[derive(Clone)] +pub enum Literal { + Float64(f64), + String(String), + Bool(bool), +} + +#[derive(Copy, Clone)] +pub enum UnaryOp { + Negate, + Not, +} + +#[derive(Copy, Clone)] +pub enum BinaryOp { + Add, + Subtract, + Multiply, + Divide, + And, + Or, +} + +#[derive(Clone)] +pub enum Expr<'a> { + Literal(Literal, Token<'a>), + Unary(UnaryOp, Token<'a>, ExprRef), + Binary(BinaryOp, Token<'a>, ExprRef, ExprRef), + Conditional(Token<'a>, ExprRef, ExprRef, Option, Token<'a>), +} + +#[derive(Clone)] +pub struct ExprRef(Option); + +impl ExprRef { + pub fn error() -> Self { + ExprRef(None) + } +} + +// TODO: Eventually we will be unable to use Eq and PartialEq here, and will +// need to do out own thing. +#[derive(Copy, Clone)] +pub enum Type { + // Signals a type error. If you receive this then you know that an error + // has already been reported; if you produce this be sure to also note + // the error in the errors collection. + Error, + + // Signals that the expression has a control-flow side-effect and that no + // value will ever result from this expression. Usually this means + // everything's fine. + Unreachable, + + // TODO: Numeric literals should be implicitly convertable, unlike other + // types. Maybe just "numeric literal" type? + F64, + String, + Bool, +} + +impl Type { + pub fn is_error(&self) -> bool { + match self { + Type::Error => true, + _ => false, + } + } + + pub fn compatible_with(&self, other: &Type) -> bool { + // TODO: This is wrong; we because of numeric literals etc. + match (self, other) { + (Type::F64, Type::F64) => true, + (Type::String, Type::String) => true, + (Type::Bool, Type::Bool) => true, + (Type::Unreachable, Type::Unreachable) => true, + + // Avoid introducing more errors + (Type::Error, _) => true, + (_, Type::Error) => true, + + (_, _) => false, + } + } +} + +impl std::fmt::Debug for Type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{self}") + } +} + +impl std::fmt::Display for Type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use Type::*; + match self { + Error => write!(f, "<< INTERNAL ERROR >>"), + Unreachable => write!(f, "<< UNREACHABLE >>"), + F64 => write!(f, "f64"), + String => write!(f, "string"), + Bool => write!(f, "bool"), + } + } +} + +pub struct SyntaxTree<'a> { + pub errors: Vec, + expressions: Vec>, +} + +impl<'a> SyntaxTree<'a> { + pub fn new() -> Self { + SyntaxTree { + errors: Vec::new(), + expressions: Vec::new(), + } + } + + pub fn add_error(&mut self, error: SyntaxError) { + self.errors.push(error); + } + + pub fn add_expr(&mut self, expr: Expr<'a>) -> ExprRef { + let index = self.expressions.len(); + self.expressions.push(expr); + ExprRef(Some(index)) + } + + pub fn dump_expr(&self, expr: &ExprRef) -> String { + match expr.0 { + Some(idx) => { + let expr = &self.expressions[idx]; + match expr { + Expr::Literal(_, tok) => tok.to_string(), + Expr::Unary(_, tok, e) => { + format!("({tok} {})", self.dump_expr(e)) + } + Expr::Binary(_, tok, l, r) => { + format!("({tok} {} {})", self.dump_expr(l), self.dump_expr(r)) + } + Expr::Conditional(tok, cond, t, e, _) => { + if let Some(e) = e { + format!( + "({tok} {} {} {})", + self.dump_expr(cond), + self.dump_expr(t), + self.dump_expr(e) + ) + } else { + format!("({tok} {} {})", self.dump_expr(cond), self.dump_expr(t)) + } + } + } + } + None => "<|EOF|>".to_string(), + } + } + + pub fn expr_span(&self, expr: &ExprRef) -> Option<(Token<'a>, Token<'a>)> { + let expr = match expr.0 { + Some(idx) => &self.expressions[idx], + None => return None, + }; + + match expr { + Expr::Literal(_, tok) => Some((tok.clone(), tok.clone())), + Expr::Unary(_, tok, arg) => { + let arg = self.expr_span(arg); + match arg { + None => None, + Some((_, end)) => Some((tok.clone(), end)), + } + } + Expr::Binary(_, _, left, right) => { + let left = self.expr_span(left); + let right = self.expr_span(right); + match (left, right) { + (None, _) => None, + (_, None) => None, + (Some((start, _)), Some((_, end))) => Some((start, end)), + } + } + Expr::Conditional(head, _, _, _, tail) => Some((head.clone(), tail.clone())), + } + } + + pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines, value_required: bool) -> Type { + // TODO: Cache and work on demand? Or is this just fine? + + let exr = expr.clone(); + let expr = match expr.0 { + Some(idx) => &self.expressions[idx], + None => return Type::Error, + }; + match expr { + Expr::Literal(lit, _) => match lit { + Literal::Float64(_) => Type::F64, + Literal::String(_) => Type::String, + Literal::Bool(_) => Type::Bool, + }, + + // Figure out the main thing. Check for a... trait? + Expr::Unary(op, tok, arg) => { + let op = op.clone(); + let arg = arg.clone(); + let tok = tok.clone(); + let arg_type = self.expr_type(&arg, lines, true); + match (op, arg_type) { + (UnaryOp::Negate, Type::F64) => Type::F64, + (UnaryOp::Not, Type::Bool) => Type::Bool, + + // This is dumb and should be punished, probably. + (_, Type::Unreachable) => { + let (line, col) = lines.position(tok.start); + self.errors.push(SyntaxError::new(line, col, format!("cannot apply a unary operator to something that doesn't yield a value"))); + Type::Error + } + + // Propagate existing errors without additional complaint. + (_, Type::Error) => Type::Error, + + // Missed the whole table, must be an error. + (_, arg_type) => { + let (line, col) = lines.position(tok.start); + self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'"))); + Type::Error + } + } + } + + Expr::Binary(op, tok, left, right) => { + let op = op.clone(); + let tok = tok.clone(); + let left = left.clone(); + let right = right.clone(); + let left_type = self.expr_type(&left, lines, true); + let right_type = self.expr_type(&right, lines, true); + + match (op, left_type, right_type) { + ( + BinaryOp::Add | BinaryOp::Subtract | BinaryOp::Multiply | BinaryOp::Divide, + Type::F64, + Type::F64, + ) => Type::F64, + + (BinaryOp::Add, Type::String, Type::String) => Type::String, + + (BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool, + + // This is dumb and should be punished, probably. + (_, _, Type::Unreachable) => { + let (line, col) = lines.position(tok.start); + self.errors.push(SyntaxError::new( + line, + col, + format!( + "cannot apply '{tok}' to an argument that doesn't yield a value (on the right)" + ), + )); + Type::Error + } + (_, Type::Unreachable, _) => { + let (line, col) = lines.position(tok.start); + self.errors.push(SyntaxError::new( + line, + col, + format!( + "cannot apply '{tok}' to an argument that doesn't yield a value (on the left)" + ), + )); + Type::Error + } + + // Propagate existing errors without additional complaint. + (_, Type::Error, _) => Type::Error, + (_, _, Type::Error) => Type::Error, + + // Missed the whole table, it must be an error. + (_, left_type, right_type) => { + let (line, col) = lines.position(tok.start); + self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)"))); + Type::Error + } + } + } + + Expr::Conditional(_, cond, then_exp, else_exp, _) => { + let cond = cond.clone(); + let then_exp = then_exp.clone(); + let else_exp = else_exp.clone(); + + let cond_type = self.expr_type(&cond, lines, true); + let then_type = self.expr_type(&then_exp, lines, value_required); + let else_type = else_exp.map(|e| self.expr_type(&e, lines, value_required)); + if !cond_type.compatible_with(&Type::Bool) { + if !cond_type.is_error() { + let span = self + .expr_span(&cond) + .expect("If the expression has a type it must have a span"); + + let start = lines.position(span.0.start); + let end = lines.position(span.1.start); + self.errors.push(SyntaxError::new_spanned( + start, + end, + "the condition of an `if` expression must be a boolean", + )); + } + return Type::Error; + } + + match (then_type, else_type) { + (Type::Error, _) => Type::Error, + (_, Some(Type::Error)) => Type::Error, + + // It's an error to have a missing else branch if the value is required + (_, None) if value_required => { + let span = self + .expr_span(&exr) + .expect("How did I get this far with a broken parse?"); + let start = lines.position(span.0.start); + let end = lines.position(span.1.start); + self.errors.push(SyntaxError::new_spanned( + start, + end, + "this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value", + )); + Type::Error + } + + // If the value is required then the branches must be + // compatible, and the type of the expression is the type + // of the `then` branch. + (then_type, Some(else_type)) if value_required => { + if !then_type.compatible_with(&else_type) { + let span = self + .expr_span(&exr) + .expect("How did I get this far with a broken parse?"); + let start = lines.position(span.0.start); + let end = lines.position(span.1.start); + self.errors.push(SyntaxError::new_spanned( + start, + end, + format!("the type of the `then` branch ({then_type}) must match the type of the `else` branch ({else_type})"), + )); + Type::Error + } else { + then_type + } + } + + // The value must not be required, just mark this as unreachable. + (_, _) => { + assert!(!value_required); + Type::Unreachable + } + } + } + } + } +} + +// BINDING POWERS. When parsing expressions we only accept expressions that +// meet a minimum binding power. (This is like "precedence" but I just super +// don't like that terminology.) +const ASSIGNMENT_POWER: u8 = 0; // = +const OR_POWER: u8 = 1; // or +const AND_POWER: u8 = 2; // and +const EQUALITY_POWER: u8 = 3; // == != +const COMPARISON_POWER: u8 = 4; // < > <= >= +const TERM_POWER: u8 = 5; // + - +const FACTOR_POWER: u8 = 6; // * / +const UNARY_POWER: u8 = 7; // ! - + +// const CALL_POWER: u8 = 8; // . () +// const PRIMARY_POWER: u8 = 9; + +fn token_power<'a>(token: TokenKind) -> Option { + match token { + TokenKind::Equal => Some(ASSIGNMENT_POWER), + TokenKind::Or => Some(OR_POWER), + TokenKind::And => Some(AND_POWER), + TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER), + TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => { + Some(COMPARISON_POWER) + } + TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER), + TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER), + _ => None, + } +} + +pub struct Parser<'a> { + tokens: Tokens<'a>, + tree: SyntaxTree<'a>, + current: Token<'a>, + previous: Token<'a>, + + panic_mode: bool, +} + +impl<'a> Parser<'a> { + pub fn new(source: &'a str) -> Self { + let mut parser = Parser { + tokens: Tokens::new(source), + tree: SyntaxTree::new(), + current: Token::new(TokenKind::EOF, 0, ""), + previous: Token::new(TokenKind::EOF, 0, ""), + panic_mode: false, + }; + parser.advance(); + parser + } + + pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) { + let expr = self.expression(); + self.consume(TokenKind::EOF, "expected end of expression"); + (self.tree, expr, self.tokens.lines()) + } + + fn expression(&mut self) -> ExprRef { + self.expression_with_power(0) + } + + fn expression_with_power(&mut self, minimum_power: u8) -> ExprRef { + self.trace("expression with power"); + self.advance(); + let mut expr = self.prefix_expression(); + loop { + let power = match token_power(self.current.kind) { + Some(p) => p, + None => break, // EOF, end of expression? + }; + + if power < minimum_power { + break; + } + + self.advance(); + expr = self.infix_expression(power, expr); + } + expr + } + + fn prefix_expression(&mut self) -> ExprRef { + self.trace("prefix"); + let token = &self.previous; + match token.kind { + TokenKind::Bang => self.unary(), + TokenKind::LeftParen => self.grouping(), + TokenKind::Number => self.number(), + TokenKind::Minus => self.unary(), + TokenKind::String => self.string(), + + TokenKind::True => self + .tree + .add_expr(Expr::Literal(Literal::Bool(true), token.clone())), + TokenKind::False => self + .tree + .add_expr(Expr::Literal(Literal::Bool(false), token.clone())), + + TokenKind::If => self.conditional(), + + _ => { + self.error("expected an expression"); + ExprRef::error() + } + } + } + + fn infix_expression(&mut self, power: u8, left: ExprRef) -> ExprRef { + self.trace("infix"); + match self.previous.kind { + TokenKind::Plus + | TokenKind::Minus + | TokenKind::Star + | TokenKind::Slash + | TokenKind::And + | TokenKind::Or => self.binary(power, left), + _ => panic!("Unknown infix operator, dispatch error?"), + } + } + + fn number(&mut self) -> ExprRef { + let token = &self.previous; + // What kind is it? For now let's just ... make it good. + + let literal = match token.as_str().parse::() { + Ok(v) => Literal::Float64(v), + Err(e) => { + self.error(format!("invalid f64: {e}")); + return ExprRef::error(); + } + }; + + self.tree.add_expr(Expr::Literal(literal, token.clone())) + } + + fn string(&mut self) -> ExprRef { + let token = &self.previous; + + let mut result = String::new(); + let mut input = token.as_str().chars(); + + assert!(input.next().is_some()); // Delimiter + while let Some(ch) = input.next() { + match ch { + '\\' => match input.next().unwrap() { + 'n' => result.push('\n'), + 'r' => result.push('\r'), + 't' => result.push('\t'), + ch => result.push(ch), + }, + _ => result.push(ch), + } + } + result.pop(); // We pushed the other delimiter on, whoops. + + let literal = Literal::String(result); + self.tree.add_expr(Expr::Literal(literal, token.clone())) + } + + fn grouping(&mut self) -> ExprRef { + let result = self.expression(); + self.consume(TokenKind::RightParen, "expected ')' after an expression"); + result + } + + fn conditional(&mut self) -> ExprRef { + let token = self.previous.clone(); + let condition_expr = self.expression(); + self.consume(TokenKind::LeftBrace, "expected '{' to start an 'if' block"); + let then_expr = self.expression(); + self.consume(TokenKind::RightBrace, "expected '}' to end an 'if' block"); + let else_expr = if self.current.kind == TokenKind::Else { + self.advance(); + if self.current.kind == TokenKind::If { + self.advance(); + Some(self.conditional()) + } else { + self.consume( + TokenKind::LeftBrace, + "expected '{' to start an 'else' block", + ); + let else_expr = self.expression(); + self.consume(TokenKind::RightBrace, "Expected '}' to end an 'else' block"); + Some(else_expr) + } + } else { + None + }; + let tail = self.previous.clone(); + self.tree.add_expr(Expr::Conditional( + token, + condition_expr, + then_expr, + else_expr, + tail, + )) + } + + fn unary(&mut self) -> ExprRef { + let token = self.previous.clone(); + let kind = token.kind; + let expr = self.expression_with_power(UNARY_POWER); + let op = match kind { + TokenKind::Minus => UnaryOp::Negate, + TokenKind::Bang => UnaryOp::Not, + _ => panic!("unsuitable unary: {:?}: no op", kind), + }; + + self.tree.add_expr(Expr::Unary(op, token, expr)) + } + + fn binary(&mut self, power: u8, left: ExprRef) -> ExprRef { + let token = self.previous.clone(); + let op = match token.kind { + TokenKind::Plus => BinaryOp::Add, + TokenKind::Minus => BinaryOp::Subtract, + TokenKind::Star => BinaryOp::Multiply, + TokenKind::Slash => BinaryOp::Divide, + TokenKind::And => BinaryOp::And, + TokenKind::Or => BinaryOp::Or, + _ => panic!("unsuitable binary: {:?}: no op", self.previous), + }; + let right = self.expression_with_power(power + 1); + self.tree.add_expr(Expr::Binary(op, token, left, right)) + } + + fn advance(&mut self) { + self.previous = self.current.clone(); + self.current = self.tokens.next(); + while self.current.kind == TokenKind::Error + || self.current.kind == TokenKind::Whitespace + || self.current.kind == TokenKind::Comment + { + if self.current.kind == TokenKind::Error { + self.error_at_current(self.current.to_string()); + } + self.current = self.tokens.next(); + } + } + + fn consume(&mut self, kind: TokenKind, error: &str) { + if self.current.kind == kind { + self.advance(); + } else { + self.error_at_current(error); + } + } + + fn error(&mut self, message: T) + where + T: Into, + { + self.error_at(self.previous.clone(), message) + } + + fn error_at_current(&mut self, message: T) + where + T: Into, + { + self.error_at(self.current.clone(), message) + } + + fn error_at(&mut self, token: Token<'a>, message: T) + where + T: Into, + { + if self.panic_mode { + return; + } + self.panic_mode = true; + + let message: String = message.into(); + let (line, column) = self.tokens.token_position(&token); + let mut final_message = "Error ".to_string(); + + if token.kind == TokenKind::EOF { + final_message.push_str("at end") + } else if token.kind != TokenKind::Error { + final_message.push_str("at '"); + final_message.push_str(token.as_str()); + final_message.push_str("'"); + } + final_message.push_str(": "); + final_message.push_str(&message); + + self.tree + .add_error(SyntaxError::new(line, column, final_message)); + } + + fn trace(&self, _msg: &str) { + // let cpos = self.tokens.token_position(&self.current); + // let ppos = self.tokens.token_position(&self.previous); + + // eprintln!( + // "[{}:{}:{}] [{}:{}:{}]: {msg}", + // ppos.0, + // ppos.1, + // self.previous + // .as_ref() + // .map(|t| t.as_str()) + // .unwrap_or(""), + // cpos.0, + // cpos.1, + // self.current.as_ref().map(|t| t.as_str()).unwrap_or("") + // ); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + fn test_successful_expression_parse(source: &str, expected: &str, expected_type: Type) { + let (mut tree, expr, lines) = Parser::new(source).parse(); + assert_eq!( + Vec::::new(), + tree.errors, + "Expected successful parse" + ); + assert_eq!( + expected, + tree.dump_expr(&expr), + "The parse structure of the expressions did not match" + ); + + // TODO: 'assert_eq' is probably wrong here + let expr_type = tree.expr_type(&expr, &lines, true); + assert!( + expected_type.compatible_with(&expr_type), + "The type of the expression did not match. expected: {expected_type}, actual: {expr_type}" + ); + } + + macro_rules! test_expr { + ($name:ident, $input:expr, $expected:expr, $type:expr) => { + #[test] + fn $name() { + test_successful_expression_parse($input, $expected, $type); + } + }; + } + + test_expr!(number_expr, "12", "12", Type::F64); + test_expr!(add_expr, "1 + 2", "(+ 1 2)", Type::F64); + test_expr!( + prec_expr, + "1 + 2 * 3 - 7 * 7", + "(- (+ 1 (* 2 3)) (* 7 7))", + Type::F64 + ); + test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)", Type::F64); + test_expr!( + strings, + r#" "Hello " + 'world!' "#, + r#"(+ "Hello " 'world!')"#, + Type::String + ); + + test_expr!( + booleans, + "true and false or false and !true", + "(or (and true false) (and false (! true)))", + Type::Bool + ); + + test_expr!( + if_expression, + "if true { 23 } else { 45 }", + "(if true 23 45)", + Type::F64 + ); + // test_expr!( + // if_with_return, + // "if true { 23 } else { return 'nothing' }", + // "", + // Type::F64 + // ); + + // ======================================================================== + // Type Error Tests + // ======================================================================== + + fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) { + let (mut tree, expr, lines) = Parser::new(source).parse(); + assert_eq!( + Vec::::new(), + tree.errors, + "Expected successful parse" + ); + + let expr_type = tree.expr_type(&expr, &lines, true); + assert!(expr_type.is_error()); + + let actual_errors = tree + .errors + .iter() + .map(|e| e.message.as_str()) + .collect::>(); + assert_eq!(expected_errors, actual_errors); + } + + macro_rules! test_type_error_expr { + ($name:ident, $input:expr, $($s:expr),+) => { + #[test] + fn $name() { + let expected_errors: Vec<&str> = (vec![$($s),*]); + test_type_error_expression($input, expected_errors); + } + } + } + + test_type_error_expr!( + negate_string, + "-('what?')", + "cannot apply unary operator '-' to expression of type 'string'" + ); + + test_type_error_expr!( + add_string_number, + "'what?' + 5", + "cannot apply binary operator '+' to expressions of type 'string' (on the left) and 'f64' (on the right)" + ); + + test_type_error_expr!( + add_number_string, + "5 + 'what?'", + "cannot apply binary operator '+' to expressions of type 'f64' (on the left) and 'string' (on the right)" + ); + + test_type_error_expr!( + errors_propagate_do_not_duplicate, + "!'hello' / 27 * -('what?') + 23", + "cannot apply unary operator '!' to expression of type 'string'", + "cannot apply unary operator '-' to expression of type 'string'" + ); + + test_type_error_expr!( + if_not_bool, + "if 23 { 1 } else { 2 }", + "the condition of an `if` expression must be a boolean" + ); + + test_type_error_expr!( + if_arm_mismatch, + "if true { 1 } else { '1' }", + "the type of the `then` branch (f64) must match the type of the `else` branch (string)" + ); + + test_type_error_expr!( + if_no_else, + "if true { 1 }", + "this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value" + ); +} diff --git a/fine/tests/example_tests.rs b/fine/tests/example_tests.rs index 89bfa63a..d098e5da 100644 --- a/fine/tests/example_tests.rs +++ b/fine/tests/example_tests.rs @@ -1,4 +1,4 @@ -use fine::parser::concrete::SyntaxTree; +use fine::parser::SyntaxTree; use pretty_assertions::assert_eq; fn rebase_concrete(source_path: &str, dump: &str) {