use crate::tokens::{Lines, Token, TokenKind, Tokens}; use std::fmt; pub mod concrete; // TODO: An error should have: // // - a start // - an end // - a focus // - descriptive messages // // that will have to wait for now #[derive(PartialEq, Eq)] pub struct SyntaxError { pub start: (usize, usize), pub end: (usize, usize), pub message: String, } impl SyntaxError { pub fn new(line: usize, column: usize, message: T) -> Self where T: ToString, { SyntaxError { start: (line, column), end: (line, column), message: message.to_string(), } } pub fn new_spanned(start: (usize, usize), end: (usize, usize), message: T) -> Self where T: ToString, { SyntaxError { start, end, message: message.to_string(), } } } impl fmt::Debug for SyntaxError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{self}") } } impl fmt::Display for SyntaxError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}:{}: {}", self.start.0, self.end.0, self.message) } } #[derive(Clone)] pub enum Literal { Float64(f64), String(String), Bool(bool), } #[derive(Copy, Clone)] pub enum UnaryOp { Negate, Not, } #[derive(Copy, Clone)] pub enum BinaryOp { Add, Subtract, Multiply, Divide, And, Or, } #[derive(Clone)] pub enum Expr<'a> { Literal(Literal, Token<'a>), Unary(UnaryOp, Token<'a>, ExprRef), Binary(BinaryOp, Token<'a>, ExprRef, ExprRef), Conditional(Token<'a>, ExprRef, ExprRef, Option, Token<'a>), } #[derive(Clone)] pub struct ExprRef(Option); impl ExprRef { pub fn error() -> Self { ExprRef(None) } } // TODO: Eventually we will be unable to use Eq and PartialEq here, and will // need to do out own thing. #[derive(Copy, Clone)] pub enum Type { // Signals a type error. If you receive this then you know that an error // has already been reported; if you produce this be sure to also note // the error in the errors collection. Error, // Signals that the expression has a control-flow side-effect and that no // value will ever result from this expression. Usually this means // everything's fine. Unreachable, // TODO: Numeric literals should be implicitly convertable, unlike other // types. Maybe just "numeric literal" type? F64, String, Bool, } impl Type { pub fn is_error(&self) -> bool { match self { Type::Error => true, _ => false, } } pub fn compatible_with(&self, other: &Type) -> bool { // TODO: This is wrong; we because of numeric literals etc. match (self, other) { (Type::F64, Type::F64) => true, (Type::String, Type::String) => true, (Type::Bool, Type::Bool) => true, (Type::Unreachable, Type::Unreachable) => true, // Avoid introducing more errors (Type::Error, _) => true, (_, Type::Error) => true, (_, _) => false, } } } impl std::fmt::Debug for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{self}") } } impl std::fmt::Display for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use Type::*; match self { Error => write!(f, "<< INTERNAL ERROR >>"), Unreachable => write!(f, "<< UNREACHABLE >>"), F64 => write!(f, "f64"), String => write!(f, "string"), Bool => write!(f, "bool"), } } } pub struct SyntaxTree<'a> { pub errors: Vec, expressions: Vec>, } impl<'a> SyntaxTree<'a> { pub fn new() -> Self { SyntaxTree { errors: Vec::new(), expressions: Vec::new(), } } pub fn add_error(&mut self, error: SyntaxError) { self.errors.push(error); } pub fn add_expr(&mut self, expr: Expr<'a>) -> ExprRef { let index = self.expressions.len(); self.expressions.push(expr); ExprRef(Some(index)) } pub fn dump_expr(&self, expr: &ExprRef) -> String { match expr.0 { Some(idx) => { let expr = &self.expressions[idx]; match expr { Expr::Literal(_, tok) => tok.to_string(), Expr::Unary(_, tok, e) => { format!("({tok} {})", self.dump_expr(e)) } Expr::Binary(_, tok, l, r) => { format!("({tok} {} {})", self.dump_expr(l), self.dump_expr(r)) } Expr::Conditional(tok, cond, t, e, _) => { if let Some(e) = e { format!( "({tok} {} {} {})", self.dump_expr(cond), self.dump_expr(t), self.dump_expr(e) ) } else { format!("({tok} {} {})", self.dump_expr(cond), self.dump_expr(t)) } } } } None => "<|EOF|>".to_string(), } } pub fn expr_span(&self, expr: &ExprRef) -> Option<(Token<'a>, Token<'a>)> { let expr = match expr.0 { Some(idx) => &self.expressions[idx], None => return None, }; match expr { Expr::Literal(_, tok) => Some((tok.clone(), tok.clone())), Expr::Unary(_, tok, arg) => { let arg = self.expr_span(arg); match arg { None => None, Some((_, end)) => Some((tok.clone(), end)), } } Expr::Binary(_, _, left, right) => { let left = self.expr_span(left); let right = self.expr_span(right); match (left, right) { (None, _) => None, (_, None) => None, (Some((start, _)), Some((_, end))) => Some((start, end)), } } Expr::Conditional(head, _, _, _, tail) => Some((head.clone(), tail.clone())), } } pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines, value_required: bool) -> Type { // TODO: Cache and work on demand? Or is this just fine? let exr = expr.clone(); let expr = match expr.0 { Some(idx) => &self.expressions[idx], None => return Type::Error, }; match expr { Expr::Literal(lit, _) => match lit { Literal::Float64(_) => Type::F64, Literal::String(_) => Type::String, Literal::Bool(_) => Type::Bool, }, // Figure out the main thing. Check for a... trait? Expr::Unary(op, tok, arg) => { let op = op.clone(); let arg = arg.clone(); let tok = tok.clone(); let arg_type = self.expr_type(&arg, lines, true); match (op, arg_type) { (UnaryOp::Negate, Type::F64) => Type::F64, (UnaryOp::Not, Type::Bool) => Type::Bool, // This is dumb and should be punished, probably. (_, Type::Unreachable) => { let (line, col) = lines.position(tok.start); self.errors.push(SyntaxError::new(line, col, format!("cannot apply a unary operator to something that doesn't yield a value"))); Type::Error } // Propagate existing errors without additional complaint. (_, Type::Error) => Type::Error, // Missed the whole table, must be an error. (_, arg_type) => { let (line, col) = lines.position(tok.start); self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'"))); Type::Error } } } Expr::Binary(op, tok, left, right) => { let op = op.clone(); let tok = tok.clone(); let left = left.clone(); let right = right.clone(); let left_type = self.expr_type(&left, lines, true); let right_type = self.expr_type(&right, lines, true); match (op, left_type, right_type) { ( BinaryOp::Add | BinaryOp::Subtract | BinaryOp::Multiply | BinaryOp::Divide, Type::F64, Type::F64, ) => Type::F64, (BinaryOp::Add, Type::String, Type::String) => Type::String, (BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool, // This is dumb and should be punished, probably. (_, _, Type::Unreachable) => { let (line, col) = lines.position(tok.start); self.errors.push(SyntaxError::new( line, col, format!( "cannot apply '{tok}' to an argument that doesn't yield a value (on the right)" ), )); Type::Error } (_, Type::Unreachable, _) => { let (line, col) = lines.position(tok.start); self.errors.push(SyntaxError::new( line, col, format!( "cannot apply '{tok}' to an argument that doesn't yield a value (on the left)" ), )); Type::Error } // Propagate existing errors without additional complaint. (_, Type::Error, _) => Type::Error, (_, _, Type::Error) => Type::Error, // Missed the whole table, it must be an error. (_, left_type, right_type) => { let (line, col) = lines.position(tok.start); self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)"))); Type::Error } } } Expr::Conditional(_, cond, then_exp, else_exp, _) => { let cond = cond.clone(); let then_exp = then_exp.clone(); let else_exp = else_exp.clone(); let cond_type = self.expr_type(&cond, lines, true); let then_type = self.expr_type(&then_exp, lines, value_required); let else_type = else_exp.map(|e| self.expr_type(&e, lines, value_required)); if !cond_type.compatible_with(&Type::Bool) { if !cond_type.is_error() { let span = self .expr_span(&cond) .expect("If the expression has a type it must have a span"); let start = lines.position(span.0.start); let end = lines.position(span.1.start); self.errors.push(SyntaxError::new_spanned( start, end, "the condition of an `if` expression must be a boolean", )); } return Type::Error; } match (then_type, else_type) { (Type::Error, _) => Type::Error, (_, Some(Type::Error)) => Type::Error, // It's an error to have a missing else branch if the value is required (_, None) if value_required => { let span = self .expr_span(&exr) .expect("How did I get this far with a broken parse?"); let start = lines.position(span.0.start); let end = lines.position(span.1.start); self.errors.push(SyntaxError::new_spanned( start, end, "this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value", )); Type::Error } // If the value is required then the branches must be // compatible, and the type of the expression is the type // of the `then` branch. (then_type, Some(else_type)) if value_required => { if !then_type.compatible_with(&else_type) { let span = self .expr_span(&exr) .expect("How did I get this far with a broken parse?"); let start = lines.position(span.0.start); let end = lines.position(span.1.start); self.errors.push(SyntaxError::new_spanned( start, end, format!("the type of the `then` branch ({then_type}) must match the type of the `else` branch ({else_type})"), )); Type::Error } else { then_type } } // The value must not be required, just mark this as unreachable. (_, _) => { assert!(!value_required); Type::Unreachable } } } } } } // BINDING POWERS. When parsing expressions we only accept expressions that // meet a minimum binding power. (This is like "precedence" but I just super // don't like that terminology.) const ASSIGNMENT_POWER: u8 = 0; // = const OR_POWER: u8 = 1; // or const AND_POWER: u8 = 2; // and const EQUALITY_POWER: u8 = 3; // == != const COMPARISON_POWER: u8 = 4; // < > <= >= const TERM_POWER: u8 = 5; // + - const FACTOR_POWER: u8 = 6; // * / const UNARY_POWER: u8 = 7; // ! - // const CALL_POWER: u8 = 8; // . () // const PRIMARY_POWER: u8 = 9; fn token_power<'a>(token: TokenKind) -> Option { match token { TokenKind::Equal => Some(ASSIGNMENT_POWER), TokenKind::Or => Some(OR_POWER), TokenKind::And => Some(AND_POWER), TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER), TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => { Some(COMPARISON_POWER) } TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER), TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER), _ => None, } } pub struct Parser<'a> { tokens: Tokens<'a>, tree: SyntaxTree<'a>, current: Token<'a>, previous: Token<'a>, panic_mode: bool, } impl<'a> Parser<'a> { pub fn new(source: &'a str) -> Self { let mut parser = Parser { tokens: Tokens::new(source), tree: SyntaxTree::new(), current: Token::new(TokenKind::EOF, 0, ""), previous: Token::new(TokenKind::EOF, 0, ""), panic_mode: false, }; parser.advance(); parser } pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) { let expr = self.expression(); self.consume(TokenKind::EOF, "expected end of expression"); (self.tree, expr, self.tokens.lines()) } fn expression(&mut self) -> ExprRef { self.expression_with_power(0) } fn expression_with_power(&mut self, minimum_power: u8) -> ExprRef { self.trace("expression with power"); self.advance(); let mut expr = self.prefix_expression(); loop { let power = match token_power(self.current.kind) { Some(p) => p, None => break, // EOF, end of expression? }; if power < minimum_power { break; } self.advance(); expr = self.infix_expression(power, expr); } expr } fn prefix_expression(&mut self) -> ExprRef { self.trace("prefix"); let token = &self.previous; match token.kind { TokenKind::Bang => self.unary(), TokenKind::LeftParen => self.grouping(), TokenKind::Number => self.number(), TokenKind::Minus => self.unary(), TokenKind::String => self.string(), TokenKind::True => self .tree .add_expr(Expr::Literal(Literal::Bool(true), token.clone())), TokenKind::False => self .tree .add_expr(Expr::Literal(Literal::Bool(false), token.clone())), TokenKind::If => self.conditional(), _ => { self.error("expected an expression"); ExprRef::error() } } } fn infix_expression(&mut self, power: u8, left: ExprRef) -> ExprRef { self.trace("infix"); match self.previous.kind { TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash | TokenKind::And | TokenKind::Or => self.binary(power, left), _ => panic!("Unknown infix operator, dispatch error?"), } } fn number(&mut self) -> ExprRef { let token = &self.previous; // What kind is it? For now let's just ... make it good. let literal = match token.as_str().parse::() { Ok(v) => Literal::Float64(v), Err(e) => { self.error(format!("invalid f64: {e}")); return ExprRef::error(); } }; self.tree.add_expr(Expr::Literal(literal, token.clone())) } fn string(&mut self) -> ExprRef { let token = &self.previous; let mut result = String::new(); let mut input = token.as_str().chars(); assert!(input.next().is_some()); // Delimiter while let Some(ch) = input.next() { match ch { '\\' => match input.next().unwrap() { 'n' => result.push('\n'), 'r' => result.push('\r'), 't' => result.push('\t'), ch => result.push(ch), }, _ => result.push(ch), } } result.pop(); // We pushed the other delimiter on, whoops. let literal = Literal::String(result); self.tree.add_expr(Expr::Literal(literal, token.clone())) } fn grouping(&mut self) -> ExprRef { let result = self.expression(); self.consume(TokenKind::RightParen, "expected ')' after an expression"); result } fn conditional(&mut self) -> ExprRef { let token = self.previous.clone(); let condition_expr = self.expression(); self.consume(TokenKind::LeftBrace, "expected '{' to start an 'if' block"); let then_expr = self.expression(); self.consume(TokenKind::RightBrace, "expected '}' to end an 'if' block"); let else_expr = if self.current.kind == TokenKind::Else { self.advance(); if self.current.kind == TokenKind::If { self.advance(); Some(self.conditional()) } else { self.consume( TokenKind::LeftBrace, "expected '{' to start an 'else' block", ); let else_expr = self.expression(); self.consume(TokenKind::RightBrace, "Expected '}' to end an 'else' block"); Some(else_expr) } } else { None }; let tail = self.previous.clone(); self.tree.add_expr(Expr::Conditional( token, condition_expr, then_expr, else_expr, tail, )) } fn unary(&mut self) -> ExprRef { let token = self.previous.clone(); let kind = token.kind; let expr = self.expression_with_power(UNARY_POWER); let op = match kind { TokenKind::Minus => UnaryOp::Negate, TokenKind::Bang => UnaryOp::Not, _ => panic!("unsuitable unary: {:?}: no op", kind), }; self.tree.add_expr(Expr::Unary(op, token, expr)) } fn binary(&mut self, power: u8, left: ExprRef) -> ExprRef { let token = self.previous.clone(); let op = match token.kind { TokenKind::Plus => BinaryOp::Add, TokenKind::Minus => BinaryOp::Subtract, TokenKind::Star => BinaryOp::Multiply, TokenKind::Slash => BinaryOp::Divide, TokenKind::And => BinaryOp::And, TokenKind::Or => BinaryOp::Or, _ => panic!("unsuitable binary: {:?}: no op", self.previous), }; let right = self.expression_with_power(power + 1); self.tree.add_expr(Expr::Binary(op, token, left, right)) } fn advance(&mut self) { self.previous = self.current.clone(); self.current = self.tokens.next(); while self.current.kind == TokenKind::Error || self.current.kind == TokenKind::Whitespace || self.current.kind == TokenKind::Comment { if self.current.kind == TokenKind::Error { self.error_at_current(self.current.to_string()); } self.current = self.tokens.next(); } } fn consume(&mut self, kind: TokenKind, error: &str) { if self.current.kind == kind { self.advance(); } else { self.error_at_current(error); } } fn error(&mut self, message: T) where T: Into, { self.error_at(self.previous.clone(), message) } fn error_at_current(&mut self, message: T) where T: Into, { self.error_at(self.current.clone(), message) } fn error_at(&mut self, token: Token<'a>, message: T) where T: Into, { if self.panic_mode { return; } self.panic_mode = true; let message: String = message.into(); let (line, column) = self.tokens.token_position(&token); let mut final_message = "Error ".to_string(); if token.kind == TokenKind::EOF { final_message.push_str("at end") } else if token.kind != TokenKind::Error { final_message.push_str("at '"); final_message.push_str(token.as_str()); final_message.push_str("'"); } final_message.push_str(": "); final_message.push_str(&message); self.tree .add_error(SyntaxError::new(line, column, final_message)); } fn trace(&self, _msg: &str) { // let cpos = self.tokens.token_position(&self.current); // let ppos = self.tokens.token_position(&self.previous); // eprintln!( // "[{}:{}:{}] [{}:{}:{}]: {msg}", // ppos.0, // ppos.1, // self.previous // .as_ref() // .map(|t| t.as_str()) // .unwrap_or(""), // cpos.0, // cpos.1, // self.current.as_ref().map(|t| t.as_str()).unwrap_or("") // ); } } #[cfg(test)] mod tests { use super::*; use pretty_assertions::assert_eq; fn test_successful_expression_parse(source: &str, expected: &str, expected_type: Type) { let (mut tree, expr, lines) = Parser::new(source).parse(); assert_eq!( Vec::::new(), tree.errors, "Expected successful parse" ); assert_eq!( expected, tree.dump_expr(&expr), "The parse structure of the expressions did not match" ); // TODO: 'assert_eq' is probably wrong here let expr_type = tree.expr_type(&expr, &lines, true); assert!( expected_type.compatible_with(&expr_type), "The type of the expression did not match. expected: {expected_type}, actual: {expr_type}" ); } macro_rules! test_expr { ($name:ident, $input:expr, $expected:expr, $type:expr) => { #[test] fn $name() { test_successful_expression_parse($input, $expected, $type); } }; } test_expr!(number_expr, "12", "12", Type::F64); test_expr!(add_expr, "1 + 2", "(+ 1 2)", Type::F64); test_expr!( prec_expr, "1 + 2 * 3 - 7 * 7", "(- (+ 1 (* 2 3)) (* 7 7))", Type::F64 ); test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)", Type::F64); test_expr!( strings, r#" "Hello " + 'world!' "#, r#"(+ "Hello " 'world!')"#, Type::String ); test_expr!( booleans, "true and false or false and !true", "(or (and true false) (and false (! true)))", Type::Bool ); test_expr!( if_expression, "if true { 23 } else { 45 }", "(if true 23 45)", Type::F64 ); // test_expr!( // if_with_return, // "if true { 23 } else { return 'nothing' }", // "", // Type::F64 // ); // ======================================================================== // Type Error Tests // ======================================================================== fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) { let (mut tree, expr, lines) = Parser::new(source).parse(); assert_eq!( Vec::::new(), tree.errors, "Expected successful parse" ); let expr_type = tree.expr_type(&expr, &lines, true); assert!(expr_type.is_error()); let actual_errors = tree .errors .iter() .map(|e| e.message.as_str()) .collect::>(); assert_eq!(expected_errors, actual_errors); } macro_rules! test_type_error_expr { ($name:ident, $input:expr, $($s:expr),+) => { #[test] fn $name() { let expected_errors: Vec<&str> = (vec![$($s),*]); test_type_error_expression($input, expected_errors); } } } test_type_error_expr!( negate_string, "-('what?')", "cannot apply unary operator '-' to expression of type 'string'" ); test_type_error_expr!( add_string_number, "'what?' + 5", "cannot apply binary operator '+' to expressions of type 'string' (on the left) and 'f64' (on the right)" ); test_type_error_expr!( add_number_string, "5 + 'what?'", "cannot apply binary operator '+' to expressions of type 'f64' (on the left) and 'string' (on the right)" ); test_type_error_expr!( errors_propagate_do_not_duplicate, "!'hello' / 27 * -('what?') + 23", "cannot apply unary operator '!' to expression of type 'string'", "cannot apply unary operator '-' to expression of type 'string'" ); test_type_error_expr!( if_not_bool, "if 23 { 1 } else { 2 }", "the condition of an `if` expression must be a boolean" ); test_type_error_expr!( if_arm_mismatch, "if true { 1 } else { '1' }", "the type of the `then` branch (f64) must match the type of the `else` branch (string)" ); test_type_error_expr!( if_no_else, "if true { 1 }", "this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value" ); }