From 652fe18f57fde7d011b7609d77ea877bcc44481e Mon Sep 17 00:00:00 2001 From: John Doty Date: Tue, 2 Jan 2024 13:49:31 -0800 Subject: [PATCH] [fine] More type checking; if and whatnot --- oden-script/src/parser.rs | 328 +++++++++++++++++++++++++++++++++++--- 1 file changed, 307 insertions(+), 21 deletions(-) diff --git a/oden-script/src/parser.rs b/oden-script/src/parser.rs index bc98d520..a962dcd3 100644 --- a/oden-script/src/parser.rs +++ b/oden-script/src/parser.rs @@ -1,32 +1,54 @@ use crate::tokens::{Lines, Token, TokenKind, Tokens}; use std::fmt; +// TODO: An error should have: +// +// - a start +// - an end +// - a focus +// - descriptive messages +// +// that will have to wait for now #[derive(PartialEq, Eq)] pub struct SyntaxError { - pub line: usize, - pub column: usize, + pub start: (usize, usize), + pub end: (usize, usize), pub message: String, } impl SyntaxError { - pub fn new(line: usize, column: usize, message: String) -> Self { + pub fn new(line: usize, column: usize, message: T) -> Self + where + T: ToString, + { SyntaxError { - line, - column, - message, + start: (line, column), + end: (line, column), + message: message.to_string(), + } + } + + pub fn new_spanned(start: (usize, usize), end: (usize, usize), message: T) -> Self + where + T: ToString, + { + SyntaxError { + start, + end, + message: message.to_string(), } } } impl fmt::Debug for SyntaxError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}:{}: {}", self.line, self.column, self.message) + write!(f, "{self}") } } impl fmt::Display for SyntaxError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}:{}: {}", self.line, self.column, self.message) + write!(f, "{}:{}: {}", self.start.0, self.end.0, self.message) } } @@ -58,6 +80,7 @@ pub enum Expr<'a> { Literal(Literal, Token<'a>), Unary(UnaryOp, Token<'a>, ExprRef), Binary(BinaryOp, Token<'a>, ExprRef, ExprRef), + Conditional(Token<'a>, ExprRef, ExprRef, Option, Token<'a>), } #[derive(Clone)] @@ -71,17 +94,50 @@ impl ExprRef { // TODO: Eventually we will be unable to use Eq and PartialEq here, and will // need to do out own thing. -#[derive(Clone, Eq, PartialEq)] +#[derive(Copy, Clone)] pub enum Type { + // Signals a type error. If you receive this then you know that an error + // has already been reported; if you produce this be sure to also note + // the error in the errors collection. Error, - // TODO: Numeric literals should be implicitly convertable unlike other - // types. + // Signals that the expression has a control-flow side-effect and that no + // value will ever result from this expression. Usually this means + // everything's fine. + Unreachable, + + // TODO: Numeric literals should be implicitly convertable, unlike other + // types. Maybe just "numeric literal" type? F64, String, Bool, } +impl Type { + pub fn is_error(&self) -> bool { + match self { + Type::Error => true, + _ => false, + } + } + + pub fn compatible_with(&self, other: &Type) -> bool { + // TODO: This is wrong; we because of numeric literals etc. + match (self, other) { + (Type::F64, Type::F64) => true, + (Type::String, Type::String) => true, + (Type::Bool, Type::Bool) => true, + (Type::Unreachable, Type::Unreachable) => true, + + // Avoid introducing more errors + (Type::Error, _) => true, + (_, Type::Error) => true, + + (_, _) => false, + } + } +} + impl std::fmt::Debug for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{self}") @@ -93,6 +149,7 @@ impl std::fmt::Display for Type { use Type::*; match self { Error => write!(f, "<< INTERNAL ERROR >>"), + Unreachable => write!(f, "<< UNREACHABLE >>"), F64 => write!(f, "f64"), String => write!(f, "string"), Bool => write!(f, "bool"), @@ -137,15 +194,56 @@ impl<'a> SyntaxTree<'a> { Expr::Binary(_, tok, l, r) => { format!("({tok} {} {})", self.dump_expr(l), self.dump_expr(r)) } + Expr::Conditional(tok, cond, t, e, _) => { + if let Some(e) = e { + format!( + "({tok} {} {} {})", + self.dump_expr(cond), + self.dump_expr(t), + self.dump_expr(e) + ) + } else { + format!("({tok} {} {})", self.dump_expr(cond), self.dump_expr(t)) + } + } } } None => "<|EOF|>".to_string(), } } - pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines) -> Type { + pub fn expr_span(&self, expr: &ExprRef) -> Option<(Token<'a>, Token<'a>)> { + let expr = match expr.0 { + Some(idx) => &self.expressions[idx], + None => return None, + }; + + match expr { + Expr::Literal(_, tok) => Some((tok.clone(), tok.clone())), + Expr::Unary(_, tok, arg) => { + let arg = self.expr_span(arg); + match arg { + None => None, + Some((_, end)) => Some((tok.clone(), end)), + } + } + Expr::Binary(_, _, left, right) => { + let left = self.expr_span(left); + let right = self.expr_span(right); + match (left, right) { + (None, _) => None, + (_, None) => None, + (Some((start, _)), Some((_, end))) => Some((start, end)), + } + } + Expr::Conditional(head, _, _, _, tail) => Some((head.clone(), tail.clone())), + } + } + + pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines, value_required: bool) -> Type { // TODO: Cache and work on demand? Or is this just fine? + let exr = expr.clone(); let expr = match expr.0 { Some(idx) => &self.expressions[idx], None => return Type::Error, @@ -162,11 +260,18 @@ impl<'a> SyntaxTree<'a> { let op = op.clone(); let arg = arg.clone(); let tok = tok.clone(); - let arg_type = self.expr_type(&arg, lines); + let arg_type = self.expr_type(&arg, lines, true); match (op, arg_type) { (UnaryOp::Negate, Type::F64) => Type::F64, (UnaryOp::Not, Type::Bool) => Type::Bool, + // This is dumb and should be punished, probably. + (_, Type::Unreachable) => { + let (line, col) = lines.position(tok.start()); + self.errors.push(SyntaxError::new(line, col, format!("cannot apply a unary operator to something that doesn't yield a value"))); + Type::Error + } + // Propagate existing errors without additional complaint. (_, Type::Error) => Type::Error, @@ -184,8 +289,8 @@ impl<'a> SyntaxTree<'a> { let tok = tok.clone(); let left = left.clone(); let right = right.clone(); - let left_type = self.expr_type(&left, lines); - let right_type = self.expr_type(&right, lines); + let left_type = self.expr_type(&left, lines, true); + let right_type = self.expr_type(&right, lines, true); match (op, left_type, right_type) { ( @@ -198,6 +303,30 @@ impl<'a> SyntaxTree<'a> { (BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool, + // This is dumb and should be punished, probably. + (_, _, Type::Unreachable) => { + let (line, col) = lines.position(tok.start()); + self.errors.push(SyntaxError::new( + line, + col, + format!( + "cannot apply '{tok}' to an argument that doesn't yield a value (on the right)" + ), + )); + Type::Error + } + (_, Type::Unreachable, _) => { + let (line, col) = lines.position(tok.start()); + self.errors.push(SyntaxError::new( + line, + col, + format!( + "cannot apply '{tok}' to an argument that doesn't yield a value (on the left)" + ), + )); + Type::Error + } + // Propagate existing errors without additional complaint. (_, Type::Error, _) => Type::Error, (_, _, Type::Error) => Type::Error, @@ -210,6 +339,79 @@ impl<'a> SyntaxTree<'a> { } } } + + Expr::Conditional(_, cond, then_exp, else_exp, _) => { + let cond = cond.clone(); + let then_exp = then_exp.clone(); + let else_exp = else_exp.clone(); + + let cond_type = self.expr_type(&cond, lines, true); + let then_type = self.expr_type(&then_exp, lines, value_required); + let else_type = else_exp.map(|e| self.expr_type(&e, lines, value_required)); + if !cond_type.compatible_with(&Type::Bool) { + if !cond_type.is_error() { + let span = self + .expr_span(&cond) + .expect("If the expression has a type it must have a span"); + + let start = lines.position(span.0.start()); + let end = lines.position(span.1.start()); + self.errors.push(SyntaxError::new_spanned( + start, + end, + "the condition of an `if` expression must be a boolean", + )); + } + return Type::Error; + } + + match (then_type, else_type) { + (Type::Error, _) => Type::Error, + (_, Some(Type::Error)) => Type::Error, + + // It's an error to have a missing else branch if the value is required + (_, None) if value_required => { + let span = self + .expr_span(&exr) + .expect("How did I get this far with a broken parse?"); + let start = lines.position(span.0.start()); + let end = lines.position(span.1.start()); + self.errors.push(SyntaxError::new_spanned( + start, + end, + "this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value", + )); + Type::Error + } + + // If the value is required then the branches must be + // compatible, and the type of the expression is the type + // of the `then` branch. + (then_type, Some(else_type)) if value_required => { + if !then_type.compatible_with(&else_type) { + let span = self + .expr_span(&exr) + .expect("How did I get this far with a broken parse?"); + let start = lines.position(span.0.start()); + let end = lines.position(span.1.start()); + self.errors.push(SyntaxError::new_spanned( + start, + end, + format!("the type of the `then` branch ({then_type}) must match the type of the `else` branch ({else_type})"), + )); + Type::Error + } else { + then_type + } + } + + // The value must not be required, just mark this as unreachable. + (_, _) => { + assert!(!value_required); + Type::Unreachable + } + } + } } } } @@ -319,6 +521,8 @@ impl<'a> Parser<'a> { .tree .add_expr(Expr::Literal(Literal::Bool(false), token.clone())), + TokenKind::If => self.conditional(), + _ => { self.error("expected an expression"); ExprRef::error() @@ -393,6 +597,53 @@ impl<'a> Parser<'a> { result } + fn conditional(&mut self) -> ExprRef { + let token = self.previous.as_ref().unwrap().clone(); + let condition_expr = self.expression(); + self.consume( + Some(TokenKind::LeftBrace), + "expected '{' to start an 'if' block", + ); + let then_expr = self.expression(); + self.consume( + Some(TokenKind::RightBrace), + "expected '}' to end an 'if' block", + ); + let else_expr = match &self.current { + Some(token) if token.kind() == TokenKind::Else => { + self.advance(); + match &self.current { + // Allow `else if` without another `{`. + Some(token) if token.kind() == TokenKind::If => { + self.advance(); + Some(self.conditional()) + } + _ => { + self.consume( + Some(TokenKind::LeftBrace), + "expected '{' to start an 'else' block", + ); + let else_expr = self.expression(); + self.consume( + Some(TokenKind::RightBrace), + "Expected '}' to end an 'else' block", + ); + Some(else_expr) + } + } + } + _ => None, + }; + let tail = self.previous.as_ref().unwrap().clone(); + self.tree.add_expr(Expr::Conditional( + token, + condition_expr, + then_expr, + else_expr, + tail, + )) + } + fn unary(&mut self) -> ExprRef { let token = self.previous.as_ref().unwrap().clone(); let kind = token.kind(); @@ -525,10 +776,10 @@ mod tests { ); // TODO: 'assert_eq' is probably wrong here - let expr_type = tree.expr_type(&expr, &lines); - assert_eq!( - expected_type, expr_type, - "The type of the expression did not match" + let expr_type = tree.expr_type(&expr, &lines, true); + assert!( + expected_type.compatible_with(&expr_type), + "The type of the expression did not match. expected: {expected_type}, actual: {expr_type}" ); } @@ -564,6 +815,23 @@ mod tests { Type::Bool ); + test_expr!( + if_expression, + "if true { 23 } else { 45 }", + "(if true 23 45)", + Type::F64 + ); + // test_expr!( + // if_with_return, + // "if true { 23 } else { return 'nothing' }", + // "", + // Type::F64 + // ); + + // ======================================================================== + // Type Error Tests + // ======================================================================== + fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) { let (mut tree, expr, lines) = Parser::new(source).parse(); assert_eq!( @@ -572,8 +840,8 @@ mod tests { "Expected successful parse" ); - let expr_type = tree.expr_type(&expr, &lines); - assert_eq!(Type::Error, expr_type, "expected to have a type error"); + let expr_type = tree.expr_type(&expr, &lines, true); + assert!(expr_type.is_error()); let actual_errors = tree .errors @@ -617,4 +885,22 @@ mod tests { "cannot apply unary operator '!' to expression of type 'string'", "cannot apply unary operator '-' to expression of type 'string'" ); + + test_type_error_expr!( + if_not_bool, + "if 23 { 1 } else { 2 }", + "the condition of an `if` expression must be a boolean" + ); + + test_type_error_expr!( + if_arm_mismatch, + "if true { 1 } else { '1' }", + "the type of the `then` branch (f64) must match the type of the `else` branch (string)" + ); + + test_type_error_expr!( + if_no_else, + "if true { 1 }", + "this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value" + ); }