From cc6f77daf4f9addab1ef9c04c747b3fc3b215a95 Mon Sep 17 00:00:00 2001 From: John Doty Date: Tue, 2 Jan 2024 09:29:52 -0800 Subject: [PATCH] [fine] Type checking --- oden-script/src/parser.rs | 227 ++++++++++++++++++++++++++++++++++---- oden-script/src/tokens.rs | 73 +++++++++--- 2 files changed, 264 insertions(+), 36 deletions(-) diff --git a/oden-script/src/parser.rs b/oden-script/src/parser.rs index bcbf0823..2753a872 100644 --- a/oden-script/src/parser.rs +++ b/oden-script/src/parser.rs @@ -1,4 +1,4 @@ -use crate::tokens::{Token, TokenKind, Tokens}; +use crate::tokens::{Lines, Token, TokenKind, Tokens}; use std::fmt; #[derive(PartialEq, Eq)] @@ -30,30 +30,37 @@ impl fmt::Display for SyntaxError { } } +#[derive(Clone)] pub enum Literal { Float64(f64), String(String), + Bool(bool), } +#[derive(Copy, Clone)] pub enum UnaryOp { Negate, + Not, } +#[derive(Copy, Clone)] pub enum BinaryOp { Add, Subtract, - Mutiply, + Multiply, Divide, And, Or, } +#[derive(Clone)] pub enum Expr<'a> { Literal(Literal, Token<'a>), Unary(UnaryOp, Token<'a>, ExprRef), Binary(BinaryOp, Token<'a>, ExprRef, ExprRef), } +#[derive(Clone)] pub struct ExprRef(Option); impl ExprRef { @@ -62,6 +69,39 @@ impl ExprRef { } } +// TODO: Eventually we will be unable to use Eq and PartialEq here, and will +// need to do out own thing. +#[derive(Clone, Eq, PartialEq)] +pub enum Type { + Error, + + // TODO: Numeric literals should be implicitly convertable unlike other + // types. + F64, + String, + Bool, +} + +impl std::fmt::Debug for Type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{self}") + } +} + +impl std::fmt::Display for Type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use Type::*; + match self { + Error => write!(f, "<< INTERNAL ERROR >>"), + F64 => write!(f, "f64"), + String => write!(f, "string"), + Bool => write!(f, "bool"), + } + } +} + +pub struct TypeRef(Option); + pub struct SyntaxTree<'a> { pub errors: Vec, expressions: Vec>, @@ -102,6 +142,76 @@ impl<'a> SyntaxTree<'a> { None => "<|EOF|>".to_string(), } } + + pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines) -> Type { + // TODO: Cache and work on demand? Or is this just fine? + + let expr = match expr.0 { + Some(idx) => &self.expressions[idx], + None => return Type::Error, + }; + match expr { + Expr::Literal(lit, _) => match lit { + Literal::Float64(_) => Type::F64, + Literal::String(_) => Type::String, + Literal::Bool(_) => Type::Bool, + }, + + // Figure out the main thing. Check for a... trait? + Expr::Unary(op, tok, arg) => { + let op = op.clone(); + let arg = arg.clone(); + let tok = tok.clone(); + let arg_type = self.expr_type(&arg, lines); + match (op, arg_type) { + (UnaryOp::Negate, Type::F64) => Type::F64, + (UnaryOp::Not, Type::Bool) => Type::Bool, + + // Propagate existing errors without additional complaint. + (_, Type::Error) => Type::Error, + + // Missed the whole table, must be an error. + (_, arg_type) => { + let (line, col) = lines.position(tok.start()); + self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'"))); + Type::Error + } + } + } + + Expr::Binary(op, tok, left, right) => { + let op = op.clone(); + let tok = tok.clone(); + let left = left.clone(); + let right = right.clone(); + let left_type = self.expr_type(&left, lines); + let right_type = self.expr_type(&right, lines); + + match (op, left_type, right_type) { + ( + BinaryOp::Add | BinaryOp::Subtract | BinaryOp::Multiply | BinaryOp::Divide, + Type::F64, + Type::F64, + ) => Type::F64, + + (BinaryOp::Add, Type::String, Type::String) => Type::String, + + (BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool, + + // Propagate existing errors without additional complaint. + (_, Type::Error, _) => Type::Error, + (_, _, Type::Error) => Type::Error, + + // Missed the whole table, it must be an error. + (_, left_type, right_type) => { + let (line, col) = lines.position(tok.start()); + self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)"))); + Type::Error + } + } + } + } + } } // BINDING POWERS. When parsing expressions we only accept expressions that @@ -161,10 +271,10 @@ impl<'a> Parser<'a> { parser } - pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef) { + pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) { let expr = self.expression(); self.consume(None, "expected end of expression"); - (self.tree, expr) + (self.tree, expr, self.tokens.lines()) } fn expression(&mut self) -> ExprRef { @@ -196,10 +306,19 @@ impl<'a> Parser<'a> { let token = self.previous.as_ref(); match token { Some(token) => match token.kind() { + TokenKind::Bang => self.unary(), TokenKind::LeftParen => self.grouping(), TokenKind::Number => self.number(), TokenKind::Minus => self.unary(), TokenKind::String => self.string(), + + TokenKind::True => self + .tree + .add_expr(Expr::Literal(Literal::Bool(true), token.clone())), + TokenKind::False => self + .tree + .add_expr(Expr::Literal(Literal::Bool(false), token.clone())), + _ => { self.error("expected an expression"); ExprRef::error() @@ -216,9 +335,12 @@ impl<'a> Parser<'a> { self.trace("infix"); let kind = self.previous.as_ref().unwrap().kind(); match kind { - TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => { - self.binary(power, left) - } + TokenKind::Plus + | TokenKind::Minus + | TokenKind::Star + | TokenKind::Slash + | TokenKind::And + | TokenKind::Or => self.binary(power, left), _ => panic!("Unknown infix operator, dispatch error?"), } } @@ -277,6 +399,7 @@ impl<'a> Parser<'a> { let expr = self.expression_with_power(UNARY_POWER); let op = match kind { TokenKind::Minus => UnaryOp::Negate, + TokenKind::Bang => UnaryOp::Not, _ => panic!("unsuitable unary: {:?}: no op", kind), }; @@ -288,7 +411,7 @@ impl<'a> Parser<'a> { let op = match token.kind() { TokenKind::Plus => BinaryOp::Add, TokenKind::Minus => BinaryOp::Subtract, - TokenKind::Star => BinaryOp::Mutiply, + TokenKind::Star => BinaryOp::Multiply, TokenKind::Slash => BinaryOp::Divide, TokenKind::And => BinaryOp::And, TokenKind::Or => BinaryOp::Or, @@ -388,32 +511,98 @@ mod tests { use super::*; use pretty_assertions::assert_eq; - fn test_successful_expression_parse(source: &str, expected: &str) { - let (tree, expr) = Parser::new(source).parse(); + fn test_successful_expression_parse(source: &str, expected: &str, expected_type: Type) { + let (mut tree, expr, lines) = Parser::new(source).parse(); assert_eq!( Vec::::new(), tree.errors, "Expected successful parse" ); - assert_eq!(expected, tree.dump_expr(&expr)); + assert_eq!( + expected, + tree.dump_expr(&expr), + "The parse structure of the expressions did not match" + ); + + // TODO: 'assert_eq' is probably wrong here + let expr_type = tree.expr_type(&expr, &lines); + assert_eq!( + expected_type, expr_type, + "The type of the expression did not match" + ); } macro_rules! test_expr { - ($name:ident, $input:expr, $expected:expr) => { + ($name:ident, $input:expr, $expected:expr, $type:expr) => { #[test] fn $name() { - test_successful_expression_parse($input, $expected); + test_successful_expression_parse($input, $expected, $type); } }; } - test_expr!(number_expr, "12", "12"); - test_expr!(add_expr, "1 + 2", "(+ 1 2)"); - test_expr!(prec_expr, "1 + 2 * 3 - 7 * 7", "(- (+ 1 (* 2 3)) (* 7 7))"); - test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)"); + test_expr!(number_expr, "12", "12", Type::F64); + test_expr!(add_expr, "1 + 2", "(+ 1 2)", Type::F64); + test_expr!( + prec_expr, + "1 + 2 * 3 - 7 * 7", + "(- (+ 1 (* 2 3)) (* 7 7))", + Type::F64 + ); + test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)", Type::F64); test_expr!( strings, - r#" "Hello " + "world!" "#, - r#"(+ "Hello " "world!")"# + r#" "Hello " + 'world!' "#, + r#"(+ "Hello " 'world!')"#, + Type::String + ); + + test_expr!( + booleans, + "true and false or false and !true", + "(or (and true false) (and false (! true)))", + Type::Bool + ); + + fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) { + let (mut tree, expr, lines) = Parser::new(source).parse(); + assert_eq!( + Vec::::new(), + tree.errors, + "Expected successful parse" + ); + + let expr_type = tree.expr_type(&expr, &lines); + assert_eq!(Type::Error, expr_type, "expected to have a type error"); + + let actual_errors = tree + .errors + .iter() + .map(|e| e.message.as_str()) + .collect::>(); + assert_eq!(expected_errors, actual_errors); + } + + macro_rules! test_type_error_expr { + ($name:ident, $input:expr, $($s:expr),+) => { + #[test] + fn $name() { + let expected_errors: Vec<&str> = (vec![$($s),*]); + test_type_error_expression($input, expected_errors); + } + } + } + + test_type_error_expr!( + negate_string, + "-('what?')", + "cannot apply unary operator '-' to expression of type 'string'" + ); + + test_type_error_expr!( + errors_propagate_do_not_duplicate, + "!'hello' / 27 * -('what?') + 23", + "cannot apply unary operator '!' to expression of type 'string'", + "cannot apply unary operator '-' to expression of type 'string'" ); } diff --git a/oden-script/src/tokens.rs b/oden-script/src/tokens.rs index c989dab9..c2bccfb9 100644 --- a/oden-script/src/tokens.rs +++ b/oden-script/src/tokens.rs @@ -74,6 +74,10 @@ impl<'a> Token<'a> { } } + pub fn start(&self) -> usize { + self.start + } + pub fn kind(&self) -> TokenKind { self.kind } @@ -95,23 +99,22 @@ impl<'a> std::fmt::Display for Token<'a> { } } -pub struct Tokens<'a> { - source: &'a str, - chars: std::str::CharIndices<'a>, - next_char: Option<(usize, char)>, +pub struct Lines { newlines: Vec, + eof: usize, } -impl<'a> Tokens<'a> { - pub fn new(source: &'a str) -> Self { - let mut result = Tokens { - source, - chars: source.char_indices(), - next_char: None, +impl Lines { + fn new(eof: usize) -> Self { + Lines { newlines: Vec::new(), - }; - result.advance(); // Prime the pump - result + eof, + } + } + + /// Record the position of a newline in the source. + pub fn add_line(&mut self, pos: usize) { + self.newlines.push(pos) } /// Return the position of the given token as a (line, column) pair. By @@ -122,9 +125,15 @@ impl<'a> Tokens<'a> { pub fn token_position(&self, token: &Option) -> (usize, usize) { let start = match token { Some(t) => t.start, - None => self.source.len(), + None => self.eof, }; - let line_end_index = match self.newlines.binary_search(&start) { + self.position(start) + } + + /// Return the position of the given character offset as a (line,column) + /// pair. By convention, lines are 1-based and columns are 0-based. + pub fn position(&self, offset: usize) -> (usize, usize) { + let line_end_index = match self.newlines.binary_search(&offset) { Ok(index) => index, Err(index) => index, }; @@ -134,9 +143,39 @@ impl<'a> Tokens<'a> { self.newlines[line_end_index - 1] + 1 }; let line_number = line_end_index + 1; - let column_offset = start - line_start_pos; + let column_offset = offset - line_start_pos; (line_number, column_offset) } +} + +pub struct Tokens<'a> { + source: &'a str, + chars: std::str::CharIndices<'a>, + next_char: Option<(usize, char)>, + lines: Lines, +} + +impl<'a> Tokens<'a> { + pub fn new(source: &'a str) -> Self { + let mut result = Tokens { + source, + chars: source.char_indices(), + next_char: None, + lines: Lines::new(source.len()), + }; + result.advance(); // Prime the pump + result + } + + pub fn lines(self) -> Lines { + self.lines + } + + /// Return the position of the given token as a (line, column) pair. See + /// `Lines::token_position` for more information about the range, etc. + pub fn token_position(&self, token: &Option) -> (usize, usize) { + self.lines.token_position(token) + } fn token(&self, start: usize, kind: TokenKind) -> Token<'a> { let value = &self.source[start..self.pos()]; @@ -363,7 +402,7 @@ impl<'a> Tokens<'a> { fn skip_whitespace(&mut self) { while let Some((pos, ch)) = self.next_char { if ch == '\n' { - self.newlines.push(pos); + self.lines.add_line(pos); } else if !ch.is_whitespace() { break; }