[fine] Type checking

2024-01-02 09:29:52 -08:00 · 2024-01-02 09:29:52 -08:00 · cc6f77daf4
commit cc6f77daf4
parent 633ce89817
2 changed files with 264 additions and 36 deletions
--- a/oden-script/src/parser.rs
+++ b/oden-script/src/parser.rs
@ -1,4 +1,4 @@
-use crate::tokens::{Token, TokenKind, Tokens};
+use crate::tokens::{Lines, Token, TokenKind, Tokens};
 use std::fmt;

 #[derive(PartialEq, Eq)]
@ -30,30 +30,37 @@ impl fmt::Display for SyntaxError {
    }
 }

+#[derive(Clone)]
 pub enum Literal {
    Float64(f64),
    String(String),
+    Bool(bool),
 }

+#[derive(Copy, Clone)]
 pub enum UnaryOp {
    Negate,
+    Not,
 }

+#[derive(Copy, Clone)]
 pub enum BinaryOp {
    Add,
    Subtract,
-    Mutiply,
+    Multiply,
    Divide,
    And,
    Or,
 }

+#[derive(Clone)]
 pub enum Expr<'a> {
    Literal(Literal, Token<'a>),
    Unary(UnaryOp, Token<'a>, ExprRef),
    Binary(BinaryOp, Token<'a>, ExprRef, ExprRef),
 }

+#[derive(Clone)]
 pub struct ExprRef(Option<usize>);

 impl ExprRef {
@ -62,6 +69,39 @@ impl ExprRef {
    }
 }

+// TODO: Eventually we will be unable to use Eq and PartialEq here, and will
+//       need to do out own thing.
+#[derive(Clone, Eq, PartialEq)]
+pub enum Type {
+    Error,
+
+    // TODO: Numeric literals should be implicitly convertable unlike other
+    // types.
+    F64,
+    String,
+    Bool,
+}
+
+impl std::fmt::Debug for Type {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{self}")
+    }
+}
+
+impl std::fmt::Display for Type {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        use Type::*;
+        match self {
+            Error => write!(f, "<< INTERNAL ERROR >>"),
+            F64 => write!(f, "f64"),
+            String => write!(f, "string"),
+            Bool => write!(f, "bool"),
+        }
+    }
+}
+
+pub struct TypeRef(Option<usize>);
+
 pub struct SyntaxTree<'a> {
    pub errors: Vec<SyntaxError>,
    expressions: Vec<Expr<'a>>,
@ -102,6 +142,76 @@ impl<'a> SyntaxTree<'a> {
            None => "<|EOF|>".to_string(),
        }
    }
+
+    pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines) -> Type {
+        // TODO: Cache and work on demand? Or is this just fine?
+
+        let expr = match expr.0 {
+            Some(idx) => &self.expressions[idx],
+            None => return Type::Error,
+        };
+        match expr {
+            Expr::Literal(lit, _) => match lit {
+                Literal::Float64(_) => Type::F64,
+                Literal::String(_) => Type::String,
+                Literal::Bool(_) => Type::Bool,
+            },
+
+            // Figure out the main thing. Check for a... trait?
+            Expr::Unary(op, tok, arg) => {
+                let op = op.clone();
+                let arg = arg.clone();
+                let tok = tok.clone();
+                let arg_type = self.expr_type(&arg, lines);
+                match (op, arg_type) {
+                    (UnaryOp::Negate, Type::F64) => Type::F64,
+                    (UnaryOp::Not, Type::Bool) => Type::Bool,
+
+                    // Propagate existing errors without additional complaint.
+                    (_, Type::Error) => Type::Error,
+
+                    // Missed the whole table, must be an error.
+                    (_, arg_type) => {
+                        let (line, col) = lines.position(tok.start());
+                        self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'")));
+                        Type::Error
+                    }
+                }
+            }
+
+            Expr::Binary(op, tok, left, right) => {
+                let op = op.clone();
+                let tok = tok.clone();
+                let left = left.clone();
+                let right = right.clone();
+                let left_type = self.expr_type(&left, lines);
+                let right_type = self.expr_type(&right, lines);
+
+                match (op, left_type, right_type) {
+                    (
+                        BinaryOp::Add | BinaryOp::Subtract | BinaryOp::Multiply | BinaryOp::Divide,
+                        Type::F64,
+                        Type::F64,
+                    ) => Type::F64,
+
+                    (BinaryOp::Add, Type::String, Type::String) => Type::String,
+
+                    (BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool,
+
+                    // Propagate existing errors without additional complaint.
+                    (_, Type::Error, _) => Type::Error,
+                    (_, _, Type::Error) => Type::Error,
+
+                    // Missed the whole table, it must be an error.
+                    (_, left_type, right_type) => {
+                        let (line, col) = lines.position(tok.start());
+                        self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)")));
+                        Type::Error
+                    }
+                }
+            }
+        }
+    }
 }

 // BINDING POWERS. When parsing expressions we only accept expressions that
@ -161,10 +271,10 @@ impl<'a> Parser<'a> {
        parser
    }

-    pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef) {
+    pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) {
        let expr = self.expression();
        self.consume(None, "expected end of expression");
-        (self.tree, expr)
+        (self.tree, expr, self.tokens.lines())
    }

    fn expression(&mut self) -> ExprRef {
@ -196,10 +306,19 @@ impl<'a> Parser<'a> {
        let token = self.previous.as_ref();
        match token {
            Some(token) => match token.kind() {
+                TokenKind::Bang => self.unary(),
                TokenKind::LeftParen => self.grouping(),
                TokenKind::Number => self.number(),
                TokenKind::Minus => self.unary(),
                TokenKind::String => self.string(),
+
+                TokenKind::True => self
+                    .tree
+                    .add_expr(Expr::Literal(Literal::Bool(true), token.clone())),
+                TokenKind::False => self
+                    .tree
+                    .add_expr(Expr::Literal(Literal::Bool(false), token.clone())),
+
                _ => {
                    self.error("expected an expression");
                    ExprRef::error()
@ -216,9 +335,12 @@ impl<'a> Parser<'a> {
        self.trace("infix");
        let kind = self.previous.as_ref().unwrap().kind();
        match kind {
-            TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => {
-                self.binary(power, left)
-            }
+            TokenKind::Plus
+            | TokenKind::Minus
+            | TokenKind::Star
+            | TokenKind::Slash
+            | TokenKind::And
+            | TokenKind::Or => self.binary(power, left),
            _ => panic!("Unknown infix operator, dispatch error?"),
        }
    }
@ -277,6 +399,7 @@ impl<'a> Parser<'a> {
        let expr = self.expression_with_power(UNARY_POWER);
        let op = match kind {
            TokenKind::Minus => UnaryOp::Negate,
+            TokenKind::Bang => UnaryOp::Not,
            _ => panic!("unsuitable unary: {:?}: no op", kind),
        };

@ -288,7 +411,7 @@ impl<'a> Parser<'a> {
        let op = match token.kind() {
            TokenKind::Plus => BinaryOp::Add,
            TokenKind::Minus => BinaryOp::Subtract,
-            TokenKind::Star => BinaryOp::Mutiply,
+            TokenKind::Star => BinaryOp::Multiply,
            TokenKind::Slash => BinaryOp::Divide,
            TokenKind::And => BinaryOp::And,
            TokenKind::Or => BinaryOp::Or,
@ -388,32 +511,98 @@ mod tests {
    use super::*;
    use pretty_assertions::assert_eq;

-    fn test_successful_expression_parse(source: &str, expected: &str) {
-        let (tree, expr) = Parser::new(source).parse();
+    fn test_successful_expression_parse(source: &str, expected: &str, expected_type: Type) {
+        let (mut tree, expr, lines) = Parser::new(source).parse();
        assert_eq!(
            Vec::<SyntaxError>::new(),
            tree.errors,
            "Expected successful parse"
        );
-        assert_eq!(expected, tree.dump_expr(&expr));
+        assert_eq!(
+            expected,
+            tree.dump_expr(&expr),
+            "The parse structure of the expressions did not match"
+        );
+
+        // TODO: 'assert_eq' is probably wrong here
+        let expr_type = tree.expr_type(&expr, &lines);
+        assert_eq!(
+            expected_type, expr_type,
+            "The type of the expression did not match"
+        );
    }

    macro_rules! test_expr {
-        ($name:ident, $input:expr, $expected:expr) => {
+        ($name:ident, $input:expr, $expected:expr, $type:expr) => {
            #[test]
            fn $name() {
-                test_successful_expression_parse($input, $expected);
+                test_successful_expression_parse($input, $expected, $type);
            }
        };
    }

-    test_expr!(number_expr, "12", "12");
-    test_expr!(add_expr, "1 + 2", "(+ 1 2)");
-    test_expr!(prec_expr, "1 + 2 * 3 - 7 * 7", "(- (+ 1 (* 2 3)) (* 7 7))");
-    test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)");
+    test_expr!(number_expr, "12", "12", Type::F64);
+    test_expr!(add_expr, "1 + 2", "(+ 1 2)", Type::F64);
+    test_expr!(
+        prec_expr,
+        "1 + 2 * 3 - 7 * 7",
+        "(- (+ 1 (* 2 3)) (* 7 7))",
+        Type::F64
+    );
+    test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)", Type::F64);
    test_expr!(
        strings,
-        r#" "Hello " + "world!" "#,
-        r#"(+ "Hello " "world!")"#
+        r#" "Hello " + 'world!' "#,
+        r#"(+ "Hello " 'world!')"#,
+        Type::String
+    );
+
+    test_expr!(
+        booleans,
+        "true and false or false and !true",
+        "(or (and true false) (and false (! true)))",
+        Type::Bool
+    );
+
+    fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) {
+        let (mut tree, expr, lines) = Parser::new(source).parse();
+        assert_eq!(
+            Vec::<SyntaxError>::new(),
+            tree.errors,
+            "Expected successful parse"
+        );
+
+        let expr_type = tree.expr_type(&expr, &lines);
+        assert_eq!(Type::Error, expr_type, "expected to have a type error");
+
+        let actual_errors = tree
+            .errors
+            .iter()
+            .map(|e| e.message.as_str())
+            .collect::<Vec<_>>();
+        assert_eq!(expected_errors, actual_errors);
+    }
+
+    macro_rules! test_type_error_expr {
+        ($name:ident, $input:expr, $($s:expr),+) => {
+            #[test]
+            fn $name() {
+                let expected_errors: Vec<&str> = (vec![$($s),*]);
+                test_type_error_expression($input, expected_errors);
+            }
+        }
+    }
+
+    test_type_error_expr!(
+        negate_string,
+        "-('what?')",
+        "cannot apply unary operator '-' to expression of type 'string'"
+    );
+
+    test_type_error_expr!(
+        errors_propagate_do_not_duplicate,
+        "!'hello' / 27 * -('what?') + 23",
+        "cannot apply unary operator '!' to expression of type 'string'",
+        "cannot apply unary operator '-' to expression of type 'string'"
    );
 }
--- a/oden-script/src/tokens.rs
+++ b/oden-script/src/tokens.rs
@ -74,6 +74,10 @@ impl<'a> Token<'a> {
        }
    }

+    pub fn start(&self) -> usize {
+        self.start
+    }
+
    pub fn kind(&self) -> TokenKind {
        self.kind
    }
@ -95,23 +99,22 @@ impl<'a> std::fmt::Display for Token<'a> {
    }
 }

-pub struct Tokens<'a> {
-    source: &'a str,
-    chars: std::str::CharIndices<'a>,
-    next_char: Option<(usize, char)>,
+pub struct Lines {
    newlines: Vec<usize>,
+    eof: usize,
 }

-impl<'a> Tokens<'a> {
-    pub fn new(source: &'a str) -> Self {
-        let mut result = Tokens {
-            source,
-            chars: source.char_indices(),
-            next_char: None,
+impl Lines {
+    fn new(eof: usize) -> Self {
+        Lines {
            newlines: Vec::new(),
-        };
-        result.advance(); // Prime the pump
-        result
+            eof,
+        }
+    }
+
+    /// Record the position of a newline in the source.
+    pub fn add_line(&mut self, pos: usize) {
+        self.newlines.push(pos)
    }

    /// Return the position of the given token as a (line, column) pair. By
@ -122,9 +125,15 @@ impl<'a> Tokens<'a> {
    pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
        let start = match token {
            Some(t) => t.start,
-            None => self.source.len(),
+            None => self.eof,
        };
-        let line_end_index = match self.newlines.binary_search(&start) {
+        self.position(start)
+    }
+
+    /// Return the position of the given character offset as a (line,column)
+    /// pair. By convention, lines are 1-based and columns are 0-based.
+    pub fn position(&self, offset: usize) -> (usize, usize) {
+        let line_end_index = match self.newlines.binary_search(&offset) {
            Ok(index) => index,
            Err(index) => index,
        };
@ -134,9 +143,39 @@ impl<'a> Tokens<'a> {
            self.newlines[line_end_index - 1] + 1
        };
        let line_number = line_end_index + 1;
-        let column_offset = start - line_start_pos;
+        let column_offset = offset - line_start_pos;
        (line_number, column_offset)
    }
+}
+
+pub struct Tokens<'a> {
+    source: &'a str,
+    chars: std::str::CharIndices<'a>,
+    next_char: Option<(usize, char)>,
+    lines: Lines,
+}
+
+impl<'a> Tokens<'a> {
+    pub fn new(source: &'a str) -> Self {
+        let mut result = Tokens {
+            source,
+            chars: source.char_indices(),
+            next_char: None,
+            lines: Lines::new(source.len()),
+        };
+        result.advance(); // Prime the pump
+        result
+    }
+
+    pub fn lines(self) -> Lines {
+        self.lines
+    }
+
+    /// Return the position of the given token as a (line, column) pair. See
+    /// `Lines::token_position` for more information about the range, etc.
+    pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
+        self.lines.token_position(token)
+    }

    fn token(&self, start: usize, kind: TokenKind) -> Token<'a> {
        let value = &self.source[start..self.pos()];
@ -363,7 +402,7 @@ impl<'a> Tokens<'a> {
    fn skip_whitespace(&mut self) {
        while let Some((pos, ch)) = self.next_char {
            if ch == '\n' {
-                self.newlines.push(pos);
+                self.lines.add_line(pos);
            } else if !ch.is_whitespace() {
                break;
            }