From cc6f77daf4f9addab1ef9c04c747b3fc3b215a95 Mon Sep 17 00:00:00 2001
From: John Doty <john@d0ty.me>
Date: Tue, 2 Jan 2024 09:29:52 -0800
Subject: [PATCH] [fine] Type checking

---
 oden-script/src/parser.rs | 227 ++++++++++++++++++++++++++++++++++----
 oden-script/src/tokens.rs |  73 +++++++++---
 2 files changed, 264 insertions(+), 36 deletions(-)
diff --git a/oden-script/src/parser.rs b/oden-script/src/parser.rs
index bcbf0823..2753a872 100644
--- a/oden-script/src/parser.rs
+++ b/oden-script/src/parser.rs
@@ -1,4 +1,4 @@
-use crate::tokens::{Token, TokenKind, Tokens};
+use crate::tokens::{Lines, Token, TokenKind, Tokens};
 use std::fmt;
 
 #[derive(PartialEq, Eq)]
@@ -30,30 +30,37 @@ impl fmt::Display for SyntaxError {
     }
 }
 
+#[derive(Clone)]
 pub enum Literal {
     Float64(f64),
     String(String),
+    Bool(bool),
 }
 
+#[derive(Copy, Clone)]
 pub enum UnaryOp {
     Negate,
+    Not,
 }
 
+#[derive(Copy, Clone)]
 pub enum BinaryOp {
     Add,
     Subtract,
-    Mutiply,
+    Multiply,
     Divide,
     And,
     Or,
 }
 
+#[derive(Clone)]
 pub enum Expr<'a> {
     Literal(Literal, Token<'a>),
     Unary(UnaryOp, Token<'a>, ExprRef),
     Binary(BinaryOp, Token<'a>, ExprRef, ExprRef),
 }
 
+#[derive(Clone)]
 pub struct ExprRef(Option<usize>);
 
 impl ExprRef {
@@ -62,6 +69,39 @@ impl ExprRef {
     }
 }
 
+// TODO: Eventually we will be unable to use Eq and PartialEq here, and will
+//       need to do out own thing.
+#[derive(Clone, Eq, PartialEq)]
+pub enum Type {
+    Error,
+
+    // TODO: Numeric literals should be implicitly convertable unlike other
+    // types.
+    F64,
+    String,
+    Bool,
+}
+
+impl std::fmt::Debug for Type {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{self}")
+    }
+}
+
+impl std::fmt::Display for Type {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        use Type::*;
+        match self {
+            Error => write!(f, "<< INTERNAL ERROR >>"),
+            F64 => write!(f, "f64"),
+            String => write!(f, "string"),
+            Bool => write!(f, "bool"),
+        }
+    }
+}
+
+pub struct TypeRef(Option<usize>);
+
 pub struct SyntaxTree<'a> {
     pub errors: Vec<SyntaxError>,
     expressions: Vec<Expr<'a>>,
@@ -102,6 +142,76 @@ impl<'a> SyntaxTree<'a> {
             None => "<|EOF|>".to_string(),
         }
     }
+
+    pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines) -> Type {
+        // TODO: Cache and work on demand? Or is this just fine?
+
+        let expr = match expr.0 {
+            Some(idx) => &self.expressions[idx],
+            None => return Type::Error,
+        };
+        match expr {
+            Expr::Literal(lit, _) => match lit {
+                Literal::Float64(_) => Type::F64,
+                Literal::String(_) => Type::String,
+                Literal::Bool(_) => Type::Bool,
+            },
+
+            // Figure out the main thing. Check for a... trait?
+            Expr::Unary(op, tok, arg) => {
+                let op = op.clone();
+                let arg = arg.clone();
+                let tok = tok.clone();
+                let arg_type = self.expr_type(&arg, lines);
+                match (op, arg_type) {
+                    (UnaryOp::Negate, Type::F64) => Type::F64,
+                    (UnaryOp::Not, Type::Bool) => Type::Bool,
+
+                    // Propagate existing errors without additional complaint.
+                    (_, Type::Error) => Type::Error,
+
+                    // Missed the whole table, must be an error.
+                    (_, arg_type) => {
+                        let (line, col) = lines.position(tok.start());
+                        self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'")));
+                        Type::Error
+                    }
+                }
+            }
+
+            Expr::Binary(op, tok, left, right) => {
+                let op = op.clone();
+                let tok = tok.clone();
+                let left = left.clone();
+                let right = right.clone();
+                let left_type = self.expr_type(&left, lines);
+                let right_type = self.expr_type(&right, lines);
+
+                match (op, left_type, right_type) {
+                    (
+                        BinaryOp::Add | BinaryOp::Subtract | BinaryOp::Multiply | BinaryOp::Divide,
+                        Type::F64,
+                        Type::F64,
+                    ) => Type::F64,
+
+                    (BinaryOp::Add, Type::String, Type::String) => Type::String,
+
+                    (BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool,
+
+                    // Propagate existing errors without additional complaint.
+                    (_, Type::Error, _) => Type::Error,
+                    (_, _, Type::Error) => Type::Error,
+
+                    // Missed the whole table, it must be an error.
+                    (_, left_type, right_type) => {
+                        let (line, col) = lines.position(tok.start());
+                        self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)")));
+                        Type::Error
+                    }
+                }
+            }
+        }
+    }
 }
 
 // BINDING POWERS. When parsing expressions we only accept expressions that
@@ -161,10 +271,10 @@ impl<'a> Parser<'a> {
         parser
     }
 
-    pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef) {
+    pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) {
         let expr = self.expression();
         self.consume(None, "expected end of expression");
-        (self.tree, expr)
+        (self.tree, expr, self.tokens.lines())
     }
 
     fn expression(&mut self) -> ExprRef {
@@ -196,10 +306,19 @@ impl<'a> Parser<'a> {
         let token = self.previous.as_ref();
         match token {
             Some(token) => match token.kind() {
+                TokenKind::Bang => self.unary(),
                 TokenKind::LeftParen => self.grouping(),
                 TokenKind::Number => self.number(),
                 TokenKind::Minus => self.unary(),
                 TokenKind::String => self.string(),
+
+                TokenKind::True => self
+                    .tree
+                    .add_expr(Expr::Literal(Literal::Bool(true), token.clone())),
+                TokenKind::False => self
+                    .tree
+                    .add_expr(Expr::Literal(Literal::Bool(false), token.clone())),
+
                 _ => {
                     self.error("expected an expression");
                     ExprRef::error()
@@ -216,9 +335,12 @@ impl<'a> Parser<'a> {
         self.trace("infix");
         let kind = self.previous.as_ref().unwrap().kind();
         match kind {
-            TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => {
-                self.binary(power, left)
-            }
+            TokenKind::Plus
+            | TokenKind::Minus
+            | TokenKind::Star
+            | TokenKind::Slash
+            | TokenKind::And
+            | TokenKind::Or => self.binary(power, left),
             _ => panic!("Unknown infix operator, dispatch error?"),
         }
     }
@@ -277,6 +399,7 @@ impl<'a> Parser<'a> {
         let expr = self.expression_with_power(UNARY_POWER);
         let op = match kind {
             TokenKind::Minus => UnaryOp::Negate,
+            TokenKind::Bang => UnaryOp::Not,
             _ => panic!("unsuitable unary: {:?}: no op", kind),
         };
 
@@ -288,7 +411,7 @@ impl<'a> Parser<'a> {
         let op = match token.kind() {
             TokenKind::Plus => BinaryOp::Add,
             TokenKind::Minus => BinaryOp::Subtract,
-            TokenKind::Star => BinaryOp::Mutiply,
+            TokenKind::Star => BinaryOp::Multiply,
             TokenKind::Slash => BinaryOp::Divide,
             TokenKind::And => BinaryOp::And,
             TokenKind::Or => BinaryOp::Or,
@@ -388,32 +511,98 @@ mod tests {
     use super::*;
     use pretty_assertions::assert_eq;
 
-    fn test_successful_expression_parse(source: &str, expected: &str) {
-        let (tree, expr) = Parser::new(source).parse();
+    fn test_successful_expression_parse(source: &str, expected: &str, expected_type: Type) {
+        let (mut tree, expr, lines) = Parser::new(source).parse();
         assert_eq!(
             Vec::<SyntaxError>::new(),
             tree.errors,
             "Expected successful parse"
         );
-        assert_eq!(expected, tree.dump_expr(&expr));
+        assert_eq!(
+            expected,
+            tree.dump_expr(&expr),
+            "The parse structure of the expressions did not match"
+        );
+
+        // TODO: 'assert_eq' is probably wrong here
+        let expr_type = tree.expr_type(&expr, &lines);
+        assert_eq!(
+            expected_type, expr_type,
+            "The type of the expression did not match"
+        );
     }
 
     macro_rules! test_expr {
-        ($name:ident, $input:expr, $expected:expr) => {
+        ($name:ident, $input:expr, $expected:expr, $type:expr) => {
             #[test]
             fn $name() {
-                test_successful_expression_parse($input, $expected);
+                test_successful_expression_parse($input, $expected, $type);
             }
         };
     }
 
-    test_expr!(number_expr, "12", "12");
-    test_expr!(add_expr, "1 + 2", "(+ 1 2)");
-    test_expr!(prec_expr, "1 + 2 * 3 - 7 * 7", "(- (+ 1 (* 2 3)) (* 7 7))");
-    test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)");
+    test_expr!(number_expr, "12", "12", Type::F64);
+    test_expr!(add_expr, "1 + 2", "(+ 1 2)", Type::F64);
+    test_expr!(
+        prec_expr,
+        "1 + 2 * 3 - 7 * 7",
+        "(- (+ 1 (* 2 3)) (* 7 7))",
+        Type::F64
+    );
+    test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)", Type::F64);
     test_expr!(
         strings,
-        r#" "Hello " + "world!" "#,
-        r#"(+ "Hello " "world!")"#
+        r#" "Hello " + 'world!' "#,
+        r#"(+ "Hello " 'world!')"#,
+        Type::String
+    );
+
+    test_expr!(
+        booleans,
+        "true and false or false and !true",
+        "(or (and true false) (and false (! true)))",
+        Type::Bool
+    );
+
+    fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) {
+        let (mut tree, expr, lines) = Parser::new(source).parse();
+        assert_eq!(
+            Vec::<SyntaxError>::new(),
+            tree.errors,
+            "Expected successful parse"
+        );
+
+        let expr_type = tree.expr_type(&expr, &lines);
+        assert_eq!(Type::Error, expr_type, "expected to have a type error");
+
+        let actual_errors = tree
+            .errors
+            .iter()
+            .map(|e| e.message.as_str())
+            .collect::<Vec<_>>();
+        assert_eq!(expected_errors, actual_errors);
+    }
+
+    macro_rules! test_type_error_expr {
+        ($name:ident, $input:expr, $($s:expr),+) => {
+            #[test]
+            fn $name() {
+                let expected_errors: Vec<&str> = (vec![$($s),*]);
+                test_type_error_expression($input, expected_errors);
+            }
+        }
+    }
+
+    test_type_error_expr!(
+        negate_string,
+        "-('what?')",
+        "cannot apply unary operator '-' to expression of type 'string'"
+    );
+
+    test_type_error_expr!(
+        errors_propagate_do_not_duplicate,
+        "!'hello' / 27 * -('what?') + 23",
+        "cannot apply unary operator '!' to expression of type 'string'",
+        "cannot apply unary operator '-' to expression of type 'string'"
     );
 }
diff --git a/oden-script/src/tokens.rs b/oden-script/src/tokens.rs
index c989dab9..c2bccfb9 100644
--- a/oden-script/src/tokens.rs
+++ b/oden-script/src/tokens.rs
@@ -74,6 +74,10 @@ impl<'a> Token<'a> {
         }
     }
 
+    pub fn start(&self) -> usize {
+        self.start
+    }
+
     pub fn kind(&self) -> TokenKind {
         self.kind
     }
@@ -95,23 +99,22 @@ impl<'a> std::fmt::Display for Token<'a> {
     }
 }
 
-pub struct Tokens<'a> {
-    source: &'a str,
-    chars: std::str::CharIndices<'a>,
-    next_char: Option<(usize, char)>,
+pub struct Lines {
     newlines: Vec<usize>,
+    eof: usize,
 }
 
-impl<'a> Tokens<'a> {
-    pub fn new(source: &'a str) -> Self {
-        let mut result = Tokens {
-            source,
-            chars: source.char_indices(),
-            next_char: None,
+impl Lines {
+    fn new(eof: usize) -> Self {
+        Lines {
             newlines: Vec::new(),
-        };
-        result.advance(); // Prime the pump
-        result
+            eof,
+        }
+    }
+
+    /// Record the position of a newline in the source.
+    pub fn add_line(&mut self, pos: usize) {
+        self.newlines.push(pos)
     }
 
     /// Return the position of the given token as a (line, column) pair. By
@@ -122,9 +125,15 @@ impl<'a> Tokens<'a> {
     pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
         let start = match token {
             Some(t) => t.start,
-            None => self.source.len(),
+            None => self.eof,
         };
-        let line_end_index = match self.newlines.binary_search(&start) {
+        self.position(start)
+    }
+
+    /// Return the position of the given character offset as a (line,column)
+    /// pair. By convention, lines are 1-based and columns are 0-based.
+    pub fn position(&self, offset: usize) -> (usize, usize) {
+        let line_end_index = match self.newlines.binary_search(&offset) {
             Ok(index) => index,
             Err(index) => index,
         };
@@ -134,9 +143,39 @@ impl<'a> Tokens<'a> {
             self.newlines[line_end_index - 1] + 1
         };
         let line_number = line_end_index + 1;
-        let column_offset = start - line_start_pos;
+        let column_offset = offset - line_start_pos;
         (line_number, column_offset)
     }
+}
+
+pub struct Tokens<'a> {
+    source: &'a str,
+    chars: std::str::CharIndices<'a>,
+    next_char: Option<(usize, char)>,
+    lines: Lines,
+}
+
+impl<'a> Tokens<'a> {
+    pub fn new(source: &'a str) -> Self {
+        let mut result = Tokens {
+            source,
+            chars: source.char_indices(),
+            next_char: None,
+            lines: Lines::new(source.len()),
+        };
+        result.advance(); // Prime the pump
+        result
+    }
+
+    pub fn lines(self) -> Lines {
+        self.lines
+    }
+
+    /// Return the position of the given token as a (line, column) pair. See
+    /// `Lines::token_position` for more information about the range, etc.
+    pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
+        self.lines.token_position(token)
+    }
 
     fn token(&self, start: usize, kind: TokenKind) -> Token<'a> {
         let value = &self.source[start..self.pos()];
@@ -363,7 +402,7 @@ impl<'a> Tokens<'a> {
     fn skip_whitespace(&mut self) {
         while let Some((pos, ch)) = self.next_char {
             if ch == '\n' {
-                self.newlines.push(pos);
+                self.lines.add_line(pos);
             } else if !ch.is_whitespace() {
                 break;
             }