[fine] Type checking

2024-01-02 09:29:52 -08:00 · 2024-01-02 09:29:52 -08:00 · cc6f77daf4
commit cc6f77daf4
parent 633ce89817
2 changed files with 264 additions and 36 deletions
--- a/oden-script/src/parser.rs
+++ b/oden-script/src/parser.rs
@ -1,4 +1,4 @@
-use crate::tokens::{Token, TokenKind, Tokens};
+use crate::tokens::{Lines, Token, TokenKind, Tokens};
 use std::fmt;
 #[derive(PartialEq, Eq)]
@ -30,30 +30,37 @@ impl fmt::Display for SyntaxError {
    }
 }
 #[derive(Clone)]
 pub enum Literal {
    Float64(f64),
    String(String),
    Bool(bool),
 }
 #[derive(Copy, Clone)]
 pub enum UnaryOp {
    Negate,
    Not,
 }
 #[derive(Copy, Clone)]
 pub enum BinaryOp {
    Add,
    Subtract,
-    Mutiply,
+    Multiply,
    Divide,
    And,
    Or,
 }
 #[derive(Clone)]
 pub enum Expr<'a> {
    Literal(Literal, Token<'a>),
    Unary(UnaryOp, Token<'a>, ExprRef),
    Binary(BinaryOp, Token<'a>, ExprRef, ExprRef),
 }
 #[derive(Clone)]
 pub struct ExprRef(Option<usize>);
 impl ExprRef {
@ -62,6 +69,39 @@ impl ExprRef {
    }
 }
 // TODO: Eventually we will be unable to use Eq and PartialEq here, and will
 //       need to do out own thing.
 #[derive(Clone, Eq, PartialEq)]
 pub enum Type {
    Error,
    // TODO: Numeric literals should be implicitly convertable unlike other
    // types.
    F64,
    String,
    Bool,
 }
 impl std::fmt::Debug for Type {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{self}")
    }
 }
 impl std::fmt::Display for Type {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        use Type::*;
        match self {
            Error => write!(f, "<< INTERNAL ERROR >>"),
            F64 => write!(f, "f64"),
            String => write!(f, "string"),
            Bool => write!(f, "bool"),
        }
    }
 }
 pub struct TypeRef(Option<usize>);
 pub struct SyntaxTree<'a> {
    pub errors: Vec<SyntaxError>,
    expressions: Vec<Expr<'a>>,
@ -102,6 +142,76 @@ impl<'a> SyntaxTree<'a> {
            None => "<|EOF|>".to_string(),
        }
    }
    pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines) -> Type {
        // TODO: Cache and work on demand? Or is this just fine?
        let expr = match expr.0 {
            Some(idx) => &self.expressions[idx],
            None => return Type::Error,
        };
        match expr {
            Expr::Literal(lit, _) => match lit {
                Literal::Float64(_) => Type::F64,
                Literal::String(_) => Type::String,
                Literal::Bool(_) => Type::Bool,
            },
            // Figure out the main thing. Check for a... trait?
            Expr::Unary(op, tok, arg) => {
                let op = op.clone();
                let arg = arg.clone();
                let tok = tok.clone();
                let arg_type = self.expr_type(&arg, lines);
                match (op, arg_type) {
                    (UnaryOp::Negate, Type::F64) => Type::F64,
                    (UnaryOp::Not, Type::Bool) => Type::Bool,
                    // Propagate existing errors without additional complaint.
                    (_, Type::Error) => Type::Error,
                    // Missed the whole table, must be an error.
                    (_, arg_type) => {
                        let (line, col) = lines.position(tok.start());
                        self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'")));
                        Type::Error
                    }
                }
            }
            Expr::Binary(op, tok, left, right) => {
                let op = op.clone();
                let tok = tok.clone();
                let left = left.clone();
                let right = right.clone();
                let left_type = self.expr_type(&left, lines);
                let right_type = self.expr_type(&right, lines);
                match (op, left_type, right_type) {
                    (
                        BinaryOp::Add | BinaryOp::Subtract | BinaryOp::Multiply | BinaryOp::Divide,
                        Type::F64,
                        Type::F64,
                    ) => Type::F64,
                    (BinaryOp::Add, Type::String, Type::String) => Type::String,
                    (BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool,
                    // Propagate existing errors without additional complaint.
                    (_, Type::Error, _) => Type::Error,
                    (_, _, Type::Error) => Type::Error,
                    // Missed the whole table, it must be an error.
                    (_, left_type, right_type) => {
                        let (line, col) = lines.position(tok.start());
                        self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)")));
                        Type::Error
                    }
                }
            }
        }
    }
 }
 // BINDING POWERS. When parsing expressions we only accept expressions that
@ -161,10 +271,10 @@ impl<'a> Parser<'a> {
        parser
    }
-    pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef) {
+    pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) {
        let expr = self.expression();
        self.consume(None, "expected end of expression");
-        (self.tree, expr)
+        (self.tree, expr, self.tokens.lines())
    }
    fn expression(&mut self) -> ExprRef {
@ -196,10 +306,19 @@ impl<'a> Parser<'a> {
        let token = self.previous.as_ref();
        match token {
            Some(token) => match token.kind() {
                TokenKind::Bang => self.unary(),
                TokenKind::LeftParen => self.grouping(),
                TokenKind::Number => self.number(),
                TokenKind::Minus => self.unary(),
                TokenKind::String => self.string(),
                TokenKind::True => self
                    .tree
                    .add_expr(Expr::Literal(Literal::Bool(true), token.clone())),
                TokenKind::False => self
                    .tree
                    .add_expr(Expr::Literal(Literal::Bool(false), token.clone())),
                _ => {
                    self.error("expected an expression");
                    ExprRef::error()
@ -216,9 +335,12 @@ impl<'a> Parser<'a> {
        self.trace("infix");
        let kind = self.previous.as_ref().unwrap().kind();
        match kind {
-            TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => {
+            TokenKind::Plus
-                self.binary(power, left)
+            | TokenKind::Minus
-            }
+            | TokenKind::Star
            | TokenKind::Slash
            | TokenKind::And
            | TokenKind::Or => self.binary(power, left),
            _ => panic!("Unknown infix operator, dispatch error?"),
        }
    }
@ -277,6 +399,7 @@ impl<'a> Parser<'a> {
        let expr = self.expression_with_power(UNARY_POWER);
        let op = match kind {
            TokenKind::Minus => UnaryOp::Negate,
            TokenKind::Bang => UnaryOp::Not,
            _ => panic!("unsuitable unary: {:?}: no op", kind),
        };
@ -288,7 +411,7 @@ impl<'a> Parser<'a> {
        let op = match token.kind() {
            TokenKind::Plus => BinaryOp::Add,
            TokenKind::Minus => BinaryOp::Subtract,
-            TokenKind::Star => BinaryOp::Mutiply,
+            TokenKind::Star => BinaryOp::Multiply,
            TokenKind::Slash => BinaryOp::Divide,
            TokenKind::And => BinaryOp::And,
            TokenKind::Or => BinaryOp::Or,
@ -388,32 +511,98 @@ mod tests {
    use super::*;
    use pretty_assertions::assert_eq;
-    fn test_successful_expression_parse(source: &str, expected: &str) {
+    fn test_successful_expression_parse(source: &str, expected: &str, expected_type: Type) {
-        let (tree, expr) = Parser::new(source).parse();
+        let (mut tree, expr, lines) = Parser::new(source).parse();
        assert_eq!(
            Vec::<SyntaxError>::new(),
            tree.errors,
            "Expected successful parse"
        );
-        assert_eq!(expected, tree.dump_expr(&expr));
+        assert_eq!(
            expected,
            tree.dump_expr(&expr),
            "The parse structure of the expressions did not match"
        );
        // TODO: 'assert_eq' is probably wrong here
        let expr_type = tree.expr_type(&expr, &lines);
        assert_eq!(
            expected_type, expr_type,
            "The type of the expression did not match"
        );
    }
    macro_rules! test_expr {
-        ($name:ident, $input:expr, $expected:expr) => {
+        ($name:ident, $input:expr, $expected:expr, $type:expr) => {
            #[test]
            fn $name() {
-                test_successful_expression_parse($input, $expected);
+                test_successful_expression_parse($input, $expected, $type);
            }
        };
    }
-    test_expr!(number_expr, "12", "12");
+    test_expr!(number_expr, "12", "12", Type::F64);
-    test_expr!(add_expr, "1 + 2", "(+ 1 2)");
+    test_expr!(add_expr, "1 + 2", "(+ 1 2)", Type::F64);
-    test_expr!(prec_expr, "1 + 2 * 3 - 7 * 7", "(- (+ 1 (* 2 3)) (* 7 7))");
+    test_expr!(
-    test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)");
+        prec_expr,
        "1 + 2 * 3 - 7 * 7",
        "(- (+ 1 (* 2 3)) (* 7 7))",
        Type::F64
    );
    test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)", Type::F64);
    test_expr!(
        strings,
-        r#" "Hello " + "world!" "#,
+        r#" "Hello " + 'world!' "#,
-        r#"(+ "Hello " "world!")"#
+        r#"(+ "Hello " 'world!')"#,
        Type::String
    );
    test_expr!(
        booleans,
        "true and false or false and !true",
        "(or (and true false) (and false (! true)))",
        Type::Bool
    );
    fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) {
        let (mut tree, expr, lines) = Parser::new(source).parse();
        assert_eq!(
            Vec::<SyntaxError>::new(),
            tree.errors,
            "Expected successful parse"
        );
        let expr_type = tree.expr_type(&expr, &lines);
        assert_eq!(Type::Error, expr_type, "expected to have a type error");
        let actual_errors = tree
            .errors
            .iter()
            .map(|e| e.message.as_str())
            .collect::<Vec<_>>();
        assert_eq!(expected_errors, actual_errors);
    }
    macro_rules! test_type_error_expr {
        ($name:ident, $input:expr, $($s:expr),+) => {
            #[test]
            fn $name() {
                let expected_errors: Vec<&str> = (vec![$($s),*]);
                test_type_error_expression($input, expected_errors);
            }
        }
    }
    test_type_error_expr!(
        negate_string,
        "-('what?')",
        "cannot apply unary operator '-' to expression of type 'string'"
    );
    test_type_error_expr!(
        errors_propagate_do_not_duplicate,
        "!'hello' / 27 * -('what?') + 23",
        "cannot apply unary operator '!' to expression of type 'string'",
        "cannot apply unary operator '-' to expression of type 'string'"
    );
 }
--- a/oden-script/src/tokens.rs
+++ b/oden-script/src/tokens.rs
@ -74,6 +74,10 @@ impl<'a> Token<'a> {
        }
    }
    pub fn start(&self) -> usize {
        self.start
    }
    pub fn kind(&self) -> TokenKind {
        self.kind
    }
@ -95,23 +99,22 @@ impl<'a> std::fmt::Display for Token<'a> {
    }
 }
-pub struct Tokens<'a> {
+pub struct Lines {
    source: &'a str,
    chars: std::str::CharIndices<'a>,
    next_char: Option<(usize, char)>,
    newlines: Vec<usize>,
    eof: usize,
 }
-impl<'a> Tokens<'a> {
+impl Lines {
-    pub fn new(source: &'a str) -> Self {
+    fn new(eof: usize) -> Self {
-        let mut result = Tokens {
+        Lines {
            source,
            chars: source.char_indices(),
            next_char: None,
            newlines: Vec::new(),
-        };
+            eof,
-        result.advance(); // Prime the pump
+        }
-        result
+    }
    /// Record the position of a newline in the source.
    pub fn add_line(&mut self, pos: usize) {
        self.newlines.push(pos)
    }
    /// Return the position of the given token as a (line, column) pair. By
@ -122,9 +125,15 @@ impl<'a> Tokens<'a> {
    pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
        let start = match token {
            Some(t) => t.start,
-            None => self.source.len(),
+            None => self.eof,
        };
-        let line_end_index = match self.newlines.binary_search(&start) {
+        self.position(start)
    }
    /// Return the position of the given character offset as a (line,column)
    /// pair. By convention, lines are 1-based and columns are 0-based.
    pub fn position(&self, offset: usize) -> (usize, usize) {
        let line_end_index = match self.newlines.binary_search(&offset) {
            Ok(index) => index,
            Err(index) => index,
        };
@ -134,9 +143,39 @@ impl<'a> Tokens<'a> {
            self.newlines[line_end_index - 1] + 1
        };
        let line_number = line_end_index + 1;
-        let column_offset = start - line_start_pos;
+        let column_offset = offset - line_start_pos;
        (line_number, column_offset)
    }
 }
 pub struct Tokens<'a> {
    source: &'a str,
    chars: std::str::CharIndices<'a>,
    next_char: Option<(usize, char)>,
    lines: Lines,
 }
 impl<'a> Tokens<'a> {
    pub fn new(source: &'a str) -> Self {
        let mut result = Tokens {
            source,
            chars: source.char_indices(),
            next_char: None,
            lines: Lines::new(source.len()),
        };
        result.advance(); // Prime the pump
        result
    }
    pub fn lines(self) -> Lines {
        self.lines
    }
    /// Return the position of the given token as a (line, column) pair. See
    /// `Lines::token_position` for more information about the range, etc.
    pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
        self.lines.token_position(token)
    }
    fn token(&self, start: usize, kind: TokenKind) -> Token<'a> {
        let value = &self.source[start..self.pos()];
@ -363,7 +402,7 @@ impl<'a> Tokens<'a> {
    fn skip_whitespace(&mut self) {
        while let Some((pos, ch)) = self.next_char {
            if ch == '\n' {
-                self.newlines.push(pos);
+                self.lines.add_line(pos);
            } else if !ch.is_whitespace() {
                break;
            }