[fine] Type checking

This commit is contained in:
John Doty 2024-01-02 09:29:52 -08:00
parent 633ce89817
commit cc6f77daf4
2 changed files with 264 additions and 36 deletions

View file

@ -1,4 +1,4 @@
use crate::tokens::{Token, TokenKind, Tokens};
use crate::tokens::{Lines, Token, TokenKind, Tokens};
use std::fmt;
#[derive(PartialEq, Eq)]
@ -30,30 +30,37 @@ impl fmt::Display for SyntaxError {
}
}
#[derive(Clone)]
pub enum Literal {
Float64(f64),
String(String),
Bool(bool),
}
#[derive(Copy, Clone)]
pub enum UnaryOp {
Negate,
Not,
}
#[derive(Copy, Clone)]
pub enum BinaryOp {
Add,
Subtract,
Mutiply,
Multiply,
Divide,
And,
Or,
}
#[derive(Clone)]
pub enum Expr<'a> {
Literal(Literal, Token<'a>),
Unary(UnaryOp, Token<'a>, ExprRef),
Binary(BinaryOp, Token<'a>, ExprRef, ExprRef),
}
#[derive(Clone)]
pub struct ExprRef(Option<usize>);
impl ExprRef {
@ -62,6 +69,39 @@ impl ExprRef {
}
}
// TODO: Eventually we will be unable to use Eq and PartialEq here, and will
// need to do out own thing.
#[derive(Clone, Eq, PartialEq)]
pub enum Type {
Error,
// TODO: Numeric literals should be implicitly convertable unlike other
// types.
F64,
String,
Bool,
}
impl std::fmt::Debug for Type {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{self}")
}
}
impl std::fmt::Display for Type {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use Type::*;
match self {
Error => write!(f, "<< INTERNAL ERROR >>"),
F64 => write!(f, "f64"),
String => write!(f, "string"),
Bool => write!(f, "bool"),
}
}
}
pub struct TypeRef(Option<usize>);
pub struct SyntaxTree<'a> {
pub errors: Vec<SyntaxError>,
expressions: Vec<Expr<'a>>,
@ -102,6 +142,76 @@ impl<'a> SyntaxTree<'a> {
None => "<|EOF|>".to_string(),
}
}
pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines) -> Type {
// TODO: Cache and work on demand? Or is this just fine?
let expr = match expr.0 {
Some(idx) => &self.expressions[idx],
None => return Type::Error,
};
match expr {
Expr::Literal(lit, _) => match lit {
Literal::Float64(_) => Type::F64,
Literal::String(_) => Type::String,
Literal::Bool(_) => Type::Bool,
},
// Figure out the main thing. Check for a... trait?
Expr::Unary(op, tok, arg) => {
let op = op.clone();
let arg = arg.clone();
let tok = tok.clone();
let arg_type = self.expr_type(&arg, lines);
match (op, arg_type) {
(UnaryOp::Negate, Type::F64) => Type::F64,
(UnaryOp::Not, Type::Bool) => Type::Bool,
// Propagate existing errors without additional complaint.
(_, Type::Error) => Type::Error,
// Missed the whole table, must be an error.
(_, arg_type) => {
let (line, col) = lines.position(tok.start());
self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'")));
Type::Error
}
}
}
Expr::Binary(op, tok, left, right) => {
let op = op.clone();
let tok = tok.clone();
let left = left.clone();
let right = right.clone();
let left_type = self.expr_type(&left, lines);
let right_type = self.expr_type(&right, lines);
match (op, left_type, right_type) {
(
BinaryOp::Add | BinaryOp::Subtract | BinaryOp::Multiply | BinaryOp::Divide,
Type::F64,
Type::F64,
) => Type::F64,
(BinaryOp::Add, Type::String, Type::String) => Type::String,
(BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool,
// Propagate existing errors without additional complaint.
(_, Type::Error, _) => Type::Error,
(_, _, Type::Error) => Type::Error,
// Missed the whole table, it must be an error.
(_, left_type, right_type) => {
let (line, col) = lines.position(tok.start());
self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)")));
Type::Error
}
}
}
}
}
}
// BINDING POWERS. When parsing expressions we only accept expressions that
@ -161,10 +271,10 @@ impl<'a> Parser<'a> {
parser
}
pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef) {
pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) {
let expr = self.expression();
self.consume(None, "expected end of expression");
(self.tree, expr)
(self.tree, expr, self.tokens.lines())
}
fn expression(&mut self) -> ExprRef {
@ -196,10 +306,19 @@ impl<'a> Parser<'a> {
let token = self.previous.as_ref();
match token {
Some(token) => match token.kind() {
TokenKind::Bang => self.unary(),
TokenKind::LeftParen => self.grouping(),
TokenKind::Number => self.number(),
TokenKind::Minus => self.unary(),
TokenKind::String => self.string(),
TokenKind::True => self
.tree
.add_expr(Expr::Literal(Literal::Bool(true), token.clone())),
TokenKind::False => self
.tree
.add_expr(Expr::Literal(Literal::Bool(false), token.clone())),
_ => {
self.error("expected an expression");
ExprRef::error()
@ -216,9 +335,12 @@ impl<'a> Parser<'a> {
self.trace("infix");
let kind = self.previous.as_ref().unwrap().kind();
match kind {
TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => {
self.binary(power, left)
}
TokenKind::Plus
| TokenKind::Minus
| TokenKind::Star
| TokenKind::Slash
| TokenKind::And
| TokenKind::Or => self.binary(power, left),
_ => panic!("Unknown infix operator, dispatch error?"),
}
}
@ -277,6 +399,7 @@ impl<'a> Parser<'a> {
let expr = self.expression_with_power(UNARY_POWER);
let op = match kind {
TokenKind::Minus => UnaryOp::Negate,
TokenKind::Bang => UnaryOp::Not,
_ => panic!("unsuitable unary: {:?}: no op", kind),
};
@ -288,7 +411,7 @@ impl<'a> Parser<'a> {
let op = match token.kind() {
TokenKind::Plus => BinaryOp::Add,
TokenKind::Minus => BinaryOp::Subtract,
TokenKind::Star => BinaryOp::Mutiply,
TokenKind::Star => BinaryOp::Multiply,
TokenKind::Slash => BinaryOp::Divide,
TokenKind::And => BinaryOp::And,
TokenKind::Or => BinaryOp::Or,
@ -388,32 +511,98 @@ mod tests {
use super::*;
use pretty_assertions::assert_eq;
fn test_successful_expression_parse(source: &str, expected: &str) {
let (tree, expr) = Parser::new(source).parse();
fn test_successful_expression_parse(source: &str, expected: &str, expected_type: Type) {
let (mut tree, expr, lines) = Parser::new(source).parse();
assert_eq!(
Vec::<SyntaxError>::new(),
tree.errors,
"Expected successful parse"
);
assert_eq!(expected, tree.dump_expr(&expr));
assert_eq!(
expected,
tree.dump_expr(&expr),
"The parse structure of the expressions did not match"
);
// TODO: 'assert_eq' is probably wrong here
let expr_type = tree.expr_type(&expr, &lines);
assert_eq!(
expected_type, expr_type,
"The type of the expression did not match"
);
}
macro_rules! test_expr {
($name:ident, $input:expr, $expected:expr) => {
($name:ident, $input:expr, $expected:expr, $type:expr) => {
#[test]
fn $name() {
test_successful_expression_parse($input, $expected);
test_successful_expression_parse($input, $expected, $type);
}
};
}
test_expr!(number_expr, "12", "12");
test_expr!(add_expr, "1 + 2", "(+ 1 2)");
test_expr!(prec_expr, "1 + 2 * 3 - 7 * 7", "(- (+ 1 (* 2 3)) (* 7 7))");
test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)");
test_expr!(number_expr, "12", "12", Type::F64);
test_expr!(add_expr, "1 + 2", "(+ 1 2)", Type::F64);
test_expr!(
prec_expr,
"1 + 2 * 3 - 7 * 7",
"(- (+ 1 (* 2 3)) (* 7 7))",
Type::F64
);
test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)", Type::F64);
test_expr!(
strings,
r#" "Hello " + "world!" "#,
r#"(+ "Hello " "world!")"#
r#" "Hello " + 'world!' "#,
r#"(+ "Hello " 'world!')"#,
Type::String
);
test_expr!(
booleans,
"true and false or false and !true",
"(or (and true false) (and false (! true)))",
Type::Bool
);
fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) {
let (mut tree, expr, lines) = Parser::new(source).parse();
assert_eq!(
Vec::<SyntaxError>::new(),
tree.errors,
"Expected successful parse"
);
let expr_type = tree.expr_type(&expr, &lines);
assert_eq!(Type::Error, expr_type, "expected to have a type error");
let actual_errors = tree
.errors
.iter()
.map(|e| e.message.as_str())
.collect::<Vec<_>>();
assert_eq!(expected_errors, actual_errors);
}
macro_rules! test_type_error_expr {
($name:ident, $input:expr, $($s:expr),+) => {
#[test]
fn $name() {
let expected_errors: Vec<&str> = (vec![$($s),*]);
test_type_error_expression($input, expected_errors);
}
}
}
test_type_error_expr!(
negate_string,
"-('what?')",
"cannot apply unary operator '-' to expression of type 'string'"
);
test_type_error_expr!(
errors_propagate_do_not_duplicate,
"!'hello' / 27 * -('what?') + 23",
"cannot apply unary operator '!' to expression of type 'string'",
"cannot apply unary operator '-' to expression of type 'string'"
);
}

View file

@ -74,6 +74,10 @@ impl<'a> Token<'a> {
}
}
pub fn start(&self) -> usize {
self.start
}
pub fn kind(&self) -> TokenKind {
self.kind
}
@ -95,23 +99,22 @@ impl<'a> std::fmt::Display for Token<'a> {
}
}
pub struct Tokens<'a> {
source: &'a str,
chars: std::str::CharIndices<'a>,
next_char: Option<(usize, char)>,
pub struct Lines {
newlines: Vec<usize>,
eof: usize,
}
impl<'a> Tokens<'a> {
pub fn new(source: &'a str) -> Self {
let mut result = Tokens {
source,
chars: source.char_indices(),
next_char: None,
impl Lines {
fn new(eof: usize) -> Self {
Lines {
newlines: Vec::new(),
};
result.advance(); // Prime the pump
result
eof,
}
}
/// Record the position of a newline in the source.
pub fn add_line(&mut self, pos: usize) {
self.newlines.push(pos)
}
/// Return the position of the given token as a (line, column) pair. By
@ -122,9 +125,15 @@ impl<'a> Tokens<'a> {
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
let start = match token {
Some(t) => t.start,
None => self.source.len(),
None => self.eof,
};
let line_end_index = match self.newlines.binary_search(&start) {
self.position(start)
}
/// Return the position of the given character offset as a (line,column)
/// pair. By convention, lines are 1-based and columns are 0-based.
pub fn position(&self, offset: usize) -> (usize, usize) {
let line_end_index = match self.newlines.binary_search(&offset) {
Ok(index) => index,
Err(index) => index,
};
@ -134,9 +143,39 @@ impl<'a> Tokens<'a> {
self.newlines[line_end_index - 1] + 1
};
let line_number = line_end_index + 1;
let column_offset = start - line_start_pos;
let column_offset = offset - line_start_pos;
(line_number, column_offset)
}
}
pub struct Tokens<'a> {
source: &'a str,
chars: std::str::CharIndices<'a>,
next_char: Option<(usize, char)>,
lines: Lines,
}
impl<'a> Tokens<'a> {
pub fn new(source: &'a str) -> Self {
let mut result = Tokens {
source,
chars: source.char_indices(),
next_char: None,
lines: Lines::new(source.len()),
};
result.advance(); // Prime the pump
result
}
pub fn lines(self) -> Lines {
self.lines
}
/// Return the position of the given token as a (line, column) pair. See
/// `Lines::token_position` for more information about the range, etc.
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
self.lines.token_position(token)
}
fn token(&self, start: usize, kind: TokenKind) -> Token<'a> {
let value = &self.source[start..self.pos()];
@ -363,7 +402,7 @@ impl<'a> Tokens<'a> {
fn skip_whitespace(&mut self) {
while let Some((pos, ch)) = self.next_char {
if ch == '\n' {
self.newlines.push(pos);
self.lines.add_line(pos);
} else if !ch.is_whitespace() {
break;
}