[fine] Type checking
This commit is contained in:
parent
633ce89817
commit
cc6f77daf4
2 changed files with 264 additions and 36 deletions
|
|
@ -1,4 +1,4 @@
|
|||
use crate::tokens::{Token, TokenKind, Tokens};
|
||||
use crate::tokens::{Lines, Token, TokenKind, Tokens};
|
||||
use std::fmt;
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
|
|
@ -30,30 +30,37 @@ impl fmt::Display for SyntaxError {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum Literal {
|
||||
Float64(f64),
|
||||
String(String),
|
||||
Bool(bool),
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum UnaryOp {
|
||||
Negate,
|
||||
Not,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum BinaryOp {
|
||||
Add,
|
||||
Subtract,
|
||||
Mutiply,
|
||||
Multiply,
|
||||
Divide,
|
||||
And,
|
||||
Or,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum Expr<'a> {
|
||||
Literal(Literal, Token<'a>),
|
||||
Unary(UnaryOp, Token<'a>, ExprRef),
|
||||
Binary(BinaryOp, Token<'a>, ExprRef, ExprRef),
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprRef(Option<usize>);
|
||||
|
||||
impl ExprRef {
|
||||
|
|
@ -62,6 +69,39 @@ impl ExprRef {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: Eventually we will be unable to use Eq and PartialEq here, and will
|
||||
// need to do out own thing.
|
||||
#[derive(Clone, Eq, PartialEq)]
|
||||
pub enum Type {
|
||||
Error,
|
||||
|
||||
// TODO: Numeric literals should be implicitly convertable unlike other
|
||||
// types.
|
||||
F64,
|
||||
String,
|
||||
Bool,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Type {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{self}")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Type {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use Type::*;
|
||||
match self {
|
||||
Error => write!(f, "<< INTERNAL ERROR >>"),
|
||||
F64 => write!(f, "f64"),
|
||||
String => write!(f, "string"),
|
||||
Bool => write!(f, "bool"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TypeRef(Option<usize>);
|
||||
|
||||
pub struct SyntaxTree<'a> {
|
||||
pub errors: Vec<SyntaxError>,
|
||||
expressions: Vec<Expr<'a>>,
|
||||
|
|
@ -102,6 +142,76 @@ impl<'a> SyntaxTree<'a> {
|
|||
None => "<|EOF|>".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines) -> Type {
|
||||
// TODO: Cache and work on demand? Or is this just fine?
|
||||
|
||||
let expr = match expr.0 {
|
||||
Some(idx) => &self.expressions[idx],
|
||||
None => return Type::Error,
|
||||
};
|
||||
match expr {
|
||||
Expr::Literal(lit, _) => match lit {
|
||||
Literal::Float64(_) => Type::F64,
|
||||
Literal::String(_) => Type::String,
|
||||
Literal::Bool(_) => Type::Bool,
|
||||
},
|
||||
|
||||
// Figure out the main thing. Check for a... trait?
|
||||
Expr::Unary(op, tok, arg) => {
|
||||
let op = op.clone();
|
||||
let arg = arg.clone();
|
||||
let tok = tok.clone();
|
||||
let arg_type = self.expr_type(&arg, lines);
|
||||
match (op, arg_type) {
|
||||
(UnaryOp::Negate, Type::F64) => Type::F64,
|
||||
(UnaryOp::Not, Type::Bool) => Type::Bool,
|
||||
|
||||
// Propagate existing errors without additional complaint.
|
||||
(_, Type::Error) => Type::Error,
|
||||
|
||||
// Missed the whole table, must be an error.
|
||||
(_, arg_type) => {
|
||||
let (line, col) = lines.position(tok.start());
|
||||
self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'")));
|
||||
Type::Error
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Expr::Binary(op, tok, left, right) => {
|
||||
let op = op.clone();
|
||||
let tok = tok.clone();
|
||||
let left = left.clone();
|
||||
let right = right.clone();
|
||||
let left_type = self.expr_type(&left, lines);
|
||||
let right_type = self.expr_type(&right, lines);
|
||||
|
||||
match (op, left_type, right_type) {
|
||||
(
|
||||
BinaryOp::Add | BinaryOp::Subtract | BinaryOp::Multiply | BinaryOp::Divide,
|
||||
Type::F64,
|
||||
Type::F64,
|
||||
) => Type::F64,
|
||||
|
||||
(BinaryOp::Add, Type::String, Type::String) => Type::String,
|
||||
|
||||
(BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool,
|
||||
|
||||
// Propagate existing errors without additional complaint.
|
||||
(_, Type::Error, _) => Type::Error,
|
||||
(_, _, Type::Error) => Type::Error,
|
||||
|
||||
// Missed the whole table, it must be an error.
|
||||
(_, left_type, right_type) => {
|
||||
let (line, col) = lines.position(tok.start());
|
||||
self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)")));
|
||||
Type::Error
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// BINDING POWERS. When parsing expressions we only accept expressions that
|
||||
|
|
@ -161,10 +271,10 @@ impl<'a> Parser<'a> {
|
|||
parser
|
||||
}
|
||||
|
||||
pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef) {
|
||||
pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) {
|
||||
let expr = self.expression();
|
||||
self.consume(None, "expected end of expression");
|
||||
(self.tree, expr)
|
||||
(self.tree, expr, self.tokens.lines())
|
||||
}
|
||||
|
||||
fn expression(&mut self) -> ExprRef {
|
||||
|
|
@ -196,10 +306,19 @@ impl<'a> Parser<'a> {
|
|||
let token = self.previous.as_ref();
|
||||
match token {
|
||||
Some(token) => match token.kind() {
|
||||
TokenKind::Bang => self.unary(),
|
||||
TokenKind::LeftParen => self.grouping(),
|
||||
TokenKind::Number => self.number(),
|
||||
TokenKind::Minus => self.unary(),
|
||||
TokenKind::String => self.string(),
|
||||
|
||||
TokenKind::True => self
|
||||
.tree
|
||||
.add_expr(Expr::Literal(Literal::Bool(true), token.clone())),
|
||||
TokenKind::False => self
|
||||
.tree
|
||||
.add_expr(Expr::Literal(Literal::Bool(false), token.clone())),
|
||||
|
||||
_ => {
|
||||
self.error("expected an expression");
|
||||
ExprRef::error()
|
||||
|
|
@ -216,9 +335,12 @@ impl<'a> Parser<'a> {
|
|||
self.trace("infix");
|
||||
let kind = self.previous.as_ref().unwrap().kind();
|
||||
match kind {
|
||||
TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => {
|
||||
self.binary(power, left)
|
||||
}
|
||||
TokenKind::Plus
|
||||
| TokenKind::Minus
|
||||
| TokenKind::Star
|
||||
| TokenKind::Slash
|
||||
| TokenKind::And
|
||||
| TokenKind::Or => self.binary(power, left),
|
||||
_ => panic!("Unknown infix operator, dispatch error?"),
|
||||
}
|
||||
}
|
||||
|
|
@ -277,6 +399,7 @@ impl<'a> Parser<'a> {
|
|||
let expr = self.expression_with_power(UNARY_POWER);
|
||||
let op = match kind {
|
||||
TokenKind::Minus => UnaryOp::Negate,
|
||||
TokenKind::Bang => UnaryOp::Not,
|
||||
_ => panic!("unsuitable unary: {:?}: no op", kind),
|
||||
};
|
||||
|
||||
|
|
@ -288,7 +411,7 @@ impl<'a> Parser<'a> {
|
|||
let op = match token.kind() {
|
||||
TokenKind::Plus => BinaryOp::Add,
|
||||
TokenKind::Minus => BinaryOp::Subtract,
|
||||
TokenKind::Star => BinaryOp::Mutiply,
|
||||
TokenKind::Star => BinaryOp::Multiply,
|
||||
TokenKind::Slash => BinaryOp::Divide,
|
||||
TokenKind::And => BinaryOp::And,
|
||||
TokenKind::Or => BinaryOp::Or,
|
||||
|
|
@ -388,32 +511,98 @@ mod tests {
|
|||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn test_successful_expression_parse(source: &str, expected: &str) {
|
||||
let (tree, expr) = Parser::new(source).parse();
|
||||
fn test_successful_expression_parse(source: &str, expected: &str, expected_type: Type) {
|
||||
let (mut tree, expr, lines) = Parser::new(source).parse();
|
||||
assert_eq!(
|
||||
Vec::<SyntaxError>::new(),
|
||||
tree.errors,
|
||||
"Expected successful parse"
|
||||
);
|
||||
assert_eq!(expected, tree.dump_expr(&expr));
|
||||
assert_eq!(
|
||||
expected,
|
||||
tree.dump_expr(&expr),
|
||||
"The parse structure of the expressions did not match"
|
||||
);
|
||||
|
||||
// TODO: 'assert_eq' is probably wrong here
|
||||
let expr_type = tree.expr_type(&expr, &lines);
|
||||
assert_eq!(
|
||||
expected_type, expr_type,
|
||||
"The type of the expression did not match"
|
||||
);
|
||||
}
|
||||
|
||||
macro_rules! test_expr {
|
||||
($name:ident, $input:expr, $expected:expr) => {
|
||||
($name:ident, $input:expr, $expected:expr, $type:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
test_successful_expression_parse($input, $expected);
|
||||
test_successful_expression_parse($input, $expected, $type);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_expr!(number_expr, "12", "12");
|
||||
test_expr!(add_expr, "1 + 2", "(+ 1 2)");
|
||||
test_expr!(prec_expr, "1 + 2 * 3 - 7 * 7", "(- (+ 1 (* 2 3)) (* 7 7))");
|
||||
test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)");
|
||||
test_expr!(number_expr, "12", "12", Type::F64);
|
||||
test_expr!(add_expr, "1 + 2", "(+ 1 2)", Type::F64);
|
||||
test_expr!(
|
||||
prec_expr,
|
||||
"1 + 2 * 3 - 7 * 7",
|
||||
"(- (+ 1 (* 2 3)) (* 7 7))",
|
||||
Type::F64
|
||||
);
|
||||
test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)", Type::F64);
|
||||
test_expr!(
|
||||
strings,
|
||||
r#" "Hello " + "world!" "#,
|
||||
r#"(+ "Hello " "world!")"#
|
||||
r#" "Hello " + 'world!' "#,
|
||||
r#"(+ "Hello " 'world!')"#,
|
||||
Type::String
|
||||
);
|
||||
|
||||
test_expr!(
|
||||
booleans,
|
||||
"true and false or false and !true",
|
||||
"(or (and true false) (and false (! true)))",
|
||||
Type::Bool
|
||||
);
|
||||
|
||||
fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) {
|
||||
let (mut tree, expr, lines) = Parser::new(source).parse();
|
||||
assert_eq!(
|
||||
Vec::<SyntaxError>::new(),
|
||||
tree.errors,
|
||||
"Expected successful parse"
|
||||
);
|
||||
|
||||
let expr_type = tree.expr_type(&expr, &lines);
|
||||
assert_eq!(Type::Error, expr_type, "expected to have a type error");
|
||||
|
||||
let actual_errors = tree
|
||||
.errors
|
||||
.iter()
|
||||
.map(|e| e.message.as_str())
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(expected_errors, actual_errors);
|
||||
}
|
||||
|
||||
macro_rules! test_type_error_expr {
|
||||
($name:ident, $input:expr, $($s:expr),+) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let expected_errors: Vec<&str> = (vec![$($s),*]);
|
||||
test_type_error_expression($input, expected_errors);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_type_error_expr!(
|
||||
negate_string,
|
||||
"-('what?')",
|
||||
"cannot apply unary operator '-' to expression of type 'string'"
|
||||
);
|
||||
|
||||
test_type_error_expr!(
|
||||
errors_propagate_do_not_duplicate,
|
||||
"!'hello' / 27 * -('what?') + 23",
|
||||
"cannot apply unary operator '!' to expression of type 'string'",
|
||||
"cannot apply unary operator '-' to expression of type 'string'"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -74,6 +74,10 @@ impl<'a> Token<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn start(&self) -> usize {
|
||||
self.start
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> TokenKind {
|
||||
self.kind
|
||||
}
|
||||
|
|
@ -95,23 +99,22 @@ impl<'a> std::fmt::Display for Token<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
pub struct Tokens<'a> {
|
||||
source: &'a str,
|
||||
chars: std::str::CharIndices<'a>,
|
||||
next_char: Option<(usize, char)>,
|
||||
pub struct Lines {
|
||||
newlines: Vec<usize>,
|
||||
eof: usize,
|
||||
}
|
||||
|
||||
impl<'a> Tokens<'a> {
|
||||
pub fn new(source: &'a str) -> Self {
|
||||
let mut result = Tokens {
|
||||
source,
|
||||
chars: source.char_indices(),
|
||||
next_char: None,
|
||||
impl Lines {
|
||||
fn new(eof: usize) -> Self {
|
||||
Lines {
|
||||
newlines: Vec::new(),
|
||||
};
|
||||
result.advance(); // Prime the pump
|
||||
result
|
||||
eof,
|
||||
}
|
||||
}
|
||||
|
||||
/// Record the position of a newline in the source.
|
||||
pub fn add_line(&mut self, pos: usize) {
|
||||
self.newlines.push(pos)
|
||||
}
|
||||
|
||||
/// Return the position of the given token as a (line, column) pair. By
|
||||
|
|
@ -122,9 +125,15 @@ impl<'a> Tokens<'a> {
|
|||
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
|
||||
let start = match token {
|
||||
Some(t) => t.start,
|
||||
None => self.source.len(),
|
||||
None => self.eof,
|
||||
};
|
||||
let line_end_index = match self.newlines.binary_search(&start) {
|
||||
self.position(start)
|
||||
}
|
||||
|
||||
/// Return the position of the given character offset as a (line,column)
|
||||
/// pair. By convention, lines are 1-based and columns are 0-based.
|
||||
pub fn position(&self, offset: usize) -> (usize, usize) {
|
||||
let line_end_index = match self.newlines.binary_search(&offset) {
|
||||
Ok(index) => index,
|
||||
Err(index) => index,
|
||||
};
|
||||
|
|
@ -134,9 +143,39 @@ impl<'a> Tokens<'a> {
|
|||
self.newlines[line_end_index - 1] + 1
|
||||
};
|
||||
let line_number = line_end_index + 1;
|
||||
let column_offset = start - line_start_pos;
|
||||
let column_offset = offset - line_start_pos;
|
||||
(line_number, column_offset)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Tokens<'a> {
|
||||
source: &'a str,
|
||||
chars: std::str::CharIndices<'a>,
|
||||
next_char: Option<(usize, char)>,
|
||||
lines: Lines,
|
||||
}
|
||||
|
||||
impl<'a> Tokens<'a> {
|
||||
pub fn new(source: &'a str) -> Self {
|
||||
let mut result = Tokens {
|
||||
source,
|
||||
chars: source.char_indices(),
|
||||
next_char: None,
|
||||
lines: Lines::new(source.len()),
|
||||
};
|
||||
result.advance(); // Prime the pump
|
||||
result
|
||||
}
|
||||
|
||||
pub fn lines(self) -> Lines {
|
||||
self.lines
|
||||
}
|
||||
|
||||
/// Return the position of the given token as a (line, column) pair. See
|
||||
/// `Lines::token_position` for more information about the range, etc.
|
||||
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
|
||||
self.lines.token_position(token)
|
||||
}
|
||||
|
||||
fn token(&self, start: usize, kind: TokenKind) -> Token<'a> {
|
||||
let value = &self.source[start..self.pos()];
|
||||
|
|
@ -363,7 +402,7 @@ impl<'a> Tokens<'a> {
|
|||
fn skip_whitespace(&mut self) {
|
||||
while let Some((pos, ch)) = self.next_char {
|
||||
if ch == '\n' {
|
||||
self.newlines.push(pos);
|
||||
self.lines.add_line(pos);
|
||||
} else if !ch.is_whitespace() {
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue