[fine] Type checking
This commit is contained in:
parent
633ce89817
commit
cc6f77daf4
2 changed files with 264 additions and 36 deletions
|
|
@ -1,4 +1,4 @@
|
||||||
use crate::tokens::{Token, TokenKind, Tokens};
|
use crate::tokens::{Lines, Token, TokenKind, Tokens};
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
#[derive(PartialEq, Eq)]
|
#[derive(PartialEq, Eq)]
|
||||||
|
|
@ -30,30 +30,37 @@ impl fmt::Display for SyntaxError {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
pub enum Literal {
|
pub enum Literal {
|
||||||
Float64(f64),
|
Float64(f64),
|
||||||
String(String),
|
String(String),
|
||||||
|
Bool(bool),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
pub enum UnaryOp {
|
pub enum UnaryOp {
|
||||||
Negate,
|
Negate,
|
||||||
|
Not,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
pub enum BinaryOp {
|
pub enum BinaryOp {
|
||||||
Add,
|
Add,
|
||||||
Subtract,
|
Subtract,
|
||||||
Mutiply,
|
Multiply,
|
||||||
Divide,
|
Divide,
|
||||||
And,
|
And,
|
||||||
Or,
|
Or,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
pub enum Expr<'a> {
|
pub enum Expr<'a> {
|
||||||
Literal(Literal, Token<'a>),
|
Literal(Literal, Token<'a>),
|
||||||
Unary(UnaryOp, Token<'a>, ExprRef),
|
Unary(UnaryOp, Token<'a>, ExprRef),
|
||||||
Binary(BinaryOp, Token<'a>, ExprRef, ExprRef),
|
Binary(BinaryOp, Token<'a>, ExprRef, ExprRef),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct ExprRef(Option<usize>);
|
pub struct ExprRef(Option<usize>);
|
||||||
|
|
||||||
impl ExprRef {
|
impl ExprRef {
|
||||||
|
|
@ -62,6 +69,39 @@ impl ExprRef {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Eventually we will be unable to use Eq and PartialEq here, and will
|
||||||
|
// need to do out own thing.
|
||||||
|
#[derive(Clone, Eq, PartialEq)]
|
||||||
|
pub enum Type {
|
||||||
|
Error,
|
||||||
|
|
||||||
|
// TODO: Numeric literals should be implicitly convertable unlike other
|
||||||
|
// types.
|
||||||
|
F64,
|
||||||
|
String,
|
||||||
|
Bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for Type {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(f, "{self}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for Type {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
use Type::*;
|
||||||
|
match self {
|
||||||
|
Error => write!(f, "<< INTERNAL ERROR >>"),
|
||||||
|
F64 => write!(f, "f64"),
|
||||||
|
String => write!(f, "string"),
|
||||||
|
Bool => write!(f, "bool"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct TypeRef(Option<usize>);
|
||||||
|
|
||||||
pub struct SyntaxTree<'a> {
|
pub struct SyntaxTree<'a> {
|
||||||
pub errors: Vec<SyntaxError>,
|
pub errors: Vec<SyntaxError>,
|
||||||
expressions: Vec<Expr<'a>>,
|
expressions: Vec<Expr<'a>>,
|
||||||
|
|
@ -102,6 +142,76 @@ impl<'a> SyntaxTree<'a> {
|
||||||
None => "<|EOF|>".to_string(),
|
None => "<|EOF|>".to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines) -> Type {
|
||||||
|
// TODO: Cache and work on demand? Or is this just fine?
|
||||||
|
|
||||||
|
let expr = match expr.0 {
|
||||||
|
Some(idx) => &self.expressions[idx],
|
||||||
|
None => return Type::Error,
|
||||||
|
};
|
||||||
|
match expr {
|
||||||
|
Expr::Literal(lit, _) => match lit {
|
||||||
|
Literal::Float64(_) => Type::F64,
|
||||||
|
Literal::String(_) => Type::String,
|
||||||
|
Literal::Bool(_) => Type::Bool,
|
||||||
|
},
|
||||||
|
|
||||||
|
// Figure out the main thing. Check for a... trait?
|
||||||
|
Expr::Unary(op, tok, arg) => {
|
||||||
|
let op = op.clone();
|
||||||
|
let arg = arg.clone();
|
||||||
|
let tok = tok.clone();
|
||||||
|
let arg_type = self.expr_type(&arg, lines);
|
||||||
|
match (op, arg_type) {
|
||||||
|
(UnaryOp::Negate, Type::F64) => Type::F64,
|
||||||
|
(UnaryOp::Not, Type::Bool) => Type::Bool,
|
||||||
|
|
||||||
|
// Propagate existing errors without additional complaint.
|
||||||
|
(_, Type::Error) => Type::Error,
|
||||||
|
|
||||||
|
// Missed the whole table, must be an error.
|
||||||
|
(_, arg_type) => {
|
||||||
|
let (line, col) = lines.position(tok.start());
|
||||||
|
self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'")));
|
||||||
|
Type::Error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Expr::Binary(op, tok, left, right) => {
|
||||||
|
let op = op.clone();
|
||||||
|
let tok = tok.clone();
|
||||||
|
let left = left.clone();
|
||||||
|
let right = right.clone();
|
||||||
|
let left_type = self.expr_type(&left, lines);
|
||||||
|
let right_type = self.expr_type(&right, lines);
|
||||||
|
|
||||||
|
match (op, left_type, right_type) {
|
||||||
|
(
|
||||||
|
BinaryOp::Add | BinaryOp::Subtract | BinaryOp::Multiply | BinaryOp::Divide,
|
||||||
|
Type::F64,
|
||||||
|
Type::F64,
|
||||||
|
) => Type::F64,
|
||||||
|
|
||||||
|
(BinaryOp::Add, Type::String, Type::String) => Type::String,
|
||||||
|
|
||||||
|
(BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool,
|
||||||
|
|
||||||
|
// Propagate existing errors without additional complaint.
|
||||||
|
(_, Type::Error, _) => Type::Error,
|
||||||
|
(_, _, Type::Error) => Type::Error,
|
||||||
|
|
||||||
|
// Missed the whole table, it must be an error.
|
||||||
|
(_, left_type, right_type) => {
|
||||||
|
let (line, col) = lines.position(tok.start());
|
||||||
|
self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)")));
|
||||||
|
Type::Error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// BINDING POWERS. When parsing expressions we only accept expressions that
|
// BINDING POWERS. When parsing expressions we only accept expressions that
|
||||||
|
|
@ -161,10 +271,10 @@ impl<'a> Parser<'a> {
|
||||||
parser
|
parser
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef) {
|
pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) {
|
||||||
let expr = self.expression();
|
let expr = self.expression();
|
||||||
self.consume(None, "expected end of expression");
|
self.consume(None, "expected end of expression");
|
||||||
(self.tree, expr)
|
(self.tree, expr, self.tokens.lines())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn expression(&mut self) -> ExprRef {
|
fn expression(&mut self) -> ExprRef {
|
||||||
|
|
@ -196,10 +306,19 @@ impl<'a> Parser<'a> {
|
||||||
let token = self.previous.as_ref();
|
let token = self.previous.as_ref();
|
||||||
match token {
|
match token {
|
||||||
Some(token) => match token.kind() {
|
Some(token) => match token.kind() {
|
||||||
|
TokenKind::Bang => self.unary(),
|
||||||
TokenKind::LeftParen => self.grouping(),
|
TokenKind::LeftParen => self.grouping(),
|
||||||
TokenKind::Number => self.number(),
|
TokenKind::Number => self.number(),
|
||||||
TokenKind::Minus => self.unary(),
|
TokenKind::Minus => self.unary(),
|
||||||
TokenKind::String => self.string(),
|
TokenKind::String => self.string(),
|
||||||
|
|
||||||
|
TokenKind::True => self
|
||||||
|
.tree
|
||||||
|
.add_expr(Expr::Literal(Literal::Bool(true), token.clone())),
|
||||||
|
TokenKind::False => self
|
||||||
|
.tree
|
||||||
|
.add_expr(Expr::Literal(Literal::Bool(false), token.clone())),
|
||||||
|
|
||||||
_ => {
|
_ => {
|
||||||
self.error("expected an expression");
|
self.error("expected an expression");
|
||||||
ExprRef::error()
|
ExprRef::error()
|
||||||
|
|
@ -216,9 +335,12 @@ impl<'a> Parser<'a> {
|
||||||
self.trace("infix");
|
self.trace("infix");
|
||||||
let kind = self.previous.as_ref().unwrap().kind();
|
let kind = self.previous.as_ref().unwrap().kind();
|
||||||
match kind {
|
match kind {
|
||||||
TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => {
|
TokenKind::Plus
|
||||||
self.binary(power, left)
|
| TokenKind::Minus
|
||||||
}
|
| TokenKind::Star
|
||||||
|
| TokenKind::Slash
|
||||||
|
| TokenKind::And
|
||||||
|
| TokenKind::Or => self.binary(power, left),
|
||||||
_ => panic!("Unknown infix operator, dispatch error?"),
|
_ => panic!("Unknown infix operator, dispatch error?"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -277,6 +399,7 @@ impl<'a> Parser<'a> {
|
||||||
let expr = self.expression_with_power(UNARY_POWER);
|
let expr = self.expression_with_power(UNARY_POWER);
|
||||||
let op = match kind {
|
let op = match kind {
|
||||||
TokenKind::Minus => UnaryOp::Negate,
|
TokenKind::Minus => UnaryOp::Negate,
|
||||||
|
TokenKind::Bang => UnaryOp::Not,
|
||||||
_ => panic!("unsuitable unary: {:?}: no op", kind),
|
_ => panic!("unsuitable unary: {:?}: no op", kind),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -288,7 +411,7 @@ impl<'a> Parser<'a> {
|
||||||
let op = match token.kind() {
|
let op = match token.kind() {
|
||||||
TokenKind::Plus => BinaryOp::Add,
|
TokenKind::Plus => BinaryOp::Add,
|
||||||
TokenKind::Minus => BinaryOp::Subtract,
|
TokenKind::Minus => BinaryOp::Subtract,
|
||||||
TokenKind::Star => BinaryOp::Mutiply,
|
TokenKind::Star => BinaryOp::Multiply,
|
||||||
TokenKind::Slash => BinaryOp::Divide,
|
TokenKind::Slash => BinaryOp::Divide,
|
||||||
TokenKind::And => BinaryOp::And,
|
TokenKind::And => BinaryOp::And,
|
||||||
TokenKind::Or => BinaryOp::Or,
|
TokenKind::Or => BinaryOp::Or,
|
||||||
|
|
@ -388,32 +511,98 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use pretty_assertions::assert_eq;
|
use pretty_assertions::assert_eq;
|
||||||
|
|
||||||
fn test_successful_expression_parse(source: &str, expected: &str) {
|
fn test_successful_expression_parse(source: &str, expected: &str, expected_type: Type) {
|
||||||
let (tree, expr) = Parser::new(source).parse();
|
let (mut tree, expr, lines) = Parser::new(source).parse();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Vec::<SyntaxError>::new(),
|
Vec::<SyntaxError>::new(),
|
||||||
tree.errors,
|
tree.errors,
|
||||||
"Expected successful parse"
|
"Expected successful parse"
|
||||||
);
|
);
|
||||||
assert_eq!(expected, tree.dump_expr(&expr));
|
assert_eq!(
|
||||||
|
expected,
|
||||||
|
tree.dump_expr(&expr),
|
||||||
|
"The parse structure of the expressions did not match"
|
||||||
|
);
|
||||||
|
|
||||||
|
// TODO: 'assert_eq' is probably wrong here
|
||||||
|
let expr_type = tree.expr_type(&expr, &lines);
|
||||||
|
assert_eq!(
|
||||||
|
expected_type, expr_type,
|
||||||
|
"The type of the expression did not match"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! test_expr {
|
macro_rules! test_expr {
|
||||||
($name:ident, $input:expr, $expected:expr) => {
|
($name:ident, $input:expr, $expected:expr, $type:expr) => {
|
||||||
#[test]
|
#[test]
|
||||||
fn $name() {
|
fn $name() {
|
||||||
test_successful_expression_parse($input, $expected);
|
test_successful_expression_parse($input, $expected, $type);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
test_expr!(number_expr, "12", "12");
|
test_expr!(number_expr, "12", "12", Type::F64);
|
||||||
test_expr!(add_expr, "1 + 2", "(+ 1 2)");
|
test_expr!(add_expr, "1 + 2", "(+ 1 2)", Type::F64);
|
||||||
test_expr!(prec_expr, "1 + 2 * 3 - 7 * 7", "(- (+ 1 (* 2 3)) (* 7 7))");
|
test_expr!(
|
||||||
test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)");
|
prec_expr,
|
||||||
|
"1 + 2 * 3 - 7 * 7",
|
||||||
|
"(- (+ 1 (* 2 3)) (* 7 7))",
|
||||||
|
Type::F64
|
||||||
|
);
|
||||||
|
test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)", Type::F64);
|
||||||
test_expr!(
|
test_expr!(
|
||||||
strings,
|
strings,
|
||||||
r#" "Hello " + "world!" "#,
|
r#" "Hello " + 'world!' "#,
|
||||||
r#"(+ "Hello " "world!")"#
|
r#"(+ "Hello " 'world!')"#,
|
||||||
|
Type::String
|
||||||
|
);
|
||||||
|
|
||||||
|
test_expr!(
|
||||||
|
booleans,
|
||||||
|
"true and false or false and !true",
|
||||||
|
"(or (and true false) (and false (! true)))",
|
||||||
|
Type::Bool
|
||||||
|
);
|
||||||
|
|
||||||
|
fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) {
|
||||||
|
let (mut tree, expr, lines) = Parser::new(source).parse();
|
||||||
|
assert_eq!(
|
||||||
|
Vec::<SyntaxError>::new(),
|
||||||
|
tree.errors,
|
||||||
|
"Expected successful parse"
|
||||||
|
);
|
||||||
|
|
||||||
|
let expr_type = tree.expr_type(&expr, &lines);
|
||||||
|
assert_eq!(Type::Error, expr_type, "expected to have a type error");
|
||||||
|
|
||||||
|
let actual_errors = tree
|
||||||
|
.errors
|
||||||
|
.iter()
|
||||||
|
.map(|e| e.message.as_str())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
assert_eq!(expected_errors, actual_errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! test_type_error_expr {
|
||||||
|
($name:ident, $input:expr, $($s:expr),+) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
let expected_errors: Vec<&str> = (vec![$($s),*]);
|
||||||
|
test_type_error_expression($input, expected_errors);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test_type_error_expr!(
|
||||||
|
negate_string,
|
||||||
|
"-('what?')",
|
||||||
|
"cannot apply unary operator '-' to expression of type 'string'"
|
||||||
|
);
|
||||||
|
|
||||||
|
test_type_error_expr!(
|
||||||
|
errors_propagate_do_not_duplicate,
|
||||||
|
"!'hello' / 27 * -('what?') + 23",
|
||||||
|
"cannot apply unary operator '!' to expression of type 'string'",
|
||||||
|
"cannot apply unary operator '-' to expression of type 'string'"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -74,6 +74,10 @@ impl<'a> Token<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn start(&self) -> usize {
|
||||||
|
self.start
|
||||||
|
}
|
||||||
|
|
||||||
pub fn kind(&self) -> TokenKind {
|
pub fn kind(&self) -> TokenKind {
|
||||||
self.kind
|
self.kind
|
||||||
}
|
}
|
||||||
|
|
@ -95,23 +99,22 @@ impl<'a> std::fmt::Display for Token<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Tokens<'a> {
|
pub struct Lines {
|
||||||
source: &'a str,
|
|
||||||
chars: std::str::CharIndices<'a>,
|
|
||||||
next_char: Option<(usize, char)>,
|
|
||||||
newlines: Vec<usize>,
|
newlines: Vec<usize>,
|
||||||
|
eof: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Tokens<'a> {
|
impl Lines {
|
||||||
pub fn new(source: &'a str) -> Self {
|
fn new(eof: usize) -> Self {
|
||||||
let mut result = Tokens {
|
Lines {
|
||||||
source,
|
|
||||||
chars: source.char_indices(),
|
|
||||||
next_char: None,
|
|
||||||
newlines: Vec::new(),
|
newlines: Vec::new(),
|
||||||
};
|
eof,
|
||||||
result.advance(); // Prime the pump
|
}
|
||||||
result
|
}
|
||||||
|
|
||||||
|
/// Record the position of a newline in the source.
|
||||||
|
pub fn add_line(&mut self, pos: usize) {
|
||||||
|
self.newlines.push(pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the position of the given token as a (line, column) pair. By
|
/// Return the position of the given token as a (line, column) pair. By
|
||||||
|
|
@ -122,9 +125,15 @@ impl<'a> Tokens<'a> {
|
||||||
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
|
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
|
||||||
let start = match token {
|
let start = match token {
|
||||||
Some(t) => t.start,
|
Some(t) => t.start,
|
||||||
None => self.source.len(),
|
None => self.eof,
|
||||||
};
|
};
|
||||||
let line_end_index = match self.newlines.binary_search(&start) {
|
self.position(start)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the position of the given character offset as a (line,column)
|
||||||
|
/// pair. By convention, lines are 1-based and columns are 0-based.
|
||||||
|
pub fn position(&self, offset: usize) -> (usize, usize) {
|
||||||
|
let line_end_index = match self.newlines.binary_search(&offset) {
|
||||||
Ok(index) => index,
|
Ok(index) => index,
|
||||||
Err(index) => index,
|
Err(index) => index,
|
||||||
};
|
};
|
||||||
|
|
@ -134,9 +143,39 @@ impl<'a> Tokens<'a> {
|
||||||
self.newlines[line_end_index - 1] + 1
|
self.newlines[line_end_index - 1] + 1
|
||||||
};
|
};
|
||||||
let line_number = line_end_index + 1;
|
let line_number = line_end_index + 1;
|
||||||
let column_offset = start - line_start_pos;
|
let column_offset = offset - line_start_pos;
|
||||||
(line_number, column_offset)
|
(line_number, column_offset)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Tokens<'a> {
|
||||||
|
source: &'a str,
|
||||||
|
chars: std::str::CharIndices<'a>,
|
||||||
|
next_char: Option<(usize, char)>,
|
||||||
|
lines: Lines,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Tokens<'a> {
|
||||||
|
pub fn new(source: &'a str) -> Self {
|
||||||
|
let mut result = Tokens {
|
||||||
|
source,
|
||||||
|
chars: source.char_indices(),
|
||||||
|
next_char: None,
|
||||||
|
lines: Lines::new(source.len()),
|
||||||
|
};
|
||||||
|
result.advance(); // Prime the pump
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn lines(self) -> Lines {
|
||||||
|
self.lines
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the position of the given token as a (line, column) pair. See
|
||||||
|
/// `Lines::token_position` for more information about the range, etc.
|
||||||
|
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
|
||||||
|
self.lines.token_position(token)
|
||||||
|
}
|
||||||
|
|
||||||
fn token(&self, start: usize, kind: TokenKind) -> Token<'a> {
|
fn token(&self, start: usize, kind: TokenKind) -> Token<'a> {
|
||||||
let value = &self.source[start..self.pos()];
|
let value = &self.source[start..self.pos()];
|
||||||
|
|
@ -363,7 +402,7 @@ impl<'a> Tokens<'a> {
|
||||||
fn skip_whitespace(&mut self) {
|
fn skip_whitespace(&mut self) {
|
||||||
while let Some((pos, ch)) = self.next_char {
|
while let Some((pos, ch)) = self.next_char {
|
||||||
if ch == '\n' {
|
if ch == '\n' {
|
||||||
self.newlines.push(pos);
|
self.lines.add_line(pos);
|
||||||
} else if !ch.is_whitespace() {
|
} else if !ch.is_whitespace() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue