[fine] New parser usurps old parser

This commit is contained in:
John Doty 2024-01-05 11:22:45 -08:00
parent 120bd43652
commit 5cc9ecc398
6 changed files with 1433 additions and 1433 deletions

View file

@ -44,7 +44,7 @@ fn generate_test_for_file(path: PathBuf) -> String {
let name = format_ident!("{}", path.file_stem().unwrap().to_string_lossy());
let test_method = quote! {
fn #name() {
let (_tree, _lines) = fine::parser::concrete::parse_concrete(#contents);
let (_tree, _lines) = fine::parser::parse(#contents);
#concrete_comparison;
}
};

View file

@ -1,4 +1,4 @@
use fine::parser::Parser;
use fine::parser::old::Parser;
use std::env;
use std::fs;

File diff suppressed because it is too large Load diff

View file

@ -1,663 +0,0 @@
// NOTE: much of this parser structure derived from
// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
use crate::tokens::{Lines, Token, TokenKind, Tokens};
use std::{cell::Cell, num::NonZeroU32};
pub struct SyntaxTree<'a> {
trees: Vec<Tree<'a>>,
root: Option<TreeRef>,
}
impl<'a> SyntaxTree<'a> {
pub fn new() -> Self {
SyntaxTree {
trees: vec![],
root: None,
}
}
pub fn add_tree(&mut self, t: Tree<'a>) -> TreeRef {
assert!(t.parent.is_none());
let tr = TreeRef::from_index(self.trees.len());
// NOTE: Because of the difficulty of holding multiple mutable
// references it's this is our best chance to patch up parent
// pointers.
for child in t.children.iter() {
if let Child::Tree(ct) = child {
self[*ct].parent = Some(tr);
}
}
self.trees.push(t);
tr
}
pub fn dump(&self) -> String {
match self.root {
Some(r) => self[r].dump(self),
None => String::new(),
}
}
}
impl<'a> std::ops::Index<TreeRef> for SyntaxTree<'a> {
type Output = Tree<'a>;
fn index(&self, index: TreeRef) -> &Self::Output {
&self.trees[index.index()]
}
}
impl<'a> std::ops::IndexMut<TreeRef> for SyntaxTree<'a> {
fn index_mut(&mut self, index: TreeRef) -> &mut Self::Output {
&mut self.trees[index.index()]
}
}
#[derive(Debug)]
pub enum TreeKind {
Error,
File,
FunDecl,
ParamList,
Parameter,
TypeExpression,
Block,
LetStatement,
ReturnStatement,
ExpressionStatement,
LiteralExpression,
GroupingExpression,
UnaryExpression,
ConditionalExpression,
CallExpression,
ArgumentList,
Argument,
BinaryExpression,
IfStatement,
Identifier,
}
pub struct Tree<'a> {
pub kind: TreeKind,
pub parent: Option<TreeRef>,
pub children: Vec<Child<'a>>,
}
#[derive(Copy, Clone, Eq, PartialEq)]
pub struct TreeRef(NonZeroU32);
impl TreeRef {
pub fn from_index(index: usize) -> TreeRef {
let index: u32 = (index + 1).try_into().unwrap();
TreeRef(NonZeroU32::new(index).unwrap())
}
pub fn index(&self) -> usize {
let index: usize = self.0.get().try_into().unwrap();
index - 1
}
}
impl<'a> Tree<'a> {
pub fn dump(&self, tree: &SyntaxTree<'a>) -> String {
let mut output = String::new();
output.push_str(&format!("{:?}\n", self.kind));
for child in self.children.iter() {
child.dump_rec(2, tree, &mut output);
}
output
}
}
pub enum Child<'a> {
Token(Token<'a>),
Tree(TreeRef),
}
impl<'a> Child<'a> {
fn dump_rec(&self, indent: usize, tree: &SyntaxTree<'a>, output: &mut String) {
for _ in 0..indent {
output.push(' ');
}
match self {
Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())),
Child::Tree(t) => {
let t = &tree[*t];
output.push_str(&format!("{:?}\n", t.kind));
for child in t.children.iter() {
child.dump_rec(indent + 2, tree, output);
}
}
}
}
}
enum ParseEvent<'a> {
Start { kind: TreeKind },
End,
Advance { token: Token<'a> },
}
struct MarkStarted {
index: usize,
}
struct MarkClosed {
index: usize,
}
struct CParser<'a> {
tokens: Tokens<'a>,
current: Token<'a>,
fuel: Cell<u32>,
events: Vec<ParseEvent<'a>>,
}
impl<'a> CParser<'a> {
fn new(tokens: Tokens<'a>) -> Self {
let mut parser = CParser {
tokens,
current: Token::new(TokenKind::EOF, 0, ""),
fuel: Cell::new(256),
events: Vec::new(),
};
parser.current = parser.tokens.next();
parser.skip_ephemera();
parser
}
fn start(&mut self) -> MarkStarted {
let mark = MarkStarted {
index: self.events.len(),
};
self.events.push(ParseEvent::Start {
kind: TreeKind::Error,
});
mark
}
fn end(&mut self, mark: MarkStarted, kind: TreeKind) -> MarkClosed {
self.events[mark.index] = ParseEvent::Start { kind };
self.events.push(ParseEvent::End);
MarkClosed { index: mark.index }
}
fn start_before(&mut self, mark: MarkClosed) -> MarkStarted {
// TODO: Point backwards and pointer chase in tree build?
let mark = MarkStarted { index: mark.index };
self.events.insert(
mark.index,
ParseEvent::Start {
kind: TreeKind::Error,
},
);
mark
}
fn advance(&mut self) {
assert!(!self.eof()); // Don't try to advance past EOF
self.fuel.set(256); // Consuming a token, rest stuck detector
self.events.push(ParseEvent::Advance {
token: self.current.clone(),
});
self.current = self.tokens.next();
self.skip_ephemera();
}
fn skip_ephemera(&mut self) {
while self.current.kind == TokenKind::Whitespace || self.current.kind == TokenKind::Comment
{
self.current = self.tokens.next();
}
}
fn eof(&self) -> bool {
self.current.kind == TokenKind::EOF
}
fn peek(&self) -> TokenKind {
assert!(self.fuel.get() > 0, "parser is stuck!");
self.fuel.set(self.fuel.get() - 1);
self.current.kind
}
fn at(&self, kind: TokenKind) -> bool {
self.peek() == kind
}
fn eat(&mut self, kind: TokenKind) -> bool {
if self.at(kind) {
self.advance();
true
} else {
false
}
}
fn expect<T>(&mut self, kind: TokenKind, error: T)
where
T: Into<String>,
{
if self.eat(kind) {
return;
}
self.error(error);
}
fn advance_with_error<T>(&mut self, error: T) -> MarkClosed
where
T: Into<String>,
{
let m = self.start();
self.error(error);
self.advance();
self.end(m, TreeKind::Error)
}
fn error<T>(&mut self, message: T)
where
T: Into<String>,
{
self.error_at(self.current.clone(), message)
}
fn error_at<T>(&mut self, token: Token<'a>, message: T)
where
T: Into<String>,
{
let message: String = message.into();
let mut final_message = "Error ".to_string();
if token.kind == TokenKind::EOF {
final_message.push_str("at end")
} else if token.kind != TokenKind::Error {
final_message.push_str("at '");
final_message.push_str(token.as_str());
final_message.push_str("'");
}
final_message.push_str(": ");
final_message.push_str(&message);
self.events.push(ParseEvent::Advance {
token: Token::error(token.start, final_message),
});
}
fn build_tree(self) -> (SyntaxTree<'a>, Lines) {
let mut events = self.events;
let mut stack = Vec::new();
let mut result = SyntaxTree::new();
// The first element in our events vector must be a start; the whole
// thing must be bracketed in a tree.
assert!(matches!(events.get(0), Some(ParseEvent::Start { .. })));
// The last element in our events vector must be an end, otherwise
// the parser has failed badly. We'll remove it here so that, after
// processing the entire array, the stack retains the tree that we
// start with the very first ::Start.
assert!(matches!(events.pop(), Some(ParseEvent::End)));
for event in events {
match event {
ParseEvent::Start { kind } => stack.push(Tree {
kind,
parent: None,
children: Vec::new(),
}),
ParseEvent::End => {
let t = result.add_tree(stack.pop().unwrap());
stack.last_mut().unwrap().children.push(Child::Tree(t));
}
ParseEvent::Advance { token } => {
stack.last_mut().unwrap().children.push(Child::Token(token));
}
}
}
assert!(stack.len() == 1, "Not all trees were ended!");
let root = result.add_tree(stack.pop().unwrap());
result.root = Some(root);
(result, self.tokens.lines())
}
}
pub fn parse_concrete(source: &str) -> (SyntaxTree, Lines) {
let tokens = Tokens::new(source);
let mut parser = CParser::new(tokens);
file(&mut parser);
parser.build_tree()
}
fn file(p: &mut CParser) {
let m = p.start();
while !p.eof() {
match p.peek() {
TokenKind::Fun => function(p),
_ => statement(p),
}
}
p.end(m, TreeKind::File);
}
fn function(p: &mut CParser) {
assert!(p.at(TokenKind::Fun));
let m = p.start();
p.expect(TokenKind::Fun, "expected a function to start with 'fun'");
p.expect(TokenKind::Identifier, "expected a function name");
if p.at(TokenKind::LeftParen) {
param_list(p);
}
if p.eat(TokenKind::Arrow) {
type_expr(p);
}
if p.at(TokenKind::LeftBrace) {
block(p);
}
p.end(m, TreeKind::FunDecl);
}
fn param_list(p: &mut CParser) {
assert!(p.at(TokenKind::LeftParen));
let m = p.start();
p.expect(TokenKind::LeftParen, "expect '(' to start a parameter list");
while !p.at(TokenKind::RightParen) && !p.eof() {
if p.at(TokenKind::Identifier) {
parameter(p);
} else {
break;
}
}
p.expect(TokenKind::RightParen, "expect ')' to end a parameter list");
p.end(m, TreeKind::ParamList);
}
fn parameter(p: &mut CParser) {
assert!(p.at(TokenKind::Identifier));
let m = p.start();
p.expect(
TokenKind::Identifier,
"expected an identifier for a parameter name",
);
if p.eat(TokenKind::Colon) {
type_expr(p);
}
if !p.at(TokenKind::RightParen) {
p.expect(TokenKind::Comma, "expected a comma between parameters");
}
p.end(m, TreeKind::Parameter);
}
fn type_expr(p: &mut CParser) {
let m = p.start();
// TODO: Other kinds of type expressions probably!
p.expect(TokenKind::Identifier, "expected the identifier of a type");
p.end(m, TreeKind::TypeExpression);
}
fn block(p: &mut CParser) {
assert!(p.at(TokenKind::LeftBrace));
let m = p.start();
p.expect(TokenKind::LeftBrace, "expect '{' to start a block");
while !p.at(TokenKind::RightBrace) && !p.eof() {
statement(p);
}
p.expect(TokenKind::RightBrace, "expect '}' to start a block");
p.end(m, TreeKind::Block);
}
fn statement(p: &mut CParser) {
match p.peek() {
TokenKind::LeftBrace => block(p),
TokenKind::Let => statement_let(p),
TokenKind::Return => statement_return(p),
// NOTE: Technically 'if' is an expression, but `if` doesn't
// require a semicolon at the end if it's all by itself.
TokenKind::If => statement_if(p),
_ => statement_expression(p),
}
}
fn statement_if(p: &mut CParser) {
assert!(p.at(TokenKind::If));
let m = p.start();
conditional(p);
p.end(m, TreeKind::IfStatement);
}
fn statement_let(p: &mut CParser) {
assert!(p.at(TokenKind::Let));
let m = p.start();
p.expect(TokenKind::Let, "expect 'let' to start a let statement");
p.expect(TokenKind::Identifier, "expected a name for the variable");
p.expect(TokenKind::Equal, "expected a '=' after the variable name");
expression(p);
if !p.at(TokenKind::RightBrace) {
p.expect(TokenKind::Semicolon, "expect ';' to end a let statement");
}
p.end(m, TreeKind::LetStatement);
}
fn statement_return(p: &mut CParser) {
assert!(p.at(TokenKind::Return));
let m = p.start();
p.expect(
TokenKind::Return,
"expect 'return' to start a return statement",
);
expression(p);
if !p.at(TokenKind::RightBrace) {
p.expect(TokenKind::Semicolon, "expect ';' to end a return statement");
}
p.end(m, TreeKind::ReturnStatement);
}
fn statement_expression(p: &mut CParser) {
let m = p.start();
expression(p);
if !p.at(TokenKind::RightBrace) {
p.expect(
TokenKind::Semicolon,
"expect ';' to end an expression statement",
);
}
p.end(m, TreeKind::ExpressionStatement);
}
fn expression(p: &mut CParser) {
expression_with_power(p, 0)
}
// BINDING POWERS. When parsing expressions we only accept expressions that
// meet a minimum binding power. (This is like "precedence" but I just super
// don't like that terminology.)
const ASSIGNMENT_POWER: u8 = 0; // =
const OR_POWER: u8 = 1; // or
const AND_POWER: u8 = 2; // and
const EQUALITY_POWER: u8 = 3; // == !=
const COMPARISON_POWER: u8 = 4; // < > <= >=
const TERM_POWER: u8 = 5; // + -
const FACTOR_POWER: u8 = 6; // * /
const UNARY_POWER: u8 = 7; // ! -
// const PRIMARY_POWER: u8 = 9;
fn token_power<'a>(token: TokenKind) -> Option<u8> {
match token {
TokenKind::Equal => Some(ASSIGNMENT_POWER),
TokenKind::Or => Some(OR_POWER),
TokenKind::And => Some(AND_POWER),
TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER),
TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => {
Some(COMPARISON_POWER)
}
TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER),
TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER),
_ => None,
}
}
fn expression_with_power(p: &mut CParser, minimum_power: u8) {
let mut expr = prefix_expression(p);
while p.at(TokenKind::LeftParen) {
let m = p.start_before(expr);
argument_list(p);
expr = p.end(m, TreeKind::CallExpression);
}
loop {
let Some(power) = token_power(p.peek()) else {
break;
};
if power < minimum_power {
break;
}
// TODO: I don't think this works for other "infix" types, but we'll
// see won't we.
let m = p.start_before(expr);
p.advance(); // Consume the operator
expression_with_power(p, power);
expr = p.end(m, TreeKind::BinaryExpression);
}
}
fn argument_list(p: &mut CParser) {
assert!(p.at(TokenKind::LeftParen));
let m = p.start();
p.expect(
TokenKind::LeftParen,
"expect an argument list to start with '('",
);
while !p.at(TokenKind::RightParen) && !p.eof() {
argument(p);
}
p.expect(
TokenKind::RightParen,
"expect an argument list to start with '('",
);
p.end(m, TreeKind::ArgumentList);
}
fn argument(p: &mut CParser) {
let m = p.start();
expression(p);
if !p.at(TokenKind::RightParen) {
p.expect(TokenKind::Comma, "expect a ',' between arguments");
}
p.end(m, TreeKind::Argument);
}
fn prefix_expression(p: &mut CParser) -> MarkClosed {
match p.peek() {
TokenKind::Number => literal(p),
TokenKind::String => literal(p),
TokenKind::True => literal(p),
TokenKind::False => literal(p),
TokenKind::LeftParen => grouping(p),
TokenKind::Bang => unary(p),
TokenKind::Minus => unary(p),
TokenKind::If => conditional(p),
TokenKind::Identifier => identifier(p),
_ => p.advance_with_error("expected an expression"),
}
}
fn literal(p: &mut CParser) -> MarkClosed {
let m = p.start();
p.advance();
p.end(m, TreeKind::LiteralExpression)
}
fn grouping(p: &mut CParser) -> MarkClosed {
assert!(p.at(TokenKind::LeftParen));
let m = p.start();
p.expect(TokenKind::LeftParen, "expected '(' to start grouping");
expression(p);
p.expect(TokenKind::RightParen, "unmatched parentheses in expression");
p.end(m, TreeKind::GroupingExpression)
}
fn unary(p: &mut CParser) -> MarkClosed {
let m = p.start();
p.advance(); // Past the operator
expression_with_power(p, UNARY_POWER);
p.end(m, TreeKind::UnaryExpression)
}
fn conditional(p: &mut CParser) -> MarkClosed {
assert!(p.at(TokenKind::If));
let m = p.start();
p.expect(TokenKind::If, "expected conditional to start with 'if'");
expression(p);
block(p);
if p.eat(TokenKind::Else) {
if p.at(TokenKind::If) {
// Don't require another block, just jump right into the conditional.
conditional(p);
} else {
block(p);
}
}
p.end(m, TreeKind::ConditionalExpression)
}
fn identifier(p: &mut CParser) -> MarkClosed {
assert!(p.at(TokenKind::Identifier));
let m = p.start();
p.advance();
p.end(m, TreeKind::Identifier)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tree_ref_size() {
// What's the point of doing all that work if the tree ref isn't nice
// and "small"?
assert_eq!(4, std::mem::size_of::<Option<TreeRef>>());
}
}

872
fine/src/parser/old.rs Normal file
View file

@ -0,0 +1,872 @@
use crate::tokens::{Lines, Token, TokenKind, Tokens};
use std::fmt;
// TODO: An error should have:
//
// - a start
// - an end
// - a focus
// - descriptive messages
//
// that will have to wait for now
#[derive(PartialEq, Eq)]
pub struct SyntaxError {
pub start: (usize, usize),
pub end: (usize, usize),
pub message: String,
}
impl SyntaxError {
pub fn new<T>(line: usize, column: usize, message: T) -> Self
where
T: ToString,
{
SyntaxError {
start: (line, column),
end: (line, column),
message: message.to_string(),
}
}
pub fn new_spanned<T>(start: (usize, usize), end: (usize, usize), message: T) -> Self
where
T: ToString,
{
SyntaxError {
start,
end,
message: message.to_string(),
}
}
}
impl fmt::Debug for SyntaxError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{self}")
}
}
impl fmt::Display for SyntaxError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}: {}", self.start.0, self.end.0, self.message)
}
}
#[derive(Clone)]
pub enum Literal {
Float64(f64),
String(String),
Bool(bool),
}
#[derive(Copy, Clone)]
pub enum UnaryOp {
Negate,
Not,
}
#[derive(Copy, Clone)]
pub enum BinaryOp {
Add,
Subtract,
Multiply,
Divide,
And,
Or,
}
#[derive(Clone)]
pub enum Expr<'a> {
Literal(Literal, Token<'a>),
Unary(UnaryOp, Token<'a>, ExprRef),
Binary(BinaryOp, Token<'a>, ExprRef, ExprRef),
Conditional(Token<'a>, ExprRef, ExprRef, Option<ExprRef>, Token<'a>),
}
#[derive(Clone)]
pub struct ExprRef(Option<usize>);
impl ExprRef {
pub fn error() -> Self {
ExprRef(None)
}
}
// TODO: Eventually we will be unable to use Eq and PartialEq here, and will
// need to do out own thing.
#[derive(Copy, Clone)]
pub enum Type {
// Signals a type error. If you receive this then you know that an error
// has already been reported; if you produce this be sure to also note
// the error in the errors collection.
Error,
// Signals that the expression has a control-flow side-effect and that no
// value will ever result from this expression. Usually this means
// everything's fine.
Unreachable,
// TODO: Numeric literals should be implicitly convertable, unlike other
// types. Maybe just "numeric literal" type?
F64,
String,
Bool,
}
impl Type {
pub fn is_error(&self) -> bool {
match self {
Type::Error => true,
_ => false,
}
}
pub fn compatible_with(&self, other: &Type) -> bool {
// TODO: This is wrong; we because of numeric literals etc.
match (self, other) {
(Type::F64, Type::F64) => true,
(Type::String, Type::String) => true,
(Type::Bool, Type::Bool) => true,
(Type::Unreachable, Type::Unreachable) => true,
// Avoid introducing more errors
(Type::Error, _) => true,
(_, Type::Error) => true,
(_, _) => false,
}
}
}
impl std::fmt::Debug for Type {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{self}")
}
}
impl std::fmt::Display for Type {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use Type::*;
match self {
Error => write!(f, "<< INTERNAL ERROR >>"),
Unreachable => write!(f, "<< UNREACHABLE >>"),
F64 => write!(f, "f64"),
String => write!(f, "string"),
Bool => write!(f, "bool"),
}
}
}
pub struct SyntaxTree<'a> {
pub errors: Vec<SyntaxError>,
expressions: Vec<Expr<'a>>,
}
impl<'a> SyntaxTree<'a> {
pub fn new() -> Self {
SyntaxTree {
errors: Vec::new(),
expressions: Vec::new(),
}
}
pub fn add_error(&mut self, error: SyntaxError) {
self.errors.push(error);
}
pub fn add_expr(&mut self, expr: Expr<'a>) -> ExprRef {
let index = self.expressions.len();
self.expressions.push(expr);
ExprRef(Some(index))
}
pub fn dump_expr(&self, expr: &ExprRef) -> String {
match expr.0 {
Some(idx) => {
let expr = &self.expressions[idx];
match expr {
Expr::Literal(_, tok) => tok.to_string(),
Expr::Unary(_, tok, e) => {
format!("({tok} {})", self.dump_expr(e))
}
Expr::Binary(_, tok, l, r) => {
format!("({tok} {} {})", self.dump_expr(l), self.dump_expr(r))
}
Expr::Conditional(tok, cond, t, e, _) => {
if let Some(e) = e {
format!(
"({tok} {} {} {})",
self.dump_expr(cond),
self.dump_expr(t),
self.dump_expr(e)
)
} else {
format!("({tok} {} {})", self.dump_expr(cond), self.dump_expr(t))
}
}
}
}
None => "<|EOF|>".to_string(),
}
}
pub fn expr_span(&self, expr: &ExprRef) -> Option<(Token<'a>, Token<'a>)> {
let expr = match expr.0 {
Some(idx) => &self.expressions[idx],
None => return None,
};
match expr {
Expr::Literal(_, tok) => Some((tok.clone(), tok.clone())),
Expr::Unary(_, tok, arg) => {
let arg = self.expr_span(arg);
match arg {
None => None,
Some((_, end)) => Some((tok.clone(), end)),
}
}
Expr::Binary(_, _, left, right) => {
let left = self.expr_span(left);
let right = self.expr_span(right);
match (left, right) {
(None, _) => None,
(_, None) => None,
(Some((start, _)), Some((_, end))) => Some((start, end)),
}
}
Expr::Conditional(head, _, _, _, tail) => Some((head.clone(), tail.clone())),
}
}
pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines, value_required: bool) -> Type {
// TODO: Cache and work on demand? Or is this just fine?
let exr = expr.clone();
let expr = match expr.0 {
Some(idx) => &self.expressions[idx],
None => return Type::Error,
};
match expr {
Expr::Literal(lit, _) => match lit {
Literal::Float64(_) => Type::F64,
Literal::String(_) => Type::String,
Literal::Bool(_) => Type::Bool,
},
// Figure out the main thing. Check for a... trait?
Expr::Unary(op, tok, arg) => {
let op = op.clone();
let arg = arg.clone();
let tok = tok.clone();
let arg_type = self.expr_type(&arg, lines, true);
match (op, arg_type) {
(UnaryOp::Negate, Type::F64) => Type::F64,
(UnaryOp::Not, Type::Bool) => Type::Bool,
// This is dumb and should be punished, probably.
(_, Type::Unreachable) => {
let (line, col) = lines.position(tok.start);
self.errors.push(SyntaxError::new(line, col, format!("cannot apply a unary operator to something that doesn't yield a value")));
Type::Error
}
// Propagate existing errors without additional complaint.
(_, Type::Error) => Type::Error,
// Missed the whole table, must be an error.
(_, arg_type) => {
let (line, col) = lines.position(tok.start);
self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'")));
Type::Error
}
}
}
Expr::Binary(op, tok, left, right) => {
let op = op.clone();
let tok = tok.clone();
let left = left.clone();
let right = right.clone();
let left_type = self.expr_type(&left, lines, true);
let right_type = self.expr_type(&right, lines, true);
match (op, left_type, right_type) {
(
BinaryOp::Add | BinaryOp::Subtract | BinaryOp::Multiply | BinaryOp::Divide,
Type::F64,
Type::F64,
) => Type::F64,
(BinaryOp::Add, Type::String, Type::String) => Type::String,
(BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool,
// This is dumb and should be punished, probably.
(_, _, Type::Unreachable) => {
let (line, col) = lines.position(tok.start);
self.errors.push(SyntaxError::new(
line,
col,
format!(
"cannot apply '{tok}' to an argument that doesn't yield a value (on the right)"
),
));
Type::Error
}
(_, Type::Unreachable, _) => {
let (line, col) = lines.position(tok.start);
self.errors.push(SyntaxError::new(
line,
col,
format!(
"cannot apply '{tok}' to an argument that doesn't yield a value (on the left)"
),
));
Type::Error
}
// Propagate existing errors without additional complaint.
(_, Type::Error, _) => Type::Error,
(_, _, Type::Error) => Type::Error,
// Missed the whole table, it must be an error.
(_, left_type, right_type) => {
let (line, col) = lines.position(tok.start);
self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)")));
Type::Error
}
}
}
Expr::Conditional(_, cond, then_exp, else_exp, _) => {
let cond = cond.clone();
let then_exp = then_exp.clone();
let else_exp = else_exp.clone();
let cond_type = self.expr_type(&cond, lines, true);
let then_type = self.expr_type(&then_exp, lines, value_required);
let else_type = else_exp.map(|e| self.expr_type(&e, lines, value_required));
if !cond_type.compatible_with(&Type::Bool) {
if !cond_type.is_error() {
let span = self
.expr_span(&cond)
.expect("If the expression has a type it must have a span");
let start = lines.position(span.0.start);
let end = lines.position(span.1.start);
self.errors.push(SyntaxError::new_spanned(
start,
end,
"the condition of an `if` expression must be a boolean",
));
}
return Type::Error;
}
match (then_type, else_type) {
(Type::Error, _) => Type::Error,
(_, Some(Type::Error)) => Type::Error,
// It's an error to have a missing else branch if the value is required
(_, None) if value_required => {
let span = self
.expr_span(&exr)
.expect("How did I get this far with a broken parse?");
let start = lines.position(span.0.start);
let end = lines.position(span.1.start);
self.errors.push(SyntaxError::new_spanned(
start,
end,
"this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value",
));
Type::Error
}
// If the value is required then the branches must be
// compatible, and the type of the expression is the type
// of the `then` branch.
(then_type, Some(else_type)) if value_required => {
if !then_type.compatible_with(&else_type) {
let span = self
.expr_span(&exr)
.expect("How did I get this far with a broken parse?");
let start = lines.position(span.0.start);
let end = lines.position(span.1.start);
self.errors.push(SyntaxError::new_spanned(
start,
end,
format!("the type of the `then` branch ({then_type}) must match the type of the `else` branch ({else_type})"),
));
Type::Error
} else {
then_type
}
}
// The value must not be required, just mark this as unreachable.
(_, _) => {
assert!(!value_required);
Type::Unreachable
}
}
}
}
}
}
// BINDING POWERS. When parsing expressions we only accept expressions that
// meet a minimum binding power. (This is like "precedence" but I just super
// don't like that terminology.)
const ASSIGNMENT_POWER: u8 = 0; // =
const OR_POWER: u8 = 1; // or
const AND_POWER: u8 = 2; // and
const EQUALITY_POWER: u8 = 3; // == !=
const COMPARISON_POWER: u8 = 4; // < > <= >=
const TERM_POWER: u8 = 5; // + -
const FACTOR_POWER: u8 = 6; // * /
const UNARY_POWER: u8 = 7; // ! -
// const CALL_POWER: u8 = 8; // . ()
// const PRIMARY_POWER: u8 = 9;
fn token_power<'a>(token: TokenKind) -> Option<u8> {
match token {
TokenKind::Equal => Some(ASSIGNMENT_POWER),
TokenKind::Or => Some(OR_POWER),
TokenKind::And => Some(AND_POWER),
TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER),
TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => {
Some(COMPARISON_POWER)
}
TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER),
TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER),
_ => None,
}
}
pub struct Parser<'a> {
tokens: Tokens<'a>,
tree: SyntaxTree<'a>,
current: Token<'a>,
previous: Token<'a>,
panic_mode: bool,
}
impl<'a> Parser<'a> {
pub fn new(source: &'a str) -> Self {
let mut parser = Parser {
tokens: Tokens::new(source),
tree: SyntaxTree::new(),
current: Token::new(TokenKind::EOF, 0, ""),
previous: Token::new(TokenKind::EOF, 0, ""),
panic_mode: false,
};
parser.advance();
parser
}
pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) {
let expr = self.expression();
self.consume(TokenKind::EOF, "expected end of expression");
(self.tree, expr, self.tokens.lines())
}
fn expression(&mut self) -> ExprRef {
self.expression_with_power(0)
}
fn expression_with_power(&mut self, minimum_power: u8) -> ExprRef {
self.trace("expression with power");
self.advance();
let mut expr = self.prefix_expression();
loop {
let power = match token_power(self.current.kind) {
Some(p) => p,
None => break, // EOF, end of expression?
};
if power < minimum_power {
break;
}
self.advance();
expr = self.infix_expression(power, expr);
}
expr
}
fn prefix_expression(&mut self) -> ExprRef {
self.trace("prefix");
let token = &self.previous;
match token.kind {
TokenKind::Bang => self.unary(),
TokenKind::LeftParen => self.grouping(),
TokenKind::Number => self.number(),
TokenKind::Minus => self.unary(),
TokenKind::String => self.string(),
TokenKind::True => self
.tree
.add_expr(Expr::Literal(Literal::Bool(true), token.clone())),
TokenKind::False => self
.tree
.add_expr(Expr::Literal(Literal::Bool(false), token.clone())),
TokenKind::If => self.conditional(),
_ => {
self.error("expected an expression");
ExprRef::error()
}
}
}
fn infix_expression(&mut self, power: u8, left: ExprRef) -> ExprRef {
self.trace("infix");
match self.previous.kind {
TokenKind::Plus
| TokenKind::Minus
| TokenKind::Star
| TokenKind::Slash
| TokenKind::And
| TokenKind::Or => self.binary(power, left),
_ => panic!("Unknown infix operator, dispatch error?"),
}
}
fn number(&mut self) -> ExprRef {
let token = &self.previous;
// What kind is it? For now let's just ... make it good.
let literal = match token.as_str().parse::<f64>() {
Ok(v) => Literal::Float64(v),
Err(e) => {
self.error(format!("invalid f64: {e}"));
return ExprRef::error();
}
};
self.tree.add_expr(Expr::Literal(literal, token.clone()))
}
fn string(&mut self) -> ExprRef {
let token = &self.previous;
let mut result = String::new();
let mut input = token.as_str().chars();
assert!(input.next().is_some()); // Delimiter
while let Some(ch) = input.next() {
match ch {
'\\' => match input.next().unwrap() {
'n' => result.push('\n'),
'r' => result.push('\r'),
't' => result.push('\t'),
ch => result.push(ch),
},
_ => result.push(ch),
}
}
result.pop(); // We pushed the other delimiter on, whoops.
let literal = Literal::String(result);
self.tree.add_expr(Expr::Literal(literal, token.clone()))
}
fn grouping(&mut self) -> ExprRef {
let result = self.expression();
self.consume(TokenKind::RightParen, "expected ')' after an expression");
result
}
fn conditional(&mut self) -> ExprRef {
let token = self.previous.clone();
let condition_expr = self.expression();
self.consume(TokenKind::LeftBrace, "expected '{' to start an 'if' block");
let then_expr = self.expression();
self.consume(TokenKind::RightBrace, "expected '}' to end an 'if' block");
let else_expr = if self.current.kind == TokenKind::Else {
self.advance();
if self.current.kind == TokenKind::If {
self.advance();
Some(self.conditional())
} else {
self.consume(
TokenKind::LeftBrace,
"expected '{' to start an 'else' block",
);
let else_expr = self.expression();
self.consume(TokenKind::RightBrace, "Expected '}' to end an 'else' block");
Some(else_expr)
}
} else {
None
};
let tail = self.previous.clone();
self.tree.add_expr(Expr::Conditional(
token,
condition_expr,
then_expr,
else_expr,
tail,
))
}
fn unary(&mut self) -> ExprRef {
let token = self.previous.clone();
let kind = token.kind;
let expr = self.expression_with_power(UNARY_POWER);
let op = match kind {
TokenKind::Minus => UnaryOp::Negate,
TokenKind::Bang => UnaryOp::Not,
_ => panic!("unsuitable unary: {:?}: no op", kind),
};
self.tree.add_expr(Expr::Unary(op, token, expr))
}
fn binary(&mut self, power: u8, left: ExprRef) -> ExprRef {
let token = self.previous.clone();
let op = match token.kind {
TokenKind::Plus => BinaryOp::Add,
TokenKind::Minus => BinaryOp::Subtract,
TokenKind::Star => BinaryOp::Multiply,
TokenKind::Slash => BinaryOp::Divide,
TokenKind::And => BinaryOp::And,
TokenKind::Or => BinaryOp::Or,
_ => panic!("unsuitable binary: {:?}: no op", self.previous),
};
let right = self.expression_with_power(power + 1);
self.tree.add_expr(Expr::Binary(op, token, left, right))
}
fn advance(&mut self) {
self.previous = self.current.clone();
self.current = self.tokens.next();
while self.current.kind == TokenKind::Error
|| self.current.kind == TokenKind::Whitespace
|| self.current.kind == TokenKind::Comment
{
if self.current.kind == TokenKind::Error {
self.error_at_current(self.current.to_string());
}
self.current = self.tokens.next();
}
}
fn consume(&mut self, kind: TokenKind, error: &str) {
if self.current.kind == kind {
self.advance();
} else {
self.error_at_current(error);
}
}
fn error<T>(&mut self, message: T)
where
T: Into<String>,
{
self.error_at(self.previous.clone(), message)
}
fn error_at_current<T>(&mut self, message: T)
where
T: Into<String>,
{
self.error_at(self.current.clone(), message)
}
fn error_at<T>(&mut self, token: Token<'a>, message: T)
where
T: Into<String>,
{
if self.panic_mode {
return;
}
self.panic_mode = true;
let message: String = message.into();
let (line, column) = self.tokens.token_position(&token);
let mut final_message = "Error ".to_string();
if token.kind == TokenKind::EOF {
final_message.push_str("at end")
} else if token.kind != TokenKind::Error {
final_message.push_str("at '");
final_message.push_str(token.as_str());
final_message.push_str("'");
}
final_message.push_str(": ");
final_message.push_str(&message);
self.tree
.add_error(SyntaxError::new(line, column, final_message));
}
fn trace(&self, _msg: &str) {
// let cpos = self.tokens.token_position(&self.current);
// let ppos = self.tokens.token_position(&self.previous);
// eprintln!(
// "[{}:{}:{}] [{}:{}:{}]: {msg}",
// ppos.0,
// ppos.1,
// self.previous
// .as_ref()
// .map(|t| t.as_str())
// .unwrap_or("<eof>"),
// cpos.0,
// cpos.1,
// self.current.as_ref().map(|t| t.as_str()).unwrap_or("<eof>")
// );
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
fn test_successful_expression_parse(source: &str, expected: &str, expected_type: Type) {
let (mut tree, expr, lines) = Parser::new(source).parse();
assert_eq!(
Vec::<SyntaxError>::new(),
tree.errors,
"Expected successful parse"
);
assert_eq!(
expected,
tree.dump_expr(&expr),
"The parse structure of the expressions did not match"
);
// TODO: 'assert_eq' is probably wrong here
let expr_type = tree.expr_type(&expr, &lines, true);
assert!(
expected_type.compatible_with(&expr_type),
"The type of the expression did not match. expected: {expected_type}, actual: {expr_type}"
);
}
macro_rules! test_expr {
($name:ident, $input:expr, $expected:expr, $type:expr) => {
#[test]
fn $name() {
test_successful_expression_parse($input, $expected, $type);
}
};
}
test_expr!(number_expr, "12", "12", Type::F64);
test_expr!(add_expr, "1 + 2", "(+ 1 2)", Type::F64);
test_expr!(
prec_expr,
"1 + 2 * 3 - 7 * 7",
"(- (+ 1 (* 2 3)) (* 7 7))",
Type::F64
);
test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)", Type::F64);
test_expr!(
strings,
r#" "Hello " + 'world!' "#,
r#"(+ "Hello " 'world!')"#,
Type::String
);
test_expr!(
booleans,
"true and false or false and !true",
"(or (and true false) (and false (! true)))",
Type::Bool
);
test_expr!(
if_expression,
"if true { 23 } else { 45 }",
"(if true 23 45)",
Type::F64
);
// test_expr!(
// if_with_return,
// "if true { 23 } else { return 'nothing' }",
// "",
// Type::F64
// );
// ========================================================================
// Type Error Tests
// ========================================================================
fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) {
let (mut tree, expr, lines) = Parser::new(source).parse();
assert_eq!(
Vec::<SyntaxError>::new(),
tree.errors,
"Expected successful parse"
);
let expr_type = tree.expr_type(&expr, &lines, true);
assert!(expr_type.is_error());
let actual_errors = tree
.errors
.iter()
.map(|e| e.message.as_str())
.collect::<Vec<_>>();
assert_eq!(expected_errors, actual_errors);
}
macro_rules! test_type_error_expr {
($name:ident, $input:expr, $($s:expr),+) => {
#[test]
fn $name() {
let expected_errors: Vec<&str> = (vec![$($s),*]);
test_type_error_expression($input, expected_errors);
}
}
}
test_type_error_expr!(
negate_string,
"-('what?')",
"cannot apply unary operator '-' to expression of type 'string'"
);
test_type_error_expr!(
add_string_number,
"'what?' + 5",
"cannot apply binary operator '+' to expressions of type 'string' (on the left) and 'f64' (on the right)"
);
test_type_error_expr!(
add_number_string,
"5 + 'what?'",
"cannot apply binary operator '+' to expressions of type 'f64' (on the left) and 'string' (on the right)"
);
test_type_error_expr!(
errors_propagate_do_not_duplicate,
"!'hello' / 27 * -('what?') + 23",
"cannot apply unary operator '!' to expression of type 'string'",
"cannot apply unary operator '-' to expression of type 'string'"
);
test_type_error_expr!(
if_not_bool,
"if 23 { 1 } else { 2 }",
"the condition of an `if` expression must be a boolean"
);
test_type_error_expr!(
if_arm_mismatch,
"if true { 1 } else { '1' }",
"the type of the `then` branch (f64) must match the type of the `else` branch (string)"
);
test_type_error_expr!(
if_no_else,
"if true { 1 }",
"this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value"
);
}

View file

@ -1,4 +1,4 @@
use fine::parser::concrete::SyntaxTree;
use fine::parser::SyntaxTree;
use pretty_assertions::assert_eq;
fn rebase_concrete(source_path: &str, dump: &str) {