use crate::tokens::{Token, TokenKind, Tokens}; use std::fmt; #[derive(PartialEq, Eq)] pub struct SyntaxError { pub line: usize, pub column: usize, pub message: String, } impl SyntaxError { pub fn new(line: usize, column: usize, message: String) -> Self { SyntaxError { line, column, message, } } } impl fmt::Debug for SyntaxError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}:{}: {}", self.line, self.column, self.message) } } impl fmt::Display for SyntaxError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}:{}: {}", self.line, self.column, self.message) } } pub enum Literal { Float64(f64), } pub enum UnaryOp { Negate, } pub enum BinaryOp { Add, Subtract, Mutiply, Divide, And, Or, } pub enum Expr { Literal(Literal), Unary(UnaryOp, ExprRef), Binary(BinaryOp, ExprRef, ExprRef), } pub struct ExprRef(Option); impl ExprRef { pub fn error() -> Self { ExprRef(None) } } pub struct SyntaxTree { pub errors: Vec, expressions: Vec, } impl SyntaxTree { pub fn new() -> Self { SyntaxTree { errors: Vec::new(), expressions: Vec::new(), } } pub fn add_error(&mut self, error: SyntaxError) { self.errors.push(error); } pub fn add_expr(&mut self, expr: Expr) -> ExprRef { let index = self.expressions.len(); self.expressions.push(expr); ExprRef(Some(index)) } pub fn dump_expr(&self, expr: &ExprRef) -> String { match expr.0 { Some(idx) => { let expr = &self.expressions[idx]; match expr { Expr::Literal(lit) => match lit { Literal::Float64(f) => f.to_string(), }, Expr::Unary(op, e) => { let op = match op { UnaryOp::Negate => "-", }; format!("({op} {})", self.dump_expr(e)) } Expr::Binary(op, l, r) => { let op = match op { BinaryOp::Add => "+", BinaryOp::Subtract => "-", BinaryOp::Mutiply => "*", BinaryOp::Divide => "/", BinaryOp::And => "and", BinaryOp::Or => "or", }; format!("({op} {} {})", self.dump_expr(l), self.dump_expr(r)) } } } None => "<|EOF|>".to_string(), } } } // BINDING POWERS. When parsing expressions we only accept expressions that // meet a minimum binding power. (This is like "precedence" but I just super // don't like that terminology.) const ASSIGNMENT_POWER: u8 = 0; // = const OR_POWER: u8 = 1; // or const AND_POWER: u8 = 2; // and const EQUALITY_POWER: u8 = 3; // == != const COMPARISON_POWER: u8 = 4; // < > <= >= const TERM_POWER: u8 = 5; // + - const FACTOR_POWER: u8 = 6; // * / const UNARY_POWER: u8 = 7; // ! - // const CALL_POWER: u8 = 8; // . () // const PRIMARY_POWER: u8 = 9; fn token_power<'a>(token: &Option>) -> Option { let token = match token { Some(t) => t, None => return None, }; match token.kind() { TokenKind::Equal => Some(ASSIGNMENT_POWER), TokenKind::Or => Some(OR_POWER), TokenKind::And => Some(AND_POWER), TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER), TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => { Some(COMPARISON_POWER) } TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER), TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER), _ => None, } } pub struct Parser<'a> { tokens: Tokens<'a>, tree: SyntaxTree, current: Option>, previous: Option>, panic_mode: bool, } impl<'a> Parser<'a> { pub fn new(source: &'a str) -> Self { let mut parser = Parser { tokens: Tokens::new(source), tree: SyntaxTree::new(), current: None, previous: None, panic_mode: false, }; parser.advance(); parser } pub fn parse(mut self) -> (SyntaxTree, ExprRef) { let expr = self.expression(); self.consume(None, "expected end of expression"); (self.tree, expr) } fn expression(&mut self) -> ExprRef { self.expression_with_power(0) } fn expression_with_power(&mut self, minimum_power: u8) -> ExprRef { self.advance(); let mut expr = self.prefix_expression(); loop { let power = match token_power(&self.current) { Some(p) => p, None => break, // EOF, end of expression? }; if power < minimum_power { break; } self.advance(); expr = self.infix_expression(power, expr); } expr } fn prefix_expression(&mut self) -> ExprRef { let token = self.previous.as_ref(); match token { Some(token) => match token.kind() { TokenKind::LeftParen => self.grouping(), TokenKind::Number => self.number(), TokenKind::Minus => self.unary(), _ => { self.error("expected an expression"); ExprRef::error() } }, None => { self.error("expected an expression"); ExprRef::error() } } } fn infix_expression(&mut self, power: u8, left: ExprRef) -> ExprRef { let kind = self.previous.as_ref().unwrap().kind(); match kind { TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => { self.binary(power, left) } _ => panic!("Unknown infix operator, dispatch error?"), } } fn number(&mut self) -> ExprRef { let token = self.previous.as_ref().unwrap(); // What kind is it? For now let's just ... make it good. match token.as_str().parse::() { Ok(v) => self.tree.add_expr(Expr::Literal(Literal::Float64(v))), Err(e) => { self.error(format!("invalid f64: {e}")); ExprRef::error() } } } fn grouping(&mut self) -> ExprRef { let result = self.number(); self.consume( Some(TokenKind::RightParen), "expected ')' after an expression", ); result } fn unary(&mut self) -> ExprRef { let kind = self.previous.as_ref().unwrap().kind(); let expr = self.expression_with_power(UNARY_POWER); let op = match kind { TokenKind::Minus => UnaryOp::Negate, _ => panic!("unsuitable unary: {:?}: no op", kind), }; self.tree.add_expr(Expr::Unary(op, expr)) } fn binary(&mut self, power: u8, left: ExprRef) -> ExprRef { let right = self.expression_with_power(power + 1); let op = match self.previous.as_ref().unwrap().kind() { TokenKind::Plus => BinaryOp::Add, TokenKind::Minus => BinaryOp::Subtract, TokenKind::Star => BinaryOp::Mutiply, TokenKind::Slash => BinaryOp::Divide, TokenKind::And => BinaryOp::And, TokenKind::Or => BinaryOp::Or, _ => panic!("unsuitable binary: {:?}: no op", self.previous), }; self.tree.add_expr(Expr::Binary(op, left, right)) } fn advance(&mut self) { self.previous = self.current.take(); loop { self.current = self.tokens.next(); match &self.current { Some(token) if token.kind() == TokenKind::Error => { self.error_at_current(token.clone()) } _ => break, } } } fn consume(&mut self, kind: Option, error: &str) { match (&self.current, kind) { (Some(token), Some(kind)) if token.kind() == kind => self.advance(), (None, None) => (), _ => { self.error_at_current(error); } } } fn error(&mut self, message: T) where T: Into, { self.error_at(self.previous.clone(), message) } fn error_at_current(&mut self, message: T) where T: Into, { self.error_at(self.current.clone(), message) } fn error_at(&mut self, token: Option>, message: T) where T: Into, { if self.panic_mode { return; } self.panic_mode = true; let message: String = message.into(); let (line, column) = self.tokens.token_position(&token); let mut final_message = "Error ".to_string(); match token { None => final_message.push_str("at end"), Some(t) => { if t.kind() != TokenKind::Error { final_message.push_str("at '"); final_message.push_str(t.as_str()); final_message.push_str("'"); } } } final_message.push_str(": "); final_message.push_str(&message); self.tree .add_error(SyntaxError::new(line, column, final_message)); } } #[cfg(test)] mod tests { use super::*; use pretty_assertions::assert_eq; #[test] pub fn number_expressions() { // How am I going to test this? let (tree, expr) = Parser::new("23.5").parse(); assert_eq!(Vec::::new(), tree.errors); assert_eq!("23.5", tree.dump_expr(&expr)); } }