360 lines
9.9 KiB
Rust
360 lines
9.9 KiB
Rust
use crate::tokens::{Token, TokenKind, Tokens};
|
|
use std::fmt;
|
|
|
|
#[derive(PartialEq, Eq)]
|
|
pub struct SyntaxError {
|
|
pub line: usize,
|
|
pub column: usize,
|
|
pub message: String,
|
|
}
|
|
|
|
impl SyntaxError {
|
|
pub fn new(line: usize, column: usize, message: String) -> Self {
|
|
SyntaxError {
|
|
line,
|
|
column,
|
|
message,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl fmt::Debug for SyntaxError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "{}:{}: {}", self.line, self.column, self.message)
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for SyntaxError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "{}:{}: {}", self.line, self.column, self.message)
|
|
}
|
|
}
|
|
|
|
pub enum Literal {
|
|
Float64(f64),
|
|
}
|
|
|
|
pub enum UnaryOp {
|
|
Negate,
|
|
}
|
|
|
|
pub enum BinaryOp {
|
|
Add,
|
|
Subtract,
|
|
Mutiply,
|
|
Divide,
|
|
And,
|
|
Or,
|
|
}
|
|
|
|
pub enum Expr {
|
|
Literal(Literal),
|
|
Unary(UnaryOp, ExprRef),
|
|
Binary(BinaryOp, ExprRef, ExprRef),
|
|
}
|
|
|
|
pub struct ExprRef(Option<usize>);
|
|
|
|
impl ExprRef {
|
|
pub fn error() -> Self {
|
|
ExprRef(None)
|
|
}
|
|
}
|
|
|
|
pub struct SyntaxTree {
|
|
pub errors: Vec<SyntaxError>,
|
|
expressions: Vec<Expr>,
|
|
}
|
|
|
|
impl SyntaxTree {
|
|
pub fn new() -> Self {
|
|
SyntaxTree {
|
|
errors: Vec::new(),
|
|
expressions: Vec::new(),
|
|
}
|
|
}
|
|
|
|
pub fn add_error(&mut self, error: SyntaxError) {
|
|
self.errors.push(error);
|
|
}
|
|
|
|
pub fn add_expr(&mut self, expr: Expr) -> ExprRef {
|
|
let index = self.expressions.len();
|
|
self.expressions.push(expr);
|
|
ExprRef(Some(index))
|
|
}
|
|
|
|
pub fn dump_expr(&self, expr: &ExprRef) -> String {
|
|
match expr.0 {
|
|
Some(idx) => {
|
|
let expr = &self.expressions[idx];
|
|
match expr {
|
|
Expr::Literal(lit) => match lit {
|
|
Literal::Float64(f) => f.to_string(),
|
|
},
|
|
Expr::Unary(op, e) => {
|
|
let op = match op {
|
|
UnaryOp::Negate => "-",
|
|
};
|
|
format!("({op} {})", self.dump_expr(e))
|
|
}
|
|
Expr::Binary(op, l, r) => {
|
|
let op = match op {
|
|
BinaryOp::Add => "+",
|
|
BinaryOp::Subtract => "-",
|
|
BinaryOp::Mutiply => "*",
|
|
BinaryOp::Divide => "/",
|
|
BinaryOp::And => "and",
|
|
BinaryOp::Or => "or",
|
|
};
|
|
format!("({op} {} {})", self.dump_expr(l), self.dump_expr(r))
|
|
}
|
|
}
|
|
}
|
|
None => "<|EOF|>".to_string(),
|
|
}
|
|
}
|
|
}
|
|
|
|
// BINDING POWERS. When parsing expressions we only accept expressions that
|
|
// meet a minimum binding power. (This is like "precedence" but I just super
|
|
// don't like that terminology.)
|
|
const ASSIGNMENT_POWER: u8 = 0; // =
|
|
const OR_POWER: u8 = 1; // or
|
|
const AND_POWER: u8 = 2; // and
|
|
const EQUALITY_POWER: u8 = 3; // == !=
|
|
const COMPARISON_POWER: u8 = 4; // < > <= >=
|
|
const TERM_POWER: u8 = 5; // + -
|
|
const FACTOR_POWER: u8 = 6; // * /
|
|
const UNARY_POWER: u8 = 7; // ! -
|
|
|
|
// const CALL_POWER: u8 = 8; // . ()
|
|
// const PRIMARY_POWER: u8 = 9;
|
|
|
|
fn token_power<'a>(token: &Option<Token<'a>>) -> Option<u8> {
|
|
let token = match token {
|
|
Some(t) => t,
|
|
None => return None,
|
|
};
|
|
|
|
match token.kind() {
|
|
TokenKind::Equal => Some(ASSIGNMENT_POWER),
|
|
TokenKind::Or => Some(OR_POWER),
|
|
TokenKind::And => Some(AND_POWER),
|
|
TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER),
|
|
TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => {
|
|
Some(COMPARISON_POWER)
|
|
}
|
|
TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER),
|
|
TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
pub struct Parser<'a> {
|
|
tokens: Tokens<'a>,
|
|
tree: SyntaxTree,
|
|
current: Option<Token<'a>>,
|
|
previous: Option<Token<'a>>,
|
|
|
|
panic_mode: bool,
|
|
}
|
|
|
|
impl<'a> Parser<'a> {
|
|
pub fn new(source: &'a str) -> Self {
|
|
let mut parser = Parser {
|
|
tokens: Tokens::new(source),
|
|
tree: SyntaxTree::new(),
|
|
current: None,
|
|
previous: None,
|
|
panic_mode: false,
|
|
};
|
|
parser.advance();
|
|
parser
|
|
}
|
|
|
|
pub fn parse(mut self) -> (SyntaxTree, ExprRef) {
|
|
let expr = self.expression();
|
|
self.consume(None, "expected end of expression");
|
|
(self.tree, expr)
|
|
}
|
|
|
|
fn expression(&mut self) -> ExprRef {
|
|
self.expression_with_power(0)
|
|
}
|
|
|
|
fn expression_with_power(&mut self, minimum_power: u8) -> ExprRef {
|
|
self.advance();
|
|
let mut expr = self.prefix_expression();
|
|
loop {
|
|
let power = match token_power(&self.current) {
|
|
Some(p) => p,
|
|
None => break, // EOF, end of expression?
|
|
};
|
|
|
|
if power < minimum_power {
|
|
break;
|
|
}
|
|
|
|
self.advance();
|
|
expr = self.infix_expression(power, expr);
|
|
}
|
|
expr
|
|
}
|
|
|
|
fn prefix_expression(&mut self) -> ExprRef {
|
|
let token = self.previous.as_ref();
|
|
match token {
|
|
Some(token) => match token.kind() {
|
|
TokenKind::LeftParen => self.grouping(),
|
|
TokenKind::Number => self.number(),
|
|
TokenKind::Minus => self.unary(),
|
|
_ => {
|
|
self.error("expected an expression");
|
|
ExprRef::error()
|
|
}
|
|
},
|
|
None => {
|
|
self.error("expected an expression");
|
|
ExprRef::error()
|
|
}
|
|
}
|
|
}
|
|
|
|
fn infix_expression(&mut self, power: u8, left: ExprRef) -> ExprRef {
|
|
let kind = self.previous.as_ref().unwrap().kind();
|
|
match kind {
|
|
TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => {
|
|
self.binary(power, left)
|
|
}
|
|
_ => panic!("Unknown infix operator, dispatch error?"),
|
|
}
|
|
}
|
|
|
|
fn number(&mut self) -> ExprRef {
|
|
let token = self.previous.as_ref().unwrap();
|
|
// What kind is it? For now let's just ... make it good.
|
|
|
|
match token.as_str().parse::<f64>() {
|
|
Ok(v) => self.tree.add_expr(Expr::Literal(Literal::Float64(v))),
|
|
Err(e) => {
|
|
self.error(format!("invalid f64: {e}"));
|
|
ExprRef::error()
|
|
}
|
|
}
|
|
}
|
|
|
|
fn grouping(&mut self) -> ExprRef {
|
|
let result = self.number();
|
|
self.consume(
|
|
Some(TokenKind::RightParen),
|
|
"expected ')' after an expression",
|
|
);
|
|
result
|
|
}
|
|
|
|
fn unary(&mut self) -> ExprRef {
|
|
let kind = self.previous.as_ref().unwrap().kind();
|
|
let expr = self.expression_with_power(UNARY_POWER);
|
|
let op = match kind {
|
|
TokenKind::Minus => UnaryOp::Negate,
|
|
_ => panic!("unsuitable unary: {:?}: no op", kind),
|
|
};
|
|
self.tree.add_expr(Expr::Unary(op, expr))
|
|
}
|
|
|
|
fn binary(&mut self, power: u8, left: ExprRef) -> ExprRef {
|
|
let right = self.expression_with_power(power + 1);
|
|
let op = match self.previous.as_ref().unwrap().kind() {
|
|
TokenKind::Plus => BinaryOp::Add,
|
|
TokenKind::Minus => BinaryOp::Subtract,
|
|
TokenKind::Star => BinaryOp::Mutiply,
|
|
TokenKind::Slash => BinaryOp::Divide,
|
|
TokenKind::And => BinaryOp::And,
|
|
TokenKind::Or => BinaryOp::Or,
|
|
_ => panic!("unsuitable binary: {:?}: no op", self.previous),
|
|
};
|
|
|
|
self.tree.add_expr(Expr::Binary(op, left, right))
|
|
}
|
|
|
|
fn advance(&mut self) {
|
|
self.previous = self.current.take();
|
|
loop {
|
|
self.current = self.tokens.next();
|
|
match &self.current {
|
|
Some(token) if token.kind() == TokenKind::Error => {
|
|
self.error_at_current(token.clone())
|
|
}
|
|
_ => break,
|
|
}
|
|
}
|
|
}
|
|
|
|
fn consume(&mut self, kind: Option<TokenKind>, error: &str) {
|
|
match (&self.current, kind) {
|
|
(Some(token), Some(kind)) if token.kind() == kind => self.advance(),
|
|
(None, None) => (),
|
|
_ => {
|
|
self.error_at_current(error);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn error<T>(&mut self, message: T)
|
|
where
|
|
T: Into<String>,
|
|
{
|
|
self.error_at(self.previous.clone(), message)
|
|
}
|
|
|
|
fn error_at_current<T>(&mut self, message: T)
|
|
where
|
|
T: Into<String>,
|
|
{
|
|
self.error_at(self.current.clone(), message)
|
|
}
|
|
|
|
fn error_at<T>(&mut self, token: Option<Token<'a>>, message: T)
|
|
where
|
|
T: Into<String>,
|
|
{
|
|
if self.panic_mode {
|
|
return;
|
|
}
|
|
self.panic_mode = true;
|
|
|
|
let message: String = message.into();
|
|
let (line, column) = self.tokens.token_position(&token);
|
|
let mut final_message = "Error ".to_string();
|
|
match token {
|
|
None => final_message.push_str("at end"),
|
|
Some(t) => {
|
|
if t.kind() != TokenKind::Error {
|
|
final_message.push_str("at '");
|
|
final_message.push_str(t.as_str());
|
|
final_message.push_str("'");
|
|
}
|
|
}
|
|
}
|
|
final_message.push_str(": ");
|
|
final_message.push_str(&message);
|
|
|
|
self.tree
|
|
.add_error(SyntaxError::new(line, column, final_message));
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use pretty_assertions::assert_eq;
|
|
|
|
#[test]
|
|
pub fn number_expressions() {
|
|
// How am I going to test this?
|
|
let (tree, expr) = Parser::new("23.5").parse();
|
|
assert_eq!(Vec::<SyntaxError>::new(), tree.errors);
|
|
assert_eq!("23.5", tree.dump_expr(&expr));
|
|
}
|
|
}
|