oden/oden-script/src/parser.rs

360 lines
9.9 KiB
Rust

use crate::tokens::{Token, TokenKind, Tokens};
use std::fmt;
#[derive(PartialEq, Eq)]
pub struct SyntaxError {
pub line: usize,
pub column: usize,
pub message: String,
}
impl SyntaxError {
pub fn new(line: usize, column: usize, message: String) -> Self {
SyntaxError {
line,
column,
message,
}
}
}
impl fmt::Debug for SyntaxError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}: {}", self.line, self.column, self.message)
}
}
impl fmt::Display for SyntaxError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}: {}", self.line, self.column, self.message)
}
}
pub enum Literal {
Float64(f64),
}
pub enum UnaryOp {
Negate,
}
pub enum BinaryOp {
Add,
Subtract,
Mutiply,
Divide,
And,
Or,
}
pub enum Expr {
Literal(Literal),
Unary(UnaryOp, ExprRef),
Binary(BinaryOp, ExprRef, ExprRef),
}
pub struct ExprRef(Option<usize>);
impl ExprRef {
pub fn error() -> Self {
ExprRef(None)
}
}
pub struct SyntaxTree {
pub errors: Vec<SyntaxError>,
expressions: Vec<Expr>,
}
impl SyntaxTree {
pub fn new() -> Self {
SyntaxTree {
errors: Vec::new(),
expressions: Vec::new(),
}
}
pub fn add_error(&mut self, error: SyntaxError) {
self.errors.push(error);
}
pub fn add_expr(&mut self, expr: Expr) -> ExprRef {
let index = self.expressions.len();
self.expressions.push(expr);
ExprRef(Some(index))
}
pub fn dump_expr(&self, expr: &ExprRef) -> String {
match expr.0 {
Some(idx) => {
let expr = &self.expressions[idx];
match expr {
Expr::Literal(lit) => match lit {
Literal::Float64(f) => f.to_string(),
},
Expr::Unary(op, e) => {
let op = match op {
UnaryOp::Negate => "-",
};
format!("({op} {})", self.dump_expr(e))
}
Expr::Binary(op, l, r) => {
let op = match op {
BinaryOp::Add => "+",
BinaryOp::Subtract => "-",
BinaryOp::Mutiply => "*",
BinaryOp::Divide => "/",
BinaryOp::And => "and",
BinaryOp::Or => "or",
};
format!("({op} {} {})", self.dump_expr(l), self.dump_expr(r))
}
}
}
None => "<|EOF|>".to_string(),
}
}
}
// BINDING POWERS. When parsing expressions we only accept expressions that
// meet a minimum binding power. (This is like "precedence" but I just super
// don't like that terminology.)
const ASSIGNMENT_POWER: u8 = 0; // =
const OR_POWER: u8 = 1; // or
const AND_POWER: u8 = 2; // and
const EQUALITY_POWER: u8 = 3; // == !=
const COMPARISON_POWER: u8 = 4; // < > <= >=
const TERM_POWER: u8 = 5; // + -
const FACTOR_POWER: u8 = 6; // * /
const UNARY_POWER: u8 = 7; // ! -
// const CALL_POWER: u8 = 8; // . ()
// const PRIMARY_POWER: u8 = 9;
fn token_power<'a>(token: &Option<Token<'a>>) -> Option<u8> {
let token = match token {
Some(t) => t,
None => return None,
};
match token.kind() {
TokenKind::Equal => Some(ASSIGNMENT_POWER),
TokenKind::Or => Some(OR_POWER),
TokenKind::And => Some(AND_POWER),
TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER),
TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => {
Some(COMPARISON_POWER)
}
TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER),
TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER),
_ => None,
}
}
pub struct Parser<'a> {
tokens: Tokens<'a>,
tree: SyntaxTree,
current: Option<Token<'a>>,
previous: Option<Token<'a>>,
panic_mode: bool,
}
impl<'a> Parser<'a> {
pub fn new(source: &'a str) -> Self {
let mut parser = Parser {
tokens: Tokens::new(source),
tree: SyntaxTree::new(),
current: None,
previous: None,
panic_mode: false,
};
parser.advance();
parser
}
pub fn parse(mut self) -> (SyntaxTree, ExprRef) {
let expr = self.expression();
self.consume(None, "expected end of expression");
(self.tree, expr)
}
fn expression(&mut self) -> ExprRef {
self.expression_with_power(0)
}
fn expression_with_power(&mut self, minimum_power: u8) -> ExprRef {
self.advance();
let mut expr = self.prefix_expression();
loop {
let power = match token_power(&self.current) {
Some(p) => p,
None => break, // EOF, end of expression?
};
if power < minimum_power {
break;
}
self.advance();
expr = self.infix_expression(power, expr);
}
expr
}
fn prefix_expression(&mut self) -> ExprRef {
let token = self.previous.as_ref();
match token {
Some(token) => match token.kind() {
TokenKind::LeftParen => self.grouping(),
TokenKind::Number => self.number(),
TokenKind::Minus => self.unary(),
_ => {
self.error("expected an expression");
ExprRef::error()
}
},
None => {
self.error("expected an expression");
ExprRef::error()
}
}
}
fn infix_expression(&mut self, power: u8, left: ExprRef) -> ExprRef {
let kind = self.previous.as_ref().unwrap().kind();
match kind {
TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => {
self.binary(power, left)
}
_ => panic!("Unknown infix operator, dispatch error?"),
}
}
fn number(&mut self) -> ExprRef {
let token = self.previous.as_ref().unwrap();
// What kind is it? For now let's just ... make it good.
match token.as_str().parse::<f64>() {
Ok(v) => self.tree.add_expr(Expr::Literal(Literal::Float64(v))),
Err(e) => {
self.error(format!("invalid f64: {e}"));
ExprRef::error()
}
}
}
fn grouping(&mut self) -> ExprRef {
let result = self.number();
self.consume(
Some(TokenKind::RightParen),
"expected ')' after an expression",
);
result
}
fn unary(&mut self) -> ExprRef {
let kind = self.previous.as_ref().unwrap().kind();
let expr = self.expression_with_power(UNARY_POWER);
let op = match kind {
TokenKind::Minus => UnaryOp::Negate,
_ => panic!("unsuitable unary: {:?}: no op", kind),
};
self.tree.add_expr(Expr::Unary(op, expr))
}
fn binary(&mut self, power: u8, left: ExprRef) -> ExprRef {
let right = self.expression_with_power(power + 1);
let op = match self.previous.as_ref().unwrap().kind() {
TokenKind::Plus => BinaryOp::Add,
TokenKind::Minus => BinaryOp::Subtract,
TokenKind::Star => BinaryOp::Mutiply,
TokenKind::Slash => BinaryOp::Divide,
TokenKind::And => BinaryOp::And,
TokenKind::Or => BinaryOp::Or,
_ => panic!("unsuitable binary: {:?}: no op", self.previous),
};
self.tree.add_expr(Expr::Binary(op, left, right))
}
fn advance(&mut self) {
self.previous = self.current.take();
loop {
self.current = self.tokens.next();
match &self.current {
Some(token) if token.kind() == TokenKind::Error => {
self.error_at_current(token.clone())
}
_ => break,
}
}
}
fn consume(&mut self, kind: Option<TokenKind>, error: &str) {
match (&self.current, kind) {
(Some(token), Some(kind)) if token.kind() == kind => self.advance(),
(None, None) => (),
_ => {
self.error_at_current(error);
}
}
}
fn error<T>(&mut self, message: T)
where
T: Into<String>,
{
self.error_at(self.previous.clone(), message)
}
fn error_at_current<T>(&mut self, message: T)
where
T: Into<String>,
{
self.error_at(self.current.clone(), message)
}
fn error_at<T>(&mut self, token: Option<Token<'a>>, message: T)
where
T: Into<String>,
{
if self.panic_mode {
return;
}
self.panic_mode = true;
let message: String = message.into();
let (line, column) = self.tokens.token_position(&token);
let mut final_message = "Error ".to_string();
match token {
None => final_message.push_str("at end"),
Some(t) => {
if t.kind() != TokenKind::Error {
final_message.push_str("at '");
final_message.push_str(t.as_str());
final_message.push_str("'");
}
}
}
final_message.push_str(": ");
final_message.push_str(&message);
self.tree
.add_error(SyntaxError::new(line, column, final_message));
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
pub fn number_expressions() {
// How am I going to test this?
let (tree, expr) = Parser::new("23.5").parse();
assert_eq!(Vec::<SyntaxError>::new(), tree.errors);
assert_eq!("23.5", tree.dump_expr(&expr));
}
}