[fine] Starting to parse (ugh)
This commit is contained in:
parent
7fccab8f59
commit
ece5576fb2
3 changed files with 534 additions and 185 deletions
360
oden-script/src/parser.rs
Normal file
360
oden-script/src/parser.rs
Normal file
|
|
@ -0,0 +1,360 @@
|
|||
use crate::tokens::{Token, TokenKind, Tokens};
|
||||
use std::fmt;
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub struct SyntaxError {
|
||||
pub line: usize,
|
||||
pub column: usize,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
impl SyntaxError {
|
||||
pub fn new(line: usize, column: usize, message: String) -> Self {
|
||||
SyntaxError {
|
||||
line,
|
||||
column,
|
||||
message,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for SyntaxError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}:{}: {}", self.line, self.column, self.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SyntaxError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}:{}: {}", self.line, self.column, self.message)
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Literal {
|
||||
Float64(f64),
|
||||
}
|
||||
|
||||
pub enum UnaryOp {
|
||||
Negate,
|
||||
}
|
||||
|
||||
pub enum BinaryOp {
|
||||
Add,
|
||||
Subtract,
|
||||
Mutiply,
|
||||
Divide,
|
||||
And,
|
||||
Or,
|
||||
}
|
||||
|
||||
pub enum Expr {
|
||||
Literal(Literal),
|
||||
Unary(UnaryOp, ExprRef),
|
||||
Binary(BinaryOp, ExprRef, ExprRef),
|
||||
}
|
||||
|
||||
pub struct ExprRef(Option<usize>);
|
||||
|
||||
impl ExprRef {
|
||||
pub fn error() -> Self {
|
||||
ExprRef(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SyntaxTree {
|
||||
pub errors: Vec<SyntaxError>,
|
||||
expressions: Vec<Expr>,
|
||||
}
|
||||
|
||||
impl SyntaxTree {
|
||||
pub fn new() -> Self {
|
||||
SyntaxTree {
|
||||
errors: Vec::new(),
|
||||
expressions: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_error(&mut self, error: SyntaxError) {
|
||||
self.errors.push(error);
|
||||
}
|
||||
|
||||
pub fn add_expr(&mut self, expr: Expr) -> ExprRef {
|
||||
let index = self.expressions.len();
|
||||
self.expressions.push(expr);
|
||||
ExprRef(Some(index))
|
||||
}
|
||||
|
||||
pub fn dump_expr(&self, expr: &ExprRef) -> String {
|
||||
match expr.0 {
|
||||
Some(idx) => {
|
||||
let expr = &self.expressions[idx];
|
||||
match expr {
|
||||
Expr::Literal(lit) => match lit {
|
||||
Literal::Float64(f) => f.to_string(),
|
||||
},
|
||||
Expr::Unary(op, e) => {
|
||||
let op = match op {
|
||||
UnaryOp::Negate => "-",
|
||||
};
|
||||
format!("({op} {})", self.dump_expr(e))
|
||||
}
|
||||
Expr::Binary(op, l, r) => {
|
||||
let op = match op {
|
||||
BinaryOp::Add => "+",
|
||||
BinaryOp::Subtract => "-",
|
||||
BinaryOp::Mutiply => "*",
|
||||
BinaryOp::Divide => "/",
|
||||
BinaryOp::And => "and",
|
||||
BinaryOp::Or => "or",
|
||||
};
|
||||
format!("({op} {} {})", self.dump_expr(l), self.dump_expr(r))
|
||||
}
|
||||
}
|
||||
}
|
||||
None => "<|EOF|>".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// BINDING POWERS. When parsing expressions we only accept expressions that
|
||||
// meet a minimum binding power. (This is like "precedence" but I just super
|
||||
// don't like that terminology.)
|
||||
const ASSIGNMENT_POWER: u8 = 0; // =
|
||||
const OR_POWER: u8 = 1; // or
|
||||
const AND_POWER: u8 = 2; // and
|
||||
const EQUALITY_POWER: u8 = 3; // == !=
|
||||
const COMPARISON_POWER: u8 = 4; // < > <= >=
|
||||
const TERM_POWER: u8 = 5; // + -
|
||||
const FACTOR_POWER: u8 = 6; // * /
|
||||
const UNARY_POWER: u8 = 7; // ! -
|
||||
|
||||
// const CALL_POWER: u8 = 8; // . ()
|
||||
// const PRIMARY_POWER: u8 = 9;
|
||||
|
||||
fn token_power<'a>(token: &Option<Token<'a>>) -> Option<u8> {
|
||||
let token = match token {
|
||||
Some(t) => t,
|
||||
None => return None,
|
||||
};
|
||||
|
||||
match token.kind() {
|
||||
TokenKind::Equal => Some(ASSIGNMENT_POWER),
|
||||
TokenKind::Or => Some(OR_POWER),
|
||||
TokenKind::And => Some(AND_POWER),
|
||||
TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER),
|
||||
TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => {
|
||||
Some(COMPARISON_POWER)
|
||||
}
|
||||
TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER),
|
||||
TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Parser<'a> {
|
||||
tokens: Tokens<'a>,
|
||||
tree: SyntaxTree,
|
||||
current: Option<Token<'a>>,
|
||||
previous: Option<Token<'a>>,
|
||||
|
||||
panic_mode: bool,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub fn new(source: &'a str) -> Self {
|
||||
let mut parser = Parser {
|
||||
tokens: Tokens::new(source),
|
||||
tree: SyntaxTree::new(),
|
||||
current: None,
|
||||
previous: None,
|
||||
panic_mode: false,
|
||||
};
|
||||
parser.advance();
|
||||
parser
|
||||
}
|
||||
|
||||
pub fn parse(mut self) -> (SyntaxTree, ExprRef) {
|
||||
let expr = self.expression();
|
||||
self.consume(None, "expected end of expression");
|
||||
(self.tree, expr)
|
||||
}
|
||||
|
||||
fn expression(&mut self) -> ExprRef {
|
||||
self.expression_with_power(0)
|
||||
}
|
||||
|
||||
fn expression_with_power(&mut self, minimum_power: u8) -> ExprRef {
|
||||
self.advance();
|
||||
let mut expr = self.prefix_expression();
|
||||
loop {
|
||||
let power = match token_power(&self.current) {
|
||||
Some(p) => p,
|
||||
None => break, // EOF, end of expression?
|
||||
};
|
||||
|
||||
if power < minimum_power {
|
||||
break;
|
||||
}
|
||||
|
||||
self.advance();
|
||||
expr = self.infix_expression(power, expr);
|
||||
}
|
||||
expr
|
||||
}
|
||||
|
||||
fn prefix_expression(&mut self) -> ExprRef {
|
||||
let token = self.previous.as_ref();
|
||||
match token {
|
||||
Some(token) => match token.kind() {
|
||||
TokenKind::LeftParen => self.grouping(),
|
||||
TokenKind::Number => self.number(),
|
||||
TokenKind::Minus => self.unary(),
|
||||
_ => {
|
||||
self.error("expected an expression");
|
||||
ExprRef::error()
|
||||
}
|
||||
},
|
||||
None => {
|
||||
self.error("expected an expression");
|
||||
ExprRef::error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn infix_expression(&mut self, power: u8, left: ExprRef) -> ExprRef {
|
||||
let kind = self.previous.as_ref().unwrap().kind();
|
||||
match kind {
|
||||
TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => {
|
||||
self.binary(power, left)
|
||||
}
|
||||
_ => panic!("Unknown infix operator, dispatch error?"),
|
||||
}
|
||||
}
|
||||
|
||||
fn number(&mut self) -> ExprRef {
|
||||
let token = self.previous.as_ref().unwrap();
|
||||
// What kind is it? For now let's just ... make it good.
|
||||
|
||||
match token.as_str().parse::<f64>() {
|
||||
Ok(v) => self.tree.add_expr(Expr::Literal(Literal::Float64(v))),
|
||||
Err(e) => {
|
||||
self.error(format!("invalid f64: {e}"));
|
||||
ExprRef::error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn grouping(&mut self) -> ExprRef {
|
||||
let result = self.number();
|
||||
self.consume(
|
||||
Some(TokenKind::RightParen),
|
||||
"expected ')' after an expression",
|
||||
);
|
||||
result
|
||||
}
|
||||
|
||||
fn unary(&mut self) -> ExprRef {
|
||||
let kind = self.previous.as_ref().unwrap().kind();
|
||||
let expr = self.expression_with_power(UNARY_POWER);
|
||||
let op = match kind {
|
||||
TokenKind::Minus => UnaryOp::Negate,
|
||||
_ => panic!("unsuitable unary: {:?}: no op", kind),
|
||||
};
|
||||
self.tree.add_expr(Expr::Unary(op, expr))
|
||||
}
|
||||
|
||||
fn binary(&mut self, power: u8, left: ExprRef) -> ExprRef {
|
||||
let right = self.expression_with_power(power + 1);
|
||||
let op = match self.previous.as_ref().unwrap().kind() {
|
||||
TokenKind::Plus => BinaryOp::Add,
|
||||
TokenKind::Minus => BinaryOp::Subtract,
|
||||
TokenKind::Star => BinaryOp::Mutiply,
|
||||
TokenKind::Slash => BinaryOp::Divide,
|
||||
TokenKind::And => BinaryOp::And,
|
||||
TokenKind::Or => BinaryOp::Or,
|
||||
_ => panic!("unsuitable binary: {:?}: no op", self.previous),
|
||||
};
|
||||
|
||||
self.tree.add_expr(Expr::Binary(op, left, right))
|
||||
}
|
||||
|
||||
fn advance(&mut self) {
|
||||
self.previous = self.current.take();
|
||||
loop {
|
||||
self.current = self.tokens.next();
|
||||
match &self.current {
|
||||
Some(token) if token.kind() == TokenKind::Error => {
|
||||
self.error_at_current(token.clone())
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn consume(&mut self, kind: Option<TokenKind>, error: &str) {
|
||||
match (&self.current, kind) {
|
||||
(Some(token), Some(kind)) if token.kind() == kind => self.advance(),
|
||||
(None, None) => (),
|
||||
_ => {
|
||||
self.error_at_current(error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn error<T>(&mut self, message: T)
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
self.error_at(self.previous.clone(), message)
|
||||
}
|
||||
|
||||
fn error_at_current<T>(&mut self, message: T)
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
self.error_at(self.current.clone(), message)
|
||||
}
|
||||
|
||||
fn error_at<T>(&mut self, token: Option<Token<'a>>, message: T)
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
if self.panic_mode {
|
||||
return;
|
||||
}
|
||||
self.panic_mode = true;
|
||||
|
||||
let message: String = message.into();
|
||||
let (line, column) = self.tokens.token_position(&token);
|
||||
let mut final_message = "Error ".to_string();
|
||||
match token {
|
||||
None => final_message.push_str("at end"),
|
||||
Some(t) => {
|
||||
if t.kind() != TokenKind::Error {
|
||||
final_message.push_str("at '");
|
||||
final_message.push_str(t.as_str());
|
||||
final_message.push_str("'");
|
||||
}
|
||||
}
|
||||
}
|
||||
final_message.push_str(": ");
|
||||
final_message.push_str(&message);
|
||||
|
||||
self.tree
|
||||
.add_error(SyntaxError::new(line, column, final_message));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
pub fn number_expressions() {
|
||||
// How am I going to test this?
|
||||
let (tree, expr) = Parser::new("23.5").parse();
|
||||
assert_eq!(Vec::<SyntaxError>::new(), tree.errors);
|
||||
assert_eq!("23.5", tree.dump_expr(&expr));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue