[fine] The name is official
This commit is contained in:
parent
652fe18f57
commit
8a867de7e7
7 changed files with 3 additions and 3 deletions
32
fine/Cargo.lock
generated
Normal file
32
fine/Cargo.lock
generated
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "diff"
|
||||
version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
|
||||
|
||||
[[package]]
|
||||
name = "fine"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"pretty_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pretty_assertions"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66"
|
||||
dependencies = [
|
||||
"diff",
|
||||
"yansi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yansi"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
|
||||
7
fine/Cargo.toml
Normal file
7
fine/Cargo.toml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
[package]
|
||||
name = "fine"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = "1.4.0"
|
||||
2
fine/src/lib.rs
Normal file
2
fine/src/lib.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
pub mod parser;
|
||||
pub mod tokens;
|
||||
1
fine/src/main.rs
Normal file
1
fine/src/main.rs
Normal file
|
|
@ -0,0 +1 @@
|
|||
pub fn main() {}
|
||||
906
fine/src/parser.rs
Normal file
906
fine/src/parser.rs
Normal file
|
|
@ -0,0 +1,906 @@
|
|||
use crate::tokens::{Lines, Token, TokenKind, Tokens};
|
||||
use std::fmt;
|
||||
|
||||
// TODO: An error should have:
|
||||
//
|
||||
// - a start
|
||||
// - an end
|
||||
// - a focus
|
||||
// - descriptive messages
|
||||
//
|
||||
// that will have to wait for now
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub struct SyntaxError {
|
||||
pub start: (usize, usize),
|
||||
pub end: (usize, usize),
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
impl SyntaxError {
|
||||
pub fn new<T>(line: usize, column: usize, message: T) -> Self
|
||||
where
|
||||
T: ToString,
|
||||
{
|
||||
SyntaxError {
|
||||
start: (line, column),
|
||||
end: (line, column),
|
||||
message: message.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_spanned<T>(start: (usize, usize), end: (usize, usize), message: T) -> Self
|
||||
where
|
||||
T: ToString,
|
||||
{
|
||||
SyntaxError {
|
||||
start,
|
||||
end,
|
||||
message: message.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for SyntaxError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{self}")
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SyntaxError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}:{}: {}", self.start.0, self.end.0, self.message)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum Literal {
|
||||
Float64(f64),
|
||||
String(String),
|
||||
Bool(bool),
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum UnaryOp {
|
||||
Negate,
|
||||
Not,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum BinaryOp {
|
||||
Add,
|
||||
Subtract,
|
||||
Multiply,
|
||||
Divide,
|
||||
And,
|
||||
Or,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum Expr<'a> {
|
||||
Literal(Literal, Token<'a>),
|
||||
Unary(UnaryOp, Token<'a>, ExprRef),
|
||||
Binary(BinaryOp, Token<'a>, ExprRef, ExprRef),
|
||||
Conditional(Token<'a>, ExprRef, ExprRef, Option<ExprRef>, Token<'a>),
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprRef(Option<usize>);
|
||||
|
||||
impl ExprRef {
|
||||
pub fn error() -> Self {
|
||||
ExprRef(None)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Eventually we will be unable to use Eq and PartialEq here, and will
|
||||
// need to do out own thing.
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum Type {
|
||||
// Signals a type error. If you receive this then you know that an error
|
||||
// has already been reported; if you produce this be sure to also note
|
||||
// the error in the errors collection.
|
||||
Error,
|
||||
|
||||
// Signals that the expression has a control-flow side-effect and that no
|
||||
// value will ever result from this expression. Usually this means
|
||||
// everything's fine.
|
||||
Unreachable,
|
||||
|
||||
// TODO: Numeric literals should be implicitly convertable, unlike other
|
||||
// types. Maybe just "numeric literal" type?
|
||||
F64,
|
||||
String,
|
||||
Bool,
|
||||
}
|
||||
|
||||
impl Type {
|
||||
pub fn is_error(&self) -> bool {
|
||||
match self {
|
||||
Type::Error => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compatible_with(&self, other: &Type) -> bool {
|
||||
// TODO: This is wrong; we because of numeric literals etc.
|
||||
match (self, other) {
|
||||
(Type::F64, Type::F64) => true,
|
||||
(Type::String, Type::String) => true,
|
||||
(Type::Bool, Type::Bool) => true,
|
||||
(Type::Unreachable, Type::Unreachable) => true,
|
||||
|
||||
// Avoid introducing more errors
|
||||
(Type::Error, _) => true,
|
||||
(_, Type::Error) => true,
|
||||
|
||||
(_, _) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Type {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{self}")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Type {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use Type::*;
|
||||
match self {
|
||||
Error => write!(f, "<< INTERNAL ERROR >>"),
|
||||
Unreachable => write!(f, "<< UNREACHABLE >>"),
|
||||
F64 => write!(f, "f64"),
|
||||
String => write!(f, "string"),
|
||||
Bool => write!(f, "bool"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TypeRef(Option<usize>);
|
||||
|
||||
pub struct SyntaxTree<'a> {
|
||||
pub errors: Vec<SyntaxError>,
|
||||
expressions: Vec<Expr<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> SyntaxTree<'a> {
|
||||
pub fn new() -> Self {
|
||||
SyntaxTree {
|
||||
errors: Vec::new(),
|
||||
expressions: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_error(&mut self, error: SyntaxError) {
|
||||
self.errors.push(error);
|
||||
}
|
||||
|
||||
pub fn add_expr(&mut self, expr: Expr<'a>) -> ExprRef {
|
||||
let index = self.expressions.len();
|
||||
self.expressions.push(expr);
|
||||
ExprRef(Some(index))
|
||||
}
|
||||
|
||||
pub fn dump_expr(&self, expr: &ExprRef) -> String {
|
||||
match expr.0 {
|
||||
Some(idx) => {
|
||||
let expr = &self.expressions[idx];
|
||||
match expr {
|
||||
Expr::Literal(_, tok) => tok.to_string(),
|
||||
Expr::Unary(_, tok, e) => {
|
||||
format!("({tok} {})", self.dump_expr(e))
|
||||
}
|
||||
Expr::Binary(_, tok, l, r) => {
|
||||
format!("({tok} {} {})", self.dump_expr(l), self.dump_expr(r))
|
||||
}
|
||||
Expr::Conditional(tok, cond, t, e, _) => {
|
||||
if let Some(e) = e {
|
||||
format!(
|
||||
"({tok} {} {} {})",
|
||||
self.dump_expr(cond),
|
||||
self.dump_expr(t),
|
||||
self.dump_expr(e)
|
||||
)
|
||||
} else {
|
||||
format!("({tok} {} {})", self.dump_expr(cond), self.dump_expr(t))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None => "<|EOF|>".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expr_span(&self, expr: &ExprRef) -> Option<(Token<'a>, Token<'a>)> {
|
||||
let expr = match expr.0 {
|
||||
Some(idx) => &self.expressions[idx],
|
||||
None => return None,
|
||||
};
|
||||
|
||||
match expr {
|
||||
Expr::Literal(_, tok) => Some((tok.clone(), tok.clone())),
|
||||
Expr::Unary(_, tok, arg) => {
|
||||
let arg = self.expr_span(arg);
|
||||
match arg {
|
||||
None => None,
|
||||
Some((_, end)) => Some((tok.clone(), end)),
|
||||
}
|
||||
}
|
||||
Expr::Binary(_, _, left, right) => {
|
||||
let left = self.expr_span(left);
|
||||
let right = self.expr_span(right);
|
||||
match (left, right) {
|
||||
(None, _) => None,
|
||||
(_, None) => None,
|
||||
(Some((start, _)), Some((_, end))) => Some((start, end)),
|
||||
}
|
||||
}
|
||||
Expr::Conditional(head, _, _, _, tail) => Some((head.clone(), tail.clone())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines, value_required: bool) -> Type {
|
||||
// TODO: Cache and work on demand? Or is this just fine?
|
||||
|
||||
let exr = expr.clone();
|
||||
let expr = match expr.0 {
|
||||
Some(idx) => &self.expressions[idx],
|
||||
None => return Type::Error,
|
||||
};
|
||||
match expr {
|
||||
Expr::Literal(lit, _) => match lit {
|
||||
Literal::Float64(_) => Type::F64,
|
||||
Literal::String(_) => Type::String,
|
||||
Literal::Bool(_) => Type::Bool,
|
||||
},
|
||||
|
||||
// Figure out the main thing. Check for a... trait?
|
||||
Expr::Unary(op, tok, arg) => {
|
||||
let op = op.clone();
|
||||
let arg = arg.clone();
|
||||
let tok = tok.clone();
|
||||
let arg_type = self.expr_type(&arg, lines, true);
|
||||
match (op, arg_type) {
|
||||
(UnaryOp::Negate, Type::F64) => Type::F64,
|
||||
(UnaryOp::Not, Type::Bool) => Type::Bool,
|
||||
|
||||
// This is dumb and should be punished, probably.
|
||||
(_, Type::Unreachable) => {
|
||||
let (line, col) = lines.position(tok.start());
|
||||
self.errors.push(SyntaxError::new(line, col, format!("cannot apply a unary operator to something that doesn't yield a value")));
|
||||
Type::Error
|
||||
}
|
||||
|
||||
// Propagate existing errors without additional complaint.
|
||||
(_, Type::Error) => Type::Error,
|
||||
|
||||
// Missed the whole table, must be an error.
|
||||
(_, arg_type) => {
|
||||
let (line, col) = lines.position(tok.start());
|
||||
self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'")));
|
||||
Type::Error
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Expr::Binary(op, tok, left, right) => {
|
||||
let op = op.clone();
|
||||
let tok = tok.clone();
|
||||
let left = left.clone();
|
||||
let right = right.clone();
|
||||
let left_type = self.expr_type(&left, lines, true);
|
||||
let right_type = self.expr_type(&right, lines, true);
|
||||
|
||||
match (op, left_type, right_type) {
|
||||
(
|
||||
BinaryOp::Add | BinaryOp::Subtract | BinaryOp::Multiply | BinaryOp::Divide,
|
||||
Type::F64,
|
||||
Type::F64,
|
||||
) => Type::F64,
|
||||
|
||||
(BinaryOp::Add, Type::String, Type::String) => Type::String,
|
||||
|
||||
(BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool,
|
||||
|
||||
// This is dumb and should be punished, probably.
|
||||
(_, _, Type::Unreachable) => {
|
||||
let (line, col) = lines.position(tok.start());
|
||||
self.errors.push(SyntaxError::new(
|
||||
line,
|
||||
col,
|
||||
format!(
|
||||
"cannot apply '{tok}' to an argument that doesn't yield a value (on the right)"
|
||||
),
|
||||
));
|
||||
Type::Error
|
||||
}
|
||||
(_, Type::Unreachable, _) => {
|
||||
let (line, col) = lines.position(tok.start());
|
||||
self.errors.push(SyntaxError::new(
|
||||
line,
|
||||
col,
|
||||
format!(
|
||||
"cannot apply '{tok}' to an argument that doesn't yield a value (on the left)"
|
||||
),
|
||||
));
|
||||
Type::Error
|
||||
}
|
||||
|
||||
// Propagate existing errors without additional complaint.
|
||||
(_, Type::Error, _) => Type::Error,
|
||||
(_, _, Type::Error) => Type::Error,
|
||||
|
||||
// Missed the whole table, it must be an error.
|
||||
(_, left_type, right_type) => {
|
||||
let (line, col) = lines.position(tok.start());
|
||||
self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)")));
|
||||
Type::Error
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Expr::Conditional(_, cond, then_exp, else_exp, _) => {
|
||||
let cond = cond.clone();
|
||||
let then_exp = then_exp.clone();
|
||||
let else_exp = else_exp.clone();
|
||||
|
||||
let cond_type = self.expr_type(&cond, lines, true);
|
||||
let then_type = self.expr_type(&then_exp, lines, value_required);
|
||||
let else_type = else_exp.map(|e| self.expr_type(&e, lines, value_required));
|
||||
if !cond_type.compatible_with(&Type::Bool) {
|
||||
if !cond_type.is_error() {
|
||||
let span = self
|
||||
.expr_span(&cond)
|
||||
.expect("If the expression has a type it must have a span");
|
||||
|
||||
let start = lines.position(span.0.start());
|
||||
let end = lines.position(span.1.start());
|
||||
self.errors.push(SyntaxError::new_spanned(
|
||||
start,
|
||||
end,
|
||||
"the condition of an `if` expression must be a boolean",
|
||||
));
|
||||
}
|
||||
return Type::Error;
|
||||
}
|
||||
|
||||
match (then_type, else_type) {
|
||||
(Type::Error, _) => Type::Error,
|
||||
(_, Some(Type::Error)) => Type::Error,
|
||||
|
||||
// It's an error to have a missing else branch if the value is required
|
||||
(_, None) if value_required => {
|
||||
let span = self
|
||||
.expr_span(&exr)
|
||||
.expect("How did I get this far with a broken parse?");
|
||||
let start = lines.position(span.0.start());
|
||||
let end = lines.position(span.1.start());
|
||||
self.errors.push(SyntaxError::new_spanned(
|
||||
start,
|
||||
end,
|
||||
"this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value",
|
||||
));
|
||||
Type::Error
|
||||
}
|
||||
|
||||
// If the value is required then the branches must be
|
||||
// compatible, and the type of the expression is the type
|
||||
// of the `then` branch.
|
||||
(then_type, Some(else_type)) if value_required => {
|
||||
if !then_type.compatible_with(&else_type) {
|
||||
let span = self
|
||||
.expr_span(&exr)
|
||||
.expect("How did I get this far with a broken parse?");
|
||||
let start = lines.position(span.0.start());
|
||||
let end = lines.position(span.1.start());
|
||||
self.errors.push(SyntaxError::new_spanned(
|
||||
start,
|
||||
end,
|
||||
format!("the type of the `then` branch ({then_type}) must match the type of the `else` branch ({else_type})"),
|
||||
));
|
||||
Type::Error
|
||||
} else {
|
||||
then_type
|
||||
}
|
||||
}
|
||||
|
||||
// The value must not be required, just mark this as unreachable.
|
||||
(_, _) => {
|
||||
assert!(!value_required);
|
||||
Type::Unreachable
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// BINDING POWERS. When parsing expressions we only accept expressions that
|
||||
// meet a minimum binding power. (This is like "precedence" but I just super
|
||||
// don't like that terminology.)
|
||||
const ASSIGNMENT_POWER: u8 = 0; // =
|
||||
const OR_POWER: u8 = 1; // or
|
||||
const AND_POWER: u8 = 2; // and
|
||||
const EQUALITY_POWER: u8 = 3; // == !=
|
||||
const COMPARISON_POWER: u8 = 4; // < > <= >=
|
||||
const TERM_POWER: u8 = 5; // + -
|
||||
const FACTOR_POWER: u8 = 6; // * /
|
||||
const UNARY_POWER: u8 = 7; // ! -
|
||||
|
||||
// const CALL_POWER: u8 = 8; // . ()
|
||||
// const PRIMARY_POWER: u8 = 9;
|
||||
|
||||
fn token_power<'a>(token: &Option<Token<'a>>) -> Option<u8> {
|
||||
let token = match token {
|
||||
Some(t) => t,
|
||||
None => return None,
|
||||
};
|
||||
|
||||
match token.kind() {
|
||||
TokenKind::Equal => Some(ASSIGNMENT_POWER),
|
||||
TokenKind::Or => Some(OR_POWER),
|
||||
TokenKind::And => Some(AND_POWER),
|
||||
TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER),
|
||||
TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => {
|
||||
Some(COMPARISON_POWER)
|
||||
}
|
||||
TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER),
|
||||
TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Parser<'a> {
|
||||
tokens: Tokens<'a>,
|
||||
tree: SyntaxTree<'a>,
|
||||
current: Option<Token<'a>>,
|
||||
previous: Option<Token<'a>>,
|
||||
|
||||
panic_mode: bool,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub fn new(source: &'a str) -> Self {
|
||||
let mut parser = Parser {
|
||||
tokens: Tokens::new(source),
|
||||
tree: SyntaxTree::new(),
|
||||
current: None,
|
||||
previous: None,
|
||||
panic_mode: false,
|
||||
};
|
||||
parser.advance();
|
||||
parser
|
||||
}
|
||||
|
||||
pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) {
|
||||
let expr = self.expression();
|
||||
self.consume(None, "expected end of expression");
|
||||
(self.tree, expr, self.tokens.lines())
|
||||
}
|
||||
|
||||
fn expression(&mut self) -> ExprRef {
|
||||
self.expression_with_power(0)
|
||||
}
|
||||
|
||||
fn expression_with_power(&mut self, minimum_power: u8) -> ExprRef {
|
||||
self.trace("expression with power");
|
||||
self.advance();
|
||||
let mut expr = self.prefix_expression();
|
||||
loop {
|
||||
let power = match token_power(&self.current) {
|
||||
Some(p) => p,
|
||||
None => break, // EOF, end of expression?
|
||||
};
|
||||
|
||||
if power < minimum_power {
|
||||
break;
|
||||
}
|
||||
|
||||
self.advance();
|
||||
expr = self.infix_expression(power, expr);
|
||||
}
|
||||
expr
|
||||
}
|
||||
|
||||
fn prefix_expression(&mut self) -> ExprRef {
|
||||
self.trace("prefix");
|
||||
let token = self.previous.as_ref();
|
||||
match token {
|
||||
Some(token) => match token.kind() {
|
||||
TokenKind::Bang => self.unary(),
|
||||
TokenKind::LeftParen => self.grouping(),
|
||||
TokenKind::Number => self.number(),
|
||||
TokenKind::Minus => self.unary(),
|
||||
TokenKind::String => self.string(),
|
||||
|
||||
TokenKind::True => self
|
||||
.tree
|
||||
.add_expr(Expr::Literal(Literal::Bool(true), token.clone())),
|
||||
TokenKind::False => self
|
||||
.tree
|
||||
.add_expr(Expr::Literal(Literal::Bool(false), token.clone())),
|
||||
|
||||
TokenKind::If => self.conditional(),
|
||||
|
||||
_ => {
|
||||
self.error("expected an expression");
|
||||
ExprRef::error()
|
||||
}
|
||||
},
|
||||
None => {
|
||||
self.error("expected an expression");
|
||||
ExprRef::error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn infix_expression(&mut self, power: u8, left: ExprRef) -> ExprRef {
|
||||
self.trace("infix");
|
||||
let kind = self.previous.as_ref().unwrap().kind();
|
||||
match kind {
|
||||
TokenKind::Plus
|
||||
| TokenKind::Minus
|
||||
| TokenKind::Star
|
||||
| TokenKind::Slash
|
||||
| TokenKind::And
|
||||
| TokenKind::Or => self.binary(power, left),
|
||||
_ => panic!("Unknown infix operator, dispatch error?"),
|
||||
}
|
||||
}
|
||||
|
||||
fn number(&mut self) -> ExprRef {
|
||||
let token = self.previous.as_ref().unwrap();
|
||||
// What kind is it? For now let's just ... make it good.
|
||||
|
||||
let literal = match token.as_str().parse::<f64>() {
|
||||
Ok(v) => Literal::Float64(v),
|
||||
Err(e) => {
|
||||
self.error(format!("invalid f64: {e}"));
|
||||
return ExprRef::error();
|
||||
}
|
||||
};
|
||||
|
||||
self.tree.add_expr(Expr::Literal(literal, token.clone()))
|
||||
}
|
||||
|
||||
fn string(&mut self) -> ExprRef {
|
||||
let token = self.previous.as_ref().unwrap();
|
||||
|
||||
let mut result = String::new();
|
||||
let mut input = token.as_str().chars();
|
||||
|
||||
assert!(input.next().is_some()); // Delimiter
|
||||
while let Some(ch) = input.next() {
|
||||
match ch {
|
||||
'\\' => match input.next().unwrap() {
|
||||
'n' => result.push('\n'),
|
||||
'r' => result.push('\r'),
|
||||
't' => result.push('\t'),
|
||||
ch => result.push(ch),
|
||||
},
|
||||
_ => result.push(ch),
|
||||
}
|
||||
}
|
||||
result.pop(); // We pushed the other delimiter on, whoops.
|
||||
|
||||
let literal = Literal::String(result);
|
||||
self.tree.add_expr(Expr::Literal(literal, token.clone()))
|
||||
}
|
||||
|
||||
fn grouping(&mut self) -> ExprRef {
|
||||
let result = self.expression();
|
||||
self.consume(
|
||||
Some(TokenKind::RightParen),
|
||||
"expected ')' after an expression",
|
||||
);
|
||||
result
|
||||
}
|
||||
|
||||
fn conditional(&mut self) -> ExprRef {
|
||||
let token = self.previous.as_ref().unwrap().clone();
|
||||
let condition_expr = self.expression();
|
||||
self.consume(
|
||||
Some(TokenKind::LeftBrace),
|
||||
"expected '{' to start an 'if' block",
|
||||
);
|
||||
let then_expr = self.expression();
|
||||
self.consume(
|
||||
Some(TokenKind::RightBrace),
|
||||
"expected '}' to end an 'if' block",
|
||||
);
|
||||
let else_expr = match &self.current {
|
||||
Some(token) if token.kind() == TokenKind::Else => {
|
||||
self.advance();
|
||||
match &self.current {
|
||||
// Allow `else if` without another `{`.
|
||||
Some(token) if token.kind() == TokenKind::If => {
|
||||
self.advance();
|
||||
Some(self.conditional())
|
||||
}
|
||||
_ => {
|
||||
self.consume(
|
||||
Some(TokenKind::LeftBrace),
|
||||
"expected '{' to start an 'else' block",
|
||||
);
|
||||
let else_expr = self.expression();
|
||||
self.consume(
|
||||
Some(TokenKind::RightBrace),
|
||||
"Expected '}' to end an 'else' block",
|
||||
);
|
||||
Some(else_expr)
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
let tail = self.previous.as_ref().unwrap().clone();
|
||||
self.tree.add_expr(Expr::Conditional(
|
||||
token,
|
||||
condition_expr,
|
||||
then_expr,
|
||||
else_expr,
|
||||
tail,
|
||||
))
|
||||
}
|
||||
|
||||
fn unary(&mut self) -> ExprRef {
|
||||
let token = self.previous.as_ref().unwrap().clone();
|
||||
let kind = token.kind();
|
||||
let expr = self.expression_with_power(UNARY_POWER);
|
||||
let op = match kind {
|
||||
TokenKind::Minus => UnaryOp::Negate,
|
||||
TokenKind::Bang => UnaryOp::Not,
|
||||
_ => panic!("unsuitable unary: {:?}: no op", kind),
|
||||
};
|
||||
|
||||
self.tree.add_expr(Expr::Unary(op, token, expr))
|
||||
}
|
||||
|
||||
fn binary(&mut self, power: u8, left: ExprRef) -> ExprRef {
|
||||
let token = self.previous.as_ref().unwrap().clone();
|
||||
let op = match token.kind() {
|
||||
TokenKind::Plus => BinaryOp::Add,
|
||||
TokenKind::Minus => BinaryOp::Subtract,
|
||||
TokenKind::Star => BinaryOp::Multiply,
|
||||
TokenKind::Slash => BinaryOp::Divide,
|
||||
TokenKind::And => BinaryOp::And,
|
||||
TokenKind::Or => BinaryOp::Or,
|
||||
_ => panic!("unsuitable binary: {:?}: no op", self.previous),
|
||||
};
|
||||
let right = self.expression_with_power(power + 1);
|
||||
self.tree.add_expr(Expr::Binary(op, token, left, right))
|
||||
}
|
||||
|
||||
fn advance(&mut self) {
|
||||
self.previous = self.current.take();
|
||||
loop {
|
||||
self.current = self.tokens.next();
|
||||
match &self.current {
|
||||
Some(token) if token.kind() == TokenKind::Error => {
|
||||
self.error_at_current(token.to_string())
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn consume(&mut self, kind: Option<TokenKind>, error: &str) {
|
||||
match (&self.current, kind) {
|
||||
(Some(token), Some(kind)) if token.kind() == kind => self.advance(),
|
||||
(None, None) => (),
|
||||
_ => {
|
||||
self.error_at_current(error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn error<T>(&mut self, message: T)
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
self.error_at(self.previous.clone(), message)
|
||||
}
|
||||
|
||||
fn error_at_current<T>(&mut self, message: T)
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
self.error_at(self.current.clone(), message)
|
||||
}
|
||||
|
||||
fn error_at<T>(&mut self, token: Option<Token<'a>>, message: T)
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
if self.panic_mode {
|
||||
return;
|
||||
}
|
||||
self.panic_mode = true;
|
||||
|
||||
let message: String = message.into();
|
||||
let (line, column) = self.tokens.token_position(&token);
|
||||
let mut final_message = "Error ".to_string();
|
||||
match token {
|
||||
None => final_message.push_str("at end"),
|
||||
Some(t) => {
|
||||
if t.kind() != TokenKind::Error {
|
||||
final_message.push_str("at '");
|
||||
final_message.push_str(t.as_str());
|
||||
final_message.push_str("'");
|
||||
}
|
||||
}
|
||||
}
|
||||
final_message.push_str(": ");
|
||||
final_message.push_str(&message);
|
||||
|
||||
self.tree
|
||||
.add_error(SyntaxError::new(line, column, final_message));
|
||||
}
|
||||
|
||||
fn trace(&self, _msg: &str) {
|
||||
// let cpos = self.tokens.token_position(&self.current);
|
||||
// let ppos = self.tokens.token_position(&self.previous);
|
||||
|
||||
// eprintln!(
|
||||
// "[{}:{}:{}] [{}:{}:{}]: {msg}",
|
||||
// ppos.0,
|
||||
// ppos.1,
|
||||
// self.previous
|
||||
// .as_ref()
|
||||
// .map(|t| t.as_str())
|
||||
// .unwrap_or("<eof>"),
|
||||
// cpos.0,
|
||||
// cpos.1,
|
||||
// self.current.as_ref().map(|t| t.as_str()).unwrap_or("<eof>")
|
||||
// );
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn test_successful_expression_parse(source: &str, expected: &str, expected_type: Type) {
|
||||
let (mut tree, expr, lines) = Parser::new(source).parse();
|
||||
assert_eq!(
|
||||
Vec::<SyntaxError>::new(),
|
||||
tree.errors,
|
||||
"Expected successful parse"
|
||||
);
|
||||
assert_eq!(
|
||||
expected,
|
||||
tree.dump_expr(&expr),
|
||||
"The parse structure of the expressions did not match"
|
||||
);
|
||||
|
||||
// TODO: 'assert_eq' is probably wrong here
|
||||
let expr_type = tree.expr_type(&expr, &lines, true);
|
||||
assert!(
|
||||
expected_type.compatible_with(&expr_type),
|
||||
"The type of the expression did not match. expected: {expected_type}, actual: {expr_type}"
|
||||
);
|
||||
}
|
||||
|
||||
macro_rules! test_expr {
|
||||
($name:ident, $input:expr, $expected:expr, $type:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
test_successful_expression_parse($input, $expected, $type);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_expr!(number_expr, "12", "12", Type::F64);
|
||||
test_expr!(add_expr, "1 + 2", "(+ 1 2)", Type::F64);
|
||||
test_expr!(
|
||||
prec_expr,
|
||||
"1 + 2 * 3 - 7 * 7",
|
||||
"(- (+ 1 (* 2 3)) (* 7 7))",
|
||||
Type::F64
|
||||
);
|
||||
test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)", Type::F64);
|
||||
test_expr!(
|
||||
strings,
|
||||
r#" "Hello " + 'world!' "#,
|
||||
r#"(+ "Hello " 'world!')"#,
|
||||
Type::String
|
||||
);
|
||||
|
||||
test_expr!(
|
||||
booleans,
|
||||
"true and false or false and !true",
|
||||
"(or (and true false) (and false (! true)))",
|
||||
Type::Bool
|
||||
);
|
||||
|
||||
test_expr!(
|
||||
if_expression,
|
||||
"if true { 23 } else { 45 }",
|
||||
"(if true 23 45)",
|
||||
Type::F64
|
||||
);
|
||||
// test_expr!(
|
||||
// if_with_return,
|
||||
// "if true { 23 } else { return 'nothing' }",
|
||||
// "",
|
||||
// Type::F64
|
||||
// );
|
||||
|
||||
// ========================================================================
|
||||
// Type Error Tests
|
||||
// ========================================================================
|
||||
|
||||
fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) {
|
||||
let (mut tree, expr, lines) = Parser::new(source).parse();
|
||||
assert_eq!(
|
||||
Vec::<SyntaxError>::new(),
|
||||
tree.errors,
|
||||
"Expected successful parse"
|
||||
);
|
||||
|
||||
let expr_type = tree.expr_type(&expr, &lines, true);
|
||||
assert!(expr_type.is_error());
|
||||
|
||||
let actual_errors = tree
|
||||
.errors
|
||||
.iter()
|
||||
.map(|e| e.message.as_str())
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(expected_errors, actual_errors);
|
||||
}
|
||||
|
||||
macro_rules! test_type_error_expr {
|
||||
($name:ident, $input:expr, $($s:expr),+) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let expected_errors: Vec<&str> = (vec![$($s),*]);
|
||||
test_type_error_expression($input, expected_errors);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_type_error_expr!(
|
||||
negate_string,
|
||||
"-('what?')",
|
||||
"cannot apply unary operator '-' to expression of type 'string'"
|
||||
);
|
||||
|
||||
test_type_error_expr!(
|
||||
add_string_number,
|
||||
"'what?' + 5",
|
||||
"cannot apply binary operator '+' to expressions of type 'string' (on the left) and 'f64' (on the right)"
|
||||
);
|
||||
|
||||
test_type_error_expr!(
|
||||
add_number_string,
|
||||
"5 + 'what?'",
|
||||
"cannot apply binary operator '+' to expressions of type 'f64' (on the left) and 'string' (on the right)"
|
||||
);
|
||||
|
||||
test_type_error_expr!(
|
||||
errors_propagate_do_not_duplicate,
|
||||
"!'hello' / 27 * -('what?') + 23",
|
||||
"cannot apply unary operator '!' to expression of type 'string'",
|
||||
"cannot apply unary operator '-' to expression of type 'string'"
|
||||
);
|
||||
|
||||
test_type_error_expr!(
|
||||
if_not_bool,
|
||||
"if 23 { 1 } else { 2 }",
|
||||
"the condition of an `if` expression must be a boolean"
|
||||
);
|
||||
|
||||
test_type_error_expr!(
|
||||
if_arm_mismatch,
|
||||
"if true { 1 } else { '1' }",
|
||||
"the type of the `then` branch (f64) must match the type of the `else` branch (string)"
|
||||
);
|
||||
|
||||
test_type_error_expr!(
|
||||
if_no_else,
|
||||
"if true { 1 }",
|
||||
"this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value"
|
||||
);
|
||||
}
|
||||
584
fine/src/tokens.rs
Normal file
584
fine/src/tokens.rs
Normal file
|
|
@ -0,0 +1,584 @@
|
|||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum TokenKind {
|
||||
LeftBrace,
|
||||
RightBrace,
|
||||
LeftBracket,
|
||||
RightBracket,
|
||||
LeftParen,
|
||||
RightParen,
|
||||
Comma,
|
||||
Dot,
|
||||
Minus,
|
||||
Plus,
|
||||
Semicolon,
|
||||
Slash,
|
||||
Star,
|
||||
|
||||
Bang,
|
||||
BangEqual,
|
||||
Equal,
|
||||
EqualEqual,
|
||||
Greater,
|
||||
GreaterEqual,
|
||||
Less,
|
||||
LessEqual,
|
||||
|
||||
Identifier,
|
||||
String,
|
||||
Number,
|
||||
|
||||
And,
|
||||
Async,
|
||||
Await,
|
||||
Class,
|
||||
Else,
|
||||
False,
|
||||
For,
|
||||
From,
|
||||
Fun,
|
||||
If,
|
||||
Let,
|
||||
Or,
|
||||
Print,
|
||||
Return,
|
||||
Select,
|
||||
This,
|
||||
True,
|
||||
While,
|
||||
Yield,
|
||||
|
||||
Error,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct Token<'a> {
|
||||
kind: TokenKind,
|
||||
start: usize,
|
||||
value: Result<&'a str, String>,
|
||||
}
|
||||
|
||||
impl<'a> Token<'a> {
|
||||
pub fn new(kind: TokenKind, start: usize, value: &'a str) -> Self {
|
||||
Token {
|
||||
kind,
|
||||
start,
|
||||
value: Ok(value),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn error(start: usize, message: String) -> Self {
|
||||
Token {
|
||||
kind: TokenKind::Error,
|
||||
start,
|
||||
value: Err(message),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start(&self) -> usize {
|
||||
self.start
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> TokenKind {
|
||||
self.kind
|
||||
}
|
||||
|
||||
pub fn as_str<'b>(&'b self) -> &'a str
|
||||
where
|
||||
'b: 'a,
|
||||
{
|
||||
match &self.value {
|
||||
Ok(v) => v,
|
||||
Err(e) => &e,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Display for Token<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Lines {
|
||||
newlines: Vec<usize>,
|
||||
eof: usize,
|
||||
}
|
||||
|
||||
impl Lines {
|
||||
fn new(eof: usize) -> Self {
|
||||
Lines {
|
||||
newlines: Vec::new(),
|
||||
eof,
|
||||
}
|
||||
}
|
||||
|
||||
/// Record the position of a newline in the source.
|
||||
pub fn add_line(&mut self, pos: usize) {
|
||||
self.newlines.push(pos)
|
||||
}
|
||||
|
||||
/// Return the position of the given token as a (line, column) pair. By
|
||||
/// convention, lines are 1-based and columns are 0-based. Also, in
|
||||
/// keeping with the iterator-nature of the tokenizer, `None` here
|
||||
/// indicates end-of-file, and will return the position of the end of the
|
||||
/// file.
|
||||
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
|
||||
let start = match token {
|
||||
Some(t) => t.start,
|
||||
None => self.eof,
|
||||
};
|
||||
self.position(start)
|
||||
}
|
||||
|
||||
/// Return the position of the given character offset as a (line,column)
|
||||
/// pair. By convention, lines are 1-based and columns are 0-based.
|
||||
pub fn position(&self, offset: usize) -> (usize, usize) {
|
||||
let line_end_index = match self.newlines.binary_search(&offset) {
|
||||
Ok(index) => index,
|
||||
Err(index) => index,
|
||||
};
|
||||
let line_start_pos = if line_end_index == 0 {
|
||||
0
|
||||
} else {
|
||||
self.newlines[line_end_index - 1] + 1
|
||||
};
|
||||
let line_number = line_end_index + 1;
|
||||
let column_offset = offset - line_start_pos;
|
||||
(line_number, column_offset)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Tokens<'a> {
|
||||
source: &'a str,
|
||||
chars: std::str::CharIndices<'a>,
|
||||
next_char: Option<(usize, char)>,
|
||||
lines: Lines,
|
||||
}
|
||||
|
||||
impl<'a> Tokens<'a> {
|
||||
pub fn new(source: &'a str) -> Self {
|
||||
let mut result = Tokens {
|
||||
source,
|
||||
chars: source.char_indices(),
|
||||
next_char: None,
|
||||
lines: Lines::new(source.len()),
|
||||
};
|
||||
result.advance(); // Prime the pump
|
||||
result
|
||||
}
|
||||
|
||||
pub fn lines(self) -> Lines {
|
||||
self.lines
|
||||
}
|
||||
|
||||
/// Return the position of the given token as a (line, column) pair. See
|
||||
/// `Lines::token_position` for more information about the range, etc.
|
||||
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
|
||||
self.lines.token_position(token)
|
||||
}
|
||||
|
||||
fn token(&self, start: usize, kind: TokenKind) -> Token<'a> {
|
||||
let value = &self.source[start..self.pos()];
|
||||
Token::new(kind, start, value)
|
||||
}
|
||||
|
||||
fn number(&mut self, start: usize) -> Token<'a> {
|
||||
// First, the main part.
|
||||
loop {
|
||||
if !self.matches_digit() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Now the fraction part.
|
||||
// The thing that is bad here is that this is speculative...
|
||||
let backup = self.chars.clone();
|
||||
if self.matches('.') {
|
||||
let mut saw_digit = false;
|
||||
loop {
|
||||
if self.matches('_') {
|
||||
} else if self.matches_next(|c| c.is_ascii_digit()) {
|
||||
saw_digit = true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if saw_digit {
|
||||
// OK we're good to here! Check the scientific notation.
|
||||
if self.matches('e') || self.matches('E') {
|
||||
if self.matches('+') || self.matches('-') {}
|
||||
let mut saw_digit = false;
|
||||
loop {
|
||||
if self.matches('_') {
|
||||
} else if self.matches_next(|c| c.is_ascii_digit()) {
|
||||
saw_digit = true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !saw_digit {
|
||||
// This is just a broken number.
|
||||
let slice = &self.source[start..self.pos()];
|
||||
return Token::error(
|
||||
start,
|
||||
format!("Invalid floating-point literal: {slice}"),
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Might be accessing a member on an integer.
|
||||
self.chars = backup;
|
||||
}
|
||||
}
|
||||
|
||||
self.token(start, TokenKind::Number)
|
||||
}
|
||||
|
||||
fn string(&mut self, start: usize, delimiter: char) -> Token<'a> {
|
||||
while !self.matches(delimiter) {
|
||||
if self.eof() {
|
||||
return Token::error(start, "Unterminated string constant".to_string());
|
||||
}
|
||||
if self.matches('\\') {
|
||||
self.advance();
|
||||
} else {
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
self.token(start, TokenKind::String)
|
||||
}
|
||||
|
||||
fn identifier_token_kind(ident: &str) -> TokenKind {
|
||||
match ident.chars().nth(0).unwrap() {
|
||||
'a' => {
|
||||
if ident == "and" {
|
||||
return TokenKind::And;
|
||||
}
|
||||
if ident == "async" {
|
||||
return TokenKind::Async;
|
||||
}
|
||||
if ident == "await" {
|
||||
return TokenKind::Await;
|
||||
}
|
||||
}
|
||||
'c' => {
|
||||
if ident == "class" {
|
||||
return TokenKind::Class;
|
||||
}
|
||||
}
|
||||
'e' => {
|
||||
if ident == "else" {
|
||||
return TokenKind::Else;
|
||||
}
|
||||
}
|
||||
'f' => {
|
||||
if ident == "false" {
|
||||
return TokenKind::False;
|
||||
}
|
||||
if ident == "for" {
|
||||
return TokenKind::For;
|
||||
}
|
||||
if ident == "from" {
|
||||
return TokenKind::From;
|
||||
}
|
||||
if ident == "fun" {
|
||||
return TokenKind::Fun;
|
||||
}
|
||||
}
|
||||
'i' => {
|
||||
if ident == "if" {
|
||||
return TokenKind::If;
|
||||
}
|
||||
}
|
||||
'l' => {
|
||||
if ident == "let" {
|
||||
return TokenKind::Let;
|
||||
}
|
||||
}
|
||||
'o' => {
|
||||
if ident == "or" {
|
||||
return TokenKind::Or;
|
||||
}
|
||||
}
|
||||
'p' => {
|
||||
if ident == "print" {
|
||||
return TokenKind::Print;
|
||||
}
|
||||
}
|
||||
'r' => {
|
||||
if ident == "return" {
|
||||
return TokenKind::Return;
|
||||
}
|
||||
}
|
||||
's' => {
|
||||
if ident == "select" {
|
||||
return TokenKind::Select;
|
||||
}
|
||||
}
|
||||
't' => {
|
||||
if ident == "this" {
|
||||
return TokenKind::This;
|
||||
}
|
||||
if ident == "true" {
|
||||
return TokenKind::True;
|
||||
}
|
||||
}
|
||||
'w' => {
|
||||
if ident == "while" {
|
||||
return TokenKind::While;
|
||||
}
|
||||
}
|
||||
'y' => {
|
||||
if ident == "yield" {
|
||||
return TokenKind::Yield;
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
TokenKind::Identifier
|
||||
}
|
||||
|
||||
fn identifier(&mut self, start: usize) -> Token<'a> {
|
||||
loop {
|
||||
// TODO: Use unicode identifier classes instead
|
||||
if !self.matches_next(|c| c.is_ascii_alphanumeric() || c == '_') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let ident = &self.source[start..self.pos()];
|
||||
let kind = Self::identifier_token_kind(ident);
|
||||
Token::new(kind, start, ident)
|
||||
}
|
||||
|
||||
fn matches(&mut self, ch: char) -> bool {
|
||||
if let Some((_, next_ch)) = self.next_char {
|
||||
if next_ch == ch {
|
||||
self.advance();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn matches_next<F>(&mut self, f: F) -> bool
|
||||
where
|
||||
F: FnOnce(char) -> bool,
|
||||
{
|
||||
if let Some((_, next_ch)) = self.next_char {
|
||||
if f(next_ch) {
|
||||
self.advance();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn matches_digit(&mut self) -> bool {
|
||||
self.matches('_') || self.matches_next(|c| c.is_ascii_digit())
|
||||
}
|
||||
|
||||
fn advance(&mut self) -> Option<(usize, char)> {
|
||||
let result = self.next_char;
|
||||
self.next_char = self.chars.next();
|
||||
result
|
||||
}
|
||||
|
||||
fn pos(&self) -> usize {
|
||||
match self.next_char {
|
||||
Some((p, _)) => p,
|
||||
None => self.source.len(),
|
||||
}
|
||||
}
|
||||
|
||||
fn eof(&self) -> bool {
|
||||
self.next_char.is_none()
|
||||
}
|
||||
|
||||
fn skip_whitespace(&mut self) {
|
||||
while let Some((pos, ch)) = self.next_char {
|
||||
if ch == '\n' {
|
||||
self.lines.add_line(pos);
|
||||
} else if !ch.is_whitespace() {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::iter::Iterator for Tokens<'a> {
|
||||
type Item = Token<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.skip_whitespace(); // TODO: Whitespace preserving/comment preserving
|
||||
let (pos, c) = match self.advance() {
|
||||
Some((p, c)) => (p, c),
|
||||
None => return None,
|
||||
};
|
||||
|
||||
let token = match c {
|
||||
'{' => self.token(pos, TokenKind::LeftBrace),
|
||||
'}' => self.token(pos, TokenKind::RightBrace),
|
||||
'[' => self.token(pos, TokenKind::LeftBracket),
|
||||
']' => self.token(pos, TokenKind::RightBracket),
|
||||
'(' => self.token(pos, TokenKind::LeftParen),
|
||||
')' => self.token(pos, TokenKind::RightParen),
|
||||
',' => self.token(pos, TokenKind::Comma),
|
||||
'.' => self.token(pos, TokenKind::Dot),
|
||||
'-' => self.token(pos, TokenKind::Minus),
|
||||
'+' => self.token(pos, TokenKind::Plus),
|
||||
';' => self.token(pos, TokenKind::Semicolon),
|
||||
'/' => self.token(pos, TokenKind::Slash),
|
||||
'*' => self.token(pos, TokenKind::Star),
|
||||
'!' => {
|
||||
if self.matches('=') {
|
||||
self.token(pos, TokenKind::BangEqual)
|
||||
} else {
|
||||
self.token(pos, TokenKind::Bang)
|
||||
}
|
||||
}
|
||||
'=' => {
|
||||
if self.matches('=') {
|
||||
self.token(pos, TokenKind::EqualEqual)
|
||||
} else {
|
||||
self.token(pos, TokenKind::Equal)
|
||||
}
|
||||
}
|
||||
'>' => {
|
||||
if self.matches('=') {
|
||||
self.token(pos, TokenKind::GreaterEqual)
|
||||
} else {
|
||||
self.token(pos, TokenKind::Greater)
|
||||
}
|
||||
}
|
||||
'<' => {
|
||||
if self.matches('=') {
|
||||
self.token(pos, TokenKind::LessEqual)
|
||||
} else {
|
||||
self.token(pos, TokenKind::Less)
|
||||
}
|
||||
}
|
||||
'\'' => self.string(pos, '\''),
|
||||
'"' => self.string(pos, '"'),
|
||||
_ => {
|
||||
if c.is_ascii_digit() {
|
||||
self.number(pos)
|
||||
} else if c.is_ascii_alphabetic() || c == '_' {
|
||||
self.identifier(pos)
|
||||
} else {
|
||||
Token::error(pos, format!("Unexpected character '{c}'"))
|
||||
}
|
||||
}
|
||||
};
|
||||
Some(token)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
macro_rules! test_tokens {
|
||||
($name:ident, $input:expr, $($s:expr),+) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
use TokenKind::*;
|
||||
let tokens: Vec<_> = Tokens::new($input).collect();
|
||||
|
||||
let expected: Vec<Token> = (vec![$($s),*])
|
||||
.into_iter()
|
||||
.map(|t| Token::new(t.1, t.0, t.2))
|
||||
.collect();
|
||||
|
||||
assert_eq!(expected, tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_tokens!(
|
||||
numbers,
|
||||
"1 1.0 1.2e7 2.3e+7 3.3E-06 7_6 8.0e_8",
|
||||
(0, Number, "1"),
|
||||
(2, Number, "1.0"),
|
||||
(6, Number, "1.2e7"),
|
||||
(12, Number, "2.3e+7"),
|
||||
(19, Number, "3.3E-06"),
|
||||
(27, Number, "7_6"),
|
||||
(31, Number, "8.0e_8")
|
||||
);
|
||||
|
||||
test_tokens!(
|
||||
identifiers,
|
||||
"asdf x _123 a_23 x3a and or yield async await class else false for from",
|
||||
(0, Identifier, "asdf"),
|
||||
(5, Identifier, "x"),
|
||||
(7, Identifier, "_123"),
|
||||
(12, Identifier, "a_23"),
|
||||
(17, Identifier, "x3a"),
|
||||
(21, And, "and"),
|
||||
(25, Or, "or"),
|
||||
(28, Yield, "yield"),
|
||||
(34, Async, "async"),
|
||||
(40, Await, "await"),
|
||||
(46, Class, "class"),
|
||||
(52, Else, "else"),
|
||||
(57, False, "false"),
|
||||
(63, For, "for"),
|
||||
(67, From, "from")
|
||||
);
|
||||
|
||||
test_tokens!(
|
||||
more_keywords,
|
||||
"fun if let print return select this true while truewhile",
|
||||
(0, Fun, "fun"),
|
||||
(4, If, "if"),
|
||||
(7, Let, "let"),
|
||||
(11, Print, "print"),
|
||||
(17, Return, "return"),
|
||||
(24, Select, "select"),
|
||||
(31, This, "this"),
|
||||
(36, True, "true"),
|
||||
(41, While, "while"),
|
||||
(47, Identifier, "truewhile")
|
||||
);
|
||||
|
||||
test_tokens!(
|
||||
strings,
|
||||
r#"'this is a string that\'s great!\r\n' "foo's" 'bar"s' "#,
|
||||
(0, String, r#"'this is a string that\'s great!\r\n'"#),
|
||||
(38, String, r#""foo's""#),
|
||||
(46, String, "'bar\"s'")
|
||||
);
|
||||
|
||||
test_tokens!(
|
||||
symbols,
|
||||
"{ } ( ) [ ] . ! != < <= > >= = == , - + * / ;",
|
||||
(0, LeftBrace, "{"),
|
||||
(2, RightBrace, "}"),
|
||||
(4, LeftParen, "("),
|
||||
(6, RightParen, ")"),
|
||||
(8, LeftBracket, "["),
|
||||
(10, RightBracket, "]"),
|
||||
(12, Dot, "."),
|
||||
(14, Bang, "!"),
|
||||
(16, BangEqual, "!="),
|
||||
(19, Less, "<"),
|
||||
(21, LessEqual, "<="),
|
||||
(24, Greater, ">"),
|
||||
(26, GreaterEqual, ">="),
|
||||
(29, Equal, "="),
|
||||
(31, EqualEqual, "=="),
|
||||
(34, Comma, ","),
|
||||
(36, Minus, "-"),
|
||||
(38, Plus, "+"),
|
||||
(40, Star, "*"),
|
||||
(42, Slash, "/"),
|
||||
(44, Semicolon, ";")
|
||||
);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue