[fine] The name is official

This commit is contained in:
John Doty 2024-01-02 16:50:21 -08:00
parent 652fe18f57
commit 8a867de7e7
7 changed files with 3 additions and 3 deletions

32
fine/Cargo.lock generated Normal file
View file

@ -0,0 +1,32 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "fine"
version = "0.1.0"
dependencies = [
"pretty_assertions",
]
[[package]]
name = "pretty_assertions"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66"
dependencies = [
"diff",
"yansi",
]
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"

7
fine/Cargo.toml Normal file
View file

@ -0,0 +1,7 @@
[package]
name = "fine"
version = "0.1.0"
edition = "2021"
[dev-dependencies]
pretty_assertions = "1.4.0"

2
fine/src/lib.rs Normal file
View file

@ -0,0 +1,2 @@
pub mod parser;
pub mod tokens;

1
fine/src/main.rs Normal file
View file

@ -0,0 +1 @@
pub fn main() {}

906
fine/src/parser.rs Normal file
View file

@ -0,0 +1,906 @@
use crate::tokens::{Lines, Token, TokenKind, Tokens};
use std::fmt;
// TODO: An error should have:
//
// - a start
// - an end
// - a focus
// - descriptive messages
//
// that will have to wait for now
#[derive(PartialEq, Eq)]
pub struct SyntaxError {
pub start: (usize, usize),
pub end: (usize, usize),
pub message: String,
}
impl SyntaxError {
pub fn new<T>(line: usize, column: usize, message: T) -> Self
where
T: ToString,
{
SyntaxError {
start: (line, column),
end: (line, column),
message: message.to_string(),
}
}
pub fn new_spanned<T>(start: (usize, usize), end: (usize, usize), message: T) -> Self
where
T: ToString,
{
SyntaxError {
start,
end,
message: message.to_string(),
}
}
}
impl fmt::Debug for SyntaxError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{self}")
}
}
impl fmt::Display for SyntaxError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}: {}", self.start.0, self.end.0, self.message)
}
}
#[derive(Clone)]
pub enum Literal {
Float64(f64),
String(String),
Bool(bool),
}
#[derive(Copy, Clone)]
pub enum UnaryOp {
Negate,
Not,
}
#[derive(Copy, Clone)]
pub enum BinaryOp {
Add,
Subtract,
Multiply,
Divide,
And,
Or,
}
#[derive(Clone)]
pub enum Expr<'a> {
Literal(Literal, Token<'a>),
Unary(UnaryOp, Token<'a>, ExprRef),
Binary(BinaryOp, Token<'a>, ExprRef, ExprRef),
Conditional(Token<'a>, ExprRef, ExprRef, Option<ExprRef>, Token<'a>),
}
#[derive(Clone)]
pub struct ExprRef(Option<usize>);
impl ExprRef {
pub fn error() -> Self {
ExprRef(None)
}
}
// TODO: Eventually we will be unable to use Eq and PartialEq here, and will
// need to do out own thing.
#[derive(Copy, Clone)]
pub enum Type {
// Signals a type error. If you receive this then you know that an error
// has already been reported; if you produce this be sure to also note
// the error in the errors collection.
Error,
// Signals that the expression has a control-flow side-effect and that no
// value will ever result from this expression. Usually this means
// everything's fine.
Unreachable,
// TODO: Numeric literals should be implicitly convertable, unlike other
// types. Maybe just "numeric literal" type?
F64,
String,
Bool,
}
impl Type {
pub fn is_error(&self) -> bool {
match self {
Type::Error => true,
_ => false,
}
}
pub fn compatible_with(&self, other: &Type) -> bool {
// TODO: This is wrong; we because of numeric literals etc.
match (self, other) {
(Type::F64, Type::F64) => true,
(Type::String, Type::String) => true,
(Type::Bool, Type::Bool) => true,
(Type::Unreachable, Type::Unreachable) => true,
// Avoid introducing more errors
(Type::Error, _) => true,
(_, Type::Error) => true,
(_, _) => false,
}
}
}
impl std::fmt::Debug for Type {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{self}")
}
}
impl std::fmt::Display for Type {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use Type::*;
match self {
Error => write!(f, "<< INTERNAL ERROR >>"),
Unreachable => write!(f, "<< UNREACHABLE >>"),
F64 => write!(f, "f64"),
String => write!(f, "string"),
Bool => write!(f, "bool"),
}
}
}
pub struct TypeRef(Option<usize>);
pub struct SyntaxTree<'a> {
pub errors: Vec<SyntaxError>,
expressions: Vec<Expr<'a>>,
}
impl<'a> SyntaxTree<'a> {
pub fn new() -> Self {
SyntaxTree {
errors: Vec::new(),
expressions: Vec::new(),
}
}
pub fn add_error(&mut self, error: SyntaxError) {
self.errors.push(error);
}
pub fn add_expr(&mut self, expr: Expr<'a>) -> ExprRef {
let index = self.expressions.len();
self.expressions.push(expr);
ExprRef(Some(index))
}
pub fn dump_expr(&self, expr: &ExprRef) -> String {
match expr.0 {
Some(idx) => {
let expr = &self.expressions[idx];
match expr {
Expr::Literal(_, tok) => tok.to_string(),
Expr::Unary(_, tok, e) => {
format!("({tok} {})", self.dump_expr(e))
}
Expr::Binary(_, tok, l, r) => {
format!("({tok} {} {})", self.dump_expr(l), self.dump_expr(r))
}
Expr::Conditional(tok, cond, t, e, _) => {
if let Some(e) = e {
format!(
"({tok} {} {} {})",
self.dump_expr(cond),
self.dump_expr(t),
self.dump_expr(e)
)
} else {
format!("({tok} {} {})", self.dump_expr(cond), self.dump_expr(t))
}
}
}
}
None => "<|EOF|>".to_string(),
}
}
pub fn expr_span(&self, expr: &ExprRef) -> Option<(Token<'a>, Token<'a>)> {
let expr = match expr.0 {
Some(idx) => &self.expressions[idx],
None => return None,
};
match expr {
Expr::Literal(_, tok) => Some((tok.clone(), tok.clone())),
Expr::Unary(_, tok, arg) => {
let arg = self.expr_span(arg);
match arg {
None => None,
Some((_, end)) => Some((tok.clone(), end)),
}
}
Expr::Binary(_, _, left, right) => {
let left = self.expr_span(left);
let right = self.expr_span(right);
match (left, right) {
(None, _) => None,
(_, None) => None,
(Some((start, _)), Some((_, end))) => Some((start, end)),
}
}
Expr::Conditional(head, _, _, _, tail) => Some((head.clone(), tail.clone())),
}
}
pub fn expr_type(&mut self, expr: &ExprRef, lines: &Lines, value_required: bool) -> Type {
// TODO: Cache and work on demand? Or is this just fine?
let exr = expr.clone();
let expr = match expr.0 {
Some(idx) => &self.expressions[idx],
None => return Type::Error,
};
match expr {
Expr::Literal(lit, _) => match lit {
Literal::Float64(_) => Type::F64,
Literal::String(_) => Type::String,
Literal::Bool(_) => Type::Bool,
},
// Figure out the main thing. Check for a... trait?
Expr::Unary(op, tok, arg) => {
let op = op.clone();
let arg = arg.clone();
let tok = tok.clone();
let arg_type = self.expr_type(&arg, lines, true);
match (op, arg_type) {
(UnaryOp::Negate, Type::F64) => Type::F64,
(UnaryOp::Not, Type::Bool) => Type::Bool,
// This is dumb and should be punished, probably.
(_, Type::Unreachable) => {
let (line, col) = lines.position(tok.start());
self.errors.push(SyntaxError::new(line, col, format!("cannot apply a unary operator to something that doesn't yield a value")));
Type::Error
}
// Propagate existing errors without additional complaint.
(_, Type::Error) => Type::Error,
// Missed the whole table, must be an error.
(_, arg_type) => {
let (line, col) = lines.position(tok.start());
self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'")));
Type::Error
}
}
}
Expr::Binary(op, tok, left, right) => {
let op = op.clone();
let tok = tok.clone();
let left = left.clone();
let right = right.clone();
let left_type = self.expr_type(&left, lines, true);
let right_type = self.expr_type(&right, lines, true);
match (op, left_type, right_type) {
(
BinaryOp::Add | BinaryOp::Subtract | BinaryOp::Multiply | BinaryOp::Divide,
Type::F64,
Type::F64,
) => Type::F64,
(BinaryOp::Add, Type::String, Type::String) => Type::String,
(BinaryOp::And | BinaryOp::Or, Type::Bool, Type::Bool) => Type::Bool,
// This is dumb and should be punished, probably.
(_, _, Type::Unreachable) => {
let (line, col) = lines.position(tok.start());
self.errors.push(SyntaxError::new(
line,
col,
format!(
"cannot apply '{tok}' to an argument that doesn't yield a value (on the right)"
),
));
Type::Error
}
(_, Type::Unreachable, _) => {
let (line, col) = lines.position(tok.start());
self.errors.push(SyntaxError::new(
line,
col,
format!(
"cannot apply '{tok}' to an argument that doesn't yield a value (on the left)"
),
));
Type::Error
}
// Propagate existing errors without additional complaint.
(_, Type::Error, _) => Type::Error,
(_, _, Type::Error) => Type::Error,
// Missed the whole table, it must be an error.
(_, left_type, right_type) => {
let (line, col) = lines.position(tok.start());
self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)")));
Type::Error
}
}
}
Expr::Conditional(_, cond, then_exp, else_exp, _) => {
let cond = cond.clone();
let then_exp = then_exp.clone();
let else_exp = else_exp.clone();
let cond_type = self.expr_type(&cond, lines, true);
let then_type = self.expr_type(&then_exp, lines, value_required);
let else_type = else_exp.map(|e| self.expr_type(&e, lines, value_required));
if !cond_type.compatible_with(&Type::Bool) {
if !cond_type.is_error() {
let span = self
.expr_span(&cond)
.expect("If the expression has a type it must have a span");
let start = lines.position(span.0.start());
let end = lines.position(span.1.start());
self.errors.push(SyntaxError::new_spanned(
start,
end,
"the condition of an `if` expression must be a boolean",
));
}
return Type::Error;
}
match (then_type, else_type) {
(Type::Error, _) => Type::Error,
(_, Some(Type::Error)) => Type::Error,
// It's an error to have a missing else branch if the value is required
(_, None) if value_required => {
let span = self
.expr_span(&exr)
.expect("How did I get this far with a broken parse?");
let start = lines.position(span.0.start());
let end = lines.position(span.1.start());
self.errors.push(SyntaxError::new_spanned(
start,
end,
"this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value",
));
Type::Error
}
// If the value is required then the branches must be
// compatible, and the type of the expression is the type
// of the `then` branch.
(then_type, Some(else_type)) if value_required => {
if !then_type.compatible_with(&else_type) {
let span = self
.expr_span(&exr)
.expect("How did I get this far with a broken parse?");
let start = lines.position(span.0.start());
let end = lines.position(span.1.start());
self.errors.push(SyntaxError::new_spanned(
start,
end,
format!("the type of the `then` branch ({then_type}) must match the type of the `else` branch ({else_type})"),
));
Type::Error
} else {
then_type
}
}
// The value must not be required, just mark this as unreachable.
(_, _) => {
assert!(!value_required);
Type::Unreachable
}
}
}
}
}
}
// BINDING POWERS. When parsing expressions we only accept expressions that
// meet a minimum binding power. (This is like "precedence" but I just super
// don't like that terminology.)
const ASSIGNMENT_POWER: u8 = 0; // =
const OR_POWER: u8 = 1; // or
const AND_POWER: u8 = 2; // and
const EQUALITY_POWER: u8 = 3; // == !=
const COMPARISON_POWER: u8 = 4; // < > <= >=
const TERM_POWER: u8 = 5; // + -
const FACTOR_POWER: u8 = 6; // * /
const UNARY_POWER: u8 = 7; // ! -
// const CALL_POWER: u8 = 8; // . ()
// const PRIMARY_POWER: u8 = 9;
fn token_power<'a>(token: &Option<Token<'a>>) -> Option<u8> {
let token = match token {
Some(t) => t,
None => return None,
};
match token.kind() {
TokenKind::Equal => Some(ASSIGNMENT_POWER),
TokenKind::Or => Some(OR_POWER),
TokenKind::And => Some(AND_POWER),
TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER),
TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => {
Some(COMPARISON_POWER)
}
TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER),
TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER),
_ => None,
}
}
pub struct Parser<'a> {
tokens: Tokens<'a>,
tree: SyntaxTree<'a>,
current: Option<Token<'a>>,
previous: Option<Token<'a>>,
panic_mode: bool,
}
impl<'a> Parser<'a> {
pub fn new(source: &'a str) -> Self {
let mut parser = Parser {
tokens: Tokens::new(source),
tree: SyntaxTree::new(),
current: None,
previous: None,
panic_mode: false,
};
parser.advance();
parser
}
pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) {
let expr = self.expression();
self.consume(None, "expected end of expression");
(self.tree, expr, self.tokens.lines())
}
fn expression(&mut self) -> ExprRef {
self.expression_with_power(0)
}
fn expression_with_power(&mut self, minimum_power: u8) -> ExprRef {
self.trace("expression with power");
self.advance();
let mut expr = self.prefix_expression();
loop {
let power = match token_power(&self.current) {
Some(p) => p,
None => break, // EOF, end of expression?
};
if power < minimum_power {
break;
}
self.advance();
expr = self.infix_expression(power, expr);
}
expr
}
fn prefix_expression(&mut self) -> ExprRef {
self.trace("prefix");
let token = self.previous.as_ref();
match token {
Some(token) => match token.kind() {
TokenKind::Bang => self.unary(),
TokenKind::LeftParen => self.grouping(),
TokenKind::Number => self.number(),
TokenKind::Minus => self.unary(),
TokenKind::String => self.string(),
TokenKind::True => self
.tree
.add_expr(Expr::Literal(Literal::Bool(true), token.clone())),
TokenKind::False => self
.tree
.add_expr(Expr::Literal(Literal::Bool(false), token.clone())),
TokenKind::If => self.conditional(),
_ => {
self.error("expected an expression");
ExprRef::error()
}
},
None => {
self.error("expected an expression");
ExprRef::error()
}
}
}
fn infix_expression(&mut self, power: u8, left: ExprRef) -> ExprRef {
self.trace("infix");
let kind = self.previous.as_ref().unwrap().kind();
match kind {
TokenKind::Plus
| TokenKind::Minus
| TokenKind::Star
| TokenKind::Slash
| TokenKind::And
| TokenKind::Or => self.binary(power, left),
_ => panic!("Unknown infix operator, dispatch error?"),
}
}
fn number(&mut self) -> ExprRef {
let token = self.previous.as_ref().unwrap();
// What kind is it? For now let's just ... make it good.
let literal = match token.as_str().parse::<f64>() {
Ok(v) => Literal::Float64(v),
Err(e) => {
self.error(format!("invalid f64: {e}"));
return ExprRef::error();
}
};
self.tree.add_expr(Expr::Literal(literal, token.clone()))
}
fn string(&mut self) -> ExprRef {
let token = self.previous.as_ref().unwrap();
let mut result = String::new();
let mut input = token.as_str().chars();
assert!(input.next().is_some()); // Delimiter
while let Some(ch) = input.next() {
match ch {
'\\' => match input.next().unwrap() {
'n' => result.push('\n'),
'r' => result.push('\r'),
't' => result.push('\t'),
ch => result.push(ch),
},
_ => result.push(ch),
}
}
result.pop(); // We pushed the other delimiter on, whoops.
let literal = Literal::String(result);
self.tree.add_expr(Expr::Literal(literal, token.clone()))
}
fn grouping(&mut self) -> ExprRef {
let result = self.expression();
self.consume(
Some(TokenKind::RightParen),
"expected ')' after an expression",
);
result
}
fn conditional(&mut self) -> ExprRef {
let token = self.previous.as_ref().unwrap().clone();
let condition_expr = self.expression();
self.consume(
Some(TokenKind::LeftBrace),
"expected '{' to start an 'if' block",
);
let then_expr = self.expression();
self.consume(
Some(TokenKind::RightBrace),
"expected '}' to end an 'if' block",
);
let else_expr = match &self.current {
Some(token) if token.kind() == TokenKind::Else => {
self.advance();
match &self.current {
// Allow `else if` without another `{`.
Some(token) if token.kind() == TokenKind::If => {
self.advance();
Some(self.conditional())
}
_ => {
self.consume(
Some(TokenKind::LeftBrace),
"expected '{' to start an 'else' block",
);
let else_expr = self.expression();
self.consume(
Some(TokenKind::RightBrace),
"Expected '}' to end an 'else' block",
);
Some(else_expr)
}
}
}
_ => None,
};
let tail = self.previous.as_ref().unwrap().clone();
self.tree.add_expr(Expr::Conditional(
token,
condition_expr,
then_expr,
else_expr,
tail,
))
}
fn unary(&mut self) -> ExprRef {
let token = self.previous.as_ref().unwrap().clone();
let kind = token.kind();
let expr = self.expression_with_power(UNARY_POWER);
let op = match kind {
TokenKind::Minus => UnaryOp::Negate,
TokenKind::Bang => UnaryOp::Not,
_ => panic!("unsuitable unary: {:?}: no op", kind),
};
self.tree.add_expr(Expr::Unary(op, token, expr))
}
fn binary(&mut self, power: u8, left: ExprRef) -> ExprRef {
let token = self.previous.as_ref().unwrap().clone();
let op = match token.kind() {
TokenKind::Plus => BinaryOp::Add,
TokenKind::Minus => BinaryOp::Subtract,
TokenKind::Star => BinaryOp::Multiply,
TokenKind::Slash => BinaryOp::Divide,
TokenKind::And => BinaryOp::And,
TokenKind::Or => BinaryOp::Or,
_ => panic!("unsuitable binary: {:?}: no op", self.previous),
};
let right = self.expression_with_power(power + 1);
self.tree.add_expr(Expr::Binary(op, token, left, right))
}
fn advance(&mut self) {
self.previous = self.current.take();
loop {
self.current = self.tokens.next();
match &self.current {
Some(token) if token.kind() == TokenKind::Error => {
self.error_at_current(token.to_string())
}
_ => break,
}
}
}
fn consume(&mut self, kind: Option<TokenKind>, error: &str) {
match (&self.current, kind) {
(Some(token), Some(kind)) if token.kind() == kind => self.advance(),
(None, None) => (),
_ => {
self.error_at_current(error);
}
}
}
fn error<T>(&mut self, message: T)
where
T: Into<String>,
{
self.error_at(self.previous.clone(), message)
}
fn error_at_current<T>(&mut self, message: T)
where
T: Into<String>,
{
self.error_at(self.current.clone(), message)
}
fn error_at<T>(&mut self, token: Option<Token<'a>>, message: T)
where
T: Into<String>,
{
if self.panic_mode {
return;
}
self.panic_mode = true;
let message: String = message.into();
let (line, column) = self.tokens.token_position(&token);
let mut final_message = "Error ".to_string();
match token {
None => final_message.push_str("at end"),
Some(t) => {
if t.kind() != TokenKind::Error {
final_message.push_str("at '");
final_message.push_str(t.as_str());
final_message.push_str("'");
}
}
}
final_message.push_str(": ");
final_message.push_str(&message);
self.tree
.add_error(SyntaxError::new(line, column, final_message));
}
fn trace(&self, _msg: &str) {
// let cpos = self.tokens.token_position(&self.current);
// let ppos = self.tokens.token_position(&self.previous);
// eprintln!(
// "[{}:{}:{}] [{}:{}:{}]: {msg}",
// ppos.0,
// ppos.1,
// self.previous
// .as_ref()
// .map(|t| t.as_str())
// .unwrap_or("<eof>"),
// cpos.0,
// cpos.1,
// self.current.as_ref().map(|t| t.as_str()).unwrap_or("<eof>")
// );
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
fn test_successful_expression_parse(source: &str, expected: &str, expected_type: Type) {
let (mut tree, expr, lines) = Parser::new(source).parse();
assert_eq!(
Vec::<SyntaxError>::new(),
tree.errors,
"Expected successful parse"
);
assert_eq!(
expected,
tree.dump_expr(&expr),
"The parse structure of the expressions did not match"
);
// TODO: 'assert_eq' is probably wrong here
let expr_type = tree.expr_type(&expr, &lines, true);
assert!(
expected_type.compatible_with(&expr_type),
"The type of the expression did not match. expected: {expected_type}, actual: {expr_type}"
);
}
macro_rules! test_expr {
($name:ident, $input:expr, $expected:expr, $type:expr) => {
#[test]
fn $name() {
test_successful_expression_parse($input, $expected, $type);
}
};
}
test_expr!(number_expr, "12", "12", Type::F64);
test_expr!(add_expr, "1 + 2", "(+ 1 2)", Type::F64);
test_expr!(
prec_expr,
"1 + 2 * 3 - 7 * 7",
"(- (+ 1 (* 2 3)) (* 7 7))",
Type::F64
);
test_expr!(unary, "-((23)) * 5", "(* (- 23) 5)", Type::F64);
test_expr!(
strings,
r#" "Hello " + 'world!' "#,
r#"(+ "Hello " 'world!')"#,
Type::String
);
test_expr!(
booleans,
"true and false or false and !true",
"(or (and true false) (and false (! true)))",
Type::Bool
);
test_expr!(
if_expression,
"if true { 23 } else { 45 }",
"(if true 23 45)",
Type::F64
);
// test_expr!(
// if_with_return,
// "if true { 23 } else { return 'nothing' }",
// "",
// Type::F64
// );
// ========================================================================
// Type Error Tests
// ========================================================================
fn test_type_error_expression(source: &str, expected_errors: Vec<&str>) {
let (mut tree, expr, lines) = Parser::new(source).parse();
assert_eq!(
Vec::<SyntaxError>::new(),
tree.errors,
"Expected successful parse"
);
let expr_type = tree.expr_type(&expr, &lines, true);
assert!(expr_type.is_error());
let actual_errors = tree
.errors
.iter()
.map(|e| e.message.as_str())
.collect::<Vec<_>>();
assert_eq!(expected_errors, actual_errors);
}
macro_rules! test_type_error_expr {
($name:ident, $input:expr, $($s:expr),+) => {
#[test]
fn $name() {
let expected_errors: Vec<&str> = (vec![$($s),*]);
test_type_error_expression($input, expected_errors);
}
}
}
test_type_error_expr!(
negate_string,
"-('what?')",
"cannot apply unary operator '-' to expression of type 'string'"
);
test_type_error_expr!(
add_string_number,
"'what?' + 5",
"cannot apply binary operator '+' to expressions of type 'string' (on the left) and 'f64' (on the right)"
);
test_type_error_expr!(
add_number_string,
"5 + 'what?'",
"cannot apply binary operator '+' to expressions of type 'f64' (on the left) and 'string' (on the right)"
);
test_type_error_expr!(
errors_propagate_do_not_duplicate,
"!'hello' / 27 * -('what?') + 23",
"cannot apply unary operator '!' to expression of type 'string'",
"cannot apply unary operator '-' to expression of type 'string'"
);
test_type_error_expr!(
if_not_bool,
"if 23 { 1 } else { 2 }",
"the condition of an `if` expression must be a boolean"
);
test_type_error_expr!(
if_arm_mismatch,
"if true { 1 } else { '1' }",
"the type of the `then` branch (f64) must match the type of the `else` branch (string)"
);
test_type_error_expr!(
if_no_else,
"if true { 1 }",
"this `if` expression must have both a `then` clause and an `else` clause, so it can produce a value"
);
}

584
fine/src/tokens.rs Normal file
View file

@ -0,0 +1,584 @@
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum TokenKind {
LeftBrace,
RightBrace,
LeftBracket,
RightBracket,
LeftParen,
RightParen,
Comma,
Dot,
Minus,
Plus,
Semicolon,
Slash,
Star,
Bang,
BangEqual,
Equal,
EqualEqual,
Greater,
GreaterEqual,
Less,
LessEqual,
Identifier,
String,
Number,
And,
Async,
Await,
Class,
Else,
False,
For,
From,
Fun,
If,
Let,
Or,
Print,
Return,
Select,
This,
True,
While,
Yield,
Error,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Token<'a> {
kind: TokenKind,
start: usize,
value: Result<&'a str, String>,
}
impl<'a> Token<'a> {
pub fn new(kind: TokenKind, start: usize, value: &'a str) -> Self {
Token {
kind,
start,
value: Ok(value),
}
}
pub fn error(start: usize, message: String) -> Self {
Token {
kind: TokenKind::Error,
start,
value: Err(message),
}
}
pub fn start(&self) -> usize {
self.start
}
pub fn kind(&self) -> TokenKind {
self.kind
}
pub fn as_str<'b>(&'b self) -> &'a str
where
'b: 'a,
{
match &self.value {
Ok(v) => v,
Err(e) => &e,
}
}
}
impl<'a> std::fmt::Display for Token<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_str())
}
}
pub struct Lines {
newlines: Vec<usize>,
eof: usize,
}
impl Lines {
fn new(eof: usize) -> Self {
Lines {
newlines: Vec::new(),
eof,
}
}
/// Record the position of a newline in the source.
pub fn add_line(&mut self, pos: usize) {
self.newlines.push(pos)
}
/// Return the position of the given token as a (line, column) pair. By
/// convention, lines are 1-based and columns are 0-based. Also, in
/// keeping with the iterator-nature of the tokenizer, `None` here
/// indicates end-of-file, and will return the position of the end of the
/// file.
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
let start = match token {
Some(t) => t.start,
None => self.eof,
};
self.position(start)
}
/// Return the position of the given character offset as a (line,column)
/// pair. By convention, lines are 1-based and columns are 0-based.
pub fn position(&self, offset: usize) -> (usize, usize) {
let line_end_index = match self.newlines.binary_search(&offset) {
Ok(index) => index,
Err(index) => index,
};
let line_start_pos = if line_end_index == 0 {
0
} else {
self.newlines[line_end_index - 1] + 1
};
let line_number = line_end_index + 1;
let column_offset = offset - line_start_pos;
(line_number, column_offset)
}
}
pub struct Tokens<'a> {
source: &'a str,
chars: std::str::CharIndices<'a>,
next_char: Option<(usize, char)>,
lines: Lines,
}
impl<'a> Tokens<'a> {
pub fn new(source: &'a str) -> Self {
let mut result = Tokens {
source,
chars: source.char_indices(),
next_char: None,
lines: Lines::new(source.len()),
};
result.advance(); // Prime the pump
result
}
pub fn lines(self) -> Lines {
self.lines
}
/// Return the position of the given token as a (line, column) pair. See
/// `Lines::token_position` for more information about the range, etc.
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
self.lines.token_position(token)
}
fn token(&self, start: usize, kind: TokenKind) -> Token<'a> {
let value = &self.source[start..self.pos()];
Token::new(kind, start, value)
}
fn number(&mut self, start: usize) -> Token<'a> {
// First, the main part.
loop {
if !self.matches_digit() {
break;
}
}
// Now the fraction part.
// The thing that is bad here is that this is speculative...
let backup = self.chars.clone();
if self.matches('.') {
let mut saw_digit = false;
loop {
if self.matches('_') {
} else if self.matches_next(|c| c.is_ascii_digit()) {
saw_digit = true;
} else {
break;
}
}
if saw_digit {
// OK we're good to here! Check the scientific notation.
if self.matches('e') || self.matches('E') {
if self.matches('+') || self.matches('-') {}
let mut saw_digit = false;
loop {
if self.matches('_') {
} else if self.matches_next(|c| c.is_ascii_digit()) {
saw_digit = true;
} else {
break;
}
}
if !saw_digit {
// This is just a broken number.
let slice = &self.source[start..self.pos()];
return Token::error(
start,
format!("Invalid floating-point literal: {slice}"),
);
}
}
} else {
// Might be accessing a member on an integer.
self.chars = backup;
}
}
self.token(start, TokenKind::Number)
}
fn string(&mut self, start: usize, delimiter: char) -> Token<'a> {
while !self.matches(delimiter) {
if self.eof() {
return Token::error(start, "Unterminated string constant".to_string());
}
if self.matches('\\') {
self.advance();
} else {
self.advance();
}
}
self.token(start, TokenKind::String)
}
fn identifier_token_kind(ident: &str) -> TokenKind {
match ident.chars().nth(0).unwrap() {
'a' => {
if ident == "and" {
return TokenKind::And;
}
if ident == "async" {
return TokenKind::Async;
}
if ident == "await" {
return TokenKind::Await;
}
}
'c' => {
if ident == "class" {
return TokenKind::Class;
}
}
'e' => {
if ident == "else" {
return TokenKind::Else;
}
}
'f' => {
if ident == "false" {
return TokenKind::False;
}
if ident == "for" {
return TokenKind::For;
}
if ident == "from" {
return TokenKind::From;
}
if ident == "fun" {
return TokenKind::Fun;
}
}
'i' => {
if ident == "if" {
return TokenKind::If;
}
}
'l' => {
if ident == "let" {
return TokenKind::Let;
}
}
'o' => {
if ident == "or" {
return TokenKind::Or;
}
}
'p' => {
if ident == "print" {
return TokenKind::Print;
}
}
'r' => {
if ident == "return" {
return TokenKind::Return;
}
}
's' => {
if ident == "select" {
return TokenKind::Select;
}
}
't' => {
if ident == "this" {
return TokenKind::This;
}
if ident == "true" {
return TokenKind::True;
}
}
'w' => {
if ident == "while" {
return TokenKind::While;
}
}
'y' => {
if ident == "yield" {
return TokenKind::Yield;
}
}
_ => (),
}
TokenKind::Identifier
}
fn identifier(&mut self, start: usize) -> Token<'a> {
loop {
// TODO: Use unicode identifier classes instead
if !self.matches_next(|c| c.is_ascii_alphanumeric() || c == '_') {
break;
}
}
let ident = &self.source[start..self.pos()];
let kind = Self::identifier_token_kind(ident);
Token::new(kind, start, ident)
}
fn matches(&mut self, ch: char) -> bool {
if let Some((_, next_ch)) = self.next_char {
if next_ch == ch {
self.advance();
return true;
}
}
false
}
fn matches_next<F>(&mut self, f: F) -> bool
where
F: FnOnce(char) -> bool,
{
if let Some((_, next_ch)) = self.next_char {
if f(next_ch) {
self.advance();
return true;
}
}
false
}
fn matches_digit(&mut self) -> bool {
self.matches('_') || self.matches_next(|c| c.is_ascii_digit())
}
fn advance(&mut self) -> Option<(usize, char)> {
let result = self.next_char;
self.next_char = self.chars.next();
result
}
fn pos(&self) -> usize {
match self.next_char {
Some((p, _)) => p,
None => self.source.len(),
}
}
fn eof(&self) -> bool {
self.next_char.is_none()
}
fn skip_whitespace(&mut self) {
while let Some((pos, ch)) = self.next_char {
if ch == '\n' {
self.lines.add_line(pos);
} else if !ch.is_whitespace() {
break;
}
self.advance();
}
}
}
impl<'a> std::iter::Iterator for Tokens<'a> {
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.skip_whitespace(); // TODO: Whitespace preserving/comment preserving
let (pos, c) = match self.advance() {
Some((p, c)) => (p, c),
None => return None,
};
let token = match c {
'{' => self.token(pos, TokenKind::LeftBrace),
'}' => self.token(pos, TokenKind::RightBrace),
'[' => self.token(pos, TokenKind::LeftBracket),
']' => self.token(pos, TokenKind::RightBracket),
'(' => self.token(pos, TokenKind::LeftParen),
')' => self.token(pos, TokenKind::RightParen),
',' => self.token(pos, TokenKind::Comma),
'.' => self.token(pos, TokenKind::Dot),
'-' => self.token(pos, TokenKind::Minus),
'+' => self.token(pos, TokenKind::Plus),
';' => self.token(pos, TokenKind::Semicolon),
'/' => self.token(pos, TokenKind::Slash),
'*' => self.token(pos, TokenKind::Star),
'!' => {
if self.matches('=') {
self.token(pos, TokenKind::BangEqual)
} else {
self.token(pos, TokenKind::Bang)
}
}
'=' => {
if self.matches('=') {
self.token(pos, TokenKind::EqualEqual)
} else {
self.token(pos, TokenKind::Equal)
}
}
'>' => {
if self.matches('=') {
self.token(pos, TokenKind::GreaterEqual)
} else {
self.token(pos, TokenKind::Greater)
}
}
'<' => {
if self.matches('=') {
self.token(pos, TokenKind::LessEqual)
} else {
self.token(pos, TokenKind::Less)
}
}
'\'' => self.string(pos, '\''),
'"' => self.string(pos, '"'),
_ => {
if c.is_ascii_digit() {
self.number(pos)
} else if c.is_ascii_alphabetic() || c == '_' {
self.identifier(pos)
} else {
Token::error(pos, format!("Unexpected character '{c}'"))
}
}
};
Some(token)
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
macro_rules! test_tokens {
($name:ident, $input:expr, $($s:expr),+) => {
#[test]
fn $name() {
use TokenKind::*;
let tokens: Vec<_> = Tokens::new($input).collect();
let expected: Vec<Token> = (vec![$($s),*])
.into_iter()
.map(|t| Token::new(t.1, t.0, t.2))
.collect();
assert_eq!(expected, tokens);
}
}
}
test_tokens!(
numbers,
"1 1.0 1.2e7 2.3e+7 3.3E-06 7_6 8.0e_8",
(0, Number, "1"),
(2, Number, "1.0"),
(6, Number, "1.2e7"),
(12, Number, "2.3e+7"),
(19, Number, "3.3E-06"),
(27, Number, "7_6"),
(31, Number, "8.0e_8")
);
test_tokens!(
identifiers,
"asdf x _123 a_23 x3a and or yield async await class else false for from",
(0, Identifier, "asdf"),
(5, Identifier, "x"),
(7, Identifier, "_123"),
(12, Identifier, "a_23"),
(17, Identifier, "x3a"),
(21, And, "and"),
(25, Or, "or"),
(28, Yield, "yield"),
(34, Async, "async"),
(40, Await, "await"),
(46, Class, "class"),
(52, Else, "else"),
(57, False, "false"),
(63, For, "for"),
(67, From, "from")
);
test_tokens!(
more_keywords,
"fun if let print return select this true while truewhile",
(0, Fun, "fun"),
(4, If, "if"),
(7, Let, "let"),
(11, Print, "print"),
(17, Return, "return"),
(24, Select, "select"),
(31, This, "this"),
(36, True, "true"),
(41, While, "while"),
(47, Identifier, "truewhile")
);
test_tokens!(
strings,
r#"'this is a string that\'s great!\r\n' "foo's" 'bar"s' "#,
(0, String, r#"'this is a string that\'s great!\r\n'"#),
(38, String, r#""foo's""#),
(46, String, "'bar\"s'")
);
test_tokens!(
symbols,
"{ } ( ) [ ] . ! != < <= > >= = == , - + * / ;",
(0, LeftBrace, "{"),
(2, RightBrace, "}"),
(4, LeftParen, "("),
(6, RightParen, ")"),
(8, LeftBracket, "["),
(10, RightBracket, "]"),
(12, Dot, "."),
(14, Bang, "!"),
(16, BangEqual, "!="),
(19, Less, "<"),
(21, LessEqual, "<="),
(24, Greater, ">"),
(26, GreaterEqual, ">="),
(29, Equal, "="),
(31, EqualEqual, "=="),
(34, Comma, ","),
(36, Minus, "-"),
(38, Plus, "+"),
(40, Star, "*"),
(42, Slash, "/"),
(44, Semicolon, ";")
);
}