[fine] Tokens is not Iterator

It was not pulling its weight
This commit is contained in:
John Doty 2024-01-03 06:18:26 -08:00
parent c4d2b82968
commit d2d144a5ec
2 changed files with 107 additions and 152 deletions

View file

@ -267,7 +267,7 @@ impl<'a> SyntaxTree<'a> {
// This is dumb and should be punished, probably. // This is dumb and should be punished, probably.
(_, Type::Unreachable) => { (_, Type::Unreachable) => {
let (line, col) = lines.position(tok.start()); let (line, col) = lines.position(tok.start);
self.errors.push(SyntaxError::new(line, col, format!("cannot apply a unary operator to something that doesn't yield a value"))); self.errors.push(SyntaxError::new(line, col, format!("cannot apply a unary operator to something that doesn't yield a value")));
Type::Error Type::Error
} }
@ -277,7 +277,7 @@ impl<'a> SyntaxTree<'a> {
// Missed the whole table, must be an error. // Missed the whole table, must be an error.
(_, arg_type) => { (_, arg_type) => {
let (line, col) = lines.position(tok.start()); let (line, col) = lines.position(tok.start);
self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'"))); self.errors.push(SyntaxError::new(line, col, format!("cannot apply unary operator '{tok}' to expression of type '{arg_type}'")));
Type::Error Type::Error
} }
@ -305,7 +305,7 @@ impl<'a> SyntaxTree<'a> {
// This is dumb and should be punished, probably. // This is dumb and should be punished, probably.
(_, _, Type::Unreachable) => { (_, _, Type::Unreachable) => {
let (line, col) = lines.position(tok.start()); let (line, col) = lines.position(tok.start);
self.errors.push(SyntaxError::new( self.errors.push(SyntaxError::new(
line, line,
col, col,
@ -316,7 +316,7 @@ impl<'a> SyntaxTree<'a> {
Type::Error Type::Error
} }
(_, Type::Unreachable, _) => { (_, Type::Unreachable, _) => {
let (line, col) = lines.position(tok.start()); let (line, col) = lines.position(tok.start);
self.errors.push(SyntaxError::new( self.errors.push(SyntaxError::new(
line, line,
col, col,
@ -333,7 +333,7 @@ impl<'a> SyntaxTree<'a> {
// Missed the whole table, it must be an error. // Missed the whole table, it must be an error.
(_, left_type, right_type) => { (_, left_type, right_type) => {
let (line, col) = lines.position(tok.start()); let (line, col) = lines.position(tok.start);
self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)"))); self.errors.push(SyntaxError::new(line, col, format!("cannot apply binary operator '{tok}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)")));
Type::Error Type::Error
} }
@ -354,8 +354,8 @@ impl<'a> SyntaxTree<'a> {
.expr_span(&cond) .expr_span(&cond)
.expect("If the expression has a type it must have a span"); .expect("If the expression has a type it must have a span");
let start = lines.position(span.0.start()); let start = lines.position(span.0.start);
let end = lines.position(span.1.start()); let end = lines.position(span.1.start);
self.errors.push(SyntaxError::new_spanned( self.errors.push(SyntaxError::new_spanned(
start, start,
end, end,
@ -374,8 +374,8 @@ impl<'a> SyntaxTree<'a> {
let span = self let span = self
.expr_span(&exr) .expr_span(&exr)
.expect("How did I get this far with a broken parse?"); .expect("How did I get this far with a broken parse?");
let start = lines.position(span.0.start()); let start = lines.position(span.0.start);
let end = lines.position(span.1.start()); let end = lines.position(span.1.start);
self.errors.push(SyntaxError::new_spanned( self.errors.push(SyntaxError::new_spanned(
start, start,
end, end,
@ -392,8 +392,8 @@ impl<'a> SyntaxTree<'a> {
let span = self let span = self
.expr_span(&exr) .expr_span(&exr)
.expect("How did I get this far with a broken parse?"); .expect("How did I get this far with a broken parse?");
let start = lines.position(span.0.start()); let start = lines.position(span.0.start);
let end = lines.position(span.1.start()); let end = lines.position(span.1.start);
self.errors.push(SyntaxError::new_spanned( self.errors.push(SyntaxError::new_spanned(
start, start,
end, end,
@ -431,13 +431,8 @@ const UNARY_POWER: u8 = 7; // ! -
// const CALL_POWER: u8 = 8; // . () // const CALL_POWER: u8 = 8; // . ()
// const PRIMARY_POWER: u8 = 9; // const PRIMARY_POWER: u8 = 9;
fn token_power<'a>(token: &Option<Token<'a>>) -> Option<u8> { fn token_power<'a>(token: &Token<'a>) -> Option<u8> {
let token = match token { match token.kind {
Some(t) => t,
None => return None,
};
match token.kind() {
TokenKind::Equal => Some(ASSIGNMENT_POWER), TokenKind::Equal => Some(ASSIGNMENT_POWER),
TokenKind::Or => Some(OR_POWER), TokenKind::Or => Some(OR_POWER),
TokenKind::And => Some(AND_POWER), TokenKind::And => Some(AND_POWER),
@ -454,8 +449,8 @@ fn token_power<'a>(token: &Option<Token<'a>>) -> Option<u8> {
pub struct Parser<'a> { pub struct Parser<'a> {
tokens: Tokens<'a>, tokens: Tokens<'a>,
tree: SyntaxTree<'a>, tree: SyntaxTree<'a>,
current: Option<Token<'a>>, current: Token<'a>,
previous: Option<Token<'a>>, previous: Token<'a>,
panic_mode: bool, panic_mode: bool,
} }
@ -465,8 +460,8 @@ impl<'a> Parser<'a> {
let mut parser = Parser { let mut parser = Parser {
tokens: Tokens::new(source), tokens: Tokens::new(source),
tree: SyntaxTree::new(), tree: SyntaxTree::new(),
current: None, current: Token::new(TokenKind::EOF, 0, ""),
previous: None, previous: Token::new(TokenKind::EOF, 0, ""),
panic_mode: false, panic_mode: false,
}; };
parser.advance(); parser.advance();
@ -475,7 +470,7 @@ impl<'a> Parser<'a> {
pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) { pub fn parse(mut self) -> (SyntaxTree<'a>, ExprRef, Lines) {
let expr = self.expression(); let expr = self.expression();
self.consume(None, "expected end of expression"); self.consume(TokenKind::EOF, "expected end of expression");
(self.tree, expr, self.tokens.lines()) (self.tree, expr, self.tokens.lines())
} }
@ -505,30 +500,24 @@ impl<'a> Parser<'a> {
fn prefix_expression(&mut self) -> ExprRef { fn prefix_expression(&mut self) -> ExprRef {
self.trace("prefix"); self.trace("prefix");
let token = self.previous.as_ref(); let token = &self.previous;
match token { match token.kind {
Some(token) => match token.kind() { TokenKind::Bang => self.unary(),
TokenKind::Bang => self.unary(), TokenKind::LeftParen => self.grouping(),
TokenKind::LeftParen => self.grouping(), TokenKind::Number => self.number(),
TokenKind::Number => self.number(), TokenKind::Minus => self.unary(),
TokenKind::Minus => self.unary(), TokenKind::String => self.string(),
TokenKind::String => self.string(),
TokenKind::True => self TokenKind::True => self
.tree .tree
.add_expr(Expr::Literal(Literal::Bool(true), token.clone())), .add_expr(Expr::Literal(Literal::Bool(true), token.clone())),
TokenKind::False => self TokenKind::False => self
.tree .tree
.add_expr(Expr::Literal(Literal::Bool(false), token.clone())), .add_expr(Expr::Literal(Literal::Bool(false), token.clone())),
TokenKind::If => self.conditional(), TokenKind::If => self.conditional(),
_ => { _ => {
self.error("expected an expression");
ExprRef::error()
}
},
None => {
self.error("expected an expression"); self.error("expected an expression");
ExprRef::error() ExprRef::error()
} }
@ -537,8 +526,7 @@ impl<'a> Parser<'a> {
fn infix_expression(&mut self, power: u8, left: ExprRef) -> ExprRef { fn infix_expression(&mut self, power: u8, left: ExprRef) -> ExprRef {
self.trace("infix"); self.trace("infix");
let kind = self.previous.as_ref().unwrap().kind(); match self.previous.kind {
match kind {
TokenKind::Plus TokenKind::Plus
| TokenKind::Minus | TokenKind::Minus
| TokenKind::Star | TokenKind::Star
@ -550,7 +538,7 @@ impl<'a> Parser<'a> {
} }
fn number(&mut self) -> ExprRef { fn number(&mut self) -> ExprRef {
let token = self.previous.as_ref().unwrap(); let token = &self.previous;
// What kind is it? For now let's just ... make it good. // What kind is it? For now let's just ... make it good.
let literal = match token.as_str().parse::<f64>() { let literal = match token.as_str().parse::<f64>() {
@ -565,7 +553,7 @@ impl<'a> Parser<'a> {
} }
fn string(&mut self) -> ExprRef { fn string(&mut self) -> ExprRef {
let token = self.previous.as_ref().unwrap(); let token = &self.previous;
let mut result = String::new(); let mut result = String::new();
let mut input = token.as_str().chars(); let mut input = token.as_str().chars();
@ -590,51 +578,34 @@ impl<'a> Parser<'a> {
fn grouping(&mut self) -> ExprRef { fn grouping(&mut self) -> ExprRef {
let result = self.expression(); let result = self.expression();
self.consume( self.consume(TokenKind::RightParen, "expected ')' after an expression");
Some(TokenKind::RightParen),
"expected ')' after an expression",
);
result result
} }
fn conditional(&mut self) -> ExprRef { fn conditional(&mut self) -> ExprRef {
let token = self.previous.as_ref().unwrap().clone(); let token = self.previous.clone();
let condition_expr = self.expression(); let condition_expr = self.expression();
self.consume( self.consume(TokenKind::LeftBrace, "expected '{' to start an 'if' block");
Some(TokenKind::LeftBrace),
"expected '{' to start an 'if' block",
);
let then_expr = self.expression(); let then_expr = self.expression();
self.consume( self.consume(TokenKind::RightBrace, "expected '}' to end an 'if' block");
Some(TokenKind::RightBrace), let else_expr = if self.current.kind == TokenKind::Else {
"expected '}' to end an 'if' block", self.advance();
); if self.current.kind == TokenKind::If {
let else_expr = match &self.current {
Some(token) if token.kind() == TokenKind::Else => {
self.advance(); self.advance();
match &self.current { Some(self.conditional())
// Allow `else if` without another `{`. } else {
Some(token) if token.kind() == TokenKind::If => { self.consume(
self.advance(); TokenKind::LeftBrace,
Some(self.conditional()) "expected '{' to start an 'else' block",
} );
_ => { let else_expr = self.expression();
self.consume( self.consume(TokenKind::RightBrace, "Expected '}' to end an 'else' block");
Some(TokenKind::LeftBrace), Some(else_expr)
"expected '{' to start an 'else' block",
);
let else_expr = self.expression();
self.consume(
Some(TokenKind::RightBrace),
"Expected '}' to end an 'else' block",
);
Some(else_expr)
}
}
} }
_ => None, } else {
None
}; };
let tail = self.previous.as_ref().unwrap().clone(); let tail = self.previous.clone();
self.tree.add_expr(Expr::Conditional( self.tree.add_expr(Expr::Conditional(
token, token,
condition_expr, condition_expr,
@ -645,8 +616,8 @@ impl<'a> Parser<'a> {
} }
fn unary(&mut self) -> ExprRef { fn unary(&mut self) -> ExprRef {
let token = self.previous.as_ref().unwrap().clone(); let token = self.previous.clone();
let kind = token.kind(); let kind = token.kind;
let expr = self.expression_with_power(UNARY_POWER); let expr = self.expression_with_power(UNARY_POWER);
let op = match kind { let op = match kind {
TokenKind::Minus => UnaryOp::Negate, TokenKind::Minus => UnaryOp::Negate,
@ -658,8 +629,8 @@ impl<'a> Parser<'a> {
} }
fn binary(&mut self, power: u8, left: ExprRef) -> ExprRef { fn binary(&mut self, power: u8, left: ExprRef) -> ExprRef {
let token = self.previous.as_ref().unwrap().clone(); let token = self.previous.clone();
let op = match token.kind() { let op = match token.kind {
TokenKind::Plus => BinaryOp::Add, TokenKind::Plus => BinaryOp::Add,
TokenKind::Minus => BinaryOp::Subtract, TokenKind::Minus => BinaryOp::Subtract,
TokenKind::Star => BinaryOp::Multiply, TokenKind::Star => BinaryOp::Multiply,
@ -673,25 +644,19 @@ impl<'a> Parser<'a> {
} }
fn advance(&mut self) { fn advance(&mut self) {
self.previous = self.current.take(); self.previous = self.current.clone();
loop { self.current = self.tokens.next();
while self.current.kind == TokenKind::Error {
self.error_at_current(self.current.to_string());
self.current = self.tokens.next(); self.current = self.tokens.next();
match &self.current {
Some(token) if token.kind() == TokenKind::Error => {
self.error_at_current(token.to_string())
}
_ => break,
}
} }
} }
fn consume(&mut self, kind: Option<TokenKind>, error: &str) { fn consume(&mut self, kind: TokenKind, error: &str) {
match (&self.current, kind) { if self.current.kind == kind {
(Some(token), Some(kind)) if token.kind() == kind => self.advance(), self.advance();
(None, None) => (), } else {
_ => { self.error_at_current(error);
self.error_at_current(error);
}
} }
} }
@ -709,7 +674,7 @@ impl<'a> Parser<'a> {
self.error_at(self.current.clone(), message) self.error_at(self.current.clone(), message)
} }
fn error_at<T>(&mut self, token: Option<Token<'a>>, message: T) fn error_at<T>(&mut self, token: Token<'a>, message: T)
where where
T: Into<String>, T: Into<String>,
{ {
@ -721,15 +686,13 @@ impl<'a> Parser<'a> {
let message: String = message.into(); let message: String = message.into();
let (line, column) = self.tokens.token_position(&token); let (line, column) = self.tokens.token_position(&token);
let mut final_message = "Error ".to_string(); let mut final_message = "Error ".to_string();
match token {
None => final_message.push_str("at end"), if token.kind == TokenKind::EOF {
Some(t) => { final_message.push_str("at end")
if t.kind() != TokenKind::Error { } else if token.kind != TokenKind::Error {
final_message.push_str("at '"); final_message.push_str("at '");
final_message.push_str(t.as_str()); final_message.push_str(token.as_str());
final_message.push_str("'"); final_message.push_str("'");
}
}
} }
final_message.push_str(": "); final_message.push_str(": ");
final_message.push_str(&message); final_message.push_str(&message);

View file

@ -1,5 +1,8 @@
#[derive(Debug, PartialEq, Eq, Clone, Copy)] #[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum TokenKind { pub enum TokenKind {
EOF,
Error,
LeftBrace, LeftBrace,
RightBrace, RightBrace,
LeftBracket, LeftBracket,
@ -47,14 +50,12 @@ pub enum TokenKind {
Use, Use,
While, While,
Yield, Yield,
Error,
} }
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub struct Token<'a> { pub struct Token<'a> {
kind: TokenKind, pub kind: TokenKind,
start: usize, pub start: usize,
value: Result<&'a str, String>, value: Result<&'a str, String>,
} }
@ -75,14 +76,6 @@ impl<'a> Token<'a> {
} }
} }
pub fn start(&self) -> usize {
self.start
}
pub fn kind(&self) -> TokenKind {
self.kind
}
pub fn as_str<'b>(&'b self) -> &'a str pub fn as_str<'b>(&'b self) -> &'a str
where where
'b: 'a, 'b: 'a,
@ -102,14 +95,12 @@ impl<'a> std::fmt::Display for Token<'a> {
pub struct Lines { pub struct Lines {
newlines: Vec<usize>, newlines: Vec<usize>,
eof: usize,
} }
impl Lines { impl Lines {
fn new(eof: usize) -> Self { fn new() -> Self {
Lines { Lines {
newlines: Vec::new(), newlines: Vec::new(),
eof,
} }
} }
@ -119,16 +110,9 @@ impl Lines {
} }
/// Return the position of the given token as a (line, column) pair. By /// Return the position of the given token as a (line, column) pair. By
/// convention, lines are 1-based and columns are 0-based. Also, in /// convention, lines are 1-based and columns are 0-based.
/// keeping with the iterator-nature of the tokenizer, `None` here pub fn token_position(&self, token: &Token) -> (usize, usize) {
/// indicates end-of-file, and will return the position of the end of the self.position(token.start)
/// file.
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
let start = match token {
Some(t) => t.start,
None => self.eof,
};
self.position(start)
} }
/// Return the position of the given character offset as a (line,column) /// Return the position of the given character offset as a (line,column)
@ -162,7 +146,7 @@ impl<'a> Tokens<'a> {
source, source,
chars: source.char_indices(), chars: source.char_indices(),
next_char: None, next_char: None,
lines: Lines::new(source.len()), lines: Lines::new(),
}; };
result.advance(); // Prime the pump result.advance(); // Prime the pump
result result
@ -174,7 +158,7 @@ impl<'a> Tokens<'a> {
/// Return the position of the given token as a (line, column) pair. See /// Return the position of the given token as a (line, column) pair. See
/// `Lines::token_position` for more information about the range, etc. /// `Lines::token_position` for more information about the range, etc.
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) { pub fn token_position(&self, token: &Token) -> (usize, usize) {
self.lines.token_position(token) self.lines.token_position(token)
} }
@ -415,19 +399,15 @@ impl<'a> Tokens<'a> {
self.advance(); self.advance();
} }
} }
}
impl<'a> std::iter::Iterator for Tokens<'a> { pub fn next(&mut self) -> Token<'a> {
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.skip_whitespace(); // TODO: Whitespace preserving/comment preserving self.skip_whitespace(); // TODO: Whitespace preserving/comment preserving
let (pos, c) = match self.advance() { let (pos, c) = match self.advance() {
Some((p, c)) => (p, c), Some((p, c)) => (p, c),
None => return None, None => return self.token(self.source.len(), TokenKind::EOF),
}; };
let token = match c { match c {
'{' => self.token(pos, TokenKind::LeftBrace), '{' => self.token(pos, TokenKind::LeftBrace),
'}' => self.token(pos, TokenKind::RightBrace), '}' => self.token(pos, TokenKind::RightBrace),
'[' => self.token(pos, TokenKind::LeftBracket), '[' => self.token(pos, TokenKind::LeftBracket),
@ -480,8 +460,7 @@ impl<'a> std::iter::Iterator for Tokens<'a> {
Token::error(pos, format!("Unexpected character '{c}'")) Token::error(pos, format!("Unexpected character '{c}'"))
} }
} }
}; }
Some(token)
} }
} }
@ -490,19 +469,32 @@ mod tests {
use super::*; use super::*;
use pretty_assertions::assert_eq; use pretty_assertions::assert_eq;
fn test_tokens_impl(input: &str, expected: Vec<Token>) {
let mut result = Vec::new();
let mut tokens = Tokens::new(input);
let mut is_eof = false;
while !is_eof {
let token = tokens.next();
is_eof = token.kind == TokenKind::EOF;
result.push(token);
}
assert_eq!(expected, result);
}
macro_rules! test_tokens { macro_rules! test_tokens {
($name:ident, $input:expr, $($s:expr),+) => { ($name:ident, $input:expr, $($s:expr),+) => {
#[test] #[test]
fn $name() { fn $name() {
use TokenKind::*; use TokenKind::*;
let tokens: Vec<_> = Tokens::new($input).collect();
let expected: Vec<Token> = (vec![$($s),*]) let mut expected: Vec<Token> = (vec![$($s),*])
.into_iter() .into_iter()
.map(|t| Token::new(t.1, t.0, t.2)) .map(|t| Token::new(t.1, t.0, t.2))
.collect(); .collect();
expected.push(Token::new(TokenKind::EOF, $input.len(), ""));
assert_eq!(expected, tokens); test_tokens_impl($input, expected);
} }
} }
} }