[fine] Tokens is not Iterator

It was not pulling its weight
This commit is contained in:
John Doty 2024-01-03 06:18:26 -08:00
parent c4d2b82968
commit d2d144a5ec
2 changed files with 107 additions and 152 deletions

View file

@ -1,5 +1,8 @@
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum TokenKind {
EOF,
Error,
LeftBrace,
RightBrace,
LeftBracket,
@ -47,14 +50,12 @@ pub enum TokenKind {
Use,
While,
Yield,
Error,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Token<'a> {
kind: TokenKind,
start: usize,
pub kind: TokenKind,
pub start: usize,
value: Result<&'a str, String>,
}
@ -75,14 +76,6 @@ impl<'a> Token<'a> {
}
}
pub fn start(&self) -> usize {
self.start
}
pub fn kind(&self) -> TokenKind {
self.kind
}
pub fn as_str<'b>(&'b self) -> &'a str
where
'b: 'a,
@ -102,14 +95,12 @@ impl<'a> std::fmt::Display for Token<'a> {
pub struct Lines {
newlines: Vec<usize>,
eof: usize,
}
impl Lines {
fn new(eof: usize) -> Self {
fn new() -> Self {
Lines {
newlines: Vec::new(),
eof,
}
}
@ -119,16 +110,9 @@ impl Lines {
}
/// Return the position of the given token as a (line, column) pair. By
/// convention, lines are 1-based and columns are 0-based. Also, in
/// keeping with the iterator-nature of the tokenizer, `None` here
/// indicates end-of-file, and will return the position of the end of the
/// file.
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
let start = match token {
Some(t) => t.start,
None => self.eof,
};
self.position(start)
/// convention, lines are 1-based and columns are 0-based.
pub fn token_position(&self, token: &Token) -> (usize, usize) {
self.position(token.start)
}
/// Return the position of the given character offset as a (line,column)
@ -162,7 +146,7 @@ impl<'a> Tokens<'a> {
source,
chars: source.char_indices(),
next_char: None,
lines: Lines::new(source.len()),
lines: Lines::new(),
};
result.advance(); // Prime the pump
result
@ -174,7 +158,7 @@ impl<'a> Tokens<'a> {
/// Return the position of the given token as a (line, column) pair. See
/// `Lines::token_position` for more information about the range, etc.
pub fn token_position(&self, token: &Option<Token>) -> (usize, usize) {
pub fn token_position(&self, token: &Token) -> (usize, usize) {
self.lines.token_position(token)
}
@ -415,19 +399,15 @@ impl<'a> Tokens<'a> {
self.advance();
}
}
}
impl<'a> std::iter::Iterator for Tokens<'a> {
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
pub fn next(&mut self) -> Token<'a> {
self.skip_whitespace(); // TODO: Whitespace preserving/comment preserving
let (pos, c) = match self.advance() {
Some((p, c)) => (p, c),
None => return None,
None => return self.token(self.source.len(), TokenKind::EOF),
};
let token = match c {
match c {
'{' => self.token(pos, TokenKind::LeftBrace),
'}' => self.token(pos, TokenKind::RightBrace),
'[' => self.token(pos, TokenKind::LeftBracket),
@ -480,8 +460,7 @@ impl<'a> std::iter::Iterator for Tokens<'a> {
Token::error(pos, format!("Unexpected character '{c}'"))
}
}
};
Some(token)
}
}
}
@ -490,19 +469,32 @@ mod tests {
use super::*;
use pretty_assertions::assert_eq;
fn test_tokens_impl(input: &str, expected: Vec<Token>) {
let mut result = Vec::new();
let mut tokens = Tokens::new(input);
let mut is_eof = false;
while !is_eof {
let token = tokens.next();
is_eof = token.kind == TokenKind::EOF;
result.push(token);
}
assert_eq!(expected, result);
}
macro_rules! test_tokens {
($name:ident, $input:expr, $($s:expr),+) => {
#[test]
fn $name() {
use TokenKind::*;
let tokens: Vec<_> = Tokens::new($input).collect();
let expected: Vec<Token> = (vec![$($s),*])
let mut expected: Vec<Token> = (vec![$($s),*])
.into_iter()
.map(|t| Token::new(t.1, t.0, t.2))
.collect();
expected.push(Token::new(TokenKind::EOF, $input.len(), ""));
assert_eq!(expected, tokens);
test_tokens_impl($input, expected);
}
}
}