[fine] Dump CSTs and an example zoo

This commit is contained in:
John Doty 2024-01-04 13:25:17 -08:00
parent 757db0ba3e
commit 1f6d7ec131
9 changed files with 357 additions and 32 deletions

View file

@ -646,8 +646,13 @@ impl<'a> Parser<'a> {
fn advance(&mut self) {
self.previous = self.current.clone();
self.current = self.tokens.next();
while self.current.kind == TokenKind::Error {
self.error_at_current(self.current.to_string());
while self.current.kind == TokenKind::Error
|| self.current.kind == TokenKind::Whitespace
|| self.current.kind == TokenKind::Comment
{
if self.current.kind == TokenKind::Error {
self.error_at_current(self.current.to_string());
}
self.current = self.tokens.next();
}
}

View file

@ -32,6 +32,7 @@ fn token_power<'a>(token: TokenKind) -> Option<u8> {
}
}
#[derive(Debug)]
pub enum TreeKind {
Error,
File,
@ -62,11 +63,53 @@ pub struct Tree<'a> {
pub children: Vec<Child<'a>>,
}
impl<'a> Tree<'a> {
pub fn dump(&self) -> String {
let mut output = String::new();
output.push_str(&format!("{:?}\n", self.kind));
for child in self.children.iter() {
child.dump_rec(2, &mut output);
}
output
}
}
impl<'a> std::fmt::Debug for Tree<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "[{:?}", self.kind)?;
for child in self.children.iter() {
match child {
Child::Token(t) => write!(f, " {:?}:'{}'", t.kind, t.as_str())?,
Child::Tree(t) => write!(f, " {t:?}")?,
}
}
write!(f, "]")?;
Ok(())
}
}
pub enum Child<'a> {
Token(Token<'a>),
Tree(Tree<'a>),
}
impl<'a> Child<'a> {
fn dump_rec(&self, indent: usize, output: &mut String) {
for _ in 0..indent {
output.push(' ');
}
match self {
Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())),
Child::Tree(t) => {
output.push_str(&format!("{:?}\n", t.kind));
for child in t.children.iter() {
child.dump_rec(indent + 2, output);
}
}
}
}
}
enum ParseEvent<'a> {
Start { kind: TreeKind },
End,
@ -97,6 +140,7 @@ impl<'a> CParser<'a> {
events: Vec::new(),
};
parser.current = parser.tokens.next();
parser.skip_ephemera();
parser
}
@ -135,6 +179,14 @@ impl<'a> CParser<'a> {
token: self.current.clone(),
});
self.current = self.tokens.next();
self.skip_ephemera();
}
fn skip_ephemera(&mut self) {
while self.current.kind == TokenKind::Whitespace || self.current.kind == TokenKind::Comment
{
self.current = self.tokens.next();
}
}
fn eof(&self) -> bool {
@ -213,8 +265,14 @@ impl<'a> CParser<'a> {
let mut events = self.events;
let mut stack = Vec::new();
// Special case: pop the last `Close` event to ensure that the stack
// is non-empty inside the loop.
// The first element in our events vector must be a start; the whole
// thing must be bracketed in a tree.
assert!(matches!(events.get(0), Some(ParseEvent::Start { .. })));
// The last element in our events vector must be an end, otherwise
// the parser has failed badly. We'll remove it here so that, after
// processing the entire array, the stack retains the tree that we
// start with the very first ::Start.
assert!(matches!(events.pop(), Some(ParseEvent::End)));
for event in events {
@ -240,7 +298,7 @@ impl<'a> CParser<'a> {
}
}
pub fn c_parse(source: &str) -> (Tree, Lines) {
pub fn parse_concrete(source: &str) -> (Tree, Lines) {
let tokens = Tokens::new(source);
let mut parser = CParser::new(tokens);
@ -527,3 +585,34 @@ fn identifier(p: &mut CParser) -> MarkClosed {
p.end(m, TreeKind::Identifier)
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
fn test_successful_expression_parse(source: &str, expected: &str) {
let tokens = Tokens::new(source);
let mut parser = CParser::new(tokens);
expression(&mut parser);
let (tree, _) = parser.build_tree();
assert_eq!(
expected,
format!("{tree:?}"),
"The parse structure of the expressions did not match"
);
}
macro_rules! test_expr {
($name:ident, $input:expr, $expected:expr) => {
#[test]
fn $name() {
test_successful_expression_parse($input, $expected);
}
};
}
test_expr!(number_expr, "12", "[LiteralExpression Number:'12']");
}

View file

@ -3,6 +3,9 @@ pub enum TokenKind {
EOF,
Error,
Whitespace,
Comment,
LeftBrace,
RightBrace,
LeftBracket,
@ -390,7 +393,7 @@ impl<'a> Tokens<'a> {
self.next_char.is_none()
}
fn skip_whitespace(&mut self) {
fn whitespace(&mut self, pos: usize) -> Token<'a> {
while let Some((pos, ch)) = self.next_char {
if ch == '\n' {
self.lines.add_line(pos);
@ -399,16 +402,27 @@ impl<'a> Tokens<'a> {
}
self.advance();
}
self.token(pos, TokenKind::Whitespace)
}
fn comment(&mut self, pos: usize) -> Token<'a> {
while let Some((_, ch)) = self.next_char {
if ch == '\n' {
break;
}
self.advance();
}
self.token(pos, TokenKind::Comment)
}
pub fn next(&mut self) -> Token<'a> {
self.skip_whitespace(); // TODO: Whitespace preserving/comment preserving
let (pos, c) = match self.advance() {
Some((p, c)) => (p, c),
None => return self.token(self.source.len(), TokenKind::EOF),
};
match c {
' ' | '\t' | '\r' | '\n' => self.whitespace(pos),
'{' => self.token(pos, TokenKind::LeftBrace),
'}' => self.token(pos, TokenKind::RightBrace),
'[' => self.token(pos, TokenKind::LeftBracket),
@ -427,7 +441,13 @@ impl<'a> Tokens<'a> {
'+' => self.token(pos, TokenKind::Plus),
':' => self.token(pos, TokenKind::Colon),
';' => self.token(pos, TokenKind::Semicolon),
'/' => self.token(pos, TokenKind::Slash),
'/' => {
if self.matches('/') {
self.comment(pos)
} else {
self.token(pos, TokenKind::Slash)
}
}
'*' => self.token(pos, TokenKind::Star),
'!' => {
if self.matches('=') {
@ -484,6 +504,9 @@ mod tests {
while !is_eof {
let token = tokens.next();
is_eof = token.kind == TokenKind::EOF;
if token.kind == TokenKind::Whitespace {
continue;
}
result.push(token);
}