[fine] Dump CSTs and an example zoo
This commit is contained in:
parent
757db0ba3e
commit
1f6d7ec131
9 changed files with 357 additions and 32 deletions
|
|
@ -646,8 +646,13 @@ impl<'a> Parser<'a> {
|
|||
fn advance(&mut self) {
|
||||
self.previous = self.current.clone();
|
||||
self.current = self.tokens.next();
|
||||
while self.current.kind == TokenKind::Error {
|
||||
self.error_at_current(self.current.to_string());
|
||||
while self.current.kind == TokenKind::Error
|
||||
|| self.current.kind == TokenKind::Whitespace
|
||||
|| self.current.kind == TokenKind::Comment
|
||||
{
|
||||
if self.current.kind == TokenKind::Error {
|
||||
self.error_at_current(self.current.to_string());
|
||||
}
|
||||
self.current = self.tokens.next();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ fn token_power<'a>(token: TokenKind) -> Option<u8> {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TreeKind {
|
||||
Error,
|
||||
File,
|
||||
|
|
@ -62,11 +63,53 @@ pub struct Tree<'a> {
|
|||
pub children: Vec<Child<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> Tree<'a> {
|
||||
pub fn dump(&self) -> String {
|
||||
let mut output = String::new();
|
||||
output.push_str(&format!("{:?}\n", self.kind));
|
||||
for child in self.children.iter() {
|
||||
child.dump_rec(2, &mut output);
|
||||
}
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Debug for Tree<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "[{:?}", self.kind)?;
|
||||
for child in self.children.iter() {
|
||||
match child {
|
||||
Child::Token(t) => write!(f, " {:?}:'{}'", t.kind, t.as_str())?,
|
||||
Child::Tree(t) => write!(f, " {t:?}")?,
|
||||
}
|
||||
}
|
||||
write!(f, "]")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Child<'a> {
|
||||
Token(Token<'a>),
|
||||
Tree(Tree<'a>),
|
||||
}
|
||||
|
||||
impl<'a> Child<'a> {
|
||||
fn dump_rec(&self, indent: usize, output: &mut String) {
|
||||
for _ in 0..indent {
|
||||
output.push(' ');
|
||||
}
|
||||
match self {
|
||||
Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())),
|
||||
Child::Tree(t) => {
|
||||
output.push_str(&format!("{:?}\n", t.kind));
|
||||
for child in t.children.iter() {
|
||||
child.dump_rec(indent + 2, output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum ParseEvent<'a> {
|
||||
Start { kind: TreeKind },
|
||||
End,
|
||||
|
|
@ -97,6 +140,7 @@ impl<'a> CParser<'a> {
|
|||
events: Vec::new(),
|
||||
};
|
||||
parser.current = parser.tokens.next();
|
||||
parser.skip_ephemera();
|
||||
parser
|
||||
}
|
||||
|
||||
|
|
@ -135,6 +179,14 @@ impl<'a> CParser<'a> {
|
|||
token: self.current.clone(),
|
||||
});
|
||||
self.current = self.tokens.next();
|
||||
self.skip_ephemera();
|
||||
}
|
||||
|
||||
fn skip_ephemera(&mut self) {
|
||||
while self.current.kind == TokenKind::Whitespace || self.current.kind == TokenKind::Comment
|
||||
{
|
||||
self.current = self.tokens.next();
|
||||
}
|
||||
}
|
||||
|
||||
fn eof(&self) -> bool {
|
||||
|
|
@ -213,8 +265,14 @@ impl<'a> CParser<'a> {
|
|||
let mut events = self.events;
|
||||
let mut stack = Vec::new();
|
||||
|
||||
// Special case: pop the last `Close` event to ensure that the stack
|
||||
// is non-empty inside the loop.
|
||||
// The first element in our events vector must be a start; the whole
|
||||
// thing must be bracketed in a tree.
|
||||
assert!(matches!(events.get(0), Some(ParseEvent::Start { .. })));
|
||||
|
||||
// The last element in our events vector must be an end, otherwise
|
||||
// the parser has failed badly. We'll remove it here so that, after
|
||||
// processing the entire array, the stack retains the tree that we
|
||||
// start with the very first ::Start.
|
||||
assert!(matches!(events.pop(), Some(ParseEvent::End)));
|
||||
|
||||
for event in events {
|
||||
|
|
@ -240,7 +298,7 @@ impl<'a> CParser<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn c_parse(source: &str) -> (Tree, Lines) {
|
||||
pub fn parse_concrete(source: &str) -> (Tree, Lines) {
|
||||
let tokens = Tokens::new(source);
|
||||
let mut parser = CParser::new(tokens);
|
||||
|
||||
|
|
@ -527,3 +585,34 @@ fn identifier(p: &mut CParser) -> MarkClosed {
|
|||
|
||||
p.end(m, TreeKind::Identifier)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn test_successful_expression_parse(source: &str, expected: &str) {
|
||||
let tokens = Tokens::new(source);
|
||||
let mut parser = CParser::new(tokens);
|
||||
|
||||
expression(&mut parser);
|
||||
|
||||
let (tree, _) = parser.build_tree();
|
||||
assert_eq!(
|
||||
expected,
|
||||
format!("{tree:?}"),
|
||||
"The parse structure of the expressions did not match"
|
||||
);
|
||||
}
|
||||
|
||||
macro_rules! test_expr {
|
||||
($name:ident, $input:expr, $expected:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
test_successful_expression_parse($input, $expected);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_expr!(number_expr, "12", "[LiteralExpression Number:'12']");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@ pub enum TokenKind {
|
|||
EOF,
|
||||
Error,
|
||||
|
||||
Whitespace,
|
||||
Comment,
|
||||
|
||||
LeftBrace,
|
||||
RightBrace,
|
||||
LeftBracket,
|
||||
|
|
@ -390,7 +393,7 @@ impl<'a> Tokens<'a> {
|
|||
self.next_char.is_none()
|
||||
}
|
||||
|
||||
fn skip_whitespace(&mut self) {
|
||||
fn whitespace(&mut self, pos: usize) -> Token<'a> {
|
||||
while let Some((pos, ch)) = self.next_char {
|
||||
if ch == '\n' {
|
||||
self.lines.add_line(pos);
|
||||
|
|
@ -399,16 +402,27 @@ impl<'a> Tokens<'a> {
|
|||
}
|
||||
self.advance();
|
||||
}
|
||||
self.token(pos, TokenKind::Whitespace)
|
||||
}
|
||||
|
||||
fn comment(&mut self, pos: usize) -> Token<'a> {
|
||||
while let Some((_, ch)) = self.next_char {
|
||||
if ch == '\n' {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
self.token(pos, TokenKind::Comment)
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Token<'a> {
|
||||
self.skip_whitespace(); // TODO: Whitespace preserving/comment preserving
|
||||
let (pos, c) = match self.advance() {
|
||||
Some((p, c)) => (p, c),
|
||||
None => return self.token(self.source.len(), TokenKind::EOF),
|
||||
};
|
||||
|
||||
match c {
|
||||
' ' | '\t' | '\r' | '\n' => self.whitespace(pos),
|
||||
'{' => self.token(pos, TokenKind::LeftBrace),
|
||||
'}' => self.token(pos, TokenKind::RightBrace),
|
||||
'[' => self.token(pos, TokenKind::LeftBracket),
|
||||
|
|
@ -427,7 +441,13 @@ impl<'a> Tokens<'a> {
|
|||
'+' => self.token(pos, TokenKind::Plus),
|
||||
':' => self.token(pos, TokenKind::Colon),
|
||||
';' => self.token(pos, TokenKind::Semicolon),
|
||||
'/' => self.token(pos, TokenKind::Slash),
|
||||
'/' => {
|
||||
if self.matches('/') {
|
||||
self.comment(pos)
|
||||
} else {
|
||||
self.token(pos, TokenKind::Slash)
|
||||
}
|
||||
}
|
||||
'*' => self.token(pos, TokenKind::Star),
|
||||
'!' => {
|
||||
if self.matches('=') {
|
||||
|
|
@ -484,6 +504,9 @@ mod tests {
|
|||
while !is_eof {
|
||||
let token = tokens.next();
|
||||
is_eof = token.kind == TokenKind::EOF;
|
||||
if token.kind == TokenKind::Whitespace {
|
||||
continue;
|
||||
}
|
||||
result.push(token);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue