diff --git a/fine/src/parser/concrete.rs b/fine/src/parser/concrete.rs index 4382d43e..3b6b25bf 100644 --- a/fine/src/parser/concrete.rs +++ b/fine/src/parser/concrete.rs @@ -1,34 +1,56 @@ // NOTE: much of this parser structure derived from // https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html use crate::tokens::{Lines, Token, TokenKind, Tokens}; -use std::cell::Cell; +use std::{cell::Cell, num::NonZeroU32}; -// BINDING POWERS. When parsing expressions we only accept expressions that -// meet a minimum binding power. (This is like "precedence" but I just super -// don't like that terminology.) -const ASSIGNMENT_POWER: u8 = 0; // = -const OR_POWER: u8 = 1; // or -const AND_POWER: u8 = 2; // and -const EQUALITY_POWER: u8 = 3; // == != -const COMPARISON_POWER: u8 = 4; // < > <= >= -const TERM_POWER: u8 = 5; // + - -const FACTOR_POWER: u8 = 6; // * / -const UNARY_POWER: u8 = 7; // ! - +pub struct ConcreteTree<'a> { + trees: Vec>, + root: Option, +} -// const PRIMARY_POWER: u8 = 9; - -fn token_power<'a>(token: TokenKind) -> Option { - match token { - TokenKind::Equal => Some(ASSIGNMENT_POWER), - TokenKind::Or => Some(OR_POWER), - TokenKind::And => Some(AND_POWER), - TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER), - TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => { - Some(COMPARISON_POWER) +impl<'a> ConcreteTree<'a> { + pub fn new() -> Self { + ConcreteTree { + trees: vec![], + root: None, } - TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER), - TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER), - _ => None, + } + + pub fn add_tree(&mut self, t: Tree<'a>) -> TreeRef { + assert!(t.parent.is_none()); + let tr = TreeRef::from_index(self.trees.len()); + + // NOTE: Because of the difficulty of holding multiple mutable + // references it's this is our best chance to patch up parent + // pointers. + for child in t.children.iter() { + if let Child::Tree(ct) = child { + self[*ct].parent = Some(tr); + } + } + self.trees.push(t); + tr + } + + pub fn dump(&self) -> String { + match self.root { + Some(r) => self[r].dump(self), + None => String::new(), + } + } +} + +impl<'a> std::ops::Index for ConcreteTree<'a> { + type Output = Tree<'a>; + + fn index(&self, index: TreeRef) -> &Self::Output { + &self.trees[index.index()] + } +} + +impl<'a> std::ops::IndexMut for ConcreteTree<'a> { + fn index_mut(&mut self, index: TreeRef) -> &mut Self::Output { + &mut self.trees[index.index()] } } @@ -58,52 +80,53 @@ pub enum TreeKind { pub struct Tree<'a> { pub kind: TreeKind, - // TODO: Indirect reference? Flatness? Using a reference structure will - // make caching and annotation easier if desired. + pub parent: Option, pub children: Vec>, } +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct TreeRef(NonZeroU32); + +impl TreeRef { + pub fn from_index(index: usize) -> TreeRef { + let index: u32 = (index + 1).try_into().unwrap(); + TreeRef(NonZeroU32::new(index).unwrap()) + } + + pub fn index(&self) -> usize { + let index: usize = self.0.get().try_into().unwrap(); + index - 1 + } +} + impl<'a> Tree<'a> { - pub fn dump(&self) -> String { + pub fn dump(&self, tree: &ConcreteTree<'a>) -> String { let mut output = String::new(); output.push_str(&format!("{:?}\n", self.kind)); for child in self.children.iter() { - child.dump_rec(2, &mut output); + child.dump_rec(2, tree, &mut output); } output } } -impl<'a> std::fmt::Debug for Tree<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "[{:?}", self.kind)?; - for child in self.children.iter() { - match child { - Child::Token(t) => write!(f, " {:?}:'{}'", t.kind, t.as_str())?, - Child::Tree(t) => write!(f, " {t:?}")?, - } - } - write!(f, "]")?; - Ok(()) - } -} - pub enum Child<'a> { Token(Token<'a>), - Tree(Tree<'a>), + Tree(TreeRef), } impl<'a> Child<'a> { - fn dump_rec(&self, indent: usize, output: &mut String) { + fn dump_rec(&self, indent: usize, tree: &ConcreteTree<'a>, output: &mut String) { for _ in 0..indent { output.push(' '); } match self { Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())), Child::Tree(t) => { + let t = &tree[*t]; output.push_str(&format!("{:?}\n", t.kind)); for child in t.children.iter() { - child.dump_rec(indent + 2, output); + child.dump_rec(indent + 2, tree, output); } } } @@ -261,10 +284,12 @@ impl<'a> CParser<'a> { }); } - fn build_tree(self) -> (Tree<'a>, Lines) { + fn build_tree(self) -> (ConcreteTree<'a>, Lines) { let mut events = self.events; let mut stack = Vec::new(); + let mut result = ConcreteTree::new(); + // The first element in our events vector must be a start; the whole // thing must be bracketed in a tree. assert!(matches!(events.get(0), Some(ParseEvent::Start { .. }))); @@ -279,12 +304,13 @@ impl<'a> CParser<'a> { match event { ParseEvent::Start { kind } => stack.push(Tree { kind, + parent: None, children: Vec::new(), }), ParseEvent::End => { - let tree = stack.pop().unwrap(); - stack.last_mut().unwrap().children.push(Child::Tree(tree)); + let t = result.add_tree(stack.pop().unwrap()); + stack.last_mut().unwrap().children.push(Child::Tree(t)); } ParseEvent::Advance { token } => { @@ -294,11 +320,14 @@ impl<'a> CParser<'a> { } assert!(stack.len() == 1, "Not all trees were ended!"); - (stack.pop().unwrap(), self.tokens.lines()) + let root = result.add_tree(stack.pop().unwrap()); + result.root = Some(root); + + (result, self.tokens.lines()) } } -pub fn parse_concrete(source: &str) -> (Tree, Lines) { +pub fn parse_concrete(source: &str) -> (ConcreteTree, Lines) { let tokens = Tokens::new(source); let mut parser = CParser::new(tokens); @@ -463,6 +492,35 @@ fn expression(p: &mut CParser) { expression_with_power(p, 0) } +// BINDING POWERS. When parsing expressions we only accept expressions that +// meet a minimum binding power. (This is like "precedence" but I just super +// don't like that terminology.) +const ASSIGNMENT_POWER: u8 = 0; // = +const OR_POWER: u8 = 1; // or +const AND_POWER: u8 = 2; // and +const EQUALITY_POWER: u8 = 3; // == != +const COMPARISON_POWER: u8 = 4; // < > <= >= +const TERM_POWER: u8 = 5; // + - +const FACTOR_POWER: u8 = 6; // * / +const UNARY_POWER: u8 = 7; // ! - + +// const PRIMARY_POWER: u8 = 9; + +fn token_power<'a>(token: TokenKind) -> Option { + match token { + TokenKind::Equal => Some(ASSIGNMENT_POWER), + TokenKind::Or => Some(OR_POWER), + TokenKind::And => Some(AND_POWER), + TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER), + TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => { + Some(COMPARISON_POWER) + } + TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER), + TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER), + _ => None, + } +} + fn expression_with_power(p: &mut CParser, minimum_power: u8) { let mut expr = prefix_expression(p); while p.at(TokenKind::LeftParen) { @@ -591,3 +649,15 @@ fn identifier(p: &mut CParser) -> MarkClosed { p.end(m, TreeKind::Identifier) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tree_ref_size() { + // What's the point of doing all that work if the tree ref isn't nice + // and "small"? + assert_eq!(4, std::mem::size_of::>()); + } +} diff --git a/fine/src/tokens.rs b/fine/src/tokens.rs index a6ddccdf..dedbf864 100644 --- a/fine/src/tokens.rs +++ b/fine/src/tokens.rs @@ -58,11 +58,18 @@ pub enum TokenKind { Yield, } +// NOTE: Tokens are kinda big (like 40 bytes?) and AFAICT the only way to go +// smaller would be to stop using string pointers and use smaller +// sizes/offsets instead, e.g., 32b for offset and 32b for size, and +// stop tracking the position independently from the start, and then +// require the source text when converting to line/col. I'm unwilling to +// give up the ergonomics of &str and String right now, so we're just +// not doing it. #[derive(Debug, PartialEq, Eq, Clone)] pub struct Token<'a> { pub kind: TokenKind, pub start: usize, - value: Result<&'a str, String>, + value: Result<&'a str, Box>, } impl<'a> Token<'a> { @@ -78,7 +85,7 @@ impl<'a> Token<'a> { Token { kind: TokenKind::Error, start, - value: Err(message), + value: Err(message.into()), } } diff --git a/fine/tests/example_tests.rs b/fine/tests/example_tests.rs index 680ab3d5..4526a6b9 100644 --- a/fine/tests/example_tests.rs +++ b/fine/tests/example_tests.rs @@ -1,4 +1,4 @@ -use fine::parser::concrete::Tree; +use fine::parser::concrete::ConcreteTree; use pretty_assertions::assert_eq; fn rebase_concrete(source_path: &str, dump: &str) { @@ -68,7 +68,7 @@ fn rebase_concrete(source_path: &str, dump: &str) { std::fs::write(source_path, result).expect("unable to write the new file!"); } -fn assert_concrete(tree: &Tree, expected: &str, source_path: &str) { +fn assert_concrete(tree: &ConcreteTree, expected: &str, source_path: &str) { let dump = tree.dump(); let rebase = std::env::var("FINE_TEST_REBASE") .unwrap_or(String::new())