3 changed files with 79 additions and 125 deletions
--- a/fine/src/parser/concrete.rs
+++ b/fine/src/parser/concrete.rs
@ -1,56 +1,34 @@
 // NOTE: much of this parser structure derived from
 // https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
 use crate::tokens::{Lines, Token, TokenKind, Tokens};
-use std::{cell::Cell, num::NonZeroU32};
+use std::cell::Cell;
-pub struct ConcreteTree<'a> {
+// BINDING POWERS. When parsing expressions we only accept expressions that
-    trees: Vec<Tree<'a>>,
+// meet a minimum binding power. (This is like "precedence" but I just super
-    root: Option<TreeRef>,
+// don't like that terminology.)
-}
+const ASSIGNMENT_POWER: u8 = 0; // =
 const OR_POWER: u8 = 1; // or
 const AND_POWER: u8 = 2; // and
 const EQUALITY_POWER: u8 = 3; // == !=
 const COMPARISON_POWER: u8 = 4; // < > <= >=
 const TERM_POWER: u8 = 5; // + -
 const FACTOR_POWER: u8 = 6; // * /
 const UNARY_POWER: u8 = 7; // ! -
-impl<'a> ConcreteTree<'a> {
+// const PRIMARY_POWER: u8 = 9;
    pub fn new() -> Self {
        ConcreteTree {
            trees: vec![],
            root: None,
        }
    }
-    pub fn add_tree(&mut self, t: Tree<'a>) -> TreeRef {
+fn token_power<'a>(token: TokenKind) -> Option<u8> {
-        assert!(t.parent.is_none());
+    match token {
-        let tr = TreeRef::from_index(self.trees.len());
+        TokenKind::Equal => Some(ASSIGNMENT_POWER),
-
+        TokenKind::Or => Some(OR_POWER),
-        // NOTE: Because of the difficulty of holding multiple mutable
+        TokenKind::And => Some(AND_POWER),
-        //       references it's this is our best chance to patch up parent
+        TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER),
-        //       pointers.
+        TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => {
-        for child in t.children.iter() {
+            Some(COMPARISON_POWER)
            if let Child::Tree(ct) = child {
                self[*ct].parent = Some(tr);
        }
-        }
+        TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER),
-        self.trees.push(t);
+        TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER),
-        tr
+        _ => None,
    }
    pub fn dump(&self) -> String {
        match self.root {
            Some(r) => self[r].dump(self),
            None => String::new(),
        }
    }
 }
 impl<'a> std::ops::Index<TreeRef> for ConcreteTree<'a> {
    type Output = Tree<'a>;
    fn index(&self, index: TreeRef) -> &Self::Output {
        &self.trees[index.index()]
    }
 }
 impl<'a> std::ops::IndexMut<TreeRef> for ConcreteTree<'a> {
    fn index_mut(&mut self, index: TreeRef) -> &mut Self::Output {
        &mut self.trees[index.index()]
    }
 }
@ -80,53 +58,52 @@ pub enum TreeKind {
 pub struct Tree<'a> {
    pub kind: TreeKind,
-    pub parent: Option<TreeRef>,
+    // TODO: Indirect reference? Flatness? Using a reference structure will
    // make caching and annotation easier if desired.
    pub children: Vec<Child<'a>>,
 }
 #[derive(Copy, Clone, Eq, PartialEq)]
 pub struct TreeRef(NonZeroU32);
 impl TreeRef {
    pub fn from_index(index: usize) -> TreeRef {
        let index: u32 = (index + 1).try_into().unwrap();
        TreeRef(NonZeroU32::new(index).unwrap())
    }
    pub fn index(&self) -> usize {
        let index: usize = self.0.get().try_into().unwrap();
        index - 1
    }
 }
 impl<'a> Tree<'a> {
-    pub fn dump(&self, tree: &ConcreteTree<'a>) -> String {
+    pub fn dump(&self) -> String {
        let mut output = String::new();
        output.push_str(&format!("{:?}\n", self.kind));
        for child in self.children.iter() {
-            child.dump_rec(2, tree, &mut output);
+            child.dump_rec(2, &mut output);
        }
        output
    }
 }
 impl<'a> std::fmt::Debug for Tree<'a> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "[{:?}", self.kind)?;
        for child in self.children.iter() {
            match child {
                Child::Token(t) => write!(f, " {:?}:'{}'", t.kind, t.as_str())?,
                Child::Tree(t) => write!(f, " {t:?}")?,
            }
        }
        write!(f, "]")?;
        Ok(())
    }
 }
 pub enum Child<'a> {
    Token(Token<'a>),
-    Tree(TreeRef),
+    Tree(Tree<'a>),
 }
 impl<'a> Child<'a> {
-    fn dump_rec(&self, indent: usize, tree: &ConcreteTree<'a>, output: &mut String) {
+    fn dump_rec(&self, indent: usize, output: &mut String) {
        for _ in 0..indent {
            output.push(' ');
        }
        match self {
            Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())),
            Child::Tree(t) => {
                let t = &tree[*t];
                output.push_str(&format!("{:?}\n", t.kind));
                for child in t.children.iter() {
-                    child.dump_rec(indent + 2, tree, output);
+                    child.dump_rec(indent + 2, output);
                }
            }
        }
@ -284,12 +261,10 @@ impl<'a> CParser<'a> {
        });
    }
-    fn build_tree(self) -> (ConcreteTree<'a>, Lines) {
+    fn build_tree(self) -> (Tree<'a>, Lines) {
        let mut events = self.events;
        let mut stack = Vec::new();
        let mut result = ConcreteTree::new();
        // The first element in our events vector must be a start; the whole
        // thing must be bracketed in a tree.
        assert!(matches!(events.get(0), Some(ParseEvent::Start { .. })));
@ -304,13 +279,12 @@ impl<'a> CParser<'a> {
            match event {
                ParseEvent::Start { kind } => stack.push(Tree {
                    kind,
                    parent: None,
                    children: Vec::new(),
                }),
                ParseEvent::End => {
-                    let t = result.add_tree(stack.pop().unwrap());
+                    let tree = stack.pop().unwrap();
-                    stack.last_mut().unwrap().children.push(Child::Tree(t));
+                    stack.last_mut().unwrap().children.push(Child::Tree(tree));
                }
                ParseEvent::Advance { token } => {
@ -320,14 +294,11 @@ impl<'a> CParser<'a> {
        }
        assert!(stack.len() == 1, "Not all trees were ended!");
-        let root = result.add_tree(stack.pop().unwrap());
+        (stack.pop().unwrap(), self.tokens.lines())
        result.root = Some(root);
        (result, self.tokens.lines())
    }
 }
-pub fn parse_concrete(source: &str) -> (ConcreteTree, Lines) {
+pub fn parse_concrete(source: &str) -> (Tree, Lines) {
    let tokens = Tokens::new(source);
    let mut parser = CParser::new(tokens);
@ -492,35 +463,6 @@ fn expression(p: &mut CParser) {
    expression_with_power(p, 0)
 }
 // BINDING POWERS. When parsing expressions we only accept expressions that
 // meet a minimum binding power. (This is like "precedence" but I just super
 // don't like that terminology.)
 const ASSIGNMENT_POWER: u8 = 0; // =
 const OR_POWER: u8 = 1; // or
 const AND_POWER: u8 = 2; // and
 const EQUALITY_POWER: u8 = 3; // == !=
 const COMPARISON_POWER: u8 = 4; // < > <= >=
 const TERM_POWER: u8 = 5; // + -
 const FACTOR_POWER: u8 = 6; // * /
 const UNARY_POWER: u8 = 7; // ! -
 // const PRIMARY_POWER: u8 = 9;
 fn token_power<'a>(token: TokenKind) -> Option<u8> {
    match token {
        TokenKind::Equal => Some(ASSIGNMENT_POWER),
        TokenKind::Or => Some(OR_POWER),
        TokenKind::And => Some(AND_POWER),
        TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER),
        TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => {
            Some(COMPARISON_POWER)
        }
        TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER),
        TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER),
        _ => None,
    }
 }
 fn expression_with_power(p: &mut CParser, minimum_power: u8) {
    let mut expr = prefix_expression(p);
    while p.at(TokenKind::LeftParen) {
@ -653,11 +595,30 @@ fn identifier(p: &mut CParser) -> MarkClosed {
 #[cfg(test)]
 mod tests {
    use super::*;
    use pretty_assertions::assert_eq;
-    #[test]
+    fn test_successful_expression_parse(source: &str, expected: &str) {
-    fn tree_ref_size() {
+        let tokens = Tokens::new(source);
-        // What's the point of doing all that work if the tree ref isn't nice
+        let mut parser = CParser::new(tokens);
-        // and "small"?
+
-        assert_eq!(4, std::mem::size_of::<Option<TreeRef>>());
+        expression(&mut parser);
        let (tree, _) = parser.build_tree();
        assert_eq!(
            expected,
            format!("{tree:?}"),
            "The parse structure of the expressions did not match"
        );
    }
    macro_rules! test_expr {
        ($name:ident, $input:expr, $expected:expr) => {
            #[test]
            fn $name() {
                test_successful_expression_parse($input, $expected);
            }
        };
    }
    test_expr!(number_expr, "12", "[LiteralExpression Number:'12']");
 }
--- a/fine/src/tokens.rs
+++ b/fine/src/tokens.rs
@ -58,18 +58,11 @@ pub enum TokenKind {
    Yield,
 }
 // NOTE: Tokens are kinda big (like 40 bytes?) and AFAICT the only way to go
 //       smaller would be to stop using string pointers and use smaller
 //       sizes/offsets instead, e.g., 32b for offset and 32b for size, and
 //       stop tracking the position independently from the start, and then
 //       require the source text when converting to line/col. I'm unwilling to
 //       give up the ergonomics of &str and String right now, so we're just
 //       not doing it.
 #[derive(Debug, PartialEq, Eq, Clone)]
 pub struct Token<'a> {
    pub kind: TokenKind,
    pub start: usize,
-    value: Result<&'a str, Box<str>>,
+    value: Result<&'a str, String>,
 }
 impl<'a> Token<'a> {
@ -85,7 +78,7 @@ impl<'a> Token<'a> {
        Token {
            kind: TokenKind::Error,
            start,
-            value: Err(message.into()),
+            value: Err(message),
        }
    }
--- a/fine/tests/example_tests.rs
+++ b/fine/tests/example_tests.rs
@ -1,4 +1,4 @@
-use fine::parser::concrete::ConcreteTree;
+use fine::parser::concrete::Tree;
 use pretty_assertions::assert_eq;
 fn rebase_concrete(source_path: &str, dump: &str) {
@ -68,7 +68,7 @@ fn rebase_concrete(source_path: &str, dump: &str) {
    std::fs::write(source_path, result).expect("unable to write the new file!");
 }
-fn assert_concrete(tree: &ConcreteTree, expected: &str, source_path: &str) {
+fn assert_concrete(tree: &Tree, expected: &str, source_path: &str) {
    let dump = tree.dump();
    let rebase = std::env::var("FINE_TEST_REBASE")
        .unwrap_or(String::new())