[fine] Parser Table Generator?

Look I'm just thinking hard about converting to a parser generator because I want to derive the pretty-printer from the parser without having to repeat myself all over the place. This parser.py is derived from my old LRParsers project, and should go back there eventually, but for now I'm driving the work from here.
2024-05-04 16:46:36 -07:00 · 2024-05-04 16:46:36 -07:00 · 25f9c3ecaf
commit 25f9c3ecaf
parent a2b3e8b74d
6 changed files with 2384 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,6 @@
 /oden-js/target
 /oden-js-sys/target
 /fine/target
+
+.venv/
+__pycache__/
--- a/fine/grammar/grammar.py
+++ b/fine/grammar/grammar.py
@ -0,0 +1,388 @@
+from parser import Assoc, Grammar, Nothing, Token, rule, seq
+
+ARROW = Token("Arrow")
+AS = Token("As")
+BAR = Token("Bar")
+CLASS = Token("Class")
+COLON = Token("Colon")
+ELSE = Token("Else")
+FOR = Token("For")
+FUN = Token("Fun")
+IDENTIFIER = Token("Identifier")
+IF = Token("If")
+IMPORT = Token("Import")
+IN = Token("In")
+LCURLY = Token("LeftBrace")
+LET = Token("Let")
+RCURLY = Token("RightBrace")
+RETURN = Token("Return")
+SEMICOLON = Token("Semicolon")
+STRING = Token("String")
+WHILE = Token("While")
+EQUAL = Token("Equal")
+LPAREN = Token("LeftParen")
+RPAREN = Token("RightParen")
+COMMA = Token("Comma")
+SELF = Token("Selff")
+OR = Token("Or")
+IS = Token("Is")
+AND = Token("And")
+EQUALEQUAL = Token("EqualEqual")
+BANGEQUAL = Token("BangEqual")
+LESS = Token("Less")
+GREATER = Token("Greater")
+LESSEQUAL = Token("LessEqual")
+GREATEREQUAL = Token("GreaterEqual")
+PLUS = Token("Plus")
+MINUS = Token("Minus")
+STAR = Token("Star")
+SLASH = Token("Slash")
+NUMBER = Token("Number")
+TRUE = Token("True")
+FALSE = Token("False")
+BANG = Token("Bang")
+DOT = Token("Dot")
+MATCH = Token("Match")
+EXPORT = Token("Export")
+UNDERSCORE = Token("Underscore")
+NEW = Token("New")
+LSQUARE = Token("LeftBracket")
+RSQUARE = Token("RightBracket")
+
+
+class FineGrammar(Grammar):
+    def __init__(self):
+        super().__init__(
+            precedence=[
+                (Assoc.RIGHT, [EQUAL]),
+                (Assoc.LEFT, [OR]),
+                (Assoc.LEFT, [IS]),
+                (Assoc.LEFT, [AND]),
+                (Assoc.LEFT, [EQUALEQUAL, BANGEQUAL]),
+                (Assoc.LEFT, [LESS, GREATER, GREATEREQUAL, LESSEQUAL]),
+                (Assoc.LEFT, [PLUS, MINUS]),
+                (Assoc.LEFT, [STAR, SLASH]),
+                (Assoc.LEFT, [self.primary_expression]),
+                (Assoc.LEFT, [LPAREN]),
+                (Assoc.LEFT, [DOT]),
+                #
+                # If there's a confusion about whether to make an IF
+                # statement or an expression, prefer the statement.
+                #
+                (Assoc.NONE, [self.if_statement]),
+            ]
+        )
+
+    @rule
+    def file(self):
+        return self.file_statement_list
+
+    @rule
+    def file_statement_list(self):
+        return self.file_statement | (self.file_statement_list + self.file_statement)
+
+    @rule
+    def file_statement(self):
+        return (
+            self.import_statement | self.class_declaration | self.export_statement | self.statement
+        )
+
+    @rule
+    def import_statement(self):
+        return seq(IMPORT, STRING, AS, IDENTIFIER, SEMICOLON)
+
+    @rule
+    def class_declaration(self):
+        return seq(CLASS, IDENTIFIER, self.class_body)
+
+    @rule
+    def class_body(self):
+        return seq(LCURLY, RCURLY) | seq(LCURLY, self.class_members, RCURLY)
+
+    @rule
+    def class_members(self):
+        return self.class_member | seq(self.class_members, self.class_member)
+
+    @rule
+    def class_member(self):
+        return self.field_declaration | self.function_declaration
+
+    @rule
+    def field_declaration(self):
+        return seq(IDENTIFIER, COLON, self.type_expression, SEMICOLON)
+
+    # Types
+    @rule
+    def type_expression(self):
+        return self.alternate_type | self.type_identifier
+
+    @rule
+    def alternate_type(self):
+        return seq(self.type_expression, BAR, self.type_identifier)
+
+    @rule
+    def type_identifier(self):
+        return IDENTIFIER
+
+    @rule
+    def export_statement(self):
+        return (
+            seq(EXPORT, self.class_declaration)
+            | seq(EXPORT, self.function_declaration)
+            | seq(EXPORT, self.let_statement)
+            | seq(EXPORT, self.export_list, SEMICOLON)
+        )
+
+    @rule
+    def export_list(self):
+        return Nothing | IDENTIFIER | seq(IDENTIFIER, COMMA, self.export_list)
+
+    # Functions
+    @rule
+    def function_declaration(self):
+        return seq(FUN, IDENTIFIER, self.function_parameters, self.block) | seq(
+            FUN, IDENTIFIER, self.function_parameters, ARROW, self.type_expression, self.block
+        )
+
+    @rule
+    def function_parameters(self):
+        return (
+            seq(LPAREN, RPAREN)
+            | seq(LPAREN, self.first_parameter, RPAREN)
+            | seq(LPAREN, self.first_parameter, COMMA, self.parameter_list, RPAREN)
+        )
+
+    @rule
+    def first_parameter(self):
+        return SELF | self.parameter
+
+    @rule
+    def parameter_list(self):
+        return Nothing | self.parameter | seq(self.parameter, COMMA, self.parameter_list)
+
+    @rule
+    def parameter(self):
+        return seq(IDENTIFIER, COLON, self.type_expression)
+
+    # Block
+    @rule
+    def block(self):
+        return (
+            seq(LCURLY, RCURLY)
+            | seq(LCURLY, self.statement_list, RCURLY)
+            | seq(LCURLY, self.statement_list, self.expression, RCURLY)
+        )
+
+    @rule
+    def statement_list(self):
+        return self.statement | seq(self.statement_list, self.statement)
+
+    @rule
+    def statement(self):
+        return (
+            self.function_declaration
+            | self.let_statement
+            | self.return_statement
+            | self.for_statement
+            | self.if_statement
+            | self.while_statement
+            | self.expression_statement
+        )
+
+    @rule
+    def let_statement(self):
+        return seq(LET, IDENTIFIER, EQUAL, self.expression, SEMICOLON)
+
+    @rule
+    def return_statement(self):
+        return seq(RETURN, self.expression, SEMICOLON)
+
+    @rule
+    def for_statement(self):
+        return seq(FOR, self.iterator_variable, IN, self.expression, self.block)
+
+    @rule
+    def iterator_variable(self):
+        return IDENTIFIER
+
+    @rule
+    def if_statement(self):
+        return self.conditional_expression
+
+    @rule
+    def while_statement(self):
+        return seq(WHILE, self.expression, self.block)
+
+    @rule
+    def expression_statement(self):
+        return seq(self.expression, SEMICOLON)
+
+    # Expressions
+    @rule
+    def expression(self):
+        return self.assignment_expression
+
+    @rule
+    def assignment_expression(self):
+        return seq(self.or_expression, EQUAL, self.assignment_expression) | self.or_expression
+
+    @rule
+    def or_expression(self):
+        return seq(self.or_expression, OR, self.is_expression) | self.is_expression
+
+    @rule
+    def is_expression(self):
+        return seq(self.is_expression, IS, self.pattern) | self.and_expression
+
+    @rule
+    def and_expression(self):
+        return seq(self.and_expression, AND, self.equality_expression) | self.equality_expression
+
+    @rule
+    def equality_expression(self):
+        return (
+            seq(self.equality_expression, EQUALEQUAL, self.relation_expression)
+            | seq(self.equality_expression, BANGEQUAL, self.relation_expression)
+            | self.relation_expression
+        )
+
+    @rule
+    def relation_expression(self):
+        return (
+            seq(self.relation_expression, LESS, self.additive_expression)
+            | seq(self.relation_expression, LESSEQUAL, self.additive_expression)
+            | seq(self.relation_expression, GREATER, self.additive_expression)
+            | seq(self.relation_expression, GREATEREQUAL, self.additive_expression)
+        )
+
+    @rule
+    def additive_expression(self):
+        return (
+            seq(self.additive_expression, PLUS, self.multiplication_expression)
+            | seq(self.additive_expression, MINUS, self.multiplication_expression)
+            | self.multiplication_expression
+        )
+
+    @rule
+    def multiplication_expression(self):
+        return (
+            seq(self.multiplication_expression, STAR, self.primary_expression)
+            | seq(self.multiplication_expression, SLASH, self.primary_expression)
+            | self.primary_expression
+        )
+
+    @rule
+    def primary_expression(self):
+        return (
+            IDENTIFIER
+            | SELF
+            | NUMBER
+            | STRING
+            | TRUE
+            | FALSE
+            | seq(BANG, self.primary_expression)
+            | seq(MINUS, self.primary_expression)
+            | self.block
+            | self.conditional_expression
+            | self.list_constructor_expression
+            | self.object_constructor_expression
+            | self.match_expression
+            | seq(self.primary_expression, LPAREN, self.expression_list, RPAREN)
+            | seq(self.primary_expression, DOT, IDENTIFIER)
+            | seq(LPAREN, self.expression, RPAREN)
+        )
+
+    @rule
+    def conditional_expression(self):
+        return (
+            seq(IF, self.expression, self.block)
+            | seq(IF, self.expression, self.block, ELSE, self.conditional_expression)
+            | seq(IF, self.expression, self.block, ELSE, self.block)
+        )
+
+    @rule
+    def list_constructor_expression(self):
+        return seq(LSQUARE, RSQUARE) | seq(LSQUARE, self.expression_list, RSQUARE)
+
+    @rule
+    def expression_list(self):
+        return (
+            self.expression
+            | seq(self.expression, COMMA)
+            | seq(self.expression, COMMA, self.expression_list)
+        )
+
+    @rule
+    def match_expression(self):
+        return seq(MATCH, self.match_body)
+
+    @rule
+    def match_body(self):
+        return seq(LCURLY, RCURLY) | seq(LCURLY, self.match_arms, RCURLY)
+
+    @rule
+    def match_arms(self):
+        return (
+            self.match_arm
+            | seq(self.match_arm, COMMA)
+            | seq(self.match_arm, COMMA, self.match_arms)
+        )
+
+    @rule
+    def match_arm(self):
+        return seq(self.pattern, ARROW, self.expression)
+
+    @rule
+    def pattern(self):
+        return (
+            seq(self.variable_binding, self.pattern_core, AND, self.and_expression)
+            | seq(self.variable_binding, self.pattern_core)
+            | seq(self.pattern_core, AND, self.and_expression)
+            | self.pattern_core
+        )
+
+    @rule
+    def pattern_core(self):
+        return self.type_expression | self.wildcard_pattern
+
+    @rule
+    def wildcard_pattern(self):
+        return UNDERSCORE
+
+    @rule
+    def variable_binding(self):
+        return seq(IDENTIFIER, COLON)
+
+    @rule
+    def object_constructor_expression(self):
+        return seq(NEW, self.type_identifier, self.field_list)
+
+    @rule
+    def field_list(self):
+        return seq(LCURLY, RCURLY) | seq(LCURLY, self.field_values, RCURLY)
+
+    @rule
+    def field_values(self):
+        return (
+            self.field_value
+            | seq(self.field_value, COMMA)
+            | seq(self.field_value, COMMA, self.field_values)
+        )
+
+    @rule
+    def field_value(self):
+        return IDENTIFIER | seq(IDENTIFIER, COLON, self.expression)
+
+
+grammar = FineGrammar()
+table = grammar.build_table(start="file")
+
+print(f"{len(table)} states")
+
+average_entries = sum(len(row) for row in table) / len(table)
+max_entries = max(len(row) for row in table)
+print(f"{average_entries} average, {max_entries} max")
+
+# print(parser_faster.format_table(gen, table))
+# print()
+# tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"])
--- a/fine/grammar/guessing.rs
+++ b/fine/grammar/guessing.rs
@ -0,0 +1,422 @@
+// NOTE: Utterly Broken Ideas about Parse Tables.
+//
+//       Committing this here so I can back it up.
+use std::collections::HashSet;
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+pub enum ReduceRule {
+    // Generated
+    AlternateType,
+    Argument,
+    ArgumentList,
+    BinaryExpression,
+    Block,
+    CallExpression,
+    ClassDecl,
+    ConditionalExpression,
+    ExpressionStatement,
+    FieldDecl,
+    FieldList,
+    FieldValue,
+    File,
+    ForStatement,
+    FunctionDecl,
+    GroupingExpression,
+    Identifier,
+    IfStatement,
+    IsExpression,
+    IteratorVariable,
+    LetStatement,
+    ListConstructor,
+    ListConstructorElement,
+    LiteralExpression,
+    MatchArm,
+    MatchBody,
+    MatchExpression,
+    MemberAccess,
+    NewObjectExpression,
+    ParamList,
+    Parameter,
+    Pattern,
+    ReturnStatement,
+    ReturnType,
+    SelfParameter,
+    SelfReference,
+    TypeExpression,
+    TypeIdentifier,
+    TypeParameter,
+    TypeParameterList,
+    UnaryExpression,
+    VariableBinding,
+    WhileStatement,
+    WildcardPattern,
+    Import,
+    Export,
+    ExportList,
+}
+
+#[derive(Eq, PartialEq, Hash, Copy, Clone)]
+pub enum TokenAction {
+    Error,
+    Reduce(ReduceRule, TreeKind, u16),
+    ReduceAnonymous(ReduceRule, u16),
+    Accept,
+    Shift(u16),
+}
+
+pub struct ParseState {
+    action_start: usize,
+    action_end: usize,
+    goto_start: usize,
+    goto_end: usize,
+}
+
+pub struct ParseTable<'a> {
+    state: &'a [ParseState],
+    start_state: usize,
+
+    token_action: &'a [TokenAction],
+    token_kind: &'a [TokenKind],
+
+    tree_goto: &'a [u16],
+    tree_rules: &'a [ReduceRule],
+}
+
+#[derive(Clone)]
+enum StackEntry {
+    Nothing,
+    Tree(TreeRef),
+    AnonTree(Vec<Child>),
+    Token(TokenRef),
+    Error(TokenRef),
+}
+
+#[derive(Clone)]
+struct ParseThread {
+    stack: Vec<(usize, StackEntry)>,
+    panic_count: u8,
+    error_count: u8,
+    score: u32,
+}
+
+impl ParseThread {
+    fn initial(start_state: usize) -> ParseThread {
+        ParseThread {
+            stack: vec![(start_state, StackEntry::Nothing)],
+            error_count: 0,
+            panic_count: 0,
+            score: 0,
+        }
+    }
+
+    fn reduce(
+        &mut self,
+        table: &ParseTable,
+        syntax: &mut SyntaxTree,
+        count: u16,
+        rule: ReduceRule,
+        kind: Option<TreeKind>,
+    ) {
+        let mut children = Vec::new();
+        let count: usize = count.into();
+
+        let mut consumed = 0;
+        while consumed < count {
+            let Some((_, value)) = self.stack.pop() else {
+                break;
+            };
+
+            match value {
+                StackEntry::Nothing => panic!("Popped nothing!"),
+                StackEntry::Tree(t) => {
+                    consumed += 1;
+                    children.push(Child::Tree(t));
+                }
+                StackEntry::AnonTree(mut cs) => {
+                    consumed += 1;
+                    children.append(&mut cs);
+                }
+                StackEntry::Token(t) => {
+                    consumed += 1;
+                    children.push(Child::Token(t));
+                }
+                StackEntry::Error(t) => {
+                    // Do *not* increment consumed; these don't count!
+                    children.push(Child::Token(t));
+                }
+            }
+        }
+        assert_eq!(consumed, count, "Stack underflow on reduce");
+
+        let value = if let Some(kind) = kind {
+            let tr = syntax.add_tree(Tree {
+                kind,
+                self_ref: TreeRef::from_index(0),
+                parent: None,
+                start_pos: 0,
+                end_pos: 0,
+                children,
+            });
+            StackEntry::Tree(tr)
+        } else {
+            StackEntry::AnonTree(children)
+        };
+
+        let (goto_index, _) = self.stack.last().unwrap();
+        let goto_state = &table.state[*goto_index];
+
+        let index: usize = (goto_state.goto_start..goto_state.goto_end)
+            .find(|i| table.tree_rules[*i] == rule)
+            .expect("Unable to goto target after reduction")
+            .into();
+        let target_state: usize = table.tree_goto[index].into();
+        self.stack.push((target_state, value));
+    }
+
+    fn shift(&mut self, state: u16, tr: TokenRef) {
+        let target_state: usize = state.into();
+        self.stack.push((target_state, StackEntry::Token(tr)));
+    }
+}
+
+// This is what we set the panic level to when we get an error; we require
+// this many successful token shifts to decide we're not lost.
+const PANIC_THRESHOLD: u8 = 3;
+
+// This is the maximum number of failed states that we're going to go through
+// before we just try to reduce all the way out of the tree.
+const THREAD_ERROR_LIMIT: u8 = 20;
+
+pub fn table_parse(source: &str, table: &ParseTable) -> (Rc<SyntaxTree>, Rc<Lines>) {
+    let mut tokens = Tokens::new(source);
+    let mut syntax = SyntaxTree::new();
+
+    let mut threads = vec![ParseThread::initial(table.start_state)];
+    let mut next_threads = vec![];
+
+    let mut accepted_threads: Vec<ParseThread> = vec![];
+    let mut maybe_pushed_garbage = false;
+
+    // While we still have threads to run....
+    while threads.len() > 0 {
+        // We've still got live threads running, which means we've still got
+        // tokens to consume! Any thread that has accepted "early" should be
+        // penalized here.
+        for thread in accepted_threads.iter_mut() {
+            if thread.score > 0 {
+                thread.score -= 1;
+            }
+        }
+
+        // Grab us the next token from the stream.
+        // TODO: Collect ephemera before setting on the token.
+        let token = tokens.next();
+        let current_token = token.kind;
+        let current_token_ref = syntax.add_token(token, vec![]);
+
+        // Go over every thread in the list of threads to run. If a thread
+        // needs to keep running on this token it can push itself back onto
+        // the stack, and we'll re-consider it next time. (This is necessary
+        // for both reduce and for error handling.)
+        while let Some(mut thread) = threads.pop() {
+            let (state, _) = thread.stack.last().unwrap();
+            let state = &table.state[*state];
+
+            let action = (state.action_start..state.action_end)
+                .find(|i| table.token_kind[*i] == current_token)
+                .map(|i| &table.token_action[i])
+                .unwrap_or(&TokenAction::Error);
+
+            match action {
+                TokenAction::Reduce(rule, kind, count) => {
+                    thread.reduce(table, &mut syntax, *count, *rule, Some(*kind));
+                    thread.score += 1;
+                    threads.push(thread); // Run me again, I can still work with this token.
+                }
+
+                TokenAction::ReduceAnonymous(rule, count) => {
+                    thread.reduce(table, &mut syntax, *count, *rule, None);
+                    thread.score += 1;
+                    threads.push(thread); // Run me again, I can still work with this token.
+                }
+
+                TokenAction::Shift(state) => {
+                    thread.shift(*state, current_token_ref);
+                    thread.score += 1;
+                    if thread.panic_count > 0 {
+                        thread.panic_count -= 1;
+                    } else if thread.error_count > 0 {
+                        // TODO: We shifted a good number of tokens in a row,
+                        // maybe we should consider reducing the error count
+                        // here too, so that this thread might live for
+                        // longer.
+                    }
+
+                    next_threads.push(thread);
+                }
+
+                TokenAction::Accept => {
+                    thread.score += 1;
+                    accepted_threads.push(thread);
+                }
+
+                // Error handling, the bane of LR parsers!
+                //
+                // In this parser, we borrow a trick from Tree-Sitter and
+                // treat the parse error as if it were an ambiguity: we see a
+                // token but don't know what to do with it, so we'll just try
+                // to do *everything* with it and see what sticks.
+                //
+                // The tricky part here is not causing an enormous explosion
+                // of threads, so we have certain conditions where we just
+                // give up and refuse to consider any more tokens for a given
+                // error thread.
+                //
+                TokenAction::Error => {
+                    // First, report the error. (We use a pretty standard
+                    // "panic" error recovery mode here to decide when to
+                    // start showing new error messages, otherwise we would
+                    // just generate *way* too many cascading errors.)
+                    //
+                    if thread.panic_count == 0 {
+                        // TODO: Get a description for this state from the table somehow.
+                        // TODO: Describe the error in an error message somehow.
+
+                        let token = &syntax[current_token_ref];
+                        let error_token = syntax.add_token(
+                            Token::error(token.start(), token.end(), format!("PARSE ERROR")),
+                            vec![],
+                        );
+
+                        // NOTE: `Error` stack entries are not counted when
+                        // reducing, so we know this push here won't mess up
+                        // the state machine.
+                        thread.stack.push((0, StackEntry::Error(error_token)));
+                    }
+
+                    // Now mark the thread as panicing so that we don't
+                    // produce too many random errors...
+                    thread.panic_count = PANIC_THRESHOLD;
+
+                    // Count the error.
+                    // TODO: Check to see if this really does help thread explosion or not.
+                    if thread.error_count < THREAD_ERROR_LIMIT {
+                        thread.error_count += 1;
+                    }
+                    // Penalize this thread; this is not a great parse, we can tell.
+                    if thread.score > 0 {
+                        thread.score -= 1;
+                    }
+
+                    let mut executed = HashSet::new();
+                    for index in state.action_start..state.action_end {
+                        // Make absolutely sure we don't do the same thing
+                        // twice! It can happen, and it is hugely wasteful
+                        // because it spawns duplicate threads.
+                        let action = &table.token_action[index];
+                        if executed.contains(action) {
+                            continue;
+                        }
+                        executed.insert(action.clone());
+
+                        match action {
+                            TokenAction::Error => {
+                                panic!("Literal error in the table; table is corrupt")
+                            }
+                            TokenAction::Reduce(rule, kind, count) => {
+                                // Let's pretend that we're done with the
+                                // current rule and see what happens.
+                                let mut new_thread = thread.clone();
+                                new_thread.reduce(&table, &mut syntax, *count, *rule, Some(*kind));
+                                threads.push(new_thread);
+
+                                // Mark that we might have to trim the syntax
+                                // tree because we might not use this
+                                // reduction.
+                                maybe_pushed_garbage = true;
+                            }
+                            TokenAction::ReduceAnonymous(rule, count) => {
+                                // Let's pretend that we're done with the
+                                // current rule and see what happens.
+                                let mut new_thread = thread.clone();
+                                new_thread.reduce(&table, &mut syntax, *count, *rule, None);
+                                threads.push(new_thread);
+                            }
+                            TokenAction::Shift(state) => {
+                                // Let's just pretend the current token
+                                // matched this thing that we were looking
+                                // for, and shift it anyway, and see what
+                                // happens.
+                                //
+                                // This represents an expansion of the search
+                                // space and so we only want to do it if we
+                                // haven't reached our error limit yet.
+                                if thread.error_count < THREAD_ERROR_LIMIT {
+                                    let mut new_thread = thread.clone();
+                                    new_thread.shift(*state, current_token_ref);
+                                    next_threads.push(new_thread);
+                                }
+                            }
+                            TokenAction::Accept => accepted_threads.push(thread.clone()),
+                        }
+                    }
+
+                    // Let's try to process the *next* token and see what
+                    // happens with this same thread, unless we're giving up
+                    // on the thread.
+                    if thread.error_count < THREAD_ERROR_LIMIT {
+                        next_threads.push(thread);
+                    }
+                }
+            }
+        }
+
+        // Drain all the next_threads into the current stack and start again
+        // on the next token!
+        threads.append(&mut next_threads);
+    }
+
+    // OK no more threads, we're done. In theory at this point we should
+    // penalize all accepted threads for remaining tokens but if we've got no
+    // more threads and there are remaining tokens then they all hit their
+    // error limit and are basically equivalent. (Why penalize all threads by
+    // the same amount?)
+    //
+    // Let's just go through all the threads that "accepted" and pick the one
+    // with the highest score that also wound up with a named tree at the top.
+    let mut best_score = 0;
+    for thread in accepted_threads {
+        if thread.score >= best_score {
+            if let Some((_, StackEntry::Tree(tr))) = thread.stack.last() {
+                syntax.root = Some(*tr);
+                best_score = thread.score + 1;
+            }
+        }
+    }
+
+    // Now, our syntax tree might have errors in it, and if it does we might
+    // have pushed trees that we have no interest in ever seeing ever again.
+    // That means that we need to rewrite the tree starting from the root, to
+    // make sure that the trees in the syntax tree are for real for real.
+    if maybe_pushed_garbage {
+        let mut valid = HashSet::new();
+        let mut stack = Vec::new();
+        if let Some(tr) = &syntax.root {
+            stack.push(*tr);
+        }
+        while let Some(tr) = stack.pop() {
+            valid.insert(tr);
+            for x in syntax[tr].child_trees() {
+                stack.push(x);
+            }
+        }
+
+        for tr in syntax.trees.iter_mut() {
+            if !valid.contains(&tr.self_ref) {
+                tr.kind = TreeKind::Ignore;
+            }
+        }
+    }
+
+    (Rc::new(syntax), Rc::new(tokens.lines()))
+}
--- a/fine/grammar/parser.py
+++ b/fine/grammar/parser.py
--- a/fine/grammar/pyproject.toml
+++ b/fine/grammar/pyproject.toml
@ -0,0 +1,2 @@
+[tool.black]
+line-length=100
--- a/fine/grammar/rust.py
+++ b/fine/grammar/rust.py
@ -0,0 +1,40 @@
+import io
+
+import parser
+
+
+def generate_rust_parser(output: io.TextIOBase, table: list[dict[str, parser.Action]]):
+    lines = []
+
+    tree_kinds = list(
+        sorted(
+            {
+                action[1]
+                for state in table
+                for action in state.values()
+                if action[0] == "reduce" and action[1][0] != "_"
+            }
+        )
+    )
+
+    # First, generate the treekind enumeration
+    lines.extend(
+        [
+            "#[derive(Debug, Eq, PartialEq)]",
+            "pub enum TreeKind {",
+            "    Error,",
+            "",
+        ]
+    )
+    lines.extend(f"    {kind}," for kind in tree_kinds)
+    lines.extend(
+        [
+            "}",
+            "",
+        ]
+    )
+
+    # Next generate the parse table
+    lines.extend([])
+
+    pass