Chaos: split tables, interactions, Terminal

- Tables are split into `actions` and `goto` now to make formatting nicer - Token is renamed Terminal - Likes are now Florps - Lexer now loaded dynamically (badly)
2024-05-30 08:02:47 -07:00 · 2024-05-30 08:02:47 -07:00 · 56d24c5fb9
commit 56d24c5fb9
parent 71078f76b4
3 changed files with 342 additions and 286 deletions
--- a/grammar.py
+++ b/grammar.py
@ -1,56 +1,56 @@
 # This is an example grammar.
 import re
-from parser import Assoc, Grammar, Nothing, Token, rule, seq
+from parser import Assoc, Grammar, Nothing, Terminal, rule, seq, Rule
-ARROW = Token("Arrow")
+ARROW = Terminal("Arrow")
-AS = Token("As")
+AS = Terminal("As")
-BAR = Token("Bar")
+BAR = Terminal("Bar")
-CLASS = Token("Class")
+CLASS = Terminal("Class")
-COLON = Token("Colon")
+COLON = Terminal("Colon")
-ELSE = Token("Else")
+ELSE = Terminal("Else")
-FOR = Token("For")
+FOR = Terminal("For")
-FUN = Token("Fun")
+FUN = Terminal("Fun")
-IDENTIFIER = Token("Identifier")
+IDENTIFIER = Terminal("Identifier")
-IF = Token("If")
+IF = Terminal("If")
-IMPORT = Token("Import")
+IMPORT = Terminal("Import")
-IN = Token("In")
+IN = Terminal("In")
-LCURLY = Token("LeftBrace")
+LCURLY = Terminal("LeftBrace")
-LET = Token("Let")
+LET = Terminal("Let")
-RCURLY = Token("RightBrace")
+RCURLY = Terminal("RightBrace")
-RETURN = Token("Return")
+RETURN = Terminal("Return")
-SEMICOLON = Token("Semicolon")
+SEMICOLON = Terminal("Semicolon")
-STRING = Token("String")
+STRING = Terminal("String")
-WHILE = Token("While")
+WHILE = Terminal("While")
-EQUAL = Token("Equal")
+EQUAL = Terminal("Equal")
-LPAREN = Token("LeftParen")
+LPAREN = Terminal("LeftParen")
-RPAREN = Token("RightParen")
+RPAREN = Terminal("RightParen")
-COMMA = Token("Comma")
+COMMA = Terminal("Comma")
-SELF = Token("Selff")
+SELF = Terminal("Selff")
-OR = Token("Or")
+OR = Terminal("Or")
-IS = Token("Is")
+IS = Terminal("Is")
-AND = Token("And")
+AND = Terminal("And")
-EQUALEQUAL = Token("EqualEqual")
+EQUALEQUAL = Terminal("EqualEqual")
-BANGEQUAL = Token("BangEqual")
+BANGEQUAL = Terminal("BangEqual")
-LESS = Token("Less")
+LESS = Terminal("Less")
-GREATER = Token("Greater")
+GREATER = Terminal("Greater")
-LESSEQUAL = Token("LessEqual")
+LESSEQUAL = Terminal("LessEqual")
-GREATEREQUAL = Token("GreaterEqual")
+GREATEREQUAL = Terminal("GreaterEqual")
-PLUS = Token("Plus")
+PLUS = Terminal("Plus")
-MINUS = Token("Minus")
+MINUS = Terminal("Minus")
-STAR = Token("Star")
+STAR = Terminal("Star")
-SLASH = Token("Slash")
+SLASH = Terminal("Slash")
-NUMBER = Token("Number")
+NUMBER = Terminal("Number")
-TRUE = Token("True")
+TRUE = Terminal("True")
-FALSE = Token("False")
+FALSE = Terminal("False")
-BANG = Token("Bang")
+BANG = Terminal("Bang")
-DOT = Token("Dot")
+DOT = Terminal("Dot")
-MATCH = Token("Match")
+MATCH = Terminal("Match")
-EXPORT = Token("Export")
+EXPORT = Terminal("Export")
-UNDERSCORE = Token("Underscore")
+UNDERSCORE = Terminal("Underscore")
-NEW = Token("New")
+NEW = Terminal("New")
-LSQUARE = Token("LeftBracket")
+LSQUARE = Terminal("LeftBracket")
-RSQUARE = Token("RightBracket")
+RSQUARE = Terminal("RightBracket")
 class FineGrammar(Grammar):
@ -77,58 +77,58 @@ class FineGrammar(Grammar):
        )
    @rule
-    def file(self):
+    def file(self) -> Rule:
        return self._file_statement_list
    @rule
-    def _file_statement_list(self):
+    def _file_statement_list(self) -> Rule:
        return self._file_statement | (self._file_statement_list + self._file_statement)
    @rule
-    def _file_statement(self):
+    def _file_statement(self) -> Rule:
        return (
-            self.import_statement | self.class_declaration | self.export_statement | self.statement
+            self.import_statement | self.class_declaration | self.export_statement | self._statement
        )
    @rule
-    def import_statement(self):
+    def import_statement(self) -> Rule:
        return seq(IMPORT, STRING, AS, IDENTIFIER, SEMICOLON)
    @rule
-    def class_declaration(self):
+    def class_declaration(self) -> Rule:
        return seq(CLASS, IDENTIFIER, self.class_body)
    @rule
-    def class_body(self):
+    def class_body(self) -> Rule:
        return seq(LCURLY, RCURLY) | seq(LCURLY, self._class_members, RCURLY)
    @rule
-    def _class_members(self):
+    def _class_members(self) -> Rule:
        return self._class_member | seq(self._class_members, self._class_member)
    @rule
-    def _class_member(self):
+    def _class_member(self) -> Rule:
        return self.field_declaration | self.function_declaration
    @rule
-    def field_declaration(self):
+    def field_declaration(self) -> Rule:
        return seq(IDENTIFIER, COLON, self.type_expression, SEMICOLON)
    # Types
    @rule
-    def type_expression(self):
+    def type_expression(self) -> Rule:
        return self.alternate_type | self.type_identifier
    @rule
-    def alternate_type(self):
+    def alternate_type(self) -> Rule:
        return seq(self.type_expression, OR, self.type_identifier)
    @rule
-    def type_identifier(self):
+    def type_identifier(self) -> Rule:
        return IDENTIFIER
    @rule
-    def export_statement(self):
+    def export_statement(self) -> Rule:
        return (
            seq(EXPORT, self.class_declaration)
            | seq(EXPORT, self.function_declaration)
@ -137,18 +137,18 @@ class FineGrammar(Grammar):
        )
    @rule
-    def export_list(self):
+    def export_list(self) -> Rule:
        return Nothing | IDENTIFIER | seq(IDENTIFIER, COMMA, self.export_list)
    # Functions
    @rule
-    def function_declaration(self):
+    def function_declaration(self) -> Rule:
        return seq(FUN, IDENTIFIER, self.function_parameters, self.block) | seq(
            FUN, IDENTIFIER, self.function_parameters, ARROW, self.type_expression, self.block
        )
    @rule
-    def function_parameters(self):
+    def function_parameters(self) -> Rule:
        return (
            seq(LPAREN, RPAREN)
            | seq(LPAREN, self.first_parameter, RPAREN)
@ -156,33 +156,33 @@ class FineGrammar(Grammar):
        )
    @rule
-    def first_parameter(self):
+    def first_parameter(self) -> Rule:
        return SELF | self.parameter
    @rule
-    def parameter_list(self):
+    def parameter_list(self) -> Rule:
        return Nothing | self.parameter | seq(self.parameter, COMMA, self.parameter_list)
    @rule
-    def parameter(self):
+    def parameter(self) -> Rule:
        return seq(IDENTIFIER, COLON, self.type_expression)
    # Block
    @rule
-    def block(self):
+    def block(self) -> Rule:
        return (
            seq(LCURLY, RCURLY)
            | seq(LCURLY, self.expression, RCURLY)
-            | seq(LCURLY, self.statement_list, RCURLY)
+            | seq(LCURLY, self._statement_list, RCURLY)
-            | seq(LCURLY, self.statement_list, self.expression, RCURLY)
+            | seq(LCURLY, self._statement_list, self.expression, RCURLY)
        )
    @rule
-    def statement_list(self):
+    def _statement_list(self) -> Rule:
-        return self.statement | seq(self.statement_list, self.statement)
+        return self._statement | seq(self._statement_list, self._statement)
    @rule
-    def statement(self):
+    def _statement(self) -> Rule:
        return (
            self.function_declaration
            | self.let_statement
@ -194,56 +194,56 @@ class FineGrammar(Grammar):
        )
    @rule
-    def let_statement(self):
+    def let_statement(self) -> Rule:
        return seq(LET, IDENTIFIER, EQUAL, self.expression, SEMICOLON)
    @rule
-    def return_statement(self):
+    def return_statement(self) -> Rule:
        return seq(RETURN, self.expression, SEMICOLON) | seq(RETURN, SEMICOLON)
    @rule
-    def for_statement(self):
+    def for_statement(self) -> Rule:
        return seq(FOR, self.iterator_variable, IN, self.expression, self.block)
    @rule
-    def iterator_variable(self):
+    def iterator_variable(self) -> Rule:
        return IDENTIFIER
    @rule
-    def if_statement(self):
+    def if_statement(self) -> Rule:
        return self.conditional_expression
    @rule
-    def while_statement(self):
+    def while_statement(self) -> Rule:
        return seq(WHILE, self.expression, self.block)
    @rule
-    def expression_statement(self):
+    def expression_statement(self) -> Rule:
        return seq(self.expression, SEMICOLON)
    # Expressions
    @rule
-    def expression(self):
+    def expression(self) -> Rule:
        return self.assignment_expression
    @rule
-    def assignment_expression(self):
+    def assignment_expression(self) -> Rule:
        return seq(self.or_expression, EQUAL, self.assignment_expression) | self.or_expression
    @rule
-    def or_expression(self):
+    def or_expression(self) -> Rule:
        return seq(self.or_expression, OR, self.is_expression) | self.is_expression
    @rule
-    def is_expression(self):
+    def is_expression(self) -> Rule:
        return seq(self.is_expression, IS, self.pattern) | self.and_expression
    @rule
-    def and_expression(self):
+    def and_expression(self) -> Rule:
        return seq(self.and_expression, AND, self.equality_expression) | self.equality_expression
    @rule
-    def equality_expression(self):
+    def equality_expression(self) -> Rule:
        return (
            seq(self.equality_expression, EQUALEQUAL, self.relation_expression)
            | seq(self.equality_expression, BANGEQUAL, self.relation_expression)
@ -251,7 +251,7 @@ class FineGrammar(Grammar):
        )
    @rule
-    def relation_expression(self):
+    def relation_expression(self) -> Rule:
        return (
            seq(self.relation_expression, LESS, self.additive_expression)
            | seq(self.relation_expression, LESSEQUAL, self.additive_expression)
@ -261,7 +261,7 @@ class FineGrammar(Grammar):
        )
    @rule
-    def additive_expression(self):
+    def additive_expression(self) -> Rule:
        return (
            seq(self.additive_expression, PLUS, self.multiplication_expression)
            | seq(self.additive_expression, MINUS, self.multiplication_expression)
@ -269,7 +269,7 @@ class FineGrammar(Grammar):
        )
    @rule
-    def multiplication_expression(self):
+    def multiplication_expression(self) -> Rule:
        return (
            seq(self.multiplication_expression, STAR, self.primary_expression)
            | seq(self.multiplication_expression, SLASH, self.primary_expression)
@ -277,7 +277,7 @@ class FineGrammar(Grammar):
        )
    @rule
-    def primary_expression(self):
+    def primary_expression(self) -> Rule:
        return (
            IDENTIFIER
            | SELF
@ -299,7 +299,7 @@ class FineGrammar(Grammar):
        )
    @rule
-    def conditional_expression(self):
+    def conditional_expression(self) -> Rule:
        return (
            seq(IF, self.expression, self.block)
            | seq(IF, self.expression, self.block, ELSE, self.conditional_expression)
@ -307,11 +307,11 @@ class FineGrammar(Grammar):
        )
    @rule
-    def list_constructor_expression(self):
+    def list_constructor_expression(self) -> Rule:
        return seq(LSQUARE, RSQUARE) | seq(LSQUARE, self.expression_list, RSQUARE)
    @rule
-    def expression_list(self):
+    def expression_list(self) -> Rule:
        return (
            self.expression
            | seq(self.expression, COMMA)
@ -319,15 +319,15 @@ class FineGrammar(Grammar):
        )
    @rule
-    def match_expression(self):
+    def match_expression(self) -> Rule:
        return seq(MATCH, self.expression, self.match_body)
    @rule
-    def match_body(self):
+    def match_body(self) -> Rule:
        return seq(LCURLY, RCURLY) | seq(LCURLY, self.match_arms, RCURLY)
    @rule
-    def match_arms(self):
+    def match_arms(self) -> Rule:
        return (
            self.match_arm
            | seq(self.match_arm, COMMA)
@ -335,11 +335,11 @@ class FineGrammar(Grammar):
        )
    @rule
-    def match_arm(self):
+    def match_arm(self) -> Rule:
        return seq(self.pattern, ARROW, self.expression)
    @rule
-    def pattern(self):
+    def pattern(self) -> Rule:
        return (
            seq(self.variable_binding, self.pattern_core, AND, self.and_expression)
            | seq(self.variable_binding, self.pattern_core)
@ -348,27 +348,27 @@ class FineGrammar(Grammar):
        )
    @rule
-    def pattern_core(self):
+    def pattern_core(self) -> Rule:
        return self.type_expression | self.wildcard_pattern
    @rule
-    def wildcard_pattern(self):
+    def wildcard_pattern(self) -> Rule:
        return UNDERSCORE
    @rule
-    def variable_binding(self):
+    def variable_binding(self) -> Rule:
        return seq(IDENTIFIER, COLON)
    @rule
-    def object_constructor_expression(self):
+    def object_constructor_expression(self) -> Rule:
        return seq(NEW, self.type_identifier, self.field_list)
    @rule
-    def field_list(self):
+    def field_list(self) -> Rule:
        return seq(LCURLY, RCURLY) | seq(LCURLY, self.field_values, RCURLY)
    @rule
-    def field_values(self):
+    def field_values(self) -> Rule:
        return (
            self.field_value
            | seq(self.field_value, COMMA)
@ -376,7 +376,7 @@ class FineGrammar(Grammar):
        )
    @rule
-    def field_value(self):
+    def field_value(self) -> Rule:
        return IDENTIFIER | seq(IDENTIFIER, COLON, self.expression)
@ -533,16 +533,19 @@ import bisect
 class FineTokens:
    def __init__(self, src: str):
        self.src = src
-        self.tokens = list(tokenize(src))
+        self._tokens = list(tokenize(src))
        self.lines = [m.start() for m in re.finditer("\n", src)]
    def tokens(self):
        return self._tokens
    def dump(self, *, start=None, end=None):
        if start is None:
            start = 0
        if end is None:
-            end = len(self.tokens)
+            end = len(self._tokens)
-        for token in self.tokens[start:end]:
+        for token in self._tokens[start:end]:
            (kind, start, length) = token
            line_index = bisect.bisect_left(self.lines, start)
            if line_index == 0:
@ -553,14 +556,3 @@ class FineTokens:
            print(
                f"{start:04} {kind.value:12} {self.src[start:start+length]} ({line_index}, {column_index})"
            )
 if __name__ == "__main__":
    grammar = FineGrammar()
    table = grammar.build_table(start="expression")
    print(f"{len(table)} states")
    average_entries = sum(len(row) for row in table) / len(table)
    max_entries = max(len(row) for row in table)
    print(f"{average_entries} average, {max_entries} max")
--- a/harness.py
+++ b/harness.py
@ -7,11 +7,12 @@ import select
 import sys
 import termios
 import time
 import traceback
 import tty
 import types
 import typing
 from dataclasses import dataclass
 import grammar
 import parser
 # from parser import Token, Grammar, rule, seq
@ -47,7 +48,8 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
    This is not a *great* parser, it's really just a demo for what you can
    do with the table.
    """
-    input: list[str] = [t.value for (t, _, _) in tokens.tokens]
+    input_tokens = tokens.tokens()
    input: list[str] = [t.value for (t, _, _) in input_tokens]
    assert "$" not in input
    input = input + ["$"]
@ -61,7 +63,7 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
        current_state = stack[-1][0]
        current_token = input[input_index]
-        action = table.states[current_state].get(current_token, parser.Error())
+        action = table.actions[current_state].get(current_token, parser.Error())
        if trace:
            trace(stack, input, input_index, action)
@ -84,21 +86,21 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
                value = Tree(name=name if not transparent else None, children=tuple(children))
                stack = stack[:-size]
-                goto = table.states[stack[-1][0]].get(name, parser.Error())
+                goto = table.gotos[stack[-1][0]].get(name)
-                assert isinstance(goto, parser.Goto)
+                assert goto is not None
-                stack.append((goto.state, value))
+                stack.append((goto, value))
            case parser.Shift(state):
                stack.append((state, current_token))
                input_index += 1
            case parser.Error():
-                if input_index >= len(tokens.tokens):
+                if input_index >= len(input_tokens):
                    message = "Unexpected end of file"
-                    start = tokens.tokens[-1][1]
+                    start = input_tokens[-1][1]
                else:
                    message = f"Syntax error: unexpected symbol {current_token}"
-                    (_, start, _) = tokens.tokens[input_index]
+                    (_, start, _) = input_tokens[input_index]
                line_index = bisect.bisect_left(tokens.lines, start)
                if line_index == 0:
@ -147,7 +149,7 @@ def CSI(x: bytes) -> bytes:
    return ESC(b"[" + x)
-CLEAR = CSI(b"2J")
+CLEAR = CSI(b"H") + CSI(b"J")
 def enter_alt_screen():
@ -158,15 +160,108 @@ def leave_alt_screen():
    sys.stdout.buffer.write(CSI(b"?1049l"))
 class DynamicModule:
    file_name: str
    member_name: str | None
    last_time: float | None
    module: types.ModuleType | None
    def __init__(self, file_name, member_name):
        self.file_name = file_name
        self.member_name = member_name
        self.last_time = None
        self.module = None
        self.value = None
    def _predicate(self, member) -> bool:
        if not inspect.isclass(member):
            return False
        assert self.module is not None
        if member.__module__ != self.module.__name__:
            return False
        return True
    def _transform(self, value):
        return value
    def get(self):
        st = os.stat(self.file_name)
        if self.last_time == st.st_mtime:
            assert self.value is not None
            return self.value
        self.value = None
        if self.module is None:
            mod_name = inspect.getmodulename(self.file_name)
            if mod_name is None:
                raise Exception(f"{self.file_name} does not seem to be a module")
            self.module = importlib.import_module(mod_name)
        else:
            importlib.reload(self.module)
        if self.member_name is None:
            classes = inspect.getmembers(self.module, self._predicate)
            if len(classes) == 0:
                raise Exception(f"No grammars found in {self.file_name}")
            if len(classes) > 1:
                raise Exception(
                    f"{len(classes)} grammars found in {self.file_name}: {', '.join(c[0] for c in classes)}"
                )
            cls = classes[0][1]
        else:
            cls = getattr(self.module, self.member_name)
            if cls is None:
                raise Exception(f"Cannot find {self.member_name} in {self.file_name}")
            if not self._predicate(cls):
                raise Exception(f"{self.member_name} in {self.file_name} is not suitable")
        self.value = self._transform(cls)
        self.last_time = st.st_mtime
        return self.value
 class DynamicGrammarModule(DynamicModule):
    def __init__(self, file_name, member_name, start_rule, generator):
        super().__init__(file_name, member_name)
        self.start_rule = start_rule
        self.generator = generator
    def _predicate(self, member) -> bool:
        if not super()._predicate(member):
            return False
        if getattr(member, "build_table", None):
            return True
        return False
    def _transform(self, value):
        return value().build_table(start=self.start_rule, generator=self.generator)
 class DynamicLexerModule(DynamicModule):
    def _predicate(self, member) -> bool:
        if not super()._predicate(member):
            return False
        if getattr(member, "tokens", None):
            return True
        return False
 class Harness:
    source: str | None
    table: parser.ParseTable | None
    tree: Tree | None
-    def __init__(self, lexer_func, start_rule, source_path):
+    def __init__(self, start_rule, source_path):
        # self.generator = parser.GenerateLR1
        self.generator = parser.GenerateLALR
        self.lexer_func = lexer_func
        self.start_rule = start_rule
        self.source_path = source_path
@ -176,10 +271,11 @@ class Harness:
        self.tree = None
        self.errors = None
-        self.grammar_file_name = "./grammar.py"
+        self.grammar_module = DynamicGrammarModule(
-        self.last_grammar_time = None
+            "./grammar.py", None, self.start_rule, generator=parser.GenerateLALR
-        self.grammar_module = None
+        )
-        self.grammar_name = None
+
        self.lexer_module = DynamicLexerModule("./grammar.py", None)
    def run(self):
        while True:
@ -191,71 +287,19 @@ class Harness:
            self.update()
    #    def should_reload_grammar(self):
    def load_grammar(self) -> parser.ParseTable:
-        st = os.stat(self.grammar_file_name)
+        return self.grammar_module.get()
        if self.last_grammar_time == st.st_mtime:
            assert self.table is not None
            return self.table
        self.table = None
        if self.grammar_module is None:
            mod_name = inspect.getmodulename(self.grammar_file_name)
            if mod_name is None:
                raise Exception(f"{self.grammar_file_name} does not seem to be a module")
            self.grammar_module = importlib.import_module(mod_name)
        else:
            importlib.reload(self.grammar_module)
        def is_grammar(cls):
            if not inspect.isclass(cls):
                return False
            assert self.grammar_module is not None
            if cls.__module__ != self.grammar_module.__name__:
                return False
            if getattr(cls, "build_table", None):
                return True
            return False
        if self.grammar_name is None:
            classes = inspect.getmembers(self.grammar_module, is_grammar)
            if len(classes) == 0:
                raise Exception(f"No grammars found in {self.grammar_file_name}")
            if len(classes) > 1:
                raise Exception(
                    f"{len(classes)} grammars found in {self.grammar_file_name}: {', '.join(c[0] for c in classes)}"
                )
            grammar_func = classes[0][1]
        else:
            cls = getattr(self.grammar_module, self.grammar_name)
            if cls is None:
                raise Exception(f"Cannot find {self.grammar_name} in {self.grammar_file_name}")
            if not is_grammar(cls):
                raise Exception(
                    f"{self.grammar_name} in {self.grammar_file_name} does not seem to be a grammar"
                )
            grammar_func = cls
        self.table = grammar_func().build_table(start=self.start_rule, generator=self.generator)
        self.last_grammar_time = st.st_mtime
        assert self.table is not None
        return self.table
    def update(self):
        start_time = time.time()
        try:
            table = self.load_grammar()
            lexer_func = self.lexer_module.get()
            with open(self.source_path, "r", encoding="utf-8") as f:
                self.source = f.read()
-                self.tokens = self.lexer_func(self.source)
+                self.tokens = lexer_func(self.source)
                lex_time = time.time()
                # print(f"{tokens.lines}")
@ -268,7 +312,9 @@ class Harness:
        except Exception as e:
            self.tree = None
-            self.errors = [f"Error loading grammar: {e}"]
+            self.errors = ["Error loading grammar:"] + [
                "  " + l.rstrip() for fl in traceback.format_exception(e) for l in fl.splitlines()
            ]
            parse_elapsed = time.time() - start_time
            table = None
@ -276,7 +322,7 @@ class Harness:
        rows, cols = termios.tcgetwinsize(sys.stdout.fileno())
        if table is not None:
-            states = table.states
+            states = table.actions
            average_entries = sum(len(row) for row in states) / len(states)
            max_entries = max(len(row) for row in states)
            print(
@ -320,7 +366,6 @@ if __name__ == "__main__":
        enter_alt_screen()
        h = Harness(
            lexer_func=grammar.FineTokens,
            start_rule="file",
            source_path=source_path,
        )
--- a/parser.py
+++ b/parser.py
@ -21,10 +21,10 @@ To get started, create a grammar that derives from the `Grammar` class. Create
 one method per nonterminal, decorated with the `rule` decorator. Here's an
 example:
-    PLUS = Token('+')
+    PLUS = Terminal('+')
-    LPAREN = Token('(')
+    LPAREN = Terminal('(')
-    RPAREN = Token(')')
+    RPAREN = Terminal(')')
-    ID = Token('id')
+    ID = Terminal('id')
    class SimpleGrammar(Grammar):
        @rule
@ -410,11 +410,6 @@ class Shift(Action):
    state: int
@dataclasses.dataclass
 class Goto(Action):
    state: int
@dataclasses.dataclass
 class Accept(Action):
    pass
@ -511,8 +506,7 @@ class ErrorCollection:
                        case Accept():
                            action_str = "accept the parse"
                        case _:
-                            assert isinstance(action, Goto)
+                            raise Exception(f"unknown action type {action}")
                            raise Exception("Shouldn't conflict on goto ever")
                    lines.append(
                        f"  - We are in the rule `{name}: {rule}` and we should {action_str}"
@ -525,7 +519,53 @@ class ErrorCollection:
@dataclasses.dataclass
 class ParseTable:
-    states: list[dict[str, Action]]
+    actions: list[dict[str, Action]]
    gotos: list[dict[str, int]]
    def format(self):
        """Format a parser table so pretty."""
        def format_action(actions: dict[str, Action], terminal: str):
            action = actions.get(terminal)
            match action:
                case Accept():
                    return "accept"
                case Shift(state=state):
                    return f"s{state}"
                case Reduce(count=count):
                    return f"r{count}"
                case _:
                    return ""
        def format_goto(gotos: dict[str, int], nt: str):
            index = gotos.get(nt)
            if index is None:
                return ""
            else:
                return str(index)
        terminals = list(sorted({k for row in self.actions for k in row.keys()}))
        nonterminals = list(sorted({k for row in self.gotos for k in row.keys()}))
        header = "     | {terms} | {nts}".format(
            terms=" ".join(f"{terminal: <6}" for terminal in terminals),
            nts=" ".join(f"{nt: <5}" for nt in nonterminals),
        )
        lines = [
            header,
            "-" * len(header),
        ] + [
            "{index: <4} | {actions} | {gotos}".format(
                index=i,
                actions=" ".join(
                    "{0: <6}".format(format_action(actions, terminal)) for terminal in terminals
                ),
                gotos=" ".join("{0: <5}".format(format_goto(gotos, nt)) for nt in nonterminals),
            )
            for i, (actions, gotos) in enumerate(zip(self.actions, self.gotos))
        ]
        return "\n".join(lines)
 class TableBuilder(object):
@ -536,12 +576,14 @@ class TableBuilder(object):
    """
    errors: ErrorCollection
-    table: list[dict[str, Action]]
+    actions: list[dict[str, Action]]
    gotos: list[dict[str, int]]
    alphabet: list[str]
    precedence: typing.Tuple[typing.Tuple[Assoc, int], ...]
    transparents: set[str]
-    row: None | list[typing.Tuple[None | Action, None | Configuration]]
+    action_row: None | list[typing.Tuple[None | Action, None | Configuration]]
    goto_row: None | list[None | int]
    def __init__(
        self,
@ -550,11 +592,14 @@ class TableBuilder(object):
        transparents: set[str],
    ):
        self.errors = ErrorCollection()
-        self.table = []
+        self.actions = []
        self.gotos = []
        self.alphabet = alphabet
        self.precedence = precedence
        self.transparents = transparents
-        self.row = None
+        self.action_row = None
        self.goto_row = None
    def flush(self, all_sets: ConfigurationSetInfo) -> ParseTable:
        """Finish building the table and return it.
@ -565,20 +610,31 @@ class TableBuilder(object):
        if self.errors.any():
            errors = self.errors.format(self.alphabet, all_sets)
            raise ValueError(f"Errors building the table:\n\n{errors}")
-        return ParseTable(states=self.table)
+        return ParseTable(actions=self.actions, gotos=self.gotos)
    def new_row(self, config_set: ConfigSet):
        """Start a new row, processing the given config set. Call this before
        doing anything else.
        """
        self._flush_row()
-        self.row = [(None, None) for _ in self.alphabet]
+        self.action_row = [(None, None) for _ in self.alphabet]
        self.goto_row = [None for _ in self.alphabet]
        self.current_config_set = config_set
    def _flush_row(self):
-        if self.row:
+        if self.action_row:
-            actions = {self.alphabet[k]: v[0] for k, v in enumerate(self.row) if v[0] is not None}
+            actions = {
-            self.table.append(actions)
+                self.alphabet[sym]: e[0]
                for sym, e in enumerate(self.action_row)
                if e[0] is not None
            }
            self.actions.append(actions)
        if self.goto_row:
            gotos = {self.alphabet[sym]: e for sym, e in enumerate(self.goto_row) if e is not None}
            self.gotos.append(gotos)
    def set_table_reduce(self, symbol: int, config: Configuration):
        """Mark a reduce of the given configuration for the given symbol in the
@ -604,7 +660,9 @@ class TableBuilder(object):
    def set_table_goto(self, symbol: int, index: int):
        """Set the goto for the given nonterminal symbol in the current row."""
-        self._set_table_action(symbol, Goto(index), None)
+        assert self.goto_row is not None
        assert self.goto_row[symbol] is None  # ?
        self.goto_row[symbol] = index
    def _action_precedence(self, symbol: int, action: Action, config: Configuration):
        if isinstance(action, Shift):
@ -620,8 +678,8 @@ class TableBuilder(object):
        """
        assert isinstance(symbol_id, int)
-        assert self.row is not None
+        assert self.action_row is not None
-        existing, existing_config = self.row[symbol_id]
+        existing, existing_config = self.action_row[symbol_id]
        if existing is not None and existing != action:
            assert existing_config is not None
            assert config is not None
@ -675,7 +733,7 @@ class TableBuilder(object):
                # action, just allow the overwrite with no change.
                pass
-        self.row[symbol_id] = (action, config)
+        self.action_row[symbol_id] = (action, config)
 class GenerateLR0:
@ -1036,7 +1094,7 @@ def parse(table: ParseTable, input, trace=False):
        current_state = stack[-1][0]
        current_token = input[input_index]
-        action = table.states[current_state].get(current_token, Error())
+        action = table.actions[current_state].get(current_token, Error())
        if trace:
            print(
                "{stack: <20}  {input: <50}  {action: <5}".format(
@ -1061,9 +1119,9 @@ def parse(table: ParseTable, input, trace=False):
                value = (name if not transparent else None, tuple(children))
                stack = stack[:-size]
-                goto = table.states[stack[-1][0]].get(name, Error())
+                goto = table.gotos[stack[-1][0]].get(name)
-                assert isinstance(goto, Goto)
+                assert goto is not None
-                stack.append((goto.state, value))
+                stack.append((goto, value))
            case Shift(state):
                stack.append((state, (current_token, ())))
@ -1554,7 +1612,7 @@ class Rule:
        return SequenceRule(self, other)
    @abc.abstractmethod
-    def flatten(self) -> typing.Generator[list["str | Token"], None, None]:
+    def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]:
        """Convert this potentially nested and branching set of rules into a
        series of nice, flat symbol lists.
@ -1574,7 +1632,7 @@ class Rule:
        raise NotImplementedError()
-class Token(Rule):
+class Terminal(Rule):
    """A token, or terminal symbol in the grammar."""
    value: str
@ -1582,7 +1640,7 @@ class Token(Rule):
    def __init__(self, value):
        self.value = sys.intern(value)
-    def flatten(self) -> typing.Generator[list["str | Token"], None, None]:
+    def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]:
        # We are just ourselves when flattened.
        yield [self]
@ -1616,7 +1674,7 @@ class NonTerminal(Rule):
        self.name = name or fn.__name__
        self.transparent = transparent
-    def generate_body(self, grammar) -> list[list[str | Token]]:
+    def generate_body(self, grammar) -> list[list[str | Terminal]]:
        """Generate the body of the non-terminal.
        We do this by first calling the associated function in order to get a
@ -1625,7 +1683,7 @@ class NonTerminal(Rule):
        """
        return [rule for rule in self.fn(grammar).flatten()]
-    def flatten(self) -> typing.Generator[list[str | Token], None, None]:
+    def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
        # Although we contain multitudes, when flattened we're being asked in
        # the context of some other production. Yield ourselves, and trust that
        # in time we will be asked to generate our body.
@ -1639,7 +1697,7 @@ class AlternativeRule(Rule):
        self.left = left
        self.right = right
-    def flatten(self) -> typing.Generator[list[str | Token], None, None]:
+    def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
        # All the things from the left of the alternative, then all the things
        # from the right, never intermingled.
        yield from self.left.flatten()
@ -1655,7 +1713,7 @@ class SequenceRule(Rule):
        self.first = first
        self.second = second
-    def flatten(self) -> typing.Generator[list[str | Token], None, None]:
+    def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
        # All the things in the prefix....
        for first in self.first.flatten():
            # ...potentially followed by all the things in the suffix.
@ -1668,7 +1726,7 @@ class NothingRule(Rule):
    these, you're probably better off just using the singleton `Nothing`.
    """
-    def flatten(self) -> typing.Generator[list[str | Token], None, None]:
+    def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
        # It's quiet in here.
        yield []
@ -1720,10 +1778,10 @@ class Grammar:
    Here's an example of a simple grammar:
-        PLUS = Token('+')
+        PLUS = Terminal('+')
-        LPAREN = Token('(')
+        LPAREN = Terminal('(')
-        RPAREN = Token(')')
+        RPAREN = Terminal(')')
-        ID = Token('id')
+        ID = Terminal('id')
        class SimpleGrammar(Grammar):
            @rule
@ -1745,7 +1803,7 @@ class Grammar:
        precedence_table = {}
        for prec, (associativity, symbols) in enumerate(precedence):
            for symbol in symbols:
-                if isinstance(symbol, Token):
+                if isinstance(symbol, Terminal):
                    key = symbol.value
                elif isinstance(symbol, NonTerminal):
                    key = symbol.name
@ -1758,7 +1816,7 @@ class Grammar:
    def generate_nonterminal_dict(
        self, start: str
-    ) -> typing.Tuple[dict[str, list[list[str | Token]]], set[str]]:
+    ) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]:
        """Convert the rules into a dictionary of productions.
        Our table generators work on a very flat set of productions. This is the
@ -1785,7 +1843,7 @@ class Grammar:
            body = rule.generate_body(self)
            for clause in body:
                for symbol in clause:
-                    if not isinstance(symbol, Token):
+                    if not isinstance(symbol, Terminal):
                        assert isinstance(symbol, str)
                        nonterminal = nonterminals.get(symbol)
                        if nonterminal is None:
@ -1811,7 +1869,7 @@ class Grammar:
            for clause in clauses:
                new_clause = []
                for symbol in clause:
-                    if isinstance(symbol, Token):
+                    if isinstance(symbol, Terminal):
                        new_clause.append(symbol.value)
                    else:
                        new_clause.append(symbol)
@ -1842,45 +1900,6 @@ def format_node(node):
    return "\n".join(lines)
 def format_table(generator, table: ParseTable):
    """Format a parser table so pretty."""
    def format_action(state, terminal):
        action = state.get(terminal, ("error",))
        if action[0] == "accept":
            return "accept"
        elif action[0] == "shift":
            return "s" + str(action[1])
        elif action[0] == "error":
            return ""
        elif action[0] == "reduce":
            return "r" + str(action[1])
    terminals = list(sorted(generator.alphabet[i] for i, v in enumerate(generator.terminal) if v))
    nonterminals = list(
        sorted(generator.alphabet[i] for i, v in enumerate(generator.nonterminal) if v)
    )
    header = "    | {terms} | {nts}".format(
        terms=" ".join("{0: <6}".format(terminal) for terminal in terminals),
        nts=" ".join("{0: <5}".format(nt) for nt in nonterminals),
    )
    lines = [
        header,
        "-" * len(header),
    ] + [
        "{index: <3} | {actions} | {gotos}".format(
            index=i,
            actions=" ".join(
                "{0: <6}".format(format_action(row, terminal)) for terminal in terminals
            ),
            gotos=" ".join("{0: <5}".format(row.get(nt, ("error", ""))[1]) for nt in nonterminals),
        )
        for i, row in enumerate(table.states)
    ]
    return "\n".join(lines)
 ###############################################################################
 # Examples
 ###############################################################################
@ -1901,7 +1920,7 @@ def examples():
    gen = GenerateLR0("E", grammar_simple)
    table = gen.gen_table()
-    print(format_table(gen, table))
+    print(table.format())
    tree = parse(table, ["id", "+", "(", "id", ")"])
    print(format_node(tree) + "\n")
    print()
@ -1954,7 +1973,7 @@ def examples():
    gen = GenerateSLR1("E", grammar_lr0_shift_reduce)
    print(f"Follow('E'): {str([gen.alphabet[f] for f in gen.gen_follow(gen.symbol_key['E'])])}")
    table = gen.gen_table()
-    print(format_table(gen, table))
+    print(table.format())
    tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"], trace=True)
    print(format_node(tree) + "\n")
    print()
@ -1985,7 +2004,7 @@ def examples():
    ]
    gen = GenerateLR1("S", grammar_aho_ullman_2)
    table = gen.gen_table()
-    print(format_table(gen, table))
+    print(table.format())
    parse(table, ["b", "a", "a", "b"], trace=True)
    print()
@ -1993,7 +2012,7 @@ def examples():
    print("grammar_aho_ullman_2 (LALR):")
    gen = GenerateLALR("S", grammar_aho_ullman_2)
    table = gen.gen_table()
-    print(format_table(gen, table))
+    print(table.format())
    print()
    # A fun LALAR grammar.
@ -2009,7 +2028,7 @@ def examples():
    ]
    gen = GenerateLALR("S", grammar_lalr)
    table = gen.gen_table()
-    print(format_table(gen, table))
+    print(table.format())
    print()