[parser] Remove LR0 and SLR1

Sorry, when this was educational it was nice to have the other generators but as part of cleaning I'm just getting rid of them.
2024-10-15 07:43:52 -07:00 · 2024-10-15 07:43:52 -07:00 · 2d5c73f0b0
commit 2d5c73f0b0
parent bb94fc6c9c
2 changed files with 443 additions and 591 deletions
--- a/parser/parser.py
+++ b/parser/parser.py
@ -158,7 +158,8 @@ class ConfigurationCore(typing.NamedTuple):

    # TODO: Possible improvement: make `symbols` an index into a production
    #       list. This would not make this smaller but it might make comparisons
-    #       faster.
+    #       faster. This could also just be a production index and a position,
+    #       we could find the name from the production index, etc.
    name: int
    symbols: typing.Tuple[int, ...]
    position: int
@ -273,98 +274,6 @@ class ConfigSet(frozenset[Configuration]):
    pass


-# Here we have a slightly different definition of a ConfigurationSet; we keep the
-# lookaheads outside and use a dictionary to check for containment quickly.
-# ItemSet is used in the GRM/Pager/Chin algorithm.
-@dataclasses.dataclass
-class ItemSet:
-    """An ItemSet is a group of configuration cores together with their
-    "contexts", or lookahead sets.
-
-    An ItemSet is comparable for equality, and also supports this lesser notion
-    of "weakly compatible" which is used to collapse states in the pager
-    algorithm.
-    """
-
-    items: dict[ConfigurationCore, set[int]]
-
-    def __init__(self, items=None):
-        self.items = items or {}
-
-    @classmethod
-    def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
-        return ItemSet({config.core: set(config.lookahead) for config in config_set})
-
-    def weakly_compatible(self, other: "ItemSet") -> bool:
-        a = self.items
-        b = other.items
-
-        if len(a) != len(b):
-            return False
-
-        for acore in a:
-            if acore not in b:
-                return False
-
-        if len(a) == 1:
-            return True
-
-        # DOTY: This loop I do not understand, truly. What the heck is happening here?
-        a_keys = list(a.keys())
-        for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
-            for j_key in itertools.islice(a_keys, i + 1, None):
-                a_i_key = a[i_key]
-                b_i_key = b[i_key]
-                a_j_key = a[j_key]
-                b_j_key = b[j_key]
-
-                # DOTY: GRMTools written with intersects(); we don't have that we have
-                #       `not disjoint()`. :P There are many double negatives....
-                #
-                #  not (intersect(a_i, b_j) or intersect(a_j, b_i))
-                #  not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
-                #  ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
-                #  disjoint(a_i, b_j) and disjoint(a_j, b_i)
-                if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
-                    continue
-
-                # intersect(a_i, a_j) or intersect(b_i, b_j)
-                # (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
-                # not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
-                if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
-                    continue
-
-                return False
-
-        return True
-
-    def weakly_merge(self, other: "ItemSet") -> bool:
-        """Merge b into a, returning True if this lead to any changes."""
-        a = self.items
-        b = other.items
-
-        changed = False
-        for a_key, a_ctx in a.items():
-            start_len = len(a_ctx)
-            a_ctx.update(b[a_key])  # Python doesn't tell us changes
-            changed = changed or (start_len != len(a_ctx))
-
-        return changed
-
-    def goto(self, symbol: int) -> "ItemSet":
-        result = ItemSet()
-        for core, context in self.items.items():
-            if core.next == symbol:
-                next = core.replace_position(core.position + 1)
-                result.items[next] = set(context)
-        return result
-
-    def to_config_set(self) -> ConfigSet:
-        return ConfigSet(
-            {Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
-        )
-
-
 class ConfigurationSetInfo:
    """When we build a grammar into a table, the first thing we need to do is
    generate all the configuration sets and their successors.
@ -876,338 +785,6 @@ class TableBuilder(object):
        self.action_row[symbol_id] = (action, config)


-class GenerateLR0:
-    """Generate parser tables for an LR0 parser."""
-
-    # Internally we use integers as symbols, not strings. Mostly this is fine,
-    # but when we need to map back from integer to string we index this list.
-    alphabet: list[str]
-
-    # The grammar we work with. The outer list is indexed by grammar symbol,
-    # terminal *and* non-terminal. The inner list is the list of productions
-    # for the given nonterminal symbol. (If you have a terminal `t` and look it
-    # up you'll just get an empty list.)
-    grammar: list[list[typing.Tuple[int, ...]]]
-
-    # nonterminal[i] is True if alphabet[i] is a nonterminal.
-    nonterminal: typing.Tuple[bool, ...]
-    # The complement of nonterminal. terminal[i] is True if alphabet[i] is a
-    # terminal.
-    terminal: typing.Tuple[bool, ...]
-
-    # The precedence of every symbol. If no precedence was explicitly provided
-    # for a symbol, then its entry in this tuple will be (NONE, 0).
-    precedence: typing.Tuple[typing.Tuple[Assoc, int], ...]
-
-    # The set of symbols for which we should reduce "transparently." This doesn't
-    # affect state generation at all, only the generation of the final table.
-    transparents: set[str]
-
-    # The lookup that maps a particular symbol to an integer. (Only really used
-    # for debugging.)
-    symbol_key: dict[str, int]
-    # The start symbol of the grammar.
-    start_symbol: int
-    # The end symbol of the grammar.
-    end_symbol: int
-
-    def __init__(
-        self,
-        start: str,
-        grammar: list[typing.Tuple[str, list[str]]],
-        precedence: None | dict[str, typing.Tuple[Assoc, int]] = None,
-        transparents: None | set[str] = None,
-    ):
-        """Initialize the parser generator with the specified grammar and
-        start symbol.
-
-        The input grammars are of the form:
-
-          grammar_simple = [
-            ('E', ['E', '+', 'T']),
-            ('E', ['T']),
-            ('T', ['(', 'E', ')']),
-            ('T', ['id']),
-          ]
-
-        Which is to say, they are a list of productions. Each production is a
-        tuple where the first element of the tuple is the name of the
-        non-terminal being added, and the second elment of the tuple is the
-        list of terminals and non-terminals that make up the production.
-
-        There is currently no support for custom actions or alternation or
-        anything like that. If you want alternations that you'll have to lower
-        the grammar by hand into the simpler form first.
-
-        Don't name anything with double-underscores; those are reserved for
-        the generator. Don't add '$' either, as it is reserved to mean
-        end-of-stream. Use an empty list to indicate nullability, that is:
-
-          ('O', []),
-
-        means that O can be matched with nothing.
-
-        This isn't a *great* way to author these things, but it is very simple
-        and flexible. You probably don't want to author this on your own; see
-        the Grammar class for a high-level API.
-
-        The precedence dictionary, if provided, maps a given symbol to an
-        associativity and a precedence. Any symbol not in the dictionary is
-        presumed to have an associativity of NONE and a precedence of zero.
-        """
-
-        # Work out the alphabet.
-        alphabet = set()
-        for name, rule in grammar:
-            alphabet.add(name)
-            alphabet.update(symbol for symbol in rule)
-
-        # Check to make sure they didn't use anything that will give us
-        # heartburn later.
-        reserved = [a for a in alphabet if a.startswith("__") or a == "$"]
-        if reserved:
-            raise ValueError(
-                "Can't use {symbols} in grammars, {what} reserved.".format(
-                    symbols=" or ".join(reserved),
-                    what="it's" if len(reserved) == 1 else "they're",
-                )
-            )
-
-        alphabet.add("__start")
-        alphabet.add("$")
-        self.alphabet = list(sorted(alphabet))
-
-        symbol_key = {symbol: index for index, symbol in enumerate(self.alphabet)}
-
-        start_symbol = symbol_key["__start"]
-        end_symbol = symbol_key["$"]
-
-        assert self.alphabet[start_symbol] == "__start"
-        assert self.alphabet[end_symbol] == "$"
-
-        # Turn the incoming grammar into a dictionary, indexed by nonterminal.
-        #
-        # We count on python dictionaries retaining the insertion order, like
-        # it or not.
-        full_grammar: list[list] = [list() for _ in self.alphabet]
-        terminal: list[bool] = [True for _ in self.alphabet]
-        assert terminal[end_symbol]
-
-        nonterminal = [False for _ in self.alphabet]
-
-        for name, rule in grammar:
-            name_symbol = symbol_key[name]
-
-            terminal[name_symbol] = False
-            nonterminal[name_symbol] = True
-
-            rules = full_grammar[name_symbol]
-            rules.append(tuple(symbol_key[symbol] for symbol in rule))
-
-        self.grammar = full_grammar
-        self.grammar[start_symbol].append((symbol_key[start],))
-        terminal[start_symbol] = False
-        nonterminal[start_symbol] = True
-
-        self.terminal = tuple(terminal)
-        self.nonterminal = tuple(nonterminal)
-
-        assert self.terminal[end_symbol]
-        assert self.nonterminal[start_symbol]
-
-        if precedence is None:
-            precedence = {}
-        self.precedence = tuple(precedence.get(a, (Assoc.NONE, 0)) for a in self.alphabet)
-
-        if transparents is None:
-            transparents = set()
-        self.transparents = transparents
-
-        self.symbol_key = symbol_key
-        self.start_symbol = start_symbol
-        self.end_symbol = end_symbol
-
-    def gen_closure_next(self, config: Configuration):
-        """Return the next set of configurations in the closure for config.
-
-        If the position for config is just before a non-terminal, then the
-        next set of configurations is configurations for all of the
-        productions for that non-terminal, with the position at the
-        beginning. (If the position for config is just before a terminal,
-        or at the end of the production, then the next set is empty.)
-        """
-        next = config.core.next
-        if next is None:
-            return ()
-        else:
-            return tuple(Configuration.from_rule(next, rule) for rule in self.grammar[next])
-
-    def gen_closure(self, seeds: typing.Iterable[Configuration]) -> ConfigSet:
-        """Compute the closure for the specified configs. The closure is all
-        of the configurations we could be in. Specifically, if the position
-        for a config is just before a non-terminal then we must also consider
-        configurations where the rule is the rule for the non-terminal and
-        the position is just before the beginning of the rule.
-
-        (We have replaced a recursive version with an iterative one.)
-        """
-        closure: set[Configuration] = set()
-        pending = list(seeds)
-        pending_next = []
-        while len(pending) > 0:
-            for config in pending:
-                if config in closure:
-                    continue
-
-                closure.add(config)
-                pending_next.extend(self.gen_closure_next(config))
-
-            temp = pending
-            pending = pending_next
-            pending_next = temp
-            pending_next.clear()
-
-        # NOTE: The generation of this closure *might* have generated
-        #       multiple cores with different lookaheads; if that's
-        #       the case we need to merge.
-        merged: dict[ConfigurationCore, set[int]] = {}
-        for c in closure:
-            existing = merged.get(c.core)
-            if existing is not None:
-                existing.update(c.lookahead)
-            else:
-                merged[c.core] = set(c.lookahead)
-
-        return ConfigSet(Configuration(k, tuple(sorted(v))) for k, v in merged.items())
-
-    def gen_all_successors(
-        self, config_set: typing.Iterable[Configuration]
-    ) -> list[typing.Tuple[int, ConfigSet]]:
-        """Return all of the non-empty successors for the given config set.
-
-        (That is, given the config set, pretend we see all the symbols we
-        could possibly see, and figure out which configs sets we get from
-        those symbols. Those are the successors of this set.)
-        """
-        possible = {config.core.next for config in config_set if config.core.next is not None}
-
-        next = []
-        for symbol in possible:
-            seeds = ConfigSet(
-                config.replace_position(config.core.position + 1)
-                for config in config_set
-                if config.core.next == symbol
-            )
-            if len(seeds) > 0:
-                next.append((symbol, seeds))
-
-        return next
-
-    def gen_sets(self, seeds: list[Configuration]) -> ConfigurationSetInfo:
-        """Generate all configuration sets starting from the provided seeds."""
-        result = ConfigurationSetInfo()
-
-        successors = []
-        pending = [ConfigSet(seeds)]
-        pending_next = []
-        while len(pending) > 0:
-            for core in pending:
-                id, is_new = result.register_core(core)
-                if is_new:
-                    config_set = self.gen_closure(core)
-                    result.register_config_closure(id, config_set)
-                    for symbol, successor in self.gen_all_successors(config_set):
-                        successors.append((id, symbol, successor))
-                        pending_next.append(successor)
-
-            temp = pending
-            pending = pending_next
-            pending_next = temp
-            pending_next.clear()
-
-        for id, symbol, successor in successors:
-            result.add_successor(id, symbol, result.core_key[successor])
-
-        return result
-
-    def gen_all_sets(self) -> ConfigurationSetInfo:
-        """Generate all of the configuration sets for the grammar."""
-        seeds = [
-            Configuration.from_rule(self.start_symbol, rule)
-            for rule in self.grammar[self.start_symbol]
-        ]
-        return self.gen_sets(seeds)
-
-    def gen_reduce_set(self, config: Configuration) -> typing.Iterable[int]:
-        """Return the set of symbols that indicate we should reduce the given
-        configuration.
-
-        In an LR0 parser, this is just the set of all terminals.
-        """
-        del config
-        return [index for index, value in enumerate(self.terminal) if value]
-
-    def gen_table(self) -> ParseTable:
-        """Generate the parse table.
-
-        The parse table is a list of states. The first state in the list is
-        the starting state. Each state is a dictionary that maps a symbol to an
-        action. Each action is a tuple. The first element of the tuple is a
-        string describing what to do:
-
-        - 'shift': The second element of the tuple is the state
-          number. Consume the input and push that state onto the stack.
-
-        - 'reduce': The second element is the name of the non-terminal being
-          reduced, and the third element is the number of states to remove
-          from the stack. Don't consume the input; just remove the specified
-          number of things from the stack, and then consult the table again,
-          this time using the new top-of-stack as the current state and the
-          name of the non-terminal to find out what to do.
-
-        - 'goto': The second element is the state number to push onto the
-          stack. In the literature, these entries are treated distinctly from
-          the actions, but we mix them here because they never overlap with the
-          other actions. (These are always associated with non-terminals, and
-          the other actions are always associated with terminals.)
-
-        - 'accept': Accept the result of the parse, it worked.
-
-        Anything missing from the row indicates an error.
-        """
-        config_sets = self.gen_all_sets()
-        # print(config_sets.dump_state(self.alphabet))
-        builder = TableBuilder(self.alphabet, self.precedence, self.transparents)
-
-        for config_set_id, config_set in enumerate(config_sets.closures):
-            assert config_set is not None
-            builder.new_row(config_set)
-            successors = config_sets.successors[config_set_id]
-
-            for config in config_set:
-                config_next = config.core.next
-                if config_next is None:
-                    if config.core.name != self.start_symbol:
-                        for a in self.gen_reduce_set(config):
-                            builder.set_table_reduce(a, config)
-                    else:
-                        builder.set_table_accept(self.end_symbol, config)
-
-                elif self.terminal[config_next]:
-                    index = successors[config_next]
-                    builder.set_table_shift(config_next, index, config)
-
-            # Gotos
-            for symbol, index in successors.items():
-                if self.nonterminal[symbol]:
-                    builder.set_table_goto(symbol, index)
-
-        return builder.flush(config_sets)
-
-
-###############################################################################
-# SLR(1)
-###############################################################################
 def update_changed(items: set[int], other: set[int]) -> bool:
    """Merge the `other` set into the `items` set, and return True if this
    changed the items set.
@ -1430,32 +1007,264 @@ class FollowInfo:
        return FollowInfo(follows=follows)


-class GenerateSLR1(GenerateLR0):
-    """Generate parse tables for SLR1 grammars.
+# Here we have a slightly different definition of a ConfigurationSet; we keep the
+# lookaheads outside and use a dictionary to check for containment quickly.
+# ItemSet is used in the GRM/Pager/Chin algorithm.
+@dataclasses.dataclass
+class ItemSet:
+    """An ItemSet is a group of configuration cores together with their
+    "contexts", or lookahead sets.

-    SLR1 parsers can recognize more than LR0 parsers, because they have a
-    little bit more information: instead of generating reduce actions for a
-    production on all possible inputs, as LR0 parsers do, they generate
-    reduce actions only for inputs that are in the 'follow' set of the
-    non-terminal.
-
-    That means SLR1 parsers need to know how to generate 'follow(A)', which
-    means they need to know how to generate 'first(A)'. See FirstInfo and
-    FollowInfo for the details on how this is computed.
+    An ItemSet is comparable for equality, and also supports this lesser notion
+    of "weakly compatible" which is used to collapse states in the pager
+    algorithm.
    """

+    items: dict[ConfigurationCore, set[int]]
+
+    def __init__(self, items=None):
+        self.items = items or {}
+
+    @classmethod
+    def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
+        return ItemSet({config.core: set(config.lookahead) for config in config_set})
+
+    def weakly_compatible(self, other: "ItemSet") -> bool:
+        a = self.items
+        b = other.items
+
+        if len(a) != len(b):
+            return False
+
+        for acore in a:
+            if acore not in b:
+                return False
+
+        if len(a) == 1:
+            return True
+
+        # DOTY: This loop I do not understand, truly. What the heck is happening here?
+        a_keys = list(a.keys())
+        for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
+            for j_key in itertools.islice(a_keys, i + 1, None):
+                a_i_key = a[i_key]
+                b_i_key = b[i_key]
+                a_j_key = a[j_key]
+                b_j_key = b[j_key]
+
+                # DOTY: GRMTools written with intersects(); we don't have that we have
+                #       `not disjoint()`. :P There are many double negatives....
+                #
+                #  not (intersect(a_i, b_j) or intersect(a_j, b_i))
+                #  not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
+                #  ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
+                #  disjoint(a_i, b_j) and disjoint(a_j, b_i)
+                if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
+                    continue
+
+                # intersect(a_i, a_j) or intersect(b_i, b_j)
+                # (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
+                # not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
+                if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
+                    continue
+
+                return False
+
+        return True
+
+    def weakly_merge(self, other: "ItemSet") -> bool:
+        """Merge b into a, returning True if this lead to any changes."""
+        a = self.items
+        b = other.items
+
+        changed = False
+        for a_key, a_ctx in a.items():
+            start_len = len(a_ctx)
+            a_ctx.update(b[a_key])  # Python doesn't tell us changes
+            changed = changed or (start_len != len(a_ctx))
+
+        return changed
+
+    def goto(self, symbol: int) -> "ItemSet":
+        result = ItemSet()
+        for core, context in self.items.items():
+            if core.next == symbol:
+                next = core.replace_position(core.position + 1)
+                result.items[next] = set(context)
+        return result
+
+    def to_config_set(self) -> ConfigSet:
+        return ConfigSet(
+            {Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
+        )
+
+
+class GenerateLR1:
+    """Generate parse tables for LR1, or "canonical LR" grammars.
+
+    LR1 parsers can recognize more than SLR parsers. Like SLR parsers, they
+    are choosier about when they reduce. But unlike SLR parsers, they specify
+    the terminals on which they reduce by carrying a 'lookahead' terminal in
+    the configuration. The lookahead of a configuration is computed as the
+    closure of a configuration set is computed, so see gen_closure_next for
+    details. (Except for the start configuration, which has '$' as its
+    lookahead.)
+    """
+
+    # Internally we use integers as symbols, not strings. Mostly this is fine,
+    # but when we need to map back from integer to string we index this list.
+    alphabet: list[str]
+
+    # The grammar we work with. The outer list is indexed by grammar symbol,
+    # terminal *and* non-terminal. The inner list is the list of productions
+    # for the given nonterminal symbol. (If you have a terminal `t` and look it
+    # up you'll just get an empty list.)
+    grammar: list[list[typing.Tuple[int, ...]]]
+
+    # nonterminal[i] is True if alphabet[i] is a nonterminal.
+    nonterminal: typing.Tuple[bool, ...]
+    # The complement of nonterminal. terminal[i] is True if alphabet[i] is a
+    # terminal.
+    terminal: typing.Tuple[bool, ...]
+
+    # The precedence of every symbol. If no precedence was explicitly provided
+    # for a symbol, then its entry in this tuple will be (NONE, 0).
+    precedence: typing.Tuple[typing.Tuple[Assoc, int], ...]
+
+    # The set of symbols for which we should reduce "transparently." This doesn't
+    # affect state generation at all, only the generation of the final table.
+    transparents: set[str]
+
+    # The lookup that maps a particular symbol to an integer. (Only really used
+    # for debugging.)
+    symbol_key: dict[str, int]
+    # The start symbol of the grammar.
+    start_symbol: int
+    # The end symbol of the grammar.
+    end_symbol: int
+
    _firsts: FirstInfo
+
    _follows: FollowInfo

-    def __init__(self, *args, **kwargs):
-        """See the constructor of GenerateLR0 for an explanation of the
-        parameters to the constructor and what they mean.
-        """
-        super().__init__(*args, **kwargs)
+    def __init__(
+        self,
+        start: str,
+        grammar: list[typing.Tuple[str, list[str]]],
+        precedence: None | dict[str, typing.Tuple[Assoc, int]] = None,
+        transparents: None | set[str] = None,
+    ):
+        """Initialize the parser generator with the specified grammar and
+        start symbol.
+
+        The input grammars are of the form:
+
+          grammar_simple = [
+            ('E', ['E', '+', 'T']),
+            ('E', ['T']),
+            ('T', ['(', 'E', ')']),
+            ('T', ['id']),
+          ]
+
+        Which is to say, they are a list of productions. Each production is a
+        tuple where the first element of the tuple is the name of the
+        non-terminal being added, and the second elment of the tuple is the
+        list of terminals and non-terminals that make up the production.
+
+        There is currently no support for custom actions or alternation or
+        anything like that. If you want alternations that you'll have to lower
+        the grammar by hand into the simpler form first.
+
+        Don't name anything with double-underscores; those are reserved for
+        the generator. Don't add '$' either, as it is reserved to mean
+        end-of-stream. Use an empty list to indicate nullability, that is:
+
+          ('O', []),
+
+        means that O can be matched with nothing.
+
+        This isn't a *great* way to author these things, but it is very simple
+        and flexible. You probably don't want to author this on your own; see
+        the Grammar class for a high-level API.
+
+        The precedence dictionary, if provided, maps a given symbol to an
+        associativity and a precedence. Any symbol not in the dictionary is
+        presumed to have an associativity of NONE and a precedence of zero.
+        """
+
+        # Work out the alphabet.
+        alphabet = set()
+        for name, rule in grammar:
+            alphabet.add(name)
+            alphabet.update(symbol for symbol in rule)
+
+        # Check to make sure they didn't use anything that will give us
+        # heartburn later.
+        reserved = [a for a in alphabet if a.startswith("__") or a == "$"]
+        if reserved:
+            raise ValueError(
+                "Can't use {symbols} in grammars, {what} reserved.".format(
+                    symbols=" or ".join(reserved),
+                    what="it's" if len(reserved) == 1 else "they're",
+                )
+            )
+
+        alphabet.add("__start")
+        alphabet.add("$")
+        self.alphabet = list(sorted(alphabet))
+
+        symbol_key = {symbol: index for index, symbol in enumerate(self.alphabet)}
+
+        start_symbol = symbol_key["__start"]
+        end_symbol = symbol_key["$"]
+
+        assert self.alphabet[start_symbol] == "__start"
+        assert self.alphabet[end_symbol] == "$"
+
+        # Turn the incoming grammar into a dictionary, indexed by nonterminal.
+        #
+        # We count on python dictionaries retaining the insertion order, like
+        # it or not.
+        full_grammar: list[list] = [list() for _ in self.alphabet]
+        terminal: list[bool] = [True for _ in self.alphabet]
+        assert terminal[end_symbol]
+
+        nonterminal = [False for _ in self.alphabet]
+
+        for name, rule in grammar:
+            name_symbol = symbol_key[name]
+
+            terminal[name_symbol] = False
+            nonterminal[name_symbol] = True
+
+            rules = full_grammar[name_symbol]
+            rules.append(tuple(symbol_key[symbol] for symbol in rule))
+
+        self.grammar = full_grammar
+        self.grammar[start_symbol].append((symbol_key[start],))
+        terminal[start_symbol] = False
+        nonterminal[start_symbol] = True
+
+        self.terminal = tuple(terminal)
+        self.nonterminal = tuple(nonterminal)
+
+        assert self.terminal[end_symbol]
+        assert self.nonterminal[start_symbol]
+
+        if precedence is None:
+            precedence = {}
+        self.precedence = tuple(precedence.get(a, (Assoc.NONE, 0)) for a in self.alphabet)
+
+        if transparents is None:
+            transparents = set()
+        self.transparents = transparents
+
+        self.symbol_key = symbol_key
+        self.start_symbol = start_symbol
+        self.end_symbol = end_symbol

-        # We store the firsts not because we need them here, but because LR1
-        # and Pager need them.
        self._firsts = FirstInfo.from_grammar(self.grammar, self.terminal)
+
        self._follows = FollowInfo.from_grammar(
            self.grammar,
            self.terminal,
@ -1464,6 +1273,94 @@ class GenerateSLR1(GenerateLR0):
            self._firsts,
        )

+    def gen_closure(self, seeds: typing.Iterable[Configuration]) -> ConfigSet:
+        """Compute the closure for the specified configs. The closure is all
+        of the configurations we could be in. Specifically, if the position
+        for a config is just before a non-terminal then we must also consider
+        configurations where the rule is the rule for the non-terminal and
+        the position is just before the beginning of the rule.
+
+        (We have replaced a recursive version with an iterative one.)
+        """
+        closure: set[Configuration] = set()
+        pending = list(seeds)
+        pending_next = []
+        while len(pending) > 0:
+            for config in pending:
+                if config in closure:
+                    continue
+
+                closure.add(config)
+                pending_next.extend(self.gen_closure_next(config))
+
+            temp = pending
+            pending = pending_next
+            pending_next = temp
+            pending_next.clear()
+
+        # NOTE: The generation of this closure *might* have generated
+        #       multiple cores with different lookaheads; if that's
+        #       the case we need to merge.
+        merged: dict[ConfigurationCore, set[int]] = {}
+        for c in closure:
+            existing = merged.get(c.core)
+            if existing is not None:
+                existing.update(c.lookahead)
+            else:
+                merged[c.core] = set(c.lookahead)
+
+        return ConfigSet(Configuration(k, tuple(sorted(v))) for k, v in merged.items())
+
+    def gen_all_successors(
+        self, config_set: typing.Iterable[Configuration]
+    ) -> list[typing.Tuple[int, ConfigSet]]:
+        """Return all of the non-empty successors for the given config set.
+
+        (That is, given the config set, pretend we see all the symbols we
+        could possibly see, and figure out which configs sets we get from
+        those symbols. Those are the successors of this set.)
+        """
+        possible = {config.core.next for config in config_set if config.core.next is not None}
+
+        next = []
+        for symbol in possible:
+            seeds = ConfigSet(
+                config.replace_position(config.core.position + 1)
+                for config in config_set
+                if config.core.next == symbol
+            )
+            if len(seeds) > 0:
+                next.append((symbol, seeds))
+
+        return next
+
+    def gen_sets(self, seeds: list[Configuration]) -> ConfigurationSetInfo:
+        """Generate all configuration sets starting from the provided seeds."""
+        result = ConfigurationSetInfo()
+
+        successors = []
+        pending = [ConfigSet(seeds)]
+        pending_next = []
+        while len(pending) > 0:
+            for core in pending:
+                id, is_new = result.register_core(core)
+                if is_new:
+                    config_set = self.gen_closure(core)
+                    result.register_config_closure(id, config_set)
+                    for symbol, successor in self.gen_all_successors(config_set):
+                        successors.append((id, symbol, successor))
+                        pending_next.append(successor)
+
+            temp = pending
+            pending = pending_next
+            pending_next = temp
+            pending_next.clear()
+
+        for id, symbol, successor in successors:
+            result.add_successor(id, symbol, result.core_key[successor])
+
+        return result
+
    def gen_follow(self, symbol: int) -> set[int]:
        """Generate the follow set for the given nonterminal.

@ -1476,27 +1373,6 @@ class GenerateSLR1(GenerateLR0):
        """
        return self._follows.follows[symbol]

-    def gen_reduce_set(self, config: Configuration) -> typing.Iterable[int]:
-        """Return the set of symbols that indicate we should reduce the given
-        config.
-
-        In an SLR1 parser, this is the follow set of the config nonterminal.
-        """
-        return self.gen_follow(config.core.name)
-
-
-class GenerateLR1(GenerateSLR1):
-    """Generate parse tables for LR1, or "canonical LR" grammars.
-
-    LR1 parsers can recognize more than SLR parsers. Like SLR parsers, they
-    are choosier about when they reduce. But unlike SLR parsers, they specify
-    the terminals on which they reduce by carrying a 'lookahead' terminal in
-    the configuration. The lookahead of a configuration is computed as the
-    closure of a configuration set is computed, so see gen_closure_next for
-    details. (Except for the start configuration, which has '$' as its
-    lookahead.)
-    """
-
    def gen_first(self, symbols: typing.Iterable[int]) -> typing.Tuple[set[int], bool]:
        """Return the first set for a *sequence* of symbols.

@ -1551,10 +1427,50 @@ class GenerateLR1(GenerateSLR1):

            next = []
            for rule in self.grammar[config_next]:
-                next.append(Configuration.from_rule(config_next, rule, lookahead=lookahead_tuple))
+                rr = Configuration.from_rule(config_next, rule, lookahead=lookahead_tuple)
+                next.append(rr)

            return tuple(next)

+    def gen_closure_x(self, items: ItemSet) -> ItemSet:
+        closure: dict[ConfigurationCore, set[int]] = {}
+
+        # We're going to maintain a set of things to look at, rules that we
+        # still need to close over. Assume that starts with everything in us.
+        todo = [(core, context) for core, context in items.items.items()]
+        while len(todo) > 0:
+            core, context = todo.pop()
+
+            existing_context = closure.get(core)
+            if existing_context is None or not context <= existing_context:
+                # Either context is none or something in context is not in
+                # existing_context, so we need to process this one.
+                if existing_context is not None:
+                    existing_context.update(context)
+                else:
+                    # NOTE: context in the set is a lookahead and got
+                    #       generated exactly once for all the child rules.
+                    #       we have to copy somewhere, this here seems best.
+                    closure[core] = set(context)
+
+                config_next = core.next
+                if config_next is None:
+                    # No closure for this one, we're at the end.
+                    continue
+
+                rules = self.grammar[config_next]
+                if len(rules) > 0:
+                    lookahead, epsilon = self.gen_first(core.rest)
+                    print(f"    LA {core.rest} -> {lookahead} e:{epsilon}")
+                    if epsilon:
+                        lookahead.update(context)
+
+                    for rule in rules:
+                        new_core = ConfigurationCore.from_rule(config_next, rule)
+                        todo.append((new_core, lookahead))
+
+        return ItemSet(closure)
+
    def gen_all_sets(self):
        """Generate all of the configuration sets for the grammar.

@ -1567,6 +1483,63 @@ class GenerateLR1(GenerateSLR1):
        ]
        return self.gen_sets(seeds)

+    def gen_table(self) -> ParseTable:
+        """Generate the parse table.
+
+        The parse table is a list of states. The first state in the list is
+        the starting state. Each state is a dictionary that maps a symbol to an
+        action. Each action is a tuple. The first element of the tuple is a
+        string describing what to do:
+
+        - 'shift': The second element of the tuple is the state
+          number. Consume the input and push that state onto the stack.
+
+        - 'reduce': The second element is the name of the non-terminal being
+          reduced, and the third element is the number of states to remove
+          from the stack. Don't consume the input; just remove the specified
+          number of things from the stack, and then consult the table again,
+          this time using the new top-of-stack as the current state and the
+          name of the non-terminal to find out what to do.
+
+        - 'goto': The second element is the state number to push onto the
+          stack. In the literature, these entries are treated distinctly from
+          the actions, but we mix them here because they never overlap with the
+          other actions. (These are always associated with non-terminals, and
+          the other actions are always associated with terminals.)
+
+        - 'accept': Accept the result of the parse, it worked.
+
+        Anything missing from the row indicates an error.
+        """
+        config_sets = self.gen_all_sets()
+        # print(config_sets.dump_state(self.alphabet))
+        builder = TableBuilder(self.alphabet, self.precedence, self.transparents)
+
+        for config_set_id, config_set in enumerate(config_sets.closures):
+            assert config_set is not None
+            builder.new_row(config_set)
+            successors = config_sets.successors[config_set_id]
+
+            for config in config_set:
+                config_next = config.core.next
+                if config_next is None:
+                    if config.core.name != self.start_symbol:
+                        for a in self.gen_reduce_set(config):
+                            builder.set_table_reduce(a, config)
+                    else:
+                        builder.set_table_accept(self.end_symbol, config)
+
+                elif self.terminal[config_next]:
+                    index = successors[config_next]
+                    builder.set_table_shift(config_next, index, config)
+
+            # Gotos
+            for symbol, index in successors.items():
+                if self.nonterminal[symbol]:
+                    builder.set_table_goto(symbol, index)
+
+        return builder.flush(config_sets)
+

 class GeneratePager(GenerateLR1):
    """Pager's algorithm.
@ -1654,15 +1627,7 @@ class GeneratePager(GenerateLR1):
            todo_off = state_i + 1
            todo -= 1

-            # DOTY: TODO: We convert here back and forth to Configuration
-            #       objects, but maybe we can make ItemSet our core
-            #       representation throughout this file. (Even in LR0.) So
-            #       never use Configuration, always ItemSet and ConfigCore.
-            #
-            #       Or just rebuild gen_closure inside ItemSet. shrug
-            temp_set = core_states[state_i].to_config_set()
-            closure = self.gen_closure(temp_set)
-            cl_state = ItemSet.from_config_set(closure)
+            cl_state = self.gen_closure_x(core_states[state_i])
            closed_states[state_i] = cl_state

            seen.clear()
@ -3044,7 +3009,7 @@ class Grammar:
    """

    _precedence: dict[str, typing.Tuple[Assoc, int]]
-    _generator: type[GenerateLR0]
+    _generator: type[GenerateLR1]
    _terminals: dict[str, Terminal]
    _nonterminals: dict[str, NonTerminal]
    _trivia: list[Terminal]
@ -3053,7 +3018,7 @@ class Grammar:
        self,
        start: str | NonTerminal | None = None,
        precedence: PrecedenceList | None = None,
-        generator: type[GenerateLR0] | None = None,
+        generator: type[GenerateLR1] | None = None,
        trivia: list[str | Terminal] | None = None,
        name: str | None = None,
    ):
--- a/tests/test_grammar.py
+++ b/tests/test_grammar.py
@ -42,7 +42,7 @@ def test_lr0_lr0():

    class G(Grammar):
        start = "E"
-        generator = parser.GenerateLR0
+        # generator = parser.GenerateLR0

        @rule
        def E(self):
@ -86,7 +86,7 @@ def test_all_generators():
        IDENTIFIER = Terminal("id", name="id")

    GENERATORS = [
-        parser.GenerateLR0,
+        # parser.GenerateLR0,
        parser.GeneratePager,
        parser.GenerateLR1,
    ]
@ -104,121 +104,9 @@ def test_all_generators():
        assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")")))


-def test_lr0_shift_reduce():
-    """This one should not work in LR0- it has a shift/reduce conflict, but works in SLR1."""
-
-    class G(Grammar):
-        start = "E"
-        generator = parser.GenerateLR0
-
-        @rule
-        def E(self):
-            return seq(self.E, self.PLUS, self.T) | self.T
-
-        @rule
-        def T(self):
-            return (
-                seq(self.LPAREN, self.E, self.RPAREN)
-                | self.IDENTIFIER
-                | seq(self.IDENTIFIER, self.LSQUARE, self.E, self.RSQUARE)
-            )
-
-        PLUS = Terminal("+")
-        LPAREN = Terminal("(")
-        RPAREN = Terminal(")")
-        LSQUARE = Terminal("[")
-        RSQUARE = Terminal("]")
-        IDENTIFIER = Terminal("id")
-
-    with pytest.raises(parser.AmbiguityError):
-        G().build_table()
-
-    G().build_table(generator=parser.GenerateSLR1)
-
-
-def test_lr0_reduce_reduce():
-    """This one should not work, it has a reduce-reduce conflict."""
-
-    class G(Grammar):
-        start = "E"
-        generator = parser.GenerateLR0
-
-        @rule
-        def E(self):
-            return seq(self.E, self.PLUS, self.T) | self.T | seq(self.V, self.EQUAL, self.E)
-
-        @rule
-        def T(self):
-            return seq(self.LPAREN, self.E, self.RPAREN) | self.IDENTIFIER
-
-        @rule
-        def V(self):
-            return self.IDENTIFIER
-
-        PLUS = Terminal("+")
-        EQUAL = Terminal("=")
-        LPAREN = Terminal("(")
-        RPAREN = Terminal(")")
-        IDENTIFIER = Terminal("id")
-
-    with pytest.raises(parser.AmbiguityError):
-        G().build_table()
-
-
-def test_lr0_empty():
-    """LR0 can't handle empty productions because it doesn't know when to reduce."""
-
-    class G(Grammar):
-        start = "E"
-        generator = parser.GenerateLR0
-
-        @rule
-        def E(self):
-            return seq(self.F, self.BOOP)
-
-        @rule
-        def F(self):
-            return self.BEEP | parser.Nothing
-
-        BOOP = Terminal("boop")
-        BEEP = Terminal("beep")
-
-    with pytest.raises(parser.AmbiguityError):
-        G().build_table()
-
-
-def test_grammar_aho_ullman_1():
-    class G(Grammar):
-        start = "S"
-        generator = parser.GenerateSLR1
-
-        @rule
-        def S(self):
-            return seq(self.L, self.EQUAL, self.R) | self.R
-
-        @rule
-        def L(self):
-            return seq(self.STAR, self.R) | self.ID
-
-        @rule
-        def R(self):
-            return self.L
-
-        EQUAL = Terminal("=")
-        STAR = Terminal("*")
-        ID = Terminal("id")
-
-    with pytest.raises(parser.AmbiguityError):
-        G().build_table()
-
-    G().build_table(generator=parser.GenerateLR1)
-    G().build_table(generator=parser.GeneratePager)
-
-
 def test_grammar_aho_ullman_2():
    class TestGrammar(Grammar):
        start = "S"
-        generator = parser.GenerateSLR1

        @rule
        def S(self):
@ -231,7 +119,6 @@ def test_grammar_aho_ullman_2():
        A = Terminal("a")
        B = Terminal("b")

-    TestGrammar().build_table()
    TestGrammar().build_table(generator=parser.GenerateLR1)
    TestGrammar().build_table(generator=parser.GeneratePager)