2 changed files with 523 additions and 338 deletions
--- a/parser/parser.py
+++ b/parser/parser.py
@ -135,6 +135,7 @@ import bisect
 import collections
 import dataclasses
 import enum
 import functools
 import inspect
 import itertools
 import json
@ -273,100 +274,7 @@ class ConfigSet(frozenset[Configuration]):
    pass
-# Here we have a slightly different definition of a ConfigurationSet; we keep
+class ConfigurationSetInfo:
 # the lookaheads outside and use a dictionary to check for containment
 # quickly. ItemSet is used in the GRM/Pager/Chin algorithm.
@dataclasses.dataclass
 class ItemSet:
    """An ItemSet is a group of configuration cores together with their
    "contexts", or lookahead sets.
    An ItemSet is comparable for equality, and also supports this lesser notion
    of "weakly compatible" which is used to collapse states in the pager
    algorithm.
    """
    items: dict[ConfigurationCore, set[int]]
    def __init__(self, items=None):
        self.items = items or {}
    @classmethod
    def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
        return ItemSet({config.core: set(config.lookahead) for config in config_set})
    def weakly_compatible(self, other: "ItemSet") -> bool:
        a = self.items
        b = other.items
        if len(a) != len(b):
            return False
        for acore in a:
            if acore not in b:
                return False
        if len(a) == 1:
            return True
        # DOTY: This loop I do not understand, truly. What the heck is happening here?
        a_keys = list(a.keys())
        for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
            for j_key in itertools.islice(a_keys, i + 1, None):
                a_i_key = a[i_key]
                b_i_key = b[i_key]
                a_j_key = a[j_key]
                b_j_key = b[j_key]
                # DOTY: GRMTools written with intersects(); we don't have that we have
                #       `not disjoint()`. :P There are many double negatives....
                #
                #  not (intersect(a_i, b_j) or intersect(a_j, b_i))
                #  not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
                #  ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
                #  disjoint(a_i, b_j) and disjoint(a_j, b_i)
                if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
                    continue
                # intersect(a_i, a_j) or intersect(b_i, b_j)
                # (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
                # not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
                if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
                    continue
                return False
        return True
    def weakly_merge(self, other: "ItemSet") -> bool:
        """Merge b into a, returning True if this lead to any changes."""
        a = self.items
        b = other.items
        changed = False
        for a_key, a_ctx in a.items():
            start_len = len(a_ctx)
            a_ctx.update(b[a_key])  # Python doesn't tell us changes
            changed = changed or (start_len != len(a_ctx))
        return changed
    def goto(self, symbol: int) -> "ItemSet":
        result = ItemSet()
        for core, context in self.items.items():
            if core.next == symbol:
                next = core.replace_position(core.position + 1)
                result.items[next] = set(context)
        return result
    def to_config_set(self) -> ConfigSet:
        return ConfigSet(
            {Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
        )
@dataclasses.dataclass
 class StateGraph:
    """When we build a grammar into a table, the first thing we need to do is
    generate all the configuration sets and their successors.
@ -381,23 +289,65 @@ class StateGraph:
    structure, but they all compute this information.)
    """
-    closures: list[ConfigSet]
+    core_key: dict[ConfigSet, int]  # Map a ConfigSet into am index
    config_set_key: dict[ConfigSet, int]  # Map a ConfigSet into am index
    sets: list[ConfigSet]  # Map the index back into a set
    closures: list[ConfigSet | None]  # Track closures
    # All the sucessors for all of the sets. `successors[i]` is the mapping
    # from grammar symbol to the index of the set you get by processing that
    # symbol.
    successors: list[dict[int, int]]
    def __init__(self):
        self.core_key = {}
        self.config_set_key = {}
        self.sets = []
        self.closures = []
        self.successors = []
    def register_core(self, c: ConfigSet) -> typing.Tuple[int, bool]:
        """Potentially add a new config set to the set of sets. Returns the
        canonical ID of the set within this structure, along with a boolean
        indicating whether the set was just added or not.
        (You can use this integer to get the set back, if you need it, and
        also access the successors table.)
        """
        existing = self.core_key.get(c)
        if existing is not None:
            return existing, False
        index = len(self.sets)
        self.sets.append(c)
        self.closures.append(None)
        self.successors.append({})
        self.core_key[c] = index
        return index, True
    def register_config_closure(self, c_id: int, closure: ConfigSet):
        assert self.closures[c_id] is None
        self.closures[c_id] = closure
        self.config_set_key[closure] = c_id
    def add_successor(self, c_id: int, symbol: int, successor: int):
        """Register sucessor(`c_id`, `symbol`) -> `successor`, where c_id
        is the id of the set in this structure, and symbol is the id of a
        symbol in the alphabet of the grammar.
        """
        self.successors[c_id][symbol] = successor
    def dump_state(self, alphabet: list[str]) -> str:
        return json.dumps(
            {
                str(set_index): {
-                    "closures": [c.format(alphabet) for c in closure],
+                    "configs": [c.format(alphabet) for c in config_set],
-                    "successors": {alphabet[k]: str(v) for k, v in successors.items()},
+                    "closures": [c.format(alphabet) for c in self.closures[set_index] or []],
                    "successors": {
                        alphabet[k]: str(v) for k, v in self.successors[set_index].items()
                    },
                }
-                for set_index, (closure, successors) in enumerate(
+                for set_index, config_set in enumerate(self.sets)
                    zip(self.closures, self.successors)
                )
            },
            indent=4,
            sort_keys=True,
@ -414,8 +364,7 @@ class StateGraph:
        This function raises KeyError if no path is found.
        """
-        # TODO: This should be tested.
+        target_index = self.config_set_key[target_set]
        target_index = self.closures.index(target_set)
        visited = set()
        queue: collections.deque = collections.deque()
@ -558,7 +507,7 @@ class ErrorCollection:
    def gen_exception(
        self,
        alphabet: list[str],
-        all_sets: StateGraph,
+        all_sets: ConfigurationSetInfo,
    ) -> AmbiguityError | None:
        """Format all the errors into an error, or return None if there are no
        errors.
@ -695,7 +644,7 @@ class TableBuilder(object):
        self.action_row = None
        self.goto_row = None
-    def flush(self, all_sets: StateGraph) -> ParseTable:
+    def flush(self, all_sets: ConfigurationSetInfo) -> ParseTable:
        """Finish building the table and return it.
        Raises ValueError if there were any conflicts during construction.
@ -1058,36 +1007,108 @@ class FollowInfo:
        return FollowInfo(follows=follows)
-class ParserGenerator:
+# Here we have a slightly different definition of a ConfigurationSet; we keep the
-    """Generate parse tables for LR1 grammars.
+# lookaheads outside and use a dictionary to check for containment quickly.
 # ItemSet is used in the GRM/Pager/Chin algorithm.
@dataclasses.dataclass
 class ItemSet:
    """An ItemSet is a group of configuration cores together with their
    "contexts", or lookahead sets.
-    This class implements a variant of pager's algorithm to generate the parse
+    An ItemSet is comparable for equality, and also supports this lesser notion
-    tables, which support the same set of languages as Canonical LR1 but with
+    of "weakly compatible" which is used to collapse states in the pager
-    much smaller resulting parse tables.
+    algorithm.
    """
-    I'll be honest, I don't understnd this one as well as the pure LR1
+    items: dict[ConfigurationCore, set[int]]
    algorithm. It proceeds as LR1, generating successor states, but every
    time it makes a new state it searches the states it has already made for
    one that is "weakly compatible;" if it finds one it merges the new state
    with the old state and marks the old state to be re-visited.
-    The implementation here follows from the implementation in
+    def __init__(self, items=None):
-    `GRMTools<https://github.com/softdevteam/grmtools/blob/master/lrtable/src/lib/pager.rs>`_.
+        self.items = items or {}
-    As they explain there:
+    @classmethod
    def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
        return ItemSet({config.core: set(config.lookahead) for config in config_set})
-    > The general algorithms that form the basis of what's used in this file
+    def weakly_compatible(self, other: "ItemSet") -> bool:
-    > can be found in:
+        a = self.items
-    >
+        b = other.items
-    >      A Practical General Method for Constructing LR(k) Parsers
+
-    >         David Pager, Acta Informatica 7, 249--268, 1977
+        if len(a) != len(b):
-    >
+            return False
-    > However Pager's paper is dense, and doesn't name sub-parts of the
+
-    > algorithm. We mostly reference the (still incomplete, but less
+        for acore in a:
-    > incomplete) version of the algorithm found in:
+            if acore not in b:
-    >
+                return False
-    >      Measuring and extending LR(1) parser generation
+
-    >         Xin Chen, PhD thesis, University of Hawaii, 2009
+        if len(a) == 1:
            return True
        # DOTY: This loop I do not understand, truly. What the heck is happening here?
        a_keys = list(a.keys())
        for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
            for j_key in itertools.islice(a_keys, i + 1, None):
                a_i_key = a[i_key]
                b_i_key = b[i_key]
                a_j_key = a[j_key]
                b_j_key = b[j_key]
                # DOTY: GRMTools written with intersects(); we don't have that we have
                #       `not disjoint()`. :P There are many double negatives....
                #
                #  not (intersect(a_i, b_j) or intersect(a_j, b_i))
                #  not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
                #  ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
                #  disjoint(a_i, b_j) and disjoint(a_j, b_i)
                if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
                    continue
                # intersect(a_i, a_j) or intersect(b_i, b_j)
                # (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
                # not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
                if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
                    continue
                return False
        return True
    def weakly_merge(self, other: "ItemSet") -> bool:
        """Merge b into a, returning True if this lead to any changes."""
        a = self.items
        b = other.items
        changed = False
        for a_key, a_ctx in a.items():
            start_len = len(a_ctx)
            a_ctx.update(b[a_key])  # Python doesn't tell us changes
            changed = changed or (start_len != len(a_ctx))
        return changed
    def goto(self, symbol: int) -> "ItemSet":
        result = ItemSet()
        for core, context in self.items.items():
            if core.next == symbol:
                next = core.replace_position(core.position + 1)
                result.items[next] = set(context)
        return result
    def to_config_set(self) -> ConfigSet:
        return ConfigSet(
            {Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
        )
 class GenerateLR1:
    """Generate parse tables for LR1, or "canonical LR" grammars.
    LR1 parsers can recognize more than SLR parsers. Like SLR parsers, they
    are choosier about when they reduce. But unlike SLR parsers, they specify
    the terminals on which they reduce by carrying a 'lookahead' terminal in
    the configuration. The lookahead of a configuration is computed as the
    closure of a configuration set is computed, so see gen_closure_next for
    details. (Except for the start configuration, which has '$' as its
    lookahead.)
    """
    # Internally we use integers as symbols, not strings. Mostly this is fine,
@ -1150,9 +1171,9 @@ class ParserGenerator:
        non-terminal being added, and the second elment of the tuple is the
        list of terminals and non-terminals that make up the production.
-        There is no support for alternation. If you want alternations that
+        There is currently no support for custom actions or alternation or
-        you'll have to lower the grammar by hand into the simpler form first,
+        anything like that. If you want alternations that you'll have to lower
-        but that's what the Grammar and NonTerminal classes are for.
+        the grammar by hand into the simpler form first.
        Don't name anything with double-underscores; those are reserved for
        the generator. Don't add '$' either, as it is reserved to mean
@ -1252,215 +1273,105 @@ class ParserGenerator:
            self._firsts,
        )
-    def gen_sets(self, seeds: list[Configuration]) -> StateGraph:
+    def gen_closure(self, seeds: typing.Iterable[Configuration]) -> ConfigSet:
-        # This function can be seen as a modified version of items() from
+        """Compute the closure for the specified configs. The closure is all
-        # Chen's dissertation.
+        of the configurations we could be in. Specifically, if the position
-        #
+        for a config is just before a non-terminal then we must also consider
-        # DOTY: It is also (practically) a converted version from grmtools
+        configurations where the rule is the rule for the non-terminal and
-        #       into python, more or less verbatim at this point. I have some
+        the position is just before the beginning of the rule.
        #       sense of what is going on, and attempt to elaborate with
        #       these comments.
-        # closed_states and core_states are both equally sized vectors of
+        (We have replaced a recursive version with an iterative one.)
-        # states. Core states are smaller, and used for the weakly compatible
+        """
-        # checks, but we ultimately need to return closed states. Closed
+        closure: set[Configuration] = set()
-        # states which are None are those which require processing; thus
+        pending = list(seeds)
-        # closed_states also implicitly serves as a todo list.
+        pending_next = []
-        closed_states: list[ItemSet | None] = []
+        while len(pending) > 0:
-        core_states: list[ItemSet] = []
+            for config in pending:
-        edges: list[dict[int, int]] = []
+                if config in closure:
        # Convert the incoming seed configurations into item sets.
        # TODO: Convert everything to ItemSet natively.
        state0 = ItemSet({seed.core: set(seed.lookahead) for seed in seeds})
        core_states.append(state0)
        closed_states.append(None)
        edges.append({})
        # We maintain a set of which rules and tokens we've seen; when
        # processing a given state there's no point processing a rule or
        # token more than once.
        seen: set[int] = set()
        # cnd_[rule|token]_weaklies represent which states are possible weakly
        # compatible matches for a given symbol.
        #
        # DOTY: As with `seen`, we have a uniform space so we can have a
        #       uniform one of these too.
        cnd_weaklies: list[list[int]] = [[] for _ in range(len(self.alphabet))]
        todo = 1  # How many None values are there in closed_states?
        todo_off = 0  # Offset in closed states to start searching for the next todo.
        while todo > 0:
            assert len(core_states) == len(closed_states)
            assert len(core_states) == len(edges)
            # state_i is the next item to process. We don't want to
            # continually search for the next None from the beginning, so we
            # remember where we last saw a None (todo_off) and search from
            # that point onwards, wrapping as necessary. Since processing a
            # state x disproportionately causes state x + 1 to require
            # processing, this prevents the search from becoming horribly
            # non-linear.
            try:
                state_i = closed_states.index(None, todo_off)
            except ValueError:
                state_i = closed_states.index(None)  # DOTY: Will not raise, given todo > 0
            todo_off = state_i + 1
            todo -= 1
            cl_state = self.gen_closure(core_states[state_i])
            closed_states[state_i] = cl_state
            seen.clear()
            for core in cl_state.items.keys():
                sym = core.next
                if sym is None or sym in seen:
                    continue
                seen.add(sym)
                nstate = cl_state.goto(sym)
                # Try and find a compatible match for this state.
                cnd_states = cnd_weaklies[sym]
                # First of all see if any of the candidate states are exactly
                # the same as the new state, in which case we only need to
                # add an edge to the candidate state. This isn't just an
                # optimisation (though it does avoid the expense of change
                # propagation), but has a correctness aspect: there's no
                # guarantee that the weakly compatible check is reflexive
                # (i.e. a state may not be weakly compatible with itself).
                found = False
                for cnd in cnd_states:
                    if core_states[cnd] == nstate:
                        edges[state_i][sym] = cnd
                        found = True
                        break
                if found:
                    continue
-                # No candidate states were equal to the new state, so we need
+                closure.add(config)
-                # to look for a candidate state which is weakly compatible.
+                pending_next.extend(self.gen_closure_next(config))
                m: int | None = None
                for cnd in cnd_states:
                    if core_states[cnd].weakly_compatible(nstate):
                        m = cnd
                        break
-                if m is not None:
+            temp = pending
-                    # A weakly compatible match has been found.
+            pending = pending_next
-                    edges[state_i][sym] = m
+            pending_next = temp
-                    assert core_states[m].weakly_compatible(nstate)  # TODO: REMOVE, TOO SLOW
+            pending_next.clear()
                    if core_states[m].weakly_merge(nstate):
                        # We only do the simplest change propagation, forcing possibly
                        # affected sets to be entirely reprocessed (which will recursively
                        # force propagation too). Even though this does unnecessary
                        # computation, it is still pretty fast.
                        #
                        # Note also that edges[k] will be completely regenerated, overwriting
                        # all existing entries and possibly adding new ones. We thus don't
                        # need to clear it manually.
                        if closed_states[m] is not None:
                            closed_states[m] = None
                            todo += 1
        # NOTE: The generation of this closure *might* have generated
        #       multiple cores with different lookaheads; if that's
        #       the case we need to merge.
        merged: dict[ConfigurationCore, set[int]] = {}
        for c in closure:
            existing = merged.get(c.core)
            if existing is not None:
                existing.update(c.lookahead)
            else:
-                    stidx = len(core_states)
+                merged[c.core] = set(c.lookahead)
-                    cnd_weaklies[sym].append(stidx)
+        return ConfigSet(Configuration(k, tuple(sorted(v))) for k, v in merged.items())
                    edges[state_i][sym] = stidx
-                    edges.append({})
+    def gen_all_successors(
-                    closed_states.append(None)
+        self, config_set: typing.Iterable[Configuration]
-                    core_states.append(nstate)
+    ) -> list[typing.Tuple[int, ConfigSet]]:
-                    todo += 1
+        """Return all of the non-empty successors for the given config set.
-        # Although the Pager paper doesn't talk about it, the algorithm above
+        (That is, given the config set, pretend we see all the symbols we
-        # can create unreachable states due to the non-determinism inherent
+        could possibly see, and figure out which configs sets we get from
-        # in working with hashsets. Indeed, this can even happen with the
+        those symbols. Those are the successors of this set.)
-        # example from Pager's paper (on perhaps 1 out of 100 runs, 24 or 25
+        """
-        # states will be created instead of 23). We thus need to weed out
+        possible = {config.core.next for config in config_set if config.core.next is not None}
        # unreachable states and update edges accordingly.
        assert len(core_states) == len(closed_states)
-        all_states = []
+        next = []
-        for core_state, closed_state in zip(core_states, closed_states):
+        for symbol in possible:
-            assert closed_state is not None
+            seeds = ConfigSet(
-            all_states.append((core_state, closed_state))
+                config.replace_position(config.core.position + 1)
-        gc_states, gc_edges = self.gc(all_states, edges)
+                for config in config_set
-
+                if config.core.next == symbol
        # DOTY: UGH this is so bad, we should rewrite to use ItemSet everywehre
        #       probably, which actually means getting rid of the pluggable
        #       generator because who actually needs that?
        # Register all the actually merged, final config sets. I should *not*
        # have to do all this work. Really really garbage.
        return StateGraph(
            closures=[closed_state.to_config_set() for _, closed_state in gc_states],
            successors=gc_edges,
            )
            if len(seeds) > 0:
                next.append((symbol, seeds))
-    def gc(
+        return next
        self,
        states: list[tuple[ItemSet, ItemSet]],
        edges: list[dict[int, int]],
    ) -> tuple[list[tuple[ItemSet, ItemSet]], list[dict[int, int]]]:
        # First of all, do a simple pass over all states. All state indexes
        # reachable from the start state will be inserted into the 'seen'
        # set.
        todo = [0]
        seen = set()
        while len(todo) > 0:
            item = todo.pop()
            if item in seen:
                continue
            seen.add(item)
            todo.extend(e for e in edges[item].values() if e not in seen)
-        if len(seen) == len(states):
+    def gen_sets(self, seeds: list[Configuration]) -> ConfigurationSetInfo:
-            # Every state is reachable.
+        """Generate all configuration sets starting from the provided seeds."""
-            return states, edges
+        result = ConfigurationSetInfo()
-        # Imagine we started with 3 states and their edges:
+        successors = []
-        #   states: [0, 1, 2]
+        pending = [ConfigSet(seeds)]
-        #   edges : [[_ => 2]]
+        pending_next = []
-        #
+        while len(pending) > 0:
-        # At this point, 'seen' will be the set {0, 2}. What we need to do is
+            for core in pending:
-        # to create a new list of states that doesn't have state 1 in it.
+                id, is_new = result.register_core(core)
-        # That will cause state 2 to become to state 1, meaning that we need
+                if is_new:
-        # to adjust edges so that the pointer to state 2 is updated to state
+                    config_set = self.gen_closure(core)
-        # 1. In other words we want to achieve this output:
+                    result.register_config_closure(id, config_set)
-        #
+                    for symbol, successor in self.gen_all_successors(config_set):
-        #   states: [0, 2]
+                        successors.append((id, symbol, successor))
-        #   edges : [_ => 1]
+                        pending_next.append(successor)
        #
        # The way we do this is to first iterate over all states, working out
        # what the mapping from seen states to their new offsets is.
        gc_states: list[tuple[ItemSet, ItemSet]] = []
        offsets: list[int] = []
        offset = 0
        for state_i, zstate in enumerate(states):
            offsets.append(state_i - offset)
            if state_i not in seen:
                offset += 1
                continue
-            gc_states.append(zstate)
+            temp = pending
            pending = pending_next
            pending_next = temp
            pending_next.clear()
-        # At this point the offsets list will be [0, 1, 1]. We now create new
+        for id, symbol, successor in successors:
-        # edges where each offset is corrected by looking it up in the
+            result.add_successor(id, symbol, result.core_key[successor])
        # offsets list.
        gc_edges: list[dict[int, int]] = []
        for st_edge_i, st_edges in enumerate(edges):
            if st_edge_i not in seen:
                continue
-            gc_edges.append({k: offsets[v] for k, v in st_edges.items()})
+        return result
-        return (gc_states, gc_edges)
+    def gen_follow(self, symbol: int) -> set[int]:
        """Generate the follow set for the given nonterminal.
        The follow set for a nonterminal is the set of terminals that can
        follow the nonterminal in a valid sentence. The resulting set never
        contains epsilon and is never empty, since we should always at least
        ground out at '$', which is the end-of-stream marker.
        See FollowInfo for more information on how this is determined.
        """
        return self._follows.follows[symbol]
    def gen_first(self, symbols: typing.Iterable[int]) -> typing.Tuple[set[int], bool]:
        """Return the first set for a *sequence* of symbols.
@ -1483,15 +1394,45 @@ class ParserGenerator:
        return (result, True)
-    def gen_closure(self, items: ItemSet) -> ItemSet:
+    def gen_reduce_set(self, config: Configuration) -> typing.Iterable[int]:
-        """Generate the closure of the given ItemSet.
+        """Return the set of symbols that indicate we should reduce the given
        config.
-        Some of the configurations the ItemSet might be positioned right before
+        In an LR1 parser, this is the lookahead of the configuration.
        nonterminals. In that case, obviously, we should *also* behave as if we
        were right at the beginning of each production for that nonterminal. The
        set of all those productions combined with all the incoming productions
        is the closure.
        """
        return config.lookahead
    def gen_closure_next(self, config: Configuration):
        """Return the next set of configurations in the closure for config.
        In LR1 parsers, we must compute the lookahead for the configurations
        we're adding to the closure. The lookahead for the new configurations
        is the first() of the rest of this config's production. If that
        contains epsilon, then the lookahead *also* contains the lookahead we
        already have. (This lookahead was presumably generated by the same
        process, so in some sense it is a 'parent' lookahead, or a lookahead
        from an upstream production in the grammar.)
        (See the documentation in GenerateLR0 for more information on how
        this function fits into the whole process, specifically `gen_closure`.)
        """
        config_next = config.core.next
        if config_next is None:
            return ()
        else:
            lookahead, epsilon = self.gen_first(config.rest)
            if epsilon:
                lookahead.update(config.lookahead)
            lookahead_tuple = tuple(sorted(lookahead))
            next = []
            for rule in self.grammar[config_next]:
                rr = Configuration.from_rule(config_next, rule, lookahead=lookahead_tuple)
                next.append(rr)
            return tuple(next)
    def gen_closure_x(self, items: ItemSet) -> ItemSet:
        closure: dict[ConfigurationCore, set[int]] = {}
        # We're going to maintain a set of things to look at, rules that we
@ -1583,7 +1524,7 @@ class ParserGenerator:
                config_next = config.core.next
                if config_next is None:
                    if config.core.name != self.start_symbol:
-                        for a in config.lookahead:
+                        for a in self.gen_reduce_set(config):
                            builder.set_table_reduce(a, config)
                    else:
                        builder.set_table_accept(self.end_symbol, config)
@ -1600,6 +1541,249 @@ class ParserGenerator:
        return builder.flush(config_sets)
 class GeneratePager(GenerateLR1):
    """Pager's algorithm.
    I'll be honest, I don't understnd this one as well as the pure LR1
    algorithm. It proceeds as LR1, generating successor states, but every
    time it makes a new state it searches the states it has already made for
    one that is "weakly compatible;" ifit finds one it merges the new state
    with the old state and marks the old state to be re-visited.
    The implementation here follows from the implementation in
    `GRMTools<https://github.com/softdevteam/grmtools/blob/master/lrtable/src/lib/pager.rs>`_.
    As they explain there:
    > The general algorithms that form the basis of what's used in this file
    > can be found in:
    >
    >      A Practical General Method for Constructing LR(k) Parsers
    >         David Pager, Acta Informatica 7, 249--268, 1977
    >
    > However Pager's paper is dense, and doesn't name sub-parts of the
    > algorithm. We mostly reference the (still incomplete, but less
    > incomplete) version of the algorithm found in:
    >
    >      Measuring and extending LR(1) parser generation
    >         Xin Chen, PhD thesis, University of Hawaii, 2009
    """
    def gen_sets(self, seeds: list[Configuration]) -> ConfigurationSetInfo:
        # This function can be seen as a modified version of items() from
        # Chen's dissertation.
        #
        # DOTY: It is also (practically) a converted version from grmtools
        #       into python, more or less verbatim at this point. I have some
        #       sense of what is going on, and attempt to elaborate with
        #       these comments.
        # closed_states and core_states are both equally sized vectors of
        # states. Core states are smaller, and used for the weakly compatible
        # checks, but we ultimately need to return closed states. Closed
        # states which are None are those which require processing; thus
        # closed_states also implicitly serves as a todo list.
        closed_states: list[ItemSet | None] = []
        core_states: list[ItemSet] = []
        edges: list[dict[int, int]] = []
        # Convert the incoming seed configurations into item sets.
        # TODO: Convert everything to ItemSet natively.
        state0 = ItemSet({seed.core: set(seed.lookahead) for seed in seeds})
        core_states.append(state0)
        closed_states.append(None)
        edges.append({})
        # We maintain a set of which rules and tokens we've seen; when
        # processing a given state there's no point processing a rule or
        # token more than once.
        seen: set[int] = set()
        # cnd_[rule|token]_weaklies represent which states are possible weakly
        # compatible matches for a given symbol.
        #
        # DOTY: As with `seen`, we have a uniform space so we can have a
        #       uniform one of these too.
        cnd_weaklies: list[list[int]] = [[] for _ in range(len(self.alphabet))]
        todo = 1  # How many None values are there in closed_states?
        todo_off = 0  # Offset in closed states to start searching for the next todo.
        while todo > 0:
            assert len(core_states) == len(closed_states)
            assert len(core_states) == len(edges)
            # state_i is the next item to process. We don't want to
            # continually search for the next None from the beginning, so we
            # remember where we last saw a None (todo_off) and search from
            # that point onwards, wrapping as necessary. Since processing a
            # state x disproportionately causes state x + 1 to require
            # processing, this prevents the search from becoming horribly
            # non-linear.
            try:
                state_i = closed_states.index(None, todo_off)
            except ValueError:
                state_i = closed_states.index(None)  # DOTY: Will not raise, given todo > 0
            todo_off = state_i + 1
            todo -= 1
            cl_state = self.gen_closure_x(core_states[state_i])
            closed_states[state_i] = cl_state
            seen.clear()
            for core in cl_state.items.keys():
                sym = core.next
                if sym is None or sym in seen:
                    continue
                seen.add(sym)
                nstate = cl_state.goto(sym)
                # Try and find a compatible match for this state.
                cnd_states = cnd_weaklies[sym]
                # First of all see if any of the candidate states are exactly
                # the same as the new state, in which case we only need to
                # add an edge to the candidate state. This isn't just an
                # optimisation (though it does avoid the expense of change
                # propagation), but has a correctness aspect: there's no
                # guarantee that the weakly compatible check is reflexive
                # (i.e. a state may not be weakly compatible with itself).
                found = False
                for cnd in cnd_states:
                    if core_states[cnd] == nstate:
                        edges[state_i][sym] = cnd
                        found = True
                        break
                if found:
                    continue
                # No candidate states were equal to the new state, so we need
                # to look for a candidate state which is weakly compatible.
                m: int | None = None
                for cnd in cnd_states:
                    if core_states[cnd].weakly_compatible(nstate):
                        m = cnd
                        break
                if m is not None:
                    # A weakly compatible match has been found.
                    edges[state_i][sym] = m
                    assert core_states[m].weakly_compatible(nstate)  # TODO: REMOVE, TOO SLOW
                    if core_states[m].weakly_merge(nstate):
                        # We only do the simplest change propagation, forcing possibly
                        # affected sets to be entirely reprocessed (which will recursively
                        # force propagation too). Even though this does unnecessary
                        # computation, it is still pretty fast.
                        #
                        # Note also that edges[k] will be completely regenerated, overwriting
                        # all existing entries and possibly adding new ones. We thus don't
                        # need to clear it manually.
                        if closed_states[m] is not None:
                            closed_states[m] = None
                            todo += 1
                else:
                    stidx = len(core_states)
                    cnd_weaklies[sym].append(stidx)
                    edges[state_i][sym] = stidx
                    edges.append({})
                    closed_states.append(None)
                    core_states.append(nstate)
                    todo += 1
        # Although the Pager paper doesn't talk about it, the algorithm above
        # can create unreachable states due to the non-determinism inherent
        # in working with hashsets. Indeed, this can even happen with the
        # example from Pager's paper (on perhaps 1 out of 100 runs, 24 or 25
        # states will be created instead of 23). We thus need to weed out
        # unreachable states and update edges accordingly.
        assert len(core_states) == len(closed_states)
        all_states = []
        for core_state, closed_state in zip(core_states, closed_states):
            assert closed_state is not None
            all_states.append((core_state, closed_state))
        gc_states, gc_edges = self.gc(all_states, edges)
        # DOTY: UGH this is so bad, we should rewrite to use ItemSet everywehre
        #       probably, which actually means getting rid of the pluggable
        #       generator because who actually needs that?
        # Register all the actually merged, final config sets. I should *not*
        # have to do all this work. Really really garbage.
        result = ConfigurationSetInfo()
        result.sets = [core_state.to_config_set() for core_state, _ in gc_states]
        result.core_key = {s: i for i, s in enumerate(result.sets)}
        result.closures = [closed_state.to_config_set() for _, closed_state in gc_states]
        result.config_set_key = {s: i for i, s in enumerate(result.closures) if s is not None}
        result.successors = gc_edges
        return result
    def gc(
        self,
        states: list[tuple[ItemSet, ItemSet]],
        edges: list[dict[int, int]],
    ) -> tuple[list[tuple[ItemSet, ItemSet]], list[dict[int, int]]]:
        # First of all, do a simple pass over all states. All state indexes
        # reachable from the start state will be inserted into the 'seen'
        # set.
        todo = [0]
        seen = set()
        while len(todo) > 0:
            item = todo.pop()
            if item in seen:
                continue
            seen.add(item)
            todo.extend(e for e in edges[item].values() if e not in seen)
        if len(seen) == len(states):
            # Every state is reachable.
            return states, edges
        # Imagine we started with 3 states and their edges:
        #   states: [0, 1, 2]
        #   edges : [[_ => 2]]
        #
        # At this point, 'seen' will be the set {0, 2}. What we need to do is
        # to create a new list of states that doesn't have state 1 in it.
        # That will cause state 2 to become to state 1, meaning that we need
        # to adjust edges so that the pointer to state 2 is updated to state
        # 1. In other words we want to achieve this output:
        #
        #   states: [0, 2]
        #   edges : [_ => 1]
        #
        # The way we do this is to first iterate over all states, working out
        # what the mapping from seen states to their new offsets is.
        gc_states: list[tuple[ItemSet, ItemSet]] = []
        offsets: list[int] = []
        offset = 0
        for state_i, zstate in enumerate(states):
            offsets.append(state_i - offset)
            if state_i not in seen:
                offset += 1
                continue
            gc_states.append(zstate)
        # At this point the offsets list will be [0, 1, 1]. We now create new
        # edges where each offset is corrected by looking it up in the
        # offsets list.
        gc_edges: list[dict[int, int]] = []
        for st_edge_i, st_edges in enumerate(edges):
            if st_edge_i not in seen:
                continue
            gc_edges.append({k: offsets[v] for k, v in st_edges.items()})
        return (gc_states, gc_edges)
 FlattenedWithMetadata = list["str|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"]
@ -2825,7 +3009,7 @@ class Grammar:
    """
    _precedence: dict[str, typing.Tuple[Assoc, int]]
-    _generator: type[ParserGenerator]
+    _generator: type[GenerateLR1]
    _terminals: dict[str, Terminal]
    _nonterminals: dict[str, NonTerminal]
    _trivia: list[Terminal]
@ -2834,7 +3018,7 @@ class Grammar:
        self,
        start: str | NonTerminal | None = None,
        precedence: PrecedenceList | None = None,
-        generator: type[ParserGenerator] | None = None,
+        generator: type[GenerateLR1] | None = None,
        trivia: list[str | Terminal] | None = None,
        name: str | None = None,
    ):
@ -2853,7 +3037,7 @@ class Grammar:
        assert precedence is not None
        if generator is None:
-            generator = getattr(self, "generator", ParserGenerator)
+            generator = getattr(self, "generator", GeneratePager)
        assert generator is not None
        if trivia is None:
--- a/tests/test_grammar.py
+++ b/tests/test_grammar.py
@ -87,8 +87,8 @@ def test_all_generators():
    GENERATORS = [
        # parser.GenerateLR0,
-        # parser.GeneratePager,
+        parser.GeneratePager,
-        parser.ParserGenerator,
+        parser.GenerateLR1,
    ]
    for generator in GENERATORS:
        table = G().build_table(generator=generator)
@ -119,14 +119,15 @@ def test_grammar_aho_ullman_2():
        A = Terminal("a")
        B = Terminal("b")
-    TestGrammar().build_table(generator=parser.ParserGenerator)
+    TestGrammar().build_table(generator=parser.GenerateLR1)
-    # TestGrammar().build_table(generator=parser.GeneratePager)
+    TestGrammar().build_table(generator=parser.GeneratePager)
 def test_fun_lalr():
    class TestGrammar(Grammar):
        start = "S"
        generator = parser.GeneratePager
        @rule
        def S(self):