From bb94fc6c9ceef82b71542090cb410a424e6a90ff Mon Sep 17 00:00:00 2001
From: John Doty <john@d0ty.me>
Date: Fri, 11 Oct 2024 07:52:48 -0700
Subject: [PATCH] [parser] clean clean clean

---
 parser/parser.py | 190 +++++++++++++++++++++++------------------------
 1 file changed, 92 insertions(+), 98 deletions(-)

diff --git a/parser/parser.py b/parser/parser.py
index c686796..10da061 100644
--- a/parser/parser.py
+++ b/parser/parser.py
@@ -269,14 +269,102 @@ class Configuration(typing.NamedTuple):
         return f"{self.core.format(alphabet)}{la}"
 
 
-class CoreSet(frozenset[ConfigurationCore]):
-    pass
-
-
 class ConfigSet(frozenset[Configuration]):
     pass
 
 
+# Here we have a slightly different definition of a ConfigurationSet; we keep the
+# lookaheads outside and use a dictionary to check for containment quickly.
+# ItemSet is used in the GRM/Pager/Chin algorithm.
+@dataclasses.dataclass
+class ItemSet:
+    """An ItemSet is a group of configuration cores together with their
+    "contexts", or lookahead sets.
+
+    An ItemSet is comparable for equality, and also supports this lesser notion
+    of "weakly compatible" which is used to collapse states in the pager
+    algorithm.
+    """
+
+    items: dict[ConfigurationCore, set[int]]
+
+    def __init__(self, items=None):
+        self.items = items or {}
+
+    @classmethod
+    def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
+        return ItemSet({config.core: set(config.lookahead) for config in config_set})
+
+    def weakly_compatible(self, other: "ItemSet") -> bool:
+        a = self.items
+        b = other.items
+
+        if len(a) != len(b):
+            return False
+
+        for acore in a:
+            if acore not in b:
+                return False
+
+        if len(a) == 1:
+            return True
+
+        # DOTY: This loop I do not understand, truly. What the heck is happening here?
+        a_keys = list(a.keys())
+        for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
+            for j_key in itertools.islice(a_keys, i + 1, None):
+                a_i_key = a[i_key]
+                b_i_key = b[i_key]
+                a_j_key = a[j_key]
+                b_j_key = b[j_key]
+
+                # DOTY: GRMTools written with intersects(); we don't have that we have
+                #       `not disjoint()`. :P There are many double negatives....
+                #
+                #  not (intersect(a_i, b_j) or intersect(a_j, b_i))
+                #  not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
+                #  ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
+                #  disjoint(a_i, b_j) and disjoint(a_j, b_i)
+                if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
+                    continue
+
+                # intersect(a_i, a_j) or intersect(b_i, b_j)
+                # (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
+                # not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
+                if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
+                    continue
+
+                return False
+
+        return True
+
+    def weakly_merge(self, other: "ItemSet") -> bool:
+        """Merge b into a, returning True if this lead to any changes."""
+        a = self.items
+        b = other.items
+
+        changed = False
+        for a_key, a_ctx in a.items():
+            start_len = len(a_ctx)
+            a_ctx.update(b[a_key])  # Python doesn't tell us changes
+            changed = changed or (start_len != len(a_ctx))
+
+        return changed
+
+    def goto(self, symbol: int) -> "ItemSet":
+        result = ItemSet()
+        for core, context in self.items.items():
+            if core.next == symbol:
+                next = core.replace_position(core.position + 1)
+                result.items[next] = set(context)
+        return result
+
+    def to_config_set(self) -> ConfigSet:
+        return ConfigSet(
+            {Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
+        )
+
+
 class ConfigurationSetInfo:
     """When we build a grammar into a table, the first thing we need to do is
     generate all the configuration sets and their successors.
@@ -823,9 +911,6 @@ class GenerateLR0:
     # The end symbol of the grammar.
     end_symbol: int
 
-    config_sets_key: dict[ConfigSet, int]
-    successors: list[set[int]]
-
     def __init__(
         self,
         start: str,
@@ -1483,90 +1568,6 @@ class GenerateLR1(GenerateSLR1):
         return self.gen_sets(seeds)
 
 
-# Here we have a slightly different definition of a ConfigurationSet; we keep the
-# lookaheads outside and use a dictionary to check for containment quickly.
-# ItemSet is used in the GRM/Pager/Chin algorithm.
-@dataclasses.dataclass
-class ItemSet:
-    items: dict[ConfigurationCore, set[int]]
-
-    def __init__(self, items=None):
-        self.items = items or {}
-
-    @classmethod
-    def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
-        return ItemSet({config.core: set(config.lookahead) for config in config_set})
-
-    def weakly_compatible(self, other: "ItemSet") -> bool:
-        a = self.items
-        b = other.items
-
-        if len(a) != len(b):
-            return False
-
-        for acore in a:
-            if acore not in b:
-                return False
-
-        if len(a) == 1:
-            return True
-
-        # DOTY: This loop I do not understand, truly. What the heck is happening here?
-        a_keys = list(a.keys())
-        for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
-            for j_key in itertools.islice(a_keys, i + 1, None):
-                a_i_key = a[i_key]
-                b_i_key = b[i_key]
-                a_j_key = a[j_key]
-                b_j_key = b[j_key]
-
-                # DOTY: GRMTools written with intersects(); we don't have that we have
-                #       `not disjoint()`. :P There are many double negatives....
-                #
-                #  not (intersect(a_i, b_j) or intersect(a_j, b_i))
-                #  not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
-                #  ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
-                #  disjoint(a_i, b_j) and disjoint(a_j, b_i)
-                if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
-                    continue
-
-                # intersect(a_i, a_j) or intersect(b_i, b_j)
-                # (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
-                # not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
-                if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
-                    continue
-
-                return False
-
-        return True
-
-    def weakly_merge(self, other: "ItemSet") -> bool:
-        """Merge b into a, returning True if this lead to any changes."""
-        a = self.items
-        b = other.items
-
-        changed = False
-        for a_key, a_ctx in a.items():
-            start_len = len(a_ctx)
-            a_ctx.update(b[a_key])  # Python doesn't tell us changes
-            changed = changed or (start_len != len(a_ctx))
-
-        return changed
-
-    def goto(self, symbol: int) -> "ItemSet":
-        result = ItemSet()
-        for core, context in self.items.items():
-            if core.next == symbol:
-                next = core.replace_position(core.position + 1)
-                result.items[next] = set(context)
-        return result
-
-    def to_config_set(self) -> ConfigSet:
-        return ConfigSet(
-            {Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
-        )
-
-
 class GeneratePager(GenerateLR1):
     """Pager's algorithm.
 
@@ -1625,10 +1626,6 @@ class GeneratePager(GenerateLR1):
         # token more than once.
         seen: set[int] = set()
 
-        # new_states is used to separate out iterating over states vs.
-        # mutating it
-        new_states: list[tuple[int, ItemSet]] = []
-
         # cnd_[rule|token]_weaklies represent which states are possible weakly
         # compatible matches for a given symbol.
         #
@@ -1669,7 +1666,6 @@ class GeneratePager(GenerateLR1):
             closed_states[state_i] = cl_state
 
             seen.clear()
-            new_states.clear()
             for core in cl_state.items.keys():
                 sym = core.next
                 if sym is None or sym in seen:
@@ -1677,9 +1673,7 @@ class GeneratePager(GenerateLR1):
                 seen.add(sym)
 
                 nstate = cl_state.goto(sym)
-                new_states.append((sym, nstate))
 
-            for sym, nstate in new_states:
                 # Try and find a compatible match for this state.
                 cnd_states = cnd_weaklies[sym]