[parser] clean clean clean

2024-10-11 07:52:48 -07:00 · 2024-10-11 07:52:48 -07:00 · bb94fc6c9c
commit bb94fc6c9c
parent 2656a1d328
1 changed files with 92 additions and 98 deletions
--- a/parser/parser.py
+++ b/parser/parser.py
@ -269,14 +269,102 @@ class Configuration(typing.NamedTuple):
        return f"{self.core.format(alphabet)}{la}"
 class CoreSet(frozenset[ConfigurationCore]):
    pass
 class ConfigSet(frozenset[Configuration]):
    pass
 # Here we have a slightly different definition of a ConfigurationSet; we keep the
 # lookaheads outside and use a dictionary to check for containment quickly.
 # ItemSet is used in the GRM/Pager/Chin algorithm.
@dataclasses.dataclass
 class ItemSet:
    """An ItemSet is a group of configuration cores together with their
    "contexts", or lookahead sets.
    An ItemSet is comparable for equality, and also supports this lesser notion
    of "weakly compatible" which is used to collapse states in the pager
    algorithm.
    """
    items: dict[ConfigurationCore, set[int]]
    def __init__(self, items=None):
        self.items = items or {}
    @classmethod
    def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
        return ItemSet({config.core: set(config.lookahead) for config in config_set})
    def weakly_compatible(self, other: "ItemSet") -> bool:
        a = self.items
        b = other.items
        if len(a) != len(b):
            return False
        for acore in a:
            if acore not in b:
                return False
        if len(a) == 1:
            return True
        # DOTY: This loop I do not understand, truly. What the heck is happening here?
        a_keys = list(a.keys())
        for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
            for j_key in itertools.islice(a_keys, i + 1, None):
                a_i_key = a[i_key]
                b_i_key = b[i_key]
                a_j_key = a[j_key]
                b_j_key = b[j_key]
                # DOTY: GRMTools written with intersects(); we don't have that we have
                #       `not disjoint()`. :P There are many double negatives....
                #
                #  not (intersect(a_i, b_j) or intersect(a_j, b_i))
                #  not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
                #  ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
                #  disjoint(a_i, b_j) and disjoint(a_j, b_i)
                if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
                    continue
                # intersect(a_i, a_j) or intersect(b_i, b_j)
                # (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
                # not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
                if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
                    continue
                return False
        return True
    def weakly_merge(self, other: "ItemSet") -> bool:
        """Merge b into a, returning True if this lead to any changes."""
        a = self.items
        b = other.items
        changed = False
        for a_key, a_ctx in a.items():
            start_len = len(a_ctx)
            a_ctx.update(b[a_key])  # Python doesn't tell us changes
            changed = changed or (start_len != len(a_ctx))
        return changed
    def goto(self, symbol: int) -> "ItemSet":
        result = ItemSet()
        for core, context in self.items.items():
            if core.next == symbol:
                next = core.replace_position(core.position + 1)
                result.items[next] = set(context)
        return result
    def to_config_set(self) -> ConfigSet:
        return ConfigSet(
            {Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
        )
 class ConfigurationSetInfo:
    """When we build a grammar into a table, the first thing we need to do is
    generate all the configuration sets and their successors.
@ -823,9 +911,6 @@ class GenerateLR0:
    # The end symbol of the grammar.
    end_symbol: int
    config_sets_key: dict[ConfigSet, int]
    successors: list[set[int]]
    def __init__(
        self,
        start: str,
@ -1483,90 +1568,6 @@ class GenerateLR1(GenerateSLR1):
        return self.gen_sets(seeds)
 # Here we have a slightly different definition of a ConfigurationSet; we keep the
 # lookaheads outside and use a dictionary to check for containment quickly.
 # ItemSet is used in the GRM/Pager/Chin algorithm.
@dataclasses.dataclass
 class ItemSet:
    items: dict[ConfigurationCore, set[int]]
    def __init__(self, items=None):
        self.items = items or {}
    @classmethod
    def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
        return ItemSet({config.core: set(config.lookahead) for config in config_set})
    def weakly_compatible(self, other: "ItemSet") -> bool:
        a = self.items
        b = other.items
        if len(a) != len(b):
            return False
        for acore in a:
            if acore not in b:
                return False
        if len(a) == 1:
            return True
        # DOTY: This loop I do not understand, truly. What the heck is happening here?
        a_keys = list(a.keys())
        for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
            for j_key in itertools.islice(a_keys, i + 1, None):
                a_i_key = a[i_key]
                b_i_key = b[i_key]
                a_j_key = a[j_key]
                b_j_key = b[j_key]
                # DOTY: GRMTools written with intersects(); we don't have that we have
                #       `not disjoint()`. :P There are many double negatives....
                #
                #  not (intersect(a_i, b_j) or intersect(a_j, b_i))
                #  not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
                #  ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
                #  disjoint(a_i, b_j) and disjoint(a_j, b_i)
                if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
                    continue
                # intersect(a_i, a_j) or intersect(b_i, b_j)
                # (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
                # not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
                if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
                    continue
                return False
        return True
    def weakly_merge(self, other: "ItemSet") -> bool:
        """Merge b into a, returning True if this lead to any changes."""
        a = self.items
        b = other.items
        changed = False
        for a_key, a_ctx in a.items():
            start_len = len(a_ctx)
            a_ctx.update(b[a_key])  # Python doesn't tell us changes
            changed = changed or (start_len != len(a_ctx))
        return changed
    def goto(self, symbol: int) -> "ItemSet":
        result = ItemSet()
        for core, context in self.items.items():
            if core.next == symbol:
                next = core.replace_position(core.position + 1)
                result.items[next] = set(context)
        return result
    def to_config_set(self) -> ConfigSet:
        return ConfigSet(
            {Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
        )
 class GeneratePager(GenerateLR1):
    """Pager's algorithm.
@ -1625,10 +1626,6 @@ class GeneratePager(GenerateLR1):
        # token more than once.
        seen: set[int] = set()
        # new_states is used to separate out iterating over states vs.
        # mutating it
        new_states: list[tuple[int, ItemSet]] = []
        # cnd_[rule|token]_weaklies represent which states are possible weakly
        # compatible matches for a given symbol.
        #
@ -1669,7 +1666,6 @@ class GeneratePager(GenerateLR1):
            closed_states[state_i] = cl_state
            seen.clear()
            new_states.clear()
            for core in cl_state.items.keys():
                sym = core.next
                if sym is None or sym in seen:
@ -1677,9 +1673,7 @@ class GeneratePager(GenerateLR1):
                seen.add(sym)
                nstate = cl_state.goto(sym)
                new_states.append((sym, nstate))
            for sym, nstate in new_states:
                # Try and find a compatible match for this state.
                cnd_states = cnd_weaklies[sym]