From bb94fc6c9ceef82b71542090cb410a424e6a90ff Mon Sep 17 00:00:00 2001 From: John Doty Date: Fri, 11 Oct 2024 07:52:48 -0700 Subject: [PATCH] [parser] clean clean clean --- parser/parser.py | 190 +++++++++++++++++++++++------------------------ 1 file changed, 92 insertions(+), 98 deletions(-) diff --git a/parser/parser.py b/parser/parser.py index c686796..10da061 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -269,14 +269,102 @@ class Configuration(typing.NamedTuple): return f"{self.core.format(alphabet)}{la}" -class CoreSet(frozenset[ConfigurationCore]): - pass - - class ConfigSet(frozenset[Configuration]): pass +# Here we have a slightly different definition of a ConfigurationSet; we keep the +# lookaheads outside and use a dictionary to check for containment quickly. +# ItemSet is used in the GRM/Pager/Chin algorithm. +@dataclasses.dataclass +class ItemSet: + """An ItemSet is a group of configuration cores together with their + "contexts", or lookahead sets. + + An ItemSet is comparable for equality, and also supports this lesser notion + of "weakly compatible" which is used to collapse states in the pager + algorithm. + """ + + items: dict[ConfigurationCore, set[int]] + + def __init__(self, items=None): + self.items = items or {} + + @classmethod + def from_config_set(cls, config_set: ConfigSet) -> "ItemSet": + return ItemSet({config.core: set(config.lookahead) for config in config_set}) + + def weakly_compatible(self, other: "ItemSet") -> bool: + a = self.items + b = other.items + + if len(a) != len(b): + return False + + for acore in a: + if acore not in b: + return False + + if len(a) == 1: + return True + + # DOTY: This loop I do not understand, truly. What the heck is happening here? + a_keys = list(a.keys()) + for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)): + for j_key in itertools.islice(a_keys, i + 1, None): + a_i_key = a[i_key] + b_i_key = b[i_key] + a_j_key = a[j_key] + b_j_key = b[j_key] + + # DOTY: GRMTools written with intersects(); we don't have that we have + # `not disjoint()`. :P There are many double negatives.... + # + # not (intersect(a_i, b_j) or intersect(a_j, b_i)) + # not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i))) + # ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i))) + # disjoint(a_i, b_j) and disjoint(a_j, b_i) + if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key): + continue + + # intersect(a_i, a_j) or intersect(b_i, b_j) + # (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j)) + # not (disjoint(a_i, a_j) and disjoint(b_i, b_j)) + if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)): + continue + + return False + + return True + + def weakly_merge(self, other: "ItemSet") -> bool: + """Merge b into a, returning True if this lead to any changes.""" + a = self.items + b = other.items + + changed = False + for a_key, a_ctx in a.items(): + start_len = len(a_ctx) + a_ctx.update(b[a_key]) # Python doesn't tell us changes + changed = changed or (start_len != len(a_ctx)) + + return changed + + def goto(self, symbol: int) -> "ItemSet": + result = ItemSet() + for core, context in self.items.items(): + if core.next == symbol: + next = core.replace_position(core.position + 1) + result.items[next] = set(context) + return result + + def to_config_set(self) -> ConfigSet: + return ConfigSet( + {Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()} + ) + + class ConfigurationSetInfo: """When we build a grammar into a table, the first thing we need to do is generate all the configuration sets and their successors. @@ -823,9 +911,6 @@ class GenerateLR0: # The end symbol of the grammar. end_symbol: int - config_sets_key: dict[ConfigSet, int] - successors: list[set[int]] - def __init__( self, start: str, @@ -1483,90 +1568,6 @@ class GenerateLR1(GenerateSLR1): return self.gen_sets(seeds) -# Here we have a slightly different definition of a ConfigurationSet; we keep the -# lookaheads outside and use a dictionary to check for containment quickly. -# ItemSet is used in the GRM/Pager/Chin algorithm. -@dataclasses.dataclass -class ItemSet: - items: dict[ConfigurationCore, set[int]] - - def __init__(self, items=None): - self.items = items or {} - - @classmethod - def from_config_set(cls, config_set: ConfigSet) -> "ItemSet": - return ItemSet({config.core: set(config.lookahead) for config in config_set}) - - def weakly_compatible(self, other: "ItemSet") -> bool: - a = self.items - b = other.items - - if len(a) != len(b): - return False - - for acore in a: - if acore not in b: - return False - - if len(a) == 1: - return True - - # DOTY: This loop I do not understand, truly. What the heck is happening here? - a_keys = list(a.keys()) - for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)): - for j_key in itertools.islice(a_keys, i + 1, None): - a_i_key = a[i_key] - b_i_key = b[i_key] - a_j_key = a[j_key] - b_j_key = b[j_key] - - # DOTY: GRMTools written with intersects(); we don't have that we have - # `not disjoint()`. :P There are many double negatives.... - # - # not (intersect(a_i, b_j) or intersect(a_j, b_i)) - # not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i))) - # ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i))) - # disjoint(a_i, b_j) and disjoint(a_j, b_i) - if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key): - continue - - # intersect(a_i, a_j) or intersect(b_i, b_j) - # (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j)) - # not (disjoint(a_i, a_j) and disjoint(b_i, b_j)) - if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)): - continue - - return False - - return True - - def weakly_merge(self, other: "ItemSet") -> bool: - """Merge b into a, returning True if this lead to any changes.""" - a = self.items - b = other.items - - changed = False - for a_key, a_ctx in a.items(): - start_len = len(a_ctx) - a_ctx.update(b[a_key]) # Python doesn't tell us changes - changed = changed or (start_len != len(a_ctx)) - - return changed - - def goto(self, symbol: int) -> "ItemSet": - result = ItemSet() - for core, context in self.items.items(): - if core.next == symbol: - next = core.replace_position(core.position + 1) - result.items[next] = set(context) - return result - - def to_config_set(self) -> ConfigSet: - return ConfigSet( - {Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()} - ) - - class GeneratePager(GenerateLR1): """Pager's algorithm. @@ -1625,10 +1626,6 @@ class GeneratePager(GenerateLR1): # token more than once. seen: set[int] = set() - # new_states is used to separate out iterating over states vs. - # mutating it - new_states: list[tuple[int, ItemSet]] = [] - # cnd_[rule|token]_weaklies represent which states are possible weakly # compatible matches for a given symbol. # @@ -1669,7 +1666,6 @@ class GeneratePager(GenerateLR1): closed_states[state_i] = cl_state seen.clear() - new_states.clear() for core in cl_state.items.keys(): sym = core.next if sym is None or sym in seen: @@ -1677,9 +1673,7 @@ class GeneratePager(GenerateLR1): seen.add(sym) nstate = cl_state.goto(sym) - new_states.append((sym, nstate)) - for sym, nstate in new_states: # Try and find a compatible match for this state. cnd_states = cnd_weaklies[sym]