[parser] clean clean clean

This commit is contained in:
John Doty 2024-10-11 07:52:48 -07:00
parent 2656a1d328
commit bb94fc6c9c

View file

@ -269,14 +269,102 @@ class Configuration(typing.NamedTuple):
return f"{self.core.format(alphabet)}{la}" return f"{self.core.format(alphabet)}{la}"
class CoreSet(frozenset[ConfigurationCore]):
pass
class ConfigSet(frozenset[Configuration]): class ConfigSet(frozenset[Configuration]):
pass pass
# Here we have a slightly different definition of a ConfigurationSet; we keep the
# lookaheads outside and use a dictionary to check for containment quickly.
# ItemSet is used in the GRM/Pager/Chin algorithm.
@dataclasses.dataclass
class ItemSet:
"""An ItemSet is a group of configuration cores together with their
"contexts", or lookahead sets.
An ItemSet is comparable for equality, and also supports this lesser notion
of "weakly compatible" which is used to collapse states in the pager
algorithm.
"""
items: dict[ConfigurationCore, set[int]]
def __init__(self, items=None):
self.items = items or {}
@classmethod
def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
return ItemSet({config.core: set(config.lookahead) for config in config_set})
def weakly_compatible(self, other: "ItemSet") -> bool:
a = self.items
b = other.items
if len(a) != len(b):
return False
for acore in a:
if acore not in b:
return False
if len(a) == 1:
return True
# DOTY: This loop I do not understand, truly. What the heck is happening here?
a_keys = list(a.keys())
for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
for j_key in itertools.islice(a_keys, i + 1, None):
a_i_key = a[i_key]
b_i_key = b[i_key]
a_j_key = a[j_key]
b_j_key = b[j_key]
# DOTY: GRMTools written with intersects(); we don't have that we have
# `not disjoint()`. :P There are many double negatives....
#
# not (intersect(a_i, b_j) or intersect(a_j, b_i))
# not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
# ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
# disjoint(a_i, b_j) and disjoint(a_j, b_i)
if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
continue
# intersect(a_i, a_j) or intersect(b_i, b_j)
# (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
# not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
continue
return False
return True
def weakly_merge(self, other: "ItemSet") -> bool:
"""Merge b into a, returning True if this lead to any changes."""
a = self.items
b = other.items
changed = False
for a_key, a_ctx in a.items():
start_len = len(a_ctx)
a_ctx.update(b[a_key]) # Python doesn't tell us changes
changed = changed or (start_len != len(a_ctx))
return changed
def goto(self, symbol: int) -> "ItemSet":
result = ItemSet()
for core, context in self.items.items():
if core.next == symbol:
next = core.replace_position(core.position + 1)
result.items[next] = set(context)
return result
def to_config_set(self) -> ConfigSet:
return ConfigSet(
{Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
)
class ConfigurationSetInfo: class ConfigurationSetInfo:
"""When we build a grammar into a table, the first thing we need to do is """When we build a grammar into a table, the first thing we need to do is
generate all the configuration sets and their successors. generate all the configuration sets and their successors.
@ -823,9 +911,6 @@ class GenerateLR0:
# The end symbol of the grammar. # The end symbol of the grammar.
end_symbol: int end_symbol: int
config_sets_key: dict[ConfigSet, int]
successors: list[set[int]]
def __init__( def __init__(
self, self,
start: str, start: str,
@ -1483,90 +1568,6 @@ class GenerateLR1(GenerateSLR1):
return self.gen_sets(seeds) return self.gen_sets(seeds)
# Here we have a slightly different definition of a ConfigurationSet; we keep the
# lookaheads outside and use a dictionary to check for containment quickly.
# ItemSet is used in the GRM/Pager/Chin algorithm.
@dataclasses.dataclass
class ItemSet:
items: dict[ConfigurationCore, set[int]]
def __init__(self, items=None):
self.items = items or {}
@classmethod
def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
return ItemSet({config.core: set(config.lookahead) for config in config_set})
def weakly_compatible(self, other: "ItemSet") -> bool:
a = self.items
b = other.items
if len(a) != len(b):
return False
for acore in a:
if acore not in b:
return False
if len(a) == 1:
return True
# DOTY: This loop I do not understand, truly. What the heck is happening here?
a_keys = list(a.keys())
for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
for j_key in itertools.islice(a_keys, i + 1, None):
a_i_key = a[i_key]
b_i_key = b[i_key]
a_j_key = a[j_key]
b_j_key = b[j_key]
# DOTY: GRMTools written with intersects(); we don't have that we have
# `not disjoint()`. :P There are many double negatives....
#
# not (intersect(a_i, b_j) or intersect(a_j, b_i))
# not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
# ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
# disjoint(a_i, b_j) and disjoint(a_j, b_i)
if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
continue
# intersect(a_i, a_j) or intersect(b_i, b_j)
# (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
# not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
continue
return False
return True
def weakly_merge(self, other: "ItemSet") -> bool:
"""Merge b into a, returning True if this lead to any changes."""
a = self.items
b = other.items
changed = False
for a_key, a_ctx in a.items():
start_len = len(a_ctx)
a_ctx.update(b[a_key]) # Python doesn't tell us changes
changed = changed or (start_len != len(a_ctx))
return changed
def goto(self, symbol: int) -> "ItemSet":
result = ItemSet()
for core, context in self.items.items():
if core.next == symbol:
next = core.replace_position(core.position + 1)
result.items[next] = set(context)
return result
def to_config_set(self) -> ConfigSet:
return ConfigSet(
{Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
)
class GeneratePager(GenerateLR1): class GeneratePager(GenerateLR1):
"""Pager's algorithm. """Pager's algorithm.
@ -1625,10 +1626,6 @@ class GeneratePager(GenerateLR1):
# token more than once. # token more than once.
seen: set[int] = set() seen: set[int] = set()
# new_states is used to separate out iterating over states vs.
# mutating it
new_states: list[tuple[int, ItemSet]] = []
# cnd_[rule|token]_weaklies represent which states are possible weakly # cnd_[rule|token]_weaklies represent which states are possible weakly
# compatible matches for a given symbol. # compatible matches for a given symbol.
# #
@ -1669,7 +1666,6 @@ class GeneratePager(GenerateLR1):
closed_states[state_i] = cl_state closed_states[state_i] = cl_state
seen.clear() seen.clear()
new_states.clear()
for core in cl_state.items.keys(): for core in cl_state.items.keys():
sym = core.next sym = core.next
if sym is None or sym in seen: if sym is None or sym in seen:
@ -1677,9 +1673,7 @@ class GeneratePager(GenerateLR1):
seen.add(sym) seen.add(sym)
nstate = cl_state.goto(sym) nstate = cl_state.goto(sym)
new_states.append((sym, nstate))
for sym, nstate in new_states:
# Try and find a compatible match for this state. # Try and find a compatible match for this state.
cnd_states = cnd_weaklies[sym] cnd_states = cnd_weaklies[sym]