[parser] clean clean clean
This commit is contained in:
parent
2656a1d328
commit
bb94fc6c9c
1 changed files with 92 additions and 98 deletions
190
parser/parser.py
190
parser/parser.py
|
|
@ -269,14 +269,102 @@ class Configuration(typing.NamedTuple):
|
|||
return f"{self.core.format(alphabet)}{la}"
|
||||
|
||||
|
||||
class CoreSet(frozenset[ConfigurationCore]):
|
||||
pass
|
||||
|
||||
|
||||
class ConfigSet(frozenset[Configuration]):
|
||||
pass
|
||||
|
||||
|
||||
# Here we have a slightly different definition of a ConfigurationSet; we keep the
|
||||
# lookaheads outside and use a dictionary to check for containment quickly.
|
||||
# ItemSet is used in the GRM/Pager/Chin algorithm.
|
||||
@dataclasses.dataclass
|
||||
class ItemSet:
|
||||
"""An ItemSet is a group of configuration cores together with their
|
||||
"contexts", or lookahead sets.
|
||||
|
||||
An ItemSet is comparable for equality, and also supports this lesser notion
|
||||
of "weakly compatible" which is used to collapse states in the pager
|
||||
algorithm.
|
||||
"""
|
||||
|
||||
items: dict[ConfigurationCore, set[int]]
|
||||
|
||||
def __init__(self, items=None):
|
||||
self.items = items or {}
|
||||
|
||||
@classmethod
|
||||
def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
|
||||
return ItemSet({config.core: set(config.lookahead) for config in config_set})
|
||||
|
||||
def weakly_compatible(self, other: "ItemSet") -> bool:
|
||||
a = self.items
|
||||
b = other.items
|
||||
|
||||
if len(a) != len(b):
|
||||
return False
|
||||
|
||||
for acore in a:
|
||||
if acore not in b:
|
||||
return False
|
||||
|
||||
if len(a) == 1:
|
||||
return True
|
||||
|
||||
# DOTY: This loop I do not understand, truly. What the heck is happening here?
|
||||
a_keys = list(a.keys())
|
||||
for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
|
||||
for j_key in itertools.islice(a_keys, i + 1, None):
|
||||
a_i_key = a[i_key]
|
||||
b_i_key = b[i_key]
|
||||
a_j_key = a[j_key]
|
||||
b_j_key = b[j_key]
|
||||
|
||||
# DOTY: GRMTools written with intersects(); we don't have that we have
|
||||
# `not disjoint()`. :P There are many double negatives....
|
||||
#
|
||||
# not (intersect(a_i, b_j) or intersect(a_j, b_i))
|
||||
# not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
|
||||
# ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
|
||||
# disjoint(a_i, b_j) and disjoint(a_j, b_i)
|
||||
if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
|
||||
continue
|
||||
|
||||
# intersect(a_i, a_j) or intersect(b_i, b_j)
|
||||
# (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
|
||||
# not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
|
||||
if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
|
||||
continue
|
||||
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def weakly_merge(self, other: "ItemSet") -> bool:
|
||||
"""Merge b into a, returning True if this lead to any changes."""
|
||||
a = self.items
|
||||
b = other.items
|
||||
|
||||
changed = False
|
||||
for a_key, a_ctx in a.items():
|
||||
start_len = len(a_ctx)
|
||||
a_ctx.update(b[a_key]) # Python doesn't tell us changes
|
||||
changed = changed or (start_len != len(a_ctx))
|
||||
|
||||
return changed
|
||||
|
||||
def goto(self, symbol: int) -> "ItemSet":
|
||||
result = ItemSet()
|
||||
for core, context in self.items.items():
|
||||
if core.next == symbol:
|
||||
next = core.replace_position(core.position + 1)
|
||||
result.items[next] = set(context)
|
||||
return result
|
||||
|
||||
def to_config_set(self) -> ConfigSet:
|
||||
return ConfigSet(
|
||||
{Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
|
||||
)
|
||||
|
||||
|
||||
class ConfigurationSetInfo:
|
||||
"""When we build a grammar into a table, the first thing we need to do is
|
||||
generate all the configuration sets and their successors.
|
||||
|
|
@ -823,9 +911,6 @@ class GenerateLR0:
|
|||
# The end symbol of the grammar.
|
||||
end_symbol: int
|
||||
|
||||
config_sets_key: dict[ConfigSet, int]
|
||||
successors: list[set[int]]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
start: str,
|
||||
|
|
@ -1483,90 +1568,6 @@ class GenerateLR1(GenerateSLR1):
|
|||
return self.gen_sets(seeds)
|
||||
|
||||
|
||||
# Here we have a slightly different definition of a ConfigurationSet; we keep the
|
||||
# lookaheads outside and use a dictionary to check for containment quickly.
|
||||
# ItemSet is used in the GRM/Pager/Chin algorithm.
|
||||
@dataclasses.dataclass
|
||||
class ItemSet:
|
||||
items: dict[ConfigurationCore, set[int]]
|
||||
|
||||
def __init__(self, items=None):
|
||||
self.items = items or {}
|
||||
|
||||
@classmethod
|
||||
def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
|
||||
return ItemSet({config.core: set(config.lookahead) for config in config_set})
|
||||
|
||||
def weakly_compatible(self, other: "ItemSet") -> bool:
|
||||
a = self.items
|
||||
b = other.items
|
||||
|
||||
if len(a) != len(b):
|
||||
return False
|
||||
|
||||
for acore in a:
|
||||
if acore not in b:
|
||||
return False
|
||||
|
||||
if len(a) == 1:
|
||||
return True
|
||||
|
||||
# DOTY: This loop I do not understand, truly. What the heck is happening here?
|
||||
a_keys = list(a.keys())
|
||||
for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
|
||||
for j_key in itertools.islice(a_keys, i + 1, None):
|
||||
a_i_key = a[i_key]
|
||||
b_i_key = b[i_key]
|
||||
a_j_key = a[j_key]
|
||||
b_j_key = b[j_key]
|
||||
|
||||
# DOTY: GRMTools written with intersects(); we don't have that we have
|
||||
# `not disjoint()`. :P There are many double negatives....
|
||||
#
|
||||
# not (intersect(a_i, b_j) or intersect(a_j, b_i))
|
||||
# not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
|
||||
# ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
|
||||
# disjoint(a_i, b_j) and disjoint(a_j, b_i)
|
||||
if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
|
||||
continue
|
||||
|
||||
# intersect(a_i, a_j) or intersect(b_i, b_j)
|
||||
# (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
|
||||
# not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
|
||||
if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
|
||||
continue
|
||||
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def weakly_merge(self, other: "ItemSet") -> bool:
|
||||
"""Merge b into a, returning True if this lead to any changes."""
|
||||
a = self.items
|
||||
b = other.items
|
||||
|
||||
changed = False
|
||||
for a_key, a_ctx in a.items():
|
||||
start_len = len(a_ctx)
|
||||
a_ctx.update(b[a_key]) # Python doesn't tell us changes
|
||||
changed = changed or (start_len != len(a_ctx))
|
||||
|
||||
return changed
|
||||
|
||||
def goto(self, symbol: int) -> "ItemSet":
|
||||
result = ItemSet()
|
||||
for core, context in self.items.items():
|
||||
if core.next == symbol:
|
||||
next = core.replace_position(core.position + 1)
|
||||
result.items[next] = set(context)
|
||||
return result
|
||||
|
||||
def to_config_set(self) -> ConfigSet:
|
||||
return ConfigSet(
|
||||
{Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
|
||||
)
|
||||
|
||||
|
||||
class GeneratePager(GenerateLR1):
|
||||
"""Pager's algorithm.
|
||||
|
||||
|
|
@ -1625,10 +1626,6 @@ class GeneratePager(GenerateLR1):
|
|||
# token more than once.
|
||||
seen: set[int] = set()
|
||||
|
||||
# new_states is used to separate out iterating over states vs.
|
||||
# mutating it
|
||||
new_states: list[tuple[int, ItemSet]] = []
|
||||
|
||||
# cnd_[rule|token]_weaklies represent which states are possible weakly
|
||||
# compatible matches for a given symbol.
|
||||
#
|
||||
|
|
@ -1669,7 +1666,6 @@ class GeneratePager(GenerateLR1):
|
|||
closed_states[state_i] = cl_state
|
||||
|
||||
seen.clear()
|
||||
new_states.clear()
|
||||
for core in cl_state.items.keys():
|
||||
sym = core.next
|
||||
if sym is None or sym in seen:
|
||||
|
|
@ -1677,9 +1673,7 @@ class GeneratePager(GenerateLR1):
|
|||
seen.add(sym)
|
||||
|
||||
nstate = cl_state.goto(sym)
|
||||
new_states.append((sym, nstate))
|
||||
|
||||
for sym, nstate in new_states:
|
||||
# Try and find a compatible match for this state.
|
||||
cnd_states = cnd_weaklies[sym]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue