[parser] clean clean clean
This commit is contained in:
parent
2656a1d328
commit
bb94fc6c9c
1 changed files with 92 additions and 98 deletions
190
parser/parser.py
190
parser/parser.py
|
|
@ -269,14 +269,102 @@ class Configuration(typing.NamedTuple):
|
||||||
return f"{self.core.format(alphabet)}{la}"
|
return f"{self.core.format(alphabet)}{la}"
|
||||||
|
|
||||||
|
|
||||||
class CoreSet(frozenset[ConfigurationCore]):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class ConfigSet(frozenset[Configuration]):
|
class ConfigSet(frozenset[Configuration]):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Here we have a slightly different definition of a ConfigurationSet; we keep the
|
||||||
|
# lookaheads outside and use a dictionary to check for containment quickly.
|
||||||
|
# ItemSet is used in the GRM/Pager/Chin algorithm.
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class ItemSet:
|
||||||
|
"""An ItemSet is a group of configuration cores together with their
|
||||||
|
"contexts", or lookahead sets.
|
||||||
|
|
||||||
|
An ItemSet is comparable for equality, and also supports this lesser notion
|
||||||
|
of "weakly compatible" which is used to collapse states in the pager
|
||||||
|
algorithm.
|
||||||
|
"""
|
||||||
|
|
||||||
|
items: dict[ConfigurationCore, set[int]]
|
||||||
|
|
||||||
|
def __init__(self, items=None):
|
||||||
|
self.items = items or {}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
|
||||||
|
return ItemSet({config.core: set(config.lookahead) for config in config_set})
|
||||||
|
|
||||||
|
def weakly_compatible(self, other: "ItemSet") -> bool:
|
||||||
|
a = self.items
|
||||||
|
b = other.items
|
||||||
|
|
||||||
|
if len(a) != len(b):
|
||||||
|
return False
|
||||||
|
|
||||||
|
for acore in a:
|
||||||
|
if acore not in b:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if len(a) == 1:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# DOTY: This loop I do not understand, truly. What the heck is happening here?
|
||||||
|
a_keys = list(a.keys())
|
||||||
|
for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
|
||||||
|
for j_key in itertools.islice(a_keys, i + 1, None):
|
||||||
|
a_i_key = a[i_key]
|
||||||
|
b_i_key = b[i_key]
|
||||||
|
a_j_key = a[j_key]
|
||||||
|
b_j_key = b[j_key]
|
||||||
|
|
||||||
|
# DOTY: GRMTools written with intersects(); we don't have that we have
|
||||||
|
# `not disjoint()`. :P There are many double negatives....
|
||||||
|
#
|
||||||
|
# not (intersect(a_i, b_j) or intersect(a_j, b_i))
|
||||||
|
# not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
|
||||||
|
# ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
|
||||||
|
# disjoint(a_i, b_j) and disjoint(a_j, b_i)
|
||||||
|
if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# intersect(a_i, a_j) or intersect(b_i, b_j)
|
||||||
|
# (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
|
||||||
|
# not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
|
||||||
|
if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
|
||||||
|
continue
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def weakly_merge(self, other: "ItemSet") -> bool:
|
||||||
|
"""Merge b into a, returning True if this lead to any changes."""
|
||||||
|
a = self.items
|
||||||
|
b = other.items
|
||||||
|
|
||||||
|
changed = False
|
||||||
|
for a_key, a_ctx in a.items():
|
||||||
|
start_len = len(a_ctx)
|
||||||
|
a_ctx.update(b[a_key]) # Python doesn't tell us changes
|
||||||
|
changed = changed or (start_len != len(a_ctx))
|
||||||
|
|
||||||
|
return changed
|
||||||
|
|
||||||
|
def goto(self, symbol: int) -> "ItemSet":
|
||||||
|
result = ItemSet()
|
||||||
|
for core, context in self.items.items():
|
||||||
|
if core.next == symbol:
|
||||||
|
next = core.replace_position(core.position + 1)
|
||||||
|
result.items[next] = set(context)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def to_config_set(self) -> ConfigSet:
|
||||||
|
return ConfigSet(
|
||||||
|
{Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ConfigurationSetInfo:
|
class ConfigurationSetInfo:
|
||||||
"""When we build a grammar into a table, the first thing we need to do is
|
"""When we build a grammar into a table, the first thing we need to do is
|
||||||
generate all the configuration sets and their successors.
|
generate all the configuration sets and their successors.
|
||||||
|
|
@ -823,9 +911,6 @@ class GenerateLR0:
|
||||||
# The end symbol of the grammar.
|
# The end symbol of the grammar.
|
||||||
end_symbol: int
|
end_symbol: int
|
||||||
|
|
||||||
config_sets_key: dict[ConfigSet, int]
|
|
||||||
successors: list[set[int]]
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
start: str,
|
start: str,
|
||||||
|
|
@ -1483,90 +1568,6 @@ class GenerateLR1(GenerateSLR1):
|
||||||
return self.gen_sets(seeds)
|
return self.gen_sets(seeds)
|
||||||
|
|
||||||
|
|
||||||
# Here we have a slightly different definition of a ConfigurationSet; we keep the
|
|
||||||
# lookaheads outside and use a dictionary to check for containment quickly.
|
|
||||||
# ItemSet is used in the GRM/Pager/Chin algorithm.
|
|
||||||
@dataclasses.dataclass
|
|
||||||
class ItemSet:
|
|
||||||
items: dict[ConfigurationCore, set[int]]
|
|
||||||
|
|
||||||
def __init__(self, items=None):
|
|
||||||
self.items = items or {}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
|
|
||||||
return ItemSet({config.core: set(config.lookahead) for config in config_set})
|
|
||||||
|
|
||||||
def weakly_compatible(self, other: "ItemSet") -> bool:
|
|
||||||
a = self.items
|
|
||||||
b = other.items
|
|
||||||
|
|
||||||
if len(a) != len(b):
|
|
||||||
return False
|
|
||||||
|
|
||||||
for acore in a:
|
|
||||||
if acore not in b:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if len(a) == 1:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# DOTY: This loop I do not understand, truly. What the heck is happening here?
|
|
||||||
a_keys = list(a.keys())
|
|
||||||
for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
|
|
||||||
for j_key in itertools.islice(a_keys, i + 1, None):
|
|
||||||
a_i_key = a[i_key]
|
|
||||||
b_i_key = b[i_key]
|
|
||||||
a_j_key = a[j_key]
|
|
||||||
b_j_key = b[j_key]
|
|
||||||
|
|
||||||
# DOTY: GRMTools written with intersects(); we don't have that we have
|
|
||||||
# `not disjoint()`. :P There are many double negatives....
|
|
||||||
#
|
|
||||||
# not (intersect(a_i, b_j) or intersect(a_j, b_i))
|
|
||||||
# not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
|
|
||||||
# ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
|
|
||||||
# disjoint(a_i, b_j) and disjoint(a_j, b_i)
|
|
||||||
if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# intersect(a_i, a_j) or intersect(b_i, b_j)
|
|
||||||
# (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
|
|
||||||
# not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
|
|
||||||
if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def weakly_merge(self, other: "ItemSet") -> bool:
|
|
||||||
"""Merge b into a, returning True if this lead to any changes."""
|
|
||||||
a = self.items
|
|
||||||
b = other.items
|
|
||||||
|
|
||||||
changed = False
|
|
||||||
for a_key, a_ctx in a.items():
|
|
||||||
start_len = len(a_ctx)
|
|
||||||
a_ctx.update(b[a_key]) # Python doesn't tell us changes
|
|
||||||
changed = changed or (start_len != len(a_ctx))
|
|
||||||
|
|
||||||
return changed
|
|
||||||
|
|
||||||
def goto(self, symbol: int) -> "ItemSet":
|
|
||||||
result = ItemSet()
|
|
||||||
for core, context in self.items.items():
|
|
||||||
if core.next == symbol:
|
|
||||||
next = core.replace_position(core.position + 1)
|
|
||||||
result.items[next] = set(context)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def to_config_set(self) -> ConfigSet:
|
|
||||||
return ConfigSet(
|
|
||||||
{Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class GeneratePager(GenerateLR1):
|
class GeneratePager(GenerateLR1):
|
||||||
"""Pager's algorithm.
|
"""Pager's algorithm.
|
||||||
|
|
||||||
|
|
@ -1625,10 +1626,6 @@ class GeneratePager(GenerateLR1):
|
||||||
# token more than once.
|
# token more than once.
|
||||||
seen: set[int] = set()
|
seen: set[int] = set()
|
||||||
|
|
||||||
# new_states is used to separate out iterating over states vs.
|
|
||||||
# mutating it
|
|
||||||
new_states: list[tuple[int, ItemSet]] = []
|
|
||||||
|
|
||||||
# cnd_[rule|token]_weaklies represent which states are possible weakly
|
# cnd_[rule|token]_weaklies represent which states are possible weakly
|
||||||
# compatible matches for a given symbol.
|
# compatible matches for a given symbol.
|
||||||
#
|
#
|
||||||
|
|
@ -1669,7 +1666,6 @@ class GeneratePager(GenerateLR1):
|
||||||
closed_states[state_i] = cl_state
|
closed_states[state_i] = cl_state
|
||||||
|
|
||||||
seen.clear()
|
seen.clear()
|
||||||
new_states.clear()
|
|
||||||
for core in cl_state.items.keys():
|
for core in cl_state.items.keys():
|
||||||
sym = core.next
|
sym = core.next
|
||||||
if sym is None or sym in seen:
|
if sym is None or sym in seen:
|
||||||
|
|
@ -1677,9 +1673,7 @@ class GeneratePager(GenerateLR1):
|
||||||
seen.add(sym)
|
seen.add(sym)
|
||||||
|
|
||||||
nstate = cl_state.goto(sym)
|
nstate = cl_state.goto(sym)
|
||||||
new_states.append((sym, nstate))
|
|
||||||
|
|
||||||
for sym, nstate in new_states:
|
|
||||||
# Try and find a compatible match for this state.
|
# Try and find a compatible match for this state.
|
||||||
cnd_states = cnd_weaklies[sym]
|
cnd_states = cnd_weaklies[sym]
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue