[parser] Move ItemSet
This commit is contained in:
parent
2d5c73f0b0
commit
e55bc140f9
1 changed files with 92 additions and 92 deletions
184
parser/parser.py
184
parser/parser.py
|
|
@ -274,6 +274,98 @@ class ConfigSet(frozenset[Configuration]):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Here we have a slightly different definition of a ConfigurationSet; we keep
|
||||||
|
# the lookaheads outside and use a dictionary to check for containment
|
||||||
|
# quickly. ItemSet is used in the GRM/Pager/Chin algorithm.
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class ItemSet:
|
||||||
|
"""An ItemSet is a group of configuration cores together with their
|
||||||
|
"contexts", or lookahead sets.
|
||||||
|
|
||||||
|
An ItemSet is comparable for equality, and also supports this lesser notion
|
||||||
|
of "weakly compatible" which is used to collapse states in the pager
|
||||||
|
algorithm.
|
||||||
|
"""
|
||||||
|
|
||||||
|
items: dict[ConfigurationCore, set[int]]
|
||||||
|
|
||||||
|
def __init__(self, items=None):
|
||||||
|
self.items = items or {}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
|
||||||
|
return ItemSet({config.core: set(config.lookahead) for config in config_set})
|
||||||
|
|
||||||
|
def weakly_compatible(self, other: "ItemSet") -> bool:
|
||||||
|
a = self.items
|
||||||
|
b = other.items
|
||||||
|
|
||||||
|
if len(a) != len(b):
|
||||||
|
return False
|
||||||
|
|
||||||
|
for acore in a:
|
||||||
|
if acore not in b:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if len(a) == 1:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# DOTY: This loop I do not understand, truly. What the heck is happening here?
|
||||||
|
a_keys = list(a.keys())
|
||||||
|
for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
|
||||||
|
for j_key in itertools.islice(a_keys, i + 1, None):
|
||||||
|
a_i_key = a[i_key]
|
||||||
|
b_i_key = b[i_key]
|
||||||
|
a_j_key = a[j_key]
|
||||||
|
b_j_key = b[j_key]
|
||||||
|
|
||||||
|
# DOTY: GRMTools written with intersects(); we don't have that we have
|
||||||
|
# `not disjoint()`. :P There are many double negatives....
|
||||||
|
#
|
||||||
|
# not (intersect(a_i, b_j) or intersect(a_j, b_i))
|
||||||
|
# not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
|
||||||
|
# ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
|
||||||
|
# disjoint(a_i, b_j) and disjoint(a_j, b_i)
|
||||||
|
if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# intersect(a_i, a_j) or intersect(b_i, b_j)
|
||||||
|
# (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
|
||||||
|
# not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
|
||||||
|
if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
|
||||||
|
continue
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def weakly_merge(self, other: "ItemSet") -> bool:
|
||||||
|
"""Merge b into a, returning True if this lead to any changes."""
|
||||||
|
a = self.items
|
||||||
|
b = other.items
|
||||||
|
|
||||||
|
changed = False
|
||||||
|
for a_key, a_ctx in a.items():
|
||||||
|
start_len = len(a_ctx)
|
||||||
|
a_ctx.update(b[a_key]) # Python doesn't tell us changes
|
||||||
|
changed = changed or (start_len != len(a_ctx))
|
||||||
|
|
||||||
|
return changed
|
||||||
|
|
||||||
|
def goto(self, symbol: int) -> "ItemSet":
|
||||||
|
result = ItemSet()
|
||||||
|
for core, context in self.items.items():
|
||||||
|
if core.next == symbol:
|
||||||
|
next = core.replace_position(core.position + 1)
|
||||||
|
result.items[next] = set(context)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def to_config_set(self) -> ConfigSet:
|
||||||
|
return ConfigSet(
|
||||||
|
{Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ConfigurationSetInfo:
|
class ConfigurationSetInfo:
|
||||||
"""When we build a grammar into a table, the first thing we need to do is
|
"""When we build a grammar into a table, the first thing we need to do is
|
||||||
generate all the configuration sets and their successors.
|
generate all the configuration sets and their successors.
|
||||||
|
|
@ -1007,98 +1099,6 @@ class FollowInfo:
|
||||||
return FollowInfo(follows=follows)
|
return FollowInfo(follows=follows)
|
||||||
|
|
||||||
|
|
||||||
# Here we have a slightly different definition of a ConfigurationSet; we keep the
|
|
||||||
# lookaheads outside and use a dictionary to check for containment quickly.
|
|
||||||
# ItemSet is used in the GRM/Pager/Chin algorithm.
|
|
||||||
@dataclasses.dataclass
|
|
||||||
class ItemSet:
|
|
||||||
"""An ItemSet is a group of configuration cores together with their
|
|
||||||
"contexts", or lookahead sets.
|
|
||||||
|
|
||||||
An ItemSet is comparable for equality, and also supports this lesser notion
|
|
||||||
of "weakly compatible" which is used to collapse states in the pager
|
|
||||||
algorithm.
|
|
||||||
"""
|
|
||||||
|
|
||||||
items: dict[ConfigurationCore, set[int]]
|
|
||||||
|
|
||||||
def __init__(self, items=None):
|
|
||||||
self.items = items or {}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
|
|
||||||
return ItemSet({config.core: set(config.lookahead) for config in config_set})
|
|
||||||
|
|
||||||
def weakly_compatible(self, other: "ItemSet") -> bool:
|
|
||||||
a = self.items
|
|
||||||
b = other.items
|
|
||||||
|
|
||||||
if len(a) != len(b):
|
|
||||||
return False
|
|
||||||
|
|
||||||
for acore in a:
|
|
||||||
if acore not in b:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if len(a) == 1:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# DOTY: This loop I do not understand, truly. What the heck is happening here?
|
|
||||||
a_keys = list(a.keys())
|
|
||||||
for i, i_key in enumerate(itertools.islice(a_keys, 0, len(a_keys) - 1)):
|
|
||||||
for j_key in itertools.islice(a_keys, i + 1, None):
|
|
||||||
a_i_key = a[i_key]
|
|
||||||
b_i_key = b[i_key]
|
|
||||||
a_j_key = a[j_key]
|
|
||||||
b_j_key = b[j_key]
|
|
||||||
|
|
||||||
# DOTY: GRMTools written with intersects(); we don't have that we have
|
|
||||||
# `not disjoint()`. :P There are many double negatives....
|
|
||||||
#
|
|
||||||
# not (intersect(a_i, b_j) or intersect(a_j, b_i))
|
|
||||||
# not ((not disjoint(a_i, b_j)) or (not disjoint(a_j, b_i)))
|
|
||||||
# ((not not disjoint(a_i, b_j)) and (not not disjoint(a_j, b_i)))
|
|
||||||
# disjoint(a_i, b_j) and disjoint(a_j, b_i)
|
|
||||||
if a_i_key.isdisjoint(b_j_key) and a_j_key.isdisjoint(b_i_key):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# intersect(a_i, a_j) or intersect(b_i, b_j)
|
|
||||||
# (not disjoint(a_i, a_j)) or (not disjoint(b_i, b_j))
|
|
||||||
# not (disjoint(a_i, a_j) and disjoint(b_i, b_j))
|
|
||||||
if not (a_i_key.isdisjoint(a_j_key) and b_i_key.isdisjoint(b_j_key)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def weakly_merge(self, other: "ItemSet") -> bool:
|
|
||||||
"""Merge b into a, returning True if this lead to any changes."""
|
|
||||||
a = self.items
|
|
||||||
b = other.items
|
|
||||||
|
|
||||||
changed = False
|
|
||||||
for a_key, a_ctx in a.items():
|
|
||||||
start_len = len(a_ctx)
|
|
||||||
a_ctx.update(b[a_key]) # Python doesn't tell us changes
|
|
||||||
changed = changed or (start_len != len(a_ctx))
|
|
||||||
|
|
||||||
return changed
|
|
||||||
|
|
||||||
def goto(self, symbol: int) -> "ItemSet":
|
|
||||||
result = ItemSet()
|
|
||||||
for core, context in self.items.items():
|
|
||||||
if core.next == symbol:
|
|
||||||
next = core.replace_position(core.position + 1)
|
|
||||||
result.items[next] = set(context)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def to_config_set(self) -> ConfigSet:
|
|
||||||
return ConfigSet(
|
|
||||||
{Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class GenerateLR1:
|
class GenerateLR1:
|
||||||
"""Generate parse tables for LR1, or "canonical LR" grammars.
|
"""Generate parse tables for LR1, or "canonical LR" grammars.
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue