Faster still.
Also somehow I was not merging things correctly for LALR; this merges more completely and winds up with 215 states for the fine grammar, which is like half of what it used to be?
This commit is contained in:
parent
28634677bf
commit
7c4705714e
2 changed files with 44 additions and 29 deletions
10
harness.py
10
harness.py
|
|
@ -89,8 +89,12 @@ def parse(table, tokens, trace=None):
|
||||||
|
|
||||||
|
|
||||||
def harness(lexer_func, grammar_func, start_rule, source_path):
|
def harness(lexer_func, grammar_func, start_rule, source_path):
|
||||||
generator = parser.GenerateLR1
|
# generator = parser.GenerateLR1
|
||||||
# generator = parser.GenerateLALR
|
generator = parser.GenerateLALR
|
||||||
|
|
||||||
|
trace = None
|
||||||
|
# trace = trace_state
|
||||||
|
|
||||||
table = grammar_func().build_table(start=start_rule, generator=generator)
|
table = grammar_func().build_table(start=start_rule, generator=generator)
|
||||||
print(f"{len(table)} states")
|
print(f"{len(table)} states")
|
||||||
|
|
||||||
|
|
@ -104,7 +108,7 @@ def harness(lexer_func, grammar_func, start_rule, source_path):
|
||||||
tokens = lexer_func(src)
|
tokens = lexer_func(src)
|
||||||
# print(f"{tokens.lines}")
|
# print(f"{tokens.lines}")
|
||||||
# tokens.dump(end=5)
|
# tokens.dump(end=5)
|
||||||
(_, errors) = parse(table, tokens)
|
(_, errors) = parse(table, tokens, trace=trace)
|
||||||
if len(errors) > 0:
|
if len(errors) > 0:
|
||||||
print(f"{len(errors)} errors:")
|
print(f"{len(errors)} errors:")
|
||||||
for error in errors:
|
for error in errors:
|
||||||
|
|
|
||||||
63
parser.py
63
parser.py
|
|
@ -267,6 +267,20 @@ class Configuration:
|
||||||
def rest(self):
|
def rest(self):
|
||||||
return self.symbols[(self.position + 1) :]
|
return self.symbols[(self.position + 1) :]
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
la = ", " + str(self.lookahead) if self.lookahead != () else ""
|
||||||
|
return "{name} -> {bits}{lookahead}".format(
|
||||||
|
name=self.name,
|
||||||
|
bits=" ".join(
|
||||||
|
[
|
||||||
|
("* " + str(sym)) if i == self.position else str(sym)
|
||||||
|
for i, sym in enumerate(self.symbols)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
+ (" *" if self.at_end else ""),
|
||||||
|
lookahead=la,
|
||||||
|
)
|
||||||
|
|
||||||
def format(self, alphabet: list[str]) -> str:
|
def format(self, alphabet: list[str]) -> str:
|
||||||
la = ", " + str(tuple(alphabet[i] for i in self.lookahead)) if self.lookahead != () else ""
|
la = ", " + str(tuple(alphabet[i] for i in self.lookahead)) if self.lookahead != () else ""
|
||||||
return "{name} -> {bits}{lookahead}".format(
|
return "{name} -> {bits}{lookahead}".format(
|
||||||
|
|
@ -282,7 +296,9 @@ class Configuration:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
ConfigSet = typing.Tuple[Configuration, ...]
|
# ConfigSet = typing.Tuple[Configuration, ...]
|
||||||
|
class ConfigSet(frozenset):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class ConfigurationSetInfo:
|
class ConfigurationSetInfo:
|
||||||
|
|
@ -807,7 +823,7 @@ class GenerateLR0(object):
|
||||||
pending_next = temp
|
pending_next = temp
|
||||||
pending_next.clear()
|
pending_next.clear()
|
||||||
|
|
||||||
return tuple(sorted(closure)) # TODO: Why tuple?
|
return ConfigSet(closure) # TODO: Why tuple?
|
||||||
|
|
||||||
def gen_successor(self, config_set: typing.Iterable[Configuration], symbol: int) -> ConfigSet:
|
def gen_successor(self, config_set: typing.Iterable[Configuration], symbol: int) -> ConfigSet:
|
||||||
"""Compute the successor state for the given config set and the
|
"""Compute the successor state for the given config set and the
|
||||||
|
|
@ -834,7 +850,7 @@ class GenerateLR0(object):
|
||||||
could possibly see, and figure out which configs sets we get from
|
could possibly see, and figure out which configs sets we get from
|
||||||
those symbols. Those are the successors of this set.)
|
those symbols. Those are the successors of this set.)
|
||||||
"""
|
"""
|
||||||
possible = tuple(sorted({config.next for config in config_set if config.next is not None}))
|
possible = {config.next for config in config_set if config.next is not None}
|
||||||
|
|
||||||
next = []
|
next = []
|
||||||
for symbol in possible:
|
for symbol in possible:
|
||||||
|
|
@ -1400,9 +1416,9 @@ class GenerateLALR(GenerateLR1):
|
||||||
# First, do the actual walk. Don't merge yet: just keep track of all
|
# First, do the actual walk. Don't merge yet: just keep track of all
|
||||||
# the config sets that need to be merged.
|
# the config sets that need to be merged.
|
||||||
#
|
#
|
||||||
F = {}
|
F: dict[ConfigSet, list[ConfigSet]] = {}
|
||||||
seen = set()
|
seen: set[ConfigSet] = set()
|
||||||
successors = []
|
successors: list[typing.Tuple[ConfigSet, int, ConfigSet]] = []
|
||||||
pending = [config_set]
|
pending = [config_set]
|
||||||
while len(pending) > 0:
|
while len(pending) > 0:
|
||||||
config_set = pending.pop()
|
config_set = pending.pop()
|
||||||
|
|
@ -1410,7 +1426,7 @@ class GenerateLALR(GenerateLR1):
|
||||||
continue
|
continue
|
||||||
seen.add(config_set)
|
seen.add(config_set)
|
||||||
|
|
||||||
config_set_no_la = tuple(s.clear_lookahead() for s in config_set)
|
config_set_no_la = ConfigSet(s.clear_lookahead() for s in config_set)
|
||||||
|
|
||||||
existing = F.get(config_set_no_la)
|
existing = F.get(config_set_no_la)
|
||||||
if existing is not None:
|
if existing is not None:
|
||||||
|
|
@ -1419,32 +1435,27 @@ class GenerateLALR(GenerateLR1):
|
||||||
F[config_set_no_la] = [config_set]
|
F[config_set_no_la] = [config_set]
|
||||||
|
|
||||||
for symbol, successor in self.gen_all_successors(config_set):
|
for symbol, successor in self.gen_all_successors(config_set):
|
||||||
successor_no_la = tuple(s.clear_lookahead() for s in successor)
|
successor_no_la = ConfigSet(s.clear_lookahead() for s in successor)
|
||||||
successors.append((config_set_no_la, symbol, successor_no_la))
|
successors.append((config_set_no_la, symbol, successor_no_la))
|
||||||
pending.append(successor)
|
pending.append(successor)
|
||||||
|
|
||||||
# Now we gathered the sets, merge them all.
|
# Now we gathered the sets, merge them all.
|
||||||
final_sets = {}
|
final_sets: dict[ConfigSet, ConfigSet] = {}
|
||||||
for key, config_sets in F.items():
|
for key, config_sets in F.items():
|
||||||
new_config_set = []
|
la_merge: dict[Configuration, set[int]] = {}
|
||||||
config_groupings = [[] for _ in range(len(config_sets[0]))]
|
|
||||||
for config_set in config_sets:
|
for config_set in config_sets:
|
||||||
for i, config in enumerate(config_set):
|
for config in config_set:
|
||||||
config_groupings[i].append(config)
|
la_key = config.clear_lookahead()
|
||||||
|
la_set = la_merge.get(la_key)
|
||||||
|
if la_set is None:
|
||||||
|
la_merge[la_key] = set(config.lookahead)
|
||||||
|
else:
|
||||||
|
la_set.update(config.lookahead)
|
||||||
|
|
||||||
for config_group in config_groupings:
|
final_set = ConfigSet(
|
||||||
new_lookahead = [l for config in config_group for l in config.lookahead]
|
config.replace_lookahead(tuple(sorted(la))) for config, la in la_merge.items()
|
||||||
new_lookahead = tuple(sorted(set(new_lookahead)))
|
)
|
||||||
new_config_set.append(
|
final_sets[key] = final_set
|
||||||
Configuration(
|
|
||||||
name=config_group[0].name,
|
|
||||||
symbols=config_group[0].symbols,
|
|
||||||
position=config_group[0].position,
|
|
||||||
lookahead=new_lookahead,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
final_sets[key] = tuple(new_config_set)
|
|
||||||
|
|
||||||
# Register all the actually merged, final config sets.
|
# Register all the actually merged, final config sets.
|
||||||
result = ConfigurationSetInfo()
|
result = ConfigurationSetInfo()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue