Fix LALR. Small, but very very slow.
This commit is contained in:
parent
0fc04cf11e
commit
8d58c64040
2 changed files with 45 additions and 19 deletions
30
harness.py
30
harness.py
|
|
@ -2,10 +2,22 @@ import bisect
|
|||
import typing
|
||||
|
||||
import grammar
|
||||
from parser import Token, Grammar, rule, seq
|
||||
import parser
|
||||
|
||||
# from parser import Token, Grammar, rule, seq
|
||||
|
||||
|
||||
def parse(table, tokens, trace=False):
|
||||
def trace_state(stack, input, input_index, action):
|
||||
print(
|
||||
"{stack: <20} {input: <50} {action: <5}".format(
|
||||
stack=repr([s[0] for s in stack]),
|
||||
input=repr(input[input_index : input_index + 4]),
|
||||
action=repr(action),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def parse(table, tokens, trace=None):
|
||||
"""Parse the input with the generated parsing table and return the
|
||||
concrete syntax tree.
|
||||
|
||||
|
|
@ -35,13 +47,7 @@ def parse(table, tokens, trace=False):
|
|||
|
||||
action = table[current_state].get(current_token, ("error",))
|
||||
if trace:
|
||||
print(
|
||||
"{stack: <20} {input: <50} {action: <5}".format(
|
||||
stack=repr([s[0] for s in stack]),
|
||||
input=repr(input[input_index : input_index + 4]),
|
||||
action=repr(action),
|
||||
)
|
||||
)
|
||||
trace(stack, input, input_index, action)
|
||||
|
||||
if action[0] == "accept":
|
||||
return (stack[-1][1], [])
|
||||
|
|
@ -83,7 +89,9 @@ def parse(table, tokens, trace=False):
|
|||
|
||||
|
||||
def harness(lexer_func, grammar_func, start_rule, source_path):
|
||||
table = grammar_func().build_table(start=start_rule)
|
||||
# generator = parser.GenerateLR1
|
||||
generator = parser.GenerateLALR
|
||||
table = grammar_func().build_table(start=start_rule, generator=generator)
|
||||
print(f"{len(table)} states")
|
||||
|
||||
average_entries = sum(len(row) for row in table) / len(table)
|
||||
|
|
@ -96,7 +104,7 @@ def harness(lexer_func, grammar_func, start_rule, source_path):
|
|||
tokens = lexer_func(src)
|
||||
# print(f"{tokens.lines}")
|
||||
# tokens.dump(end=5)
|
||||
(_, errors) = parse(table, tokens, trace=True)
|
||||
(_, errors) = parse(table, tokens)
|
||||
if len(errors) > 0:
|
||||
print(f"{len(errors)} errors:")
|
||||
for error in errors:
|
||||
|
|
|
|||
34
parser.py
34
parser.py
|
|
@ -257,6 +257,14 @@ class Configuration:
|
|||
lookahead=(),
|
||||
)
|
||||
|
||||
def replace_lookahead(self, lookahead: typing.Tuple[int, ...]):
|
||||
return Configuration(
|
||||
name=self.name,
|
||||
symbols=self.symbols,
|
||||
position=self.position,
|
||||
lookahead=lookahead,
|
||||
)
|
||||
|
||||
@property
|
||||
def rest(self):
|
||||
return self.symbols[(self.position + 1) :]
|
||||
|
|
@ -1382,7 +1390,11 @@ class GenerateLALR(GenerateLR1):
|
|||
use a bunch of improvement, probably.)
|
||||
"""
|
||||
|
||||
def merge_sets(self, config_set_a, config_set_b):
|
||||
def merge_sets(
|
||||
self,
|
||||
config_set_a: typing.Tuple[Configuration, ...],
|
||||
config_set_b: typing.Tuple[Configuration, ...],
|
||||
):
|
||||
"""Merge the two config sets, by keeping the item cores but merging
|
||||
the lookahead sets for each item.
|
||||
"""
|
||||
|
|
@ -1394,7 +1406,7 @@ class GenerateLALR(GenerateLR1):
|
|||
|
||||
new_lookahead = a.lookahead + b.lookahead
|
||||
new_lookahead = tuple(sorted(set(new_lookahead)))
|
||||
merged.append(a.clear_lookahead())
|
||||
merged.append(a.replace_lookahead(new_lookahead))
|
||||
|
||||
return tuple(merged)
|
||||
|
||||
|
|
@ -1403,7 +1415,7 @@ class GenerateLALR(GenerateLR1):
|
|||
b_no_la = tuple(s.clear_lookahead() for s in b)
|
||||
return a_no_la == b_no_la
|
||||
|
||||
def gen_sets(self, config_set) -> ConfigurationSetInfo:
|
||||
def gen_sets(self, config_set: typing.Tuple[Configuration, ...]) -> ConfigurationSetInfo:
|
||||
"""Recursively generate all configuration sets starting from the
|
||||
provided set, and merge them with the provided set 'F'.
|
||||
|
||||
|
|
@ -1414,10 +1426,15 @@ class GenerateLALR(GenerateLR1):
|
|||
and replace the set in F, returning the modified set.
|
||||
"""
|
||||
F = {}
|
||||
seen = set()
|
||||
successors = []
|
||||
pending = [config_set]
|
||||
while len(pending) > 0:
|
||||
config_set = pending.pop()
|
||||
if config_set in seen:
|
||||
continue
|
||||
seen.add(config_set)
|
||||
|
||||
config_set_no_la = tuple(s.clear_lookahead() for s in config_set)
|
||||
|
||||
existing = F.get(config_set_no_la)
|
||||
|
|
@ -1425,10 +1442,11 @@ class GenerateLALR(GenerateLR1):
|
|||
F[config_set_no_la] = self.merge_sets(config_set, existing)
|
||||
else:
|
||||
F[config_set_no_la] = config_set
|
||||
for symbol, successor in self.gen_all_successors(config_set):
|
||||
successor_no_la = tuple(s.clear_lookahead() for s in successor)
|
||||
successors.append((config_set_no_la, symbol, successor_no_la))
|
||||
pending.append(successor)
|
||||
|
||||
for symbol, successor in self.gen_all_successors(config_set):
|
||||
successor_no_la = tuple(s.clear_lookahead() for s in successor)
|
||||
successors.append((config_set_no_la, symbol, successor_no_la))
|
||||
pending.append(successor)
|
||||
|
||||
# Register all the actually merged, final config sets.
|
||||
result = ConfigurationSetInfo()
|
||||
|
|
@ -1723,7 +1741,7 @@ class Grammar:
|
|||
|
||||
return grammar
|
||||
|
||||
def build_table(self, start: str, generator=GenerateLR1):
|
||||
def build_table(self, start: str, generator=GenerateLALR):
|
||||
"""Construct a parse table for this grammar, starting at the named
|
||||
nonterminal rule.
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue