faster: Significant performance boost on large grammar

15s to 5s
This commit is contained in:
John Doty 2024-04-17 23:25:15 -07:00
parent 6fa89a9757
commit f656dbd8f3
2 changed files with 31 additions and 17 deletions

View file

@ -419,7 +419,7 @@ grammar = {
# dump_yacc(grammar) # dump_yacc(grammar)
grammar, precedence = desugar(grammar, precedence) grammar, precedence = desugar(grammar, precedence)
gen = parser_faster.GenerateLR1("File", grammar, precedence=precedence) gen = parser_faster.GenerateLR1("File", grammar, precedence=precedence)
table = gen.gen_table() gen.gen_table()
print(parser_faster.format_table(gen, table)) # print(parser_faster.format_table(gen, table))
print() # print()
# tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"]) # tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"])

View file

@ -10,6 +10,7 @@ It also supports precedence.
import collections import collections
import dataclasses import dataclasses
import enum import enum
import functools
import typing import typing
@ -516,7 +517,7 @@ class GenerateLR0(object):
self.start_symbol = start_symbol self.start_symbol = start_symbol
self.end_symbol = end_symbol self.end_symbol = end_symbol
@functools.cache
def gen_closure_next(self, config: Configuration): def gen_closure_next(self, config: Configuration):
"""Return the next set of configurations in the closure for """Return the next set of configurations in the closure for
config. config.
@ -547,14 +548,20 @@ class GenerateLR0(object):
""" """
closure = set() closure = set()
pending = list(seeds) pending = list(seeds)
pending_next = []
while len(pending) > 0: while len(pending) > 0:
config = pending.pop() for config in pending:
if config in closure: if config in closure:
continue continue
closure.add(config) closure.add(config)
for next_config in self.gen_closure_next(config): for next_config in self.gen_closure_next(config):
pending.append(next_config) pending_next.append(next_config)
temp = pending
pending = pending_next
pending_next = temp
pending_next.clear()
return tuple(sorted(closure)) # TODO: Why tuple? return tuple(sorted(closure)) # TODO: Why tuple?
@ -596,14 +603,20 @@ class GenerateLR0(object):
successors = [] successors = []
pending = [config_set] pending = [config_set]
pending_next = []
while len(pending) > 0: while len(pending) > 0:
config_set = pending.pop() for config_set in pending:
id, is_new = result.register_config_set(config_set)
if is_new:
for symbol, successor in self.gen_all_successors(config_set):
successors.append((id,symbol,successor))
pending_next.append(successor)
id, is_new = result.register_config_set(config_set)
if is_new: temp = pending
for symbol, successor in self.gen_all_successors(config_set): pending = pending_next
successors.append((id,symbol,successor)) pending_next = temp
pending.append(successor) pending_next.clear()
for id,symbol,successor in successors: for id,symbol,successor in successors:
result.add_successor(id, symbol, result.config_set_key[successor]) result.add_successor(id, symbol, result.config_set_key[successor])
@ -940,6 +953,7 @@ class GenerateLR1(GenerateSLR1):
In an LR1 parser, this is the lookahead of the configuration.""" In an LR1 parser, this is the lookahead of the configuration."""
return config.lookahead return config.lookahead
@functools.cache
def gen_closure_next(self, config: Configuration): def gen_closure_next(self, config: Configuration):
"""Return the next set of configurations in the closure for """Return the next set of configurations in the closure for
config. config.
@ -967,7 +981,7 @@ class GenerateLR1(GenerateSLR1):
lookahead = tuple(sorted(lookahead)) lookahead = tuple(sorted(lookahead))
next.append(Configuration.from_rule(config_next, rule, lookahead=lookahead)) next.append(Configuration.from_rule(config_next, rule, lookahead=lookahead))
return tuple(next) return tuple(sorted(next))
def gen_all_sets(self): def gen_all_sets(self):
"""Generate all of the configuration sets for the grammar. """Generate all of the configuration sets for the grammar.