Improve performance by not de-duping before closing

You close *after* you've determined that a successor is new, not
before.
This commit is contained in:
John Doty 2024-06-06 05:31:15 -07:00
parent 8c3b1b784c
commit bd70315935
2 changed files with 65 additions and 57 deletions

View file

@ -256,11 +256,8 @@ class FineGrammar(Grammar):
def primary_expression(self) -> Rule: def primary_expression(self) -> Rule:
return ( return (
self.identifier_expression self.identifier_expression
| self.literal_expression
| SELF | SELF
| NUMBER
| STRING
| TRUE
| FALSE
| seq(BANG, self.primary_expression) | seq(BANG, self.primary_expression)
| seq(MINUS, self.primary_expression) | seq(MINUS, self.primary_expression)
| self.block | self.block
@ -278,6 +275,10 @@ class FineGrammar(Grammar):
def identifier_expression(self): def identifier_expression(self):
return IDENTIFIER return IDENTIFIER
@rule("Literal")
def literal_expression(self):
return NUMBER | STRING | TRUE | FALSE
@rule("ConditionalExpression") @rule("ConditionalExpression")
def conditional_expression(self) -> Rule: def conditional_expression(self) -> Rule:
return ( return (

113
parser.py
View file

@ -287,8 +287,10 @@ class ConfigurationSetInfo:
structure, but they all compute this information.) structure, but they all compute this information.)
""" """
core_key: dict[ConfigSet, int] # Map a ConfigSet into am index
config_set_key: dict[ConfigSet, int] # Map a ConfigSet into am index config_set_key: dict[ConfigSet, int] # Map a ConfigSet into am index
sets: list[ConfigSet] # Map the index back into a set sets: list[ConfigSet] # Map the index back into a set
closures: list[ConfigSet | None] # Track closures
# All the sucessors for all of the sets. `successors[i]` is the mapping # All the sucessors for all of the sets. `successors[i]` is the mapping
# from grammar symbol to the index of the set you get by processing that # from grammar symbol to the index of the set you get by processing that
@ -296,11 +298,13 @@ class ConfigurationSetInfo:
successors: list[dict[int, int]] successors: list[dict[int, int]]
def __init__(self): def __init__(self):
self.core_key = {}
self.config_set_key = {} self.config_set_key = {}
self.sets = [] self.sets = []
self.closures = []
self.successors = [] self.successors = []
def register_config_set(self, c: ConfigSet) -> typing.Tuple[int, bool]: def register_core(self, c: ConfigSet) -> typing.Tuple[int, bool]:
"""Potentially add a new config set to the set of sets. Returns the """Potentially add a new config set to the set of sets. Returns the
canonical ID of the set within this structure, along with a boolean canonical ID of the set within this structure, along with a boolean
indicating whether the set was just added or not. indicating whether the set was just added or not.
@ -308,16 +312,22 @@ class ConfigurationSetInfo:
(You can use this integer to get the set back, if you need it, and (You can use this integer to get the set back, if you need it, and
also access the successors table.) also access the successors table.)
""" """
existing = self.config_set_key.get(c) existing = self.core_key.get(c)
if existing is not None: if existing is not None:
return existing, False return existing, False
index = len(self.sets) index = len(self.sets)
self.sets.append(c) self.sets.append(c)
self.closures.append(None)
self.successors.append({}) self.successors.append({})
self.config_set_key[c] = index self.core_key[c] = index
return index, True return index, True
def register_config_closure(self, c_id: int, closure: ConfigSet):
assert self.closures[c_id] is None
self.closures[c_id] = closure
self.config_set_key[closure] = c_id
def add_successor(self, c_id: int, symbol: int, successor: int): def add_successor(self, c_id: int, symbol: int, successor: int):
"""Register sucessor(`c_id`, `symbol`) -> `successor`, where c_id """Register sucessor(`c_id`, `symbol`) -> `successor`, where c_id
is the id of the set in this structure, and symbol is the id of a is the id of the set in this structure, and symbol is the id of a
@ -960,22 +970,6 @@ class GenerateLR0:
return ConfigSet(closure) return ConfigSet(closure)
def gen_successor(self, config_set: typing.Iterable[Configuration], symbol: int) -> ConfigSet:
"""Compute the successor state for the given config set and the
given symbol.
The successor represents the next state of the parser after seeing
the symbol.
"""
seeds = tuple(
config.replace_position(config.core.position + 1)
for config in config_set
if config.core.next == symbol
)
closure = self.gen_closure(seeds)
return closure
def gen_all_successors( def gen_all_successors(
self, config_set: typing.Iterable[Configuration] self, config_set: typing.Iterable[Configuration]
) -> list[typing.Tuple[int, ConfigSet]]: ) -> list[typing.Tuple[int, ConfigSet]]:
@ -989,23 +983,29 @@ class GenerateLR0:
next = [] next = []
for symbol in possible: for symbol in possible:
successor = self.gen_successor(config_set, symbol) seeds = ConfigSet(
if len(successor) > 0: config.replace_position(config.core.position + 1)
next.append((symbol, successor)) for config in config_set
if config.core.next == symbol
)
if len(seeds) > 0:
next.append((symbol, seeds))
return next return next
def gen_sets(self, config_set: ConfigSet) -> ConfigurationSetInfo: def gen_sets(self, seeds: list[Configuration]) -> ConfigurationSetInfo:
"""Generate all configuration sets starting from the provided set.""" """Generate all configuration sets starting from the provided seeds."""
result = ConfigurationSetInfo() result = ConfigurationSetInfo()
successors = [] successors = []
pending = [config_set] pending = [ConfigSet(seeds)]
pending_next = [] pending_next = []
while len(pending) > 0: while len(pending) > 0:
for config_set in pending: for core in pending:
id, is_new = result.register_config_set(config_set) id, is_new = result.register_core(core)
if is_new: if is_new:
config_set = self.gen_closure(core)
result.register_config_closure(id, config_set)
for symbol, successor in self.gen_all_successors(config_set): for symbol, successor in self.gen_all_successors(config_set):
successors.append((id, symbol, successor)) successors.append((id, symbol, successor))
pending_next.append(successor) pending_next.append(successor)
@ -1016,18 +1016,17 @@ class GenerateLR0:
pending_next.clear() pending_next.clear()
for id, symbol, successor in successors: for id, symbol, successor in successors:
result.add_successor(id, symbol, result.config_set_key[successor]) result.add_successor(id, symbol, result.core_key[successor])
return result return result
def gen_all_sets(self) -> ConfigurationSetInfo: def gen_all_sets(self) -> ConfigurationSetInfo:
"""Generate all of the configuration sets for the grammar.""" """Generate all of the configuration sets for the grammar."""
seeds = tuple( seeds = [
Configuration.from_rule(self.start_symbol, rule) Configuration.from_rule(self.start_symbol, rule)
for rule in self.grammar[self.start_symbol] for rule in self.grammar[self.start_symbol]
) ]
initial_set = self.gen_closure(seeds) return self.gen_sets(seeds)
return self.gen_sets(initial_set)
def gen_reduce_set(self, config: Configuration) -> typing.Iterable[int]: def gen_reduce_set(self, config: Configuration) -> typing.Iterable[int]:
"""Return the set of symbols that indicate we should reduce the given """Return the set of symbols that indicate we should reduce the given
@ -1069,7 +1068,8 @@ class GenerateLR0:
config_sets = self.gen_all_sets() config_sets = self.gen_all_sets()
builder = TableBuilder(self.alphabet, self.precedence, self.transparents) builder = TableBuilder(self.alphabet, self.precedence, self.transparents)
for config_set_id, config_set in enumerate(config_sets.sets): for config_set_id, config_set in enumerate(config_sets.closures):
assert config_set is not None
builder.new_row(config_set) builder.new_row(config_set)
successors = config_sets.successors[config_set_id] successors = config_sets.successors[config_set_id]
@ -1517,12 +1517,11 @@ class GenerateLR1(GenerateSLR1):
In LR1 parsers, we must remember to set the lookahead of the start In LR1 parsers, we must remember to set the lookahead of the start
symbol to '$'. symbol to '$'.
""" """
seeds = tuple( seeds = [
Configuration.from_rule(self.start_symbol, rule, lookahead=(self.end_symbol,)) Configuration.from_rule(self.start_symbol, rule, lookahead=(self.end_symbol,))
for rule in self.grammar[self.start_symbol] for rule in self.grammar[self.start_symbol]
) ]
initial_set = self.gen_closure(seeds) return self.gen_sets(seeds)
return self.gen_sets(initial_set)
class GenerateLALR(GenerateLR1): class GenerateLALR(GenerateLR1):
@ -1544,7 +1543,7 @@ class GenerateLALR(GenerateLR1):
use a bunch of improvement, probably.) use a bunch of improvement, probably.)
""" """
def gen_sets(self, config_set: ConfigSet) -> ConfigurationSetInfo: def gen_sets(self, seeds: list[Configuration]) -> ConfigurationSetInfo:
"""Recursively generate all configuration sets starting from the """Recursively generate all configuration sets starting from the
provided set. provided set.
@ -1558,26 +1557,30 @@ class GenerateLALR(GenerateLR1):
# #
F: dict[CoreSet, list[ConfigSet]] = {} F: dict[CoreSet, list[ConfigSet]] = {}
seen: set[ConfigSet] = set() seen: set[ConfigSet] = set()
closed_cores: dict[CoreSet, CoreSet] = {}
successors: list[typing.Tuple[CoreSet, int, CoreSet]] = [] successors: list[typing.Tuple[CoreSet, int, CoreSet]] = []
pending = [config_set]
pending = [(ConfigSet(seeds), CoreSet(s.core for s in seeds))]
while len(pending) > 0: while len(pending) > 0:
config_set = pending.pop() seed_set, seed_core = pending.pop()
if config_set in seen: if seed_set in seen:
continue continue
seen.add(config_set) seen.add(seed_set)
config_set_no_la = CoreSet(s.core for s in config_set) closure = self.gen_closure(seed_set)
closure_core = CoreSet(s.core for s in closure)
closed_cores[seed_core] = closure_core
existing = F.get(config_set_no_la) existing = F.get(closure_core)
if existing is not None: if existing is not None:
existing.append(config_set) existing.append(closure)
else: else:
F[config_set_no_la] = [config_set] F[closure_core] = [closure]
for symbol, successor in self.gen_all_successors(config_set): for symbol, successor in self.gen_all_successors(closure):
successor_no_la = CoreSet(s.core for s in successor) successor_seed_core = CoreSet(s.core for s in successor)
successors.append((config_set_no_la, symbol, successor_no_la)) successors.append((closure_core, symbol, successor_seed_core))
pending.append(successor) pending.append((successor, successor_seed_core))
# Now we gathered the sets, merge them all. # Now we gathered the sets, merge them all.
final_sets: dict[CoreSet, ConfigSet] = {} final_sets: dict[CoreSet, ConfigSet] = {}
@ -1601,7 +1604,10 @@ class GenerateLALR(GenerateLR1):
# Register all the actually merged, final config sets. # Register all the actually merged, final config sets.
result = ConfigurationSetInfo() result = ConfigurationSetInfo()
for config_set in final_sets.values(): for config_set in final_sets.values():
result.register_config_set(config_set) # Because we're building this so late we don't distinguish.
# This is probably a hack, and a sign the tracker should be better.
id, _ = result.register_core(config_set)
result.register_config_closure(id, config_set)
# Now record all the successors that we found. Of course, the actual # Now record all the successors that we found. Of course, the actual
# sets that wound up in the ConfigurationSetInfo don't match anything # sets that wound up in the ConfigurationSetInfo don't match anything
@ -1610,10 +1616,11 @@ class GenerateLALR(GenerateLR1):
# *Fortunately* we recorded the no-lookahead keys in the successors # *Fortunately* we recorded the no-lookahead keys in the successors
# so we can find the final sets, then look them up in the registered # so we can find the final sets, then look them up in the registered
# sets, and actually register the successor. # sets, and actually register the successor.
for config_set_no_la, symbol, successor_no_la in successors: for config_core, symbol, successor_seed_core in successors:
actual_config_set = final_sets[config_set_no_la] actual_config_set = final_sets[config_core]
from_index = result.config_set_key[actual_config_set] from_index = result.config_set_key[actual_config_set]
successor_no_la = closed_cores[successor_seed_core]
actual_successor = final_sets[successor_no_la] actual_successor = final_sets[successor_no_la]
to_index = result.config_set_key[actual_successor] to_index = result.config_set_key[actual_successor]