From 7c4705714eed7ea62322fe6941dd081e504e46bc Mon Sep 17 00:00:00 2001 From: John Doty Date: Tue, 28 May 2024 08:07:11 -0700 Subject: [PATCH] Faster still. Also somehow I was not merging things correctly for LALR; this merges more completely and winds up with 215 states for the fine grammar, which is like half of what it used to be? --- harness.py | 10 ++++++--- parser.py | 63 ++++++++++++++++++++++++++++++++---------------------- 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/harness.py b/harness.py index 050c87a..8255a41 100644 --- a/harness.py +++ b/harness.py @@ -89,8 +89,12 @@ def parse(table, tokens, trace=None): def harness(lexer_func, grammar_func, start_rule, source_path): - generator = parser.GenerateLR1 - # generator = parser.GenerateLALR + # generator = parser.GenerateLR1 + generator = parser.GenerateLALR + + trace = None + # trace = trace_state + table = grammar_func().build_table(start=start_rule, generator=generator) print(f"{len(table)} states") @@ -104,7 +108,7 @@ def harness(lexer_func, grammar_func, start_rule, source_path): tokens = lexer_func(src) # print(f"{tokens.lines}") # tokens.dump(end=5) - (_, errors) = parse(table, tokens) + (_, errors) = parse(table, tokens, trace=trace) if len(errors) > 0: print(f"{len(errors)} errors:") for error in errors: diff --git a/parser.py b/parser.py index 8ace978..8e7e753 100644 --- a/parser.py +++ b/parser.py @@ -267,6 +267,20 @@ class Configuration: def rest(self): return self.symbols[(self.position + 1) :] + def __repr__(self) -> str: + la = ", " + str(self.lookahead) if self.lookahead != () else "" + return "{name} -> {bits}{lookahead}".format( + name=self.name, + bits=" ".join( + [ + ("* " + str(sym)) if i == self.position else str(sym) + for i, sym in enumerate(self.symbols) + ] + ) + + (" *" if self.at_end else ""), + lookahead=la, + ) + def format(self, alphabet: list[str]) -> str: la = ", " + str(tuple(alphabet[i] for i in self.lookahead)) if self.lookahead != () else "" return "{name} -> {bits}{lookahead}".format( @@ -282,7 +296,9 @@ class Configuration: ) -ConfigSet = typing.Tuple[Configuration, ...] +# ConfigSet = typing.Tuple[Configuration, ...] +class ConfigSet(frozenset): + pass class ConfigurationSetInfo: @@ -807,7 +823,7 @@ class GenerateLR0(object): pending_next = temp pending_next.clear() - return tuple(sorted(closure)) # TODO: Why tuple? + return ConfigSet(closure) # TODO: Why tuple? def gen_successor(self, config_set: typing.Iterable[Configuration], symbol: int) -> ConfigSet: """Compute the successor state for the given config set and the @@ -834,7 +850,7 @@ class GenerateLR0(object): could possibly see, and figure out which configs sets we get from those symbols. Those are the successors of this set.) """ - possible = tuple(sorted({config.next for config in config_set if config.next is not None})) + possible = {config.next for config in config_set if config.next is not None} next = [] for symbol in possible: @@ -1400,9 +1416,9 @@ class GenerateLALR(GenerateLR1): # First, do the actual walk. Don't merge yet: just keep track of all # the config sets that need to be merged. # - F = {} - seen = set() - successors = [] + F: dict[ConfigSet, list[ConfigSet]] = {} + seen: set[ConfigSet] = set() + successors: list[typing.Tuple[ConfigSet, int, ConfigSet]] = [] pending = [config_set] while len(pending) > 0: config_set = pending.pop() @@ -1410,7 +1426,7 @@ class GenerateLALR(GenerateLR1): continue seen.add(config_set) - config_set_no_la = tuple(s.clear_lookahead() for s in config_set) + config_set_no_la = ConfigSet(s.clear_lookahead() for s in config_set) existing = F.get(config_set_no_la) if existing is not None: @@ -1419,32 +1435,27 @@ class GenerateLALR(GenerateLR1): F[config_set_no_la] = [config_set] for symbol, successor in self.gen_all_successors(config_set): - successor_no_la = tuple(s.clear_lookahead() for s in successor) + successor_no_la = ConfigSet(s.clear_lookahead() for s in successor) successors.append((config_set_no_la, symbol, successor_no_la)) pending.append(successor) # Now we gathered the sets, merge them all. - final_sets = {} + final_sets: dict[ConfigSet, ConfigSet] = {} for key, config_sets in F.items(): - new_config_set = [] - config_groupings = [[] for _ in range(len(config_sets[0]))] + la_merge: dict[Configuration, set[int]] = {} for config_set in config_sets: - for i, config in enumerate(config_set): - config_groupings[i].append(config) + for config in config_set: + la_key = config.clear_lookahead() + la_set = la_merge.get(la_key) + if la_set is None: + la_merge[la_key] = set(config.lookahead) + else: + la_set.update(config.lookahead) - for config_group in config_groupings: - new_lookahead = [l for config in config_group for l in config.lookahead] - new_lookahead = tuple(sorted(set(new_lookahead))) - new_config_set.append( - Configuration( - name=config_group[0].name, - symbols=config_group[0].symbols, - position=config_group[0].position, - lookahead=new_lookahead, - ) - ) - - final_sets[key] = tuple(new_config_set) + final_set = ConfigSet( + config.replace_lookahead(tuple(sorted(la))) for config, la in la_merge.items() + ) + final_sets[key] = final_set # Register all the actually merged, final config sets. result = ConfigurationSetInfo()