From 8c3b1b784c5fdbdc6911be14c8ffde688ac1a962 Mon Sep 17 00:00:00 2001 From: John Doty Date: Wed, 5 Jun 2024 09:55:12 -0700 Subject: [PATCH] Performance --- parser.py | 257 ++++++++++++++++++++++++------------------------------ 1 file changed, 112 insertions(+), 145 deletions(-) diff --git a/parser.py b/parser.py index f85d4be..e70368e 100644 --- a/parser.py +++ b/parser.py @@ -145,7 +145,71 @@ import typing # # We start with LR0 parsers, because they form the basis of everything else. ############################################################################### -class Configuration: +class ConfigurationCore(typing.NamedTuple): + name: int + symbols: typing.Tuple[int, ...] + position: int + next: int | None + + @classmethod + def from_rule(cls, name: int, symbols: typing.Tuple[int, ...]): + if len(symbols) == 0: + next = None + else: + next = symbols[0] + return ConfigurationCore( + name=name, + symbols=symbols, + position=0, + next=next, + ) + + @property + def at_end(self) -> bool: + return self.position == len(self.symbols) + + def replace_position(self, new_position): + if new_position == len(self.symbols): + next = None + else: + next = self.symbols[new_position] + return ConfigurationCore( + name=self.name, + symbols=self.symbols, + position=new_position, + next=next, + ) + + @property + def rest(self) -> typing.Tuple[int, ...]: + return self.symbols[(self.position + 1) :] + + def __repr__(self) -> str: + return "{name} -> {bits}".format( + name=self.name, + bits=" ".join( + [ + ("* " + str(sym)) if i == self.position else str(sym) + for i, sym in enumerate(self.symbols) + ] + ) + + (" *" if self.at_end else ""), + ) + + def format(self, alphabet: list[str]) -> str: + return "{name} -> {bits}".format( + name=alphabet[self.name], + bits=" ".join( + [ + "* " + alphabet[sym] if i == self.position else alphabet[sym] + for i, sym in enumerate(self.symbols) + ] + ) + + (" *" if self.at_end else ""), + ) + + +class Configuration(typing.NamedTuple): """A rule being tracked in a state. That is, a specific position within a specific rule, with an associated lookahead state. @@ -162,125 +226,34 @@ class Configuration: the part about LR(1).) """ - __slots__ = ( - "name", - "symbols", - "position", - "lookahead", - "next", - "at_end", - "_vals", - "_hash", - ) - - name: int - symbols: typing.Tuple[int, ...] - position: int + core: ConfigurationCore lookahead: typing.Tuple[int, ...] - next: int | None - at_end: bool - - _vals: typing.Tuple - _hash: int - - def __init__(self, name, symbols, position, lookahead) -> None: - self.name = name - self.symbols = symbols - self.position = position - self.lookahead = lookahead - - at_end = position == len(symbols) - self.at_end = at_end - self.next = symbols[position] if not at_end else None - - self._vals = (name, symbols, position, lookahead) - self._hash = hash(self._vals) @classmethod def from_rule(cls, name: int, symbols: typing.Tuple[int, ...], lookahead=()): + # Consider adding at_end and next to the namedtuple. return Configuration( - name=name, - symbols=symbols, - position=0, - lookahead=lookahead, - ) - - def __hash__(self) -> int: - return self._hash - - def __eq__(self, value: typing.Any, /) -> bool: - if value is self: - return True - - return ( - value._hash == self._hash - and value.name == self.name - and value.position == self.position - and value.symbols == self.symbols - and value.lookahead == self.lookahead - ) - - def __lt__(self, value) -> bool: - if not isinstance(value, Configuration): - return NotImplemented - return self._vals < value._vals - - def __gt__(self, value) -> bool: - if not isinstance(value, Configuration): - return NotImplemented - return self._vals > value._vals - - def __le__(self, value) -> bool: - if not isinstance(value, Configuration): - return NotImplemented - return self._vals <= value._vals - - def __ge__(self, value) -> bool: - if not isinstance(value, Configuration): - return NotImplemented - return self._vals >= value._vals - - def replace_position(self, new_position): - return Configuration( - name=self.name, - symbols=self.symbols, - position=new_position, - lookahead=self.lookahead, - ) - - def clear_lookahead(self): - return Configuration( - name=self.name, - symbols=self.symbols, - position=self.position, - lookahead=(), - ) - - def replace_lookahead(self, lookahead: typing.Tuple[int, ...]): - return Configuration( - name=self.name, - symbols=self.symbols, - position=self.position, + core=ConfigurationCore.from_rule(name, symbols), lookahead=lookahead, ) + @property + def at_end(self) -> bool: + return self.core.next is None + + def replace_position(self, new_position): + return Configuration( + core=self.core.replace_position(new_position), + lookahead=self.lookahead, + ) + @property def rest(self): - return self.symbols[(self.position + 1) :] + return self.core.symbols[(self.core.position + 1) :] def __repr__(self) -> str: la = ", " + str(self.lookahead) if self.lookahead != () else "" - return "{name} -> {bits}{lookahead}".format( - name=self.name, - bits=" ".join( - [ - ("* " + str(sym)) if i == self.position else str(sym) - for i, sym in enumerate(self.symbols) - ] - ) - + (" *" if self.at_end else ""), - lookahead=la, - ) + return f"{repr(self.core)}{la}" def format(self, alphabet: list[str]) -> str: if self.lookahead != (): @@ -288,20 +261,13 @@ class Configuration: else: la = "" - return "{name} -> {bits}{lookahead}".format( - name=alphabet[self.name], - bits=" ".join( - [ - "* " + alphabet[sym] if i == self.position else alphabet[sym] - for i, sym in enumerate(self.symbols) - ] - ) - + (" *" if self.at_end else ""), - lookahead=la, - ) + return f"{self.core.format(alphabet)}{la}" + + +class CoreSet(frozenset[ConfigurationCore]): + pass -# ConfigSet = typing.Tuple[Configuration, ...] class ConfigSet(frozenset[Configuration]): pass @@ -548,12 +514,13 @@ class ErrorCollection: for symbol, symbol_errors in set_errors.items(): actions = [] for config, action in symbol_errors.items(): - name = alphabet[config.name] + core = config.core + name = alphabet[core.name] rule = " ".join( - f"{'* ' if config.position == i else ''}{alphabet[s]}" - for i, s in enumerate(config.symbols) + f"{'* ' if core.position == i else ''}{alphabet[s]}" + for i, s in enumerate(core.symbols) ) - if config.next is None: + if config.at_end: rule += " *" match action: @@ -700,9 +667,9 @@ class TableBuilder(object): """Mark a reduce of the given configuration for the given symbol in the current row. """ - name = self.alphabet[config.name] + name = self.alphabet[config.core.name] transparent = name in self.transparents - action = Reduce(name, len(config.symbols), transparent) + action = Reduce(name, len(config.core.symbols), transparent) self._set_table_action(symbol, action, config) def set_table_accept(self, symbol: int, config: Configuration): @@ -728,7 +695,7 @@ class TableBuilder(object): if isinstance(action, Shift): return self.precedence[symbol] else: - return self.precedence[config.name] + return self.precedence[config.core.name] def _set_table_action(self, symbol_id: int, action: Action, config: Configuration | None): """Set the action for 'symbol' in the table row to 'action'. @@ -960,7 +927,7 @@ class GenerateLR0: beginning. (If the position for config is just before a terminal, or at the end of the production, then the next set is empty.) """ - next = config.next + next = config.core.next if next is None: return () else: @@ -984,15 +951,14 @@ class GenerateLR0: continue closure.add(config) - for next_config in self.gen_closure_next(config): - pending_next.append(next_config) + pending_next.extend(self.gen_closure_next(config)) temp = pending pending = pending_next pending_next = temp pending_next.clear() - return ConfigSet(closure) # TODO: Why tuple? + return ConfigSet(closure) def gen_successor(self, config_set: typing.Iterable[Configuration], symbol: int) -> ConfigSet: """Compute the successor state for the given config set and the @@ -1002,9 +968,9 @@ class GenerateLR0: the symbol. """ seeds = tuple( - config.replace_position(config.position + 1) + config.replace_position(config.core.position + 1) for config in config_set - if config.next == symbol + if config.core.next == symbol ) closure = self.gen_closure(seeds) @@ -1019,7 +985,7 @@ class GenerateLR0: could possibly see, and figure out which configs sets we get from those symbols. Those are the successors of this set.) """ - possible = {config.next for config in config_set if config.next is not None} + possible = {config.core.next for config in config_set if config.core.next is not None} next = [] for symbol in possible: @@ -1108,9 +1074,9 @@ class GenerateLR0: successors = config_sets.successors[config_set_id] for config in config_set: - config_next = config.next + config_next = config.core.next if config_next is None: - if config.name != self.start_symbol: + if config.core.name != self.start_symbol: for a in self.gen_reduce_set(config): builder.set_table_reduce(a, config) else: @@ -1472,7 +1438,7 @@ class GenerateSLR1(GenerateLR0): In an SLR1 parser, this is the follow set of the config nonterminal. """ - return self.gen_follow(config.name) + return self.gen_follow(config.core.name) class GenerateLR1(GenerateSLR1): @@ -1531,7 +1497,7 @@ class GenerateLR1(GenerateSLR1): (See the documentation in GenerateLR0 for more information on how this function fits into the whole process, specifically `gen_closure`.) """ - config_next = config.next + config_next = config.core.next if config_next is None: return () else: @@ -1590,9 +1556,9 @@ class GenerateLALR(GenerateLR1): # First, do the actual walk. Don't merge yet: just keep track of all # the config sets that need to be merged. # - F: dict[ConfigSet, list[ConfigSet]] = {} + F: dict[CoreSet, list[ConfigSet]] = {} seen: set[ConfigSet] = set() - successors: list[typing.Tuple[ConfigSet, int, ConfigSet]] = [] + successors: list[typing.Tuple[CoreSet, int, CoreSet]] = [] pending = [config_set] while len(pending) > 0: config_set = pending.pop() @@ -1600,7 +1566,7 @@ class GenerateLALR(GenerateLR1): continue seen.add(config_set) - config_set_no_la = ConfigSet(s.clear_lookahead() for s in config_set) + config_set_no_la = CoreSet(s.core for s in config_set) existing = F.get(config_set_no_la) if existing is not None: @@ -1609,17 +1575,17 @@ class GenerateLALR(GenerateLR1): F[config_set_no_la] = [config_set] for symbol, successor in self.gen_all_successors(config_set): - successor_no_la = ConfigSet(s.clear_lookahead() for s in successor) + successor_no_la = CoreSet(s.core for s in successor) successors.append((config_set_no_la, symbol, successor_no_la)) pending.append(successor) # Now we gathered the sets, merge them all. - final_sets: dict[ConfigSet, ConfigSet] = {} + final_sets: dict[CoreSet, ConfigSet] = {} for key, config_sets in F.items(): - la_merge: dict[Configuration, set[int]] = {} + la_merge: dict[ConfigurationCore, set[int]] = {} for config_set in config_sets: for config in config_set: - la_key = config.clear_lookahead() + la_key = config.core la_set = la_merge.get(la_key) if la_set is None: la_merge[la_key] = set(config.lookahead) @@ -1627,7 +1593,8 @@ class GenerateLALR(GenerateLR1): la_set.update(config.lookahead) final_set = ConfigSet( - config.replace_lookahead(tuple(sorted(la))) for config, la in la_merge.items() + Configuration(core=core, lookahead=tuple(sorted(la))) + for core, la in la_merge.items() ) final_sets[key] = final_set