diff --git a/parser_faster.py b/parser_faster.py index 6420b06..fad7d70 100644 --- a/parser_faster.py +++ b/parser_faster.py @@ -3,10 +3,13 @@ might expect the code did NOT work acceptibly. This version has some performance work done. +It also supports precedence. + 2023 """ import collections import dataclasses +import enum import typing @@ -196,6 +199,14 @@ class ConfigurationSetInfo: raise KeyError("Unable to find a path to the target set!") +class Assoc(enum.Enum): + """Associativity of a rule.""" + NONE = 0 + LEFT = 1 + RIGHT = 2 + + + class ErrorCollection: errors: dict[ConfigSet, dict[int, dict[Configuration, typing.Tuple]]] @@ -259,15 +270,17 @@ class ErrorCollection: class TableBuilder(object): - row: None | list[typing.Tuple[None | typing.Tuple, None | Configuration]] - table: list[dict[str, typing.Tuple]] - config_sets: dict[ConfigSet, int] # Map config sets to rows. errors: ErrorCollection + table: list[dict[str, typing.Tuple]] + alphabet: list[str] + precedence: typing.Tuple[typing.Tuple[Assoc, int], ...] + row: None | list[typing.Tuple[None | typing.Tuple, None | Configuration]] - def __init__(self, alphabet: list[str]): + def __init__(self, alphabet: list[str], precedence: typing.Tuple[typing.Tuple[Assoc, int], ...]): self.errors = ErrorCollection() self.table = [] self.alphabet = alphabet + self.precedence = precedence self.row = None def flush(self, all_sets: ConfigurationSetInfo): @@ -322,13 +335,56 @@ class TableBuilder(object): assert existing_config is not None assert config is not None - # Record the conflicts. - self.errors.add_error(self.current_config_set, symbol_id, existing_config, existing) - self.errors.add_error(self.current_config_set, symbol_id, config, action) + # Maybe we can resolve the conflict with precedence? + existing_assoc, existing_prec = self.precedence[existing_config.name] + new_assoc, new_prec = self.precedence[config.name] + + if existing_prec > new_prec: + # Precedence of the action in the table already wins, do nothing. + return + + elif existing_prec == new_prec: + # It's an actual conflict, use associativity if we can. + # If there's a conflict in associativity then it's a real conflict! + assoc = Assoc.NONE + if existing_assoc == Assoc.NONE: + assoc = new_assoc + elif new_assoc == Assoc.NONE: + assoc = existing_assoc + elif new_assoc == existing_assoc: + assoc = new_assoc + + resolved = False + if assoc == Assoc.LEFT: + # Prefer reduce over shift + if action[0] == 'shift' and existing[0] == 'reduce': + action = existing + resolved = True + elif action[0] == 'reduce' and existing[0] == 'shift': + resolved = True + + elif assoc == Assoc.RIGHT: + # Prefer shift over reduce + if action[0] == 'shift' and existing[0] == 'reduce': + resolved = True + elif action[0] == 'reduce' and existing[0] == 'shift': + action = existing + resolved = True + + if not resolved: + # Record the conflicts. + self.errors.add_error(self.current_config_set, symbol_id, existing_config, existing) + self.errors.add_error(self.current_config_set, symbol_id, config, action) + + else: + # Precedence of the new action is greater than the existing + # action, just allow the overwrite with no change. + pass self.row[symbol_id] = (action, config) + class GenerateLR0(object): """Generate parser tables for an LR0 parser. @@ -357,24 +413,13 @@ class GenerateLR0(object): ('O', []), means that O can be matched with nothing. - - Implementation notes: - - This is implemented in the dumbest way possible, in order to be the - most understandable it can be. I built this to learn, and I want to - make sure I can keep learning with it. - - - We tend to use tuples everywhere. This is because tuples can be - compared for equality and put into tables and all that jazz. They might - be a little bit slower in places but like I said, this is for - learning. (Also, if we need this to run faster we can probably go a - long way by memoizing results, which is much easier if we have tuples - everywhere.) """ alphabet: list[str] grammar: list[list[typing.Tuple[int, ...]]] - nonterminals: typing.Tuple[bool, ...] - terminals: typing.Tuple[bool, ...] + nonterminal: typing.Tuple[bool, ...] + terminal: typing.Tuple[bool, ...] + precedence: typing.Tuple[typing.Tuple[Assoc, int], ...] symbol_key: dict[str, int] start_symbol: int @@ -384,7 +429,12 @@ class GenerateLR0(object): successors: list[set[int]] - def __init__(self, start: str, grammar: list[typing.Tuple[str, list[str]]]): + def __init__( + self, + start: str, + grammar: list[typing.Tuple[str, list[str]]], + precedence: None | dict[str, typing.Tuple[Assoc, int]] = None, + ): """Initialize the parser generator with the specified grammar and start symbol. """ @@ -426,30 +476,34 @@ class GenerateLR0(object): # We count on python dictionaries retaining the insertion order, like # it or not. full_grammar = [list() for _ in self.alphabet] - terminals = [True for _ in self.alphabet] - assert terminals[end_symbol] + terminal = [True for _ in self.alphabet] + assert terminal[end_symbol] - nonterminals = [False for _ in self.alphabet] + nonterminal = [False for _ in self.alphabet] for name, rule in grammar: name_symbol = symbol_key[name] - terminals[name_symbol] = False - nonterminals[name_symbol] = True + terminal[name_symbol] = False + nonterminal[name_symbol] = True rules = full_grammar[name_symbol] rules.append(tuple(symbol_key[symbol] for symbol in rule)) self.grammar = full_grammar self.grammar[start_symbol].append((symbol_key[start],)) - terminals[start_symbol] = False - nonterminals[start_symbol] = True + terminal[start_symbol] = False + nonterminal[start_symbol] = True - self.terminals = tuple(terminals) - self.nonterminals = tuple(nonterminals) + self.terminal = tuple(terminal) + self.nonterminal = tuple(nonterminal) - assert self.terminals[end_symbol] - assert self.nonterminals[start_symbol] + assert self.terminal[end_symbol] + assert self.nonterminal[start_symbol] + + if precedence is None: + precedence = {} + self.precedence = tuple(precedence.get(a, (Assoc.NONE, 0)) for a in self.alphabet) self.symbol_key = symbol_key self.start_symbol = start_symbol @@ -497,7 +551,7 @@ class GenerateLR0(object): return tuple(sorted(closure)) # TODO: Why tuple? - def gen_successor(self, config_set: typing.Iterable[Configuration], symbol: str) -> ConfigSet: + def gen_successor(self, config_set: typing.Iterable[Configuration], symbol: int) -> ConfigSet: """Compute the successor state for the given config set and the given symbol. @@ -564,7 +618,7 @@ class GenerateLR0(object): In an LR0 parser, this is just the set of all terminals.""" del(config) - return [index for index, value in enumerate(self.terminals) if value] + return [index for index, value in enumerate(self.terminal) if value] def gen_table(self): """Generate the parse table. @@ -595,7 +649,7 @@ class GenerateLR0(object): Anything missing from the row indicates an error. """ config_sets = self.gen_all_sets() - builder = TableBuilder(self.alphabet) + builder = TableBuilder(self.alphabet, self.precedence) for config_set_id, config_set in enumerate(config_sets.sets): builder.new_row(config_set) @@ -610,13 +664,13 @@ class GenerateLR0(object): else: builder.set_table_accept(self.end_symbol, config) - elif self.terminals[config_next]: + elif self.terminal[config_next]: index = successors[config_next] builder.set_table_shift(config_next, index, config) # Gotos for symbol, index in successors.items(): - if self.nonterminals[symbol]: + if self.nonterminal[symbol]: builder.set_table_goto(symbol, index) return builder.flush(config_sets) @@ -700,27 +754,22 @@ class FirstInfo: @classmethod def from_grammar( cls, - alphabet: list[str], grammar: list[list[typing.Tuple[int,...]]], - terminals: typing.Tuple[bool, ...], + terminal: typing.Tuple[bool, ...], ): - # print("******* GENERATING FIRSTS ********") - # Add all terminals to their own firsts firsts = [] - for index, is_terminal in enumerate(terminals): + for index, is_terminal in enumerate(terminal): firsts.append(set()) if is_terminal: firsts[index].add(index) - epsilons = [False for _ in terminals] + epsilons = [False for _ in terminal] changed = True while changed: - # print("========= ITERATION") changed = False for name, rules in enumerate(grammar): f = firsts[name] - # print(f" {alphabet[name]} -> {[alphabet[s] for s in f]}") for rule in rules: if len(rule) == 0: changed = changed or not epsilons[name] @@ -728,11 +777,7 @@ class FirstInfo: continue for index, symbol in enumerate(rule): - # if terminals[symbol]: - # changed = add_changed(f, symbol) or changed - # else: other_firsts = firsts[symbol] - # print(f" adding {alphabet[symbol]} -> {[alphabet[s] for s in other_firsts]}") changed = update_changed(f, other_firsts) or changed is_last = index == len(rule) - 1 @@ -750,7 +795,6 @@ class FirstInfo: # looping through the symbols in this rule. break - # print("******* DONE GENERATING FIRSTS ********") return FirstInfo(firsts=firsts, is_epsilon=epsilons) @dataclasses.dataclass(frozen=True) @@ -761,7 +805,7 @@ class FollowInfo: def from_grammar( cls, grammar: list[list[typing.Tuple[int,...]]], - terminals: typing.Tuple[bool, ...], + terminal: typing.Tuple[bool, ...], start_symbol: int, end_symbol: int, firsts: FirstInfo, @@ -778,7 +822,7 @@ class FollowInfo: prev_symbol = None for symbol in reversed(rule): f = follows[symbol] - if terminals[symbol]: + if terminal[symbol]: # This particular rule can't produce epsilon. epsilon = False prev_symbol = symbol @@ -826,10 +870,10 @@ class GenerateSLR1(GenerateLR0): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self._firsts = FirstInfo.from_grammar(self.alphabet, self.grammar, self.terminals) + self._firsts = FirstInfo.from_grammar(self.grammar, self.terminal) self._follows = FollowInfo.from_grammar( self.grammar, - self.terminals, + self.terminal, self.start_symbol, self.end_symbol, self._firsts, @@ -1049,24 +1093,24 @@ def format_table(generator, table): elif action[0] == 'reduce': return 'r' + str(action[1]) - terminals = [ + terminals = list(sorted( generator.alphabet[i] - for i,v in enumerate(generator.terminals) + for i,v in enumerate(generator.terminal) if v - ] - nonterminals = [ + )) + nonterminals = list(sorted( generator.alphabet[i] - for i,v in enumerate(generator.nonterminals) + for i,v in enumerate(generator.nonterminal) if v - ] + )) header = " | {terms} | {nts}".format( terms=' '.join( '{0: <6}'.format(terminal) - for terminal in sorted(terminals) + for terminal in terminals ), nts=' '.join( '{0: <5}'.format(nt) - for nt in sorted(nonterminals) + for nt in nonterminals ), ) @@ -1078,11 +1122,11 @@ def format_table(generator, table): index=i, actions=' '.join( '{0: <6}'.format(format_action(row, terminal)) - for terminal in sorted(terminals) + for terminal in terminals ), gotos=' '.join( '{0: <5}'.format(row.get(nt, ('error', ''))[1]) - for nt in sorted(nonterminals) + for nt in nonterminals ), ) for i, row in enumerate(table)