diff --git a/grammar.py b/grammar.py index 754f2e9..f57c85b 100644 --- a/grammar.py +++ b/grammar.py @@ -1,8 +1,7 @@ # This is an example grammar. import re -import parser -from parser import Assoc, Grammar, Nothing, Terminal, rule, seq, Rule +from parser import Assoc, GenerateLALR, GenerateLR1, Grammar, Nothing, Terminal, rule, seq, Rule ARROW = Terminal("Arrow") AS = Terminal("As") @@ -55,7 +54,7 @@ RSQUARE = Terminal("RightBracket") class FineGrammar(Grammar): - # generator = parser.GenerateLR1 + generator = GenerateLALR start = "File" def __init__(self): @@ -255,7 +254,7 @@ class FineGrammar(Grammar): @rule def primary_expression(self) -> Rule: return ( - self.identifier_expression + IDENTIFIER | SELF | NUMBER | STRING @@ -274,10 +273,6 @@ class FineGrammar(Grammar): | seq(LPAREN, self.expression, RPAREN) ) - @rule("IdentifierExpression") - def identifier_expression(self): - return IDENTIFIER - @rule("ConditionalExpression") def conditional_expression(self) -> Rule: return ( @@ -323,6 +318,7 @@ class FineGrammar(Grammar): return ( seq(self.variable_binding, self._pattern_core, self.pattern_predicate) | seq(self.variable_binding, self._pattern_core) + | seq(self._pattern_core, self.pattern_predicate) | self._pattern_core ) @@ -538,7 +534,3 @@ class FineTokens: column_index = start - col_start value = self.src[start : start + length] print(f"{start:04} {kind.value:12} {value} ({line_index}, {column_index})") - - -if __name__ == "__main__": - FineGrammar().build_table() diff --git a/harness.py b/harness.py index 8e87501..acf48ff 100644 --- a/harness.py +++ b/harness.py @@ -390,7 +390,7 @@ class Harness: for line in lines[: rows - 3]: print(line[:cols] + "\r") else: - wrapper = textwrap.TextWrapper(width=cols, drop_whitespace=False) + wrapper = textwrap.TextWrapper(width=cols) lines = [line for error in self.errors for line in wrapper.wrap(error)] for line in lines[: rows - 3]: print(line + "\r") diff --git a/parser.py b/parser.py index f85d4be..4ad5683 100644 --- a/parser.py +++ b/parser.py @@ -135,7 +135,6 @@ import dataclasses import enum import functools import inspect -import json import sys import typing @@ -283,11 +282,7 @@ class Configuration: ) def format(self, alphabet: list[str]) -> str: - if self.lookahead != (): - la = " {" + ",".join(alphabet[i] for i in self.lookahead) + "}" - else: - la = "" - + la = ", " + str(tuple(alphabet[i] for i in self.lookahead)) if self.lookahead != () else "" return "{name} -> {bits}{lookahead}".format( name=alphabet[self.name], bits=" ".join( @@ -302,7 +297,7 @@ class Configuration: # ConfigSet = typing.Tuple[Configuration, ...] -class ConfigSet(frozenset[Configuration]): +class ConfigSet(frozenset): pass @@ -359,21 +354,6 @@ class ConfigurationSetInfo: """ self.successors[c_id][symbol] = successor - def dump_state(self, alphabet: list[str]) -> str: - return json.dumps( - { - str(set_index): { - "configs": [c.format(alphabet) for c in config_set], - "successors": { - alphabet[k]: str(v) for k, v in self.successors[set_index].items() - }, - } - for set_index, config_set in enumerate(self.sets) - }, - indent=4, - sort_keys=True, - ) - def find_path_to_set(self, target_set: ConfigSet) -> list[int]: """Trace the path of grammar symbols from the first set (which always set 0) to the target set. This is useful in conflict reporting, @@ -389,8 +369,6 @@ class ConfigurationSetInfo: visited = set() queue: collections.deque = collections.deque() - # NOTE: Set 0 is always the first set, the one that contains the - # start symbol. queue.appendleft((0, [])) while len(queue) > 0: set_index, path = queue.pop() @@ -474,7 +452,7 @@ class AmbiguityError(Exception): self.ambiguities = ambiguities def __str__(self): - return f"{len(self.ambiguities)} ambiguities:\n\n" + "\n\n".join( + return "The grammar is ambiguous:\n\n" + "\n\n".join( str(ambiguity) for ambiguity in self.ambiguities ) @@ -527,7 +505,7 @@ class ErrorCollection: alphabet: list[str], all_sets: ConfigurationSetInfo, ) -> AmbiguityError | None: - """Format all the errors into an error, or return None if there are no + """Format all the errors into a string, or return None if there are no errors. We need the alphabet to turn all these integers into something human @@ -537,9 +515,6 @@ class ErrorCollection: if len(self.errors) == 0: return None - # with open("ambiguity.json", mode="w", encoding="utf-8") as aj: - # aj.write(all_sets.dump_state(alphabet)) - errors = [] for config_set, set_errors in self.errors.items(): path = all_sets.find_path_to_set(config_set) @@ -1543,7 +1518,7 @@ class GenerateLR1(GenerateSLR1): lookahead_tuple = tuple(sorted(lookahead)) next.append(Configuration.from_rule(config_next, rule, lookahead=lookahead_tuple)) - return tuple(next) + return tuple(sorted(next)) def gen_all_sets(self): """Generate all of the configuration sets for the grammar. @@ -1976,10 +1951,6 @@ class Grammar: new_clause = [] for symbol in clause: if isinstance(symbol, Terminal): - if symbol.value in temp_grammar: - raise ValueError( - f"'{symbol.value}' is the name of both a Terminal and a NonTerminal rule. This will cause problems." - ) new_clause.append(symbol.value) else: new_clause.append(symbol) @@ -1988,7 +1959,7 @@ class Grammar: return grammar, transparents - def build_table(self, start: str | None = None, generator=None): + def build_table(self, start: str | None, generator=None): """Construct a parse table for this grammar, starting at the named nonterminal rule. """