From d6131ad53e0ed7701df797dd6e38da7c6f001afd Mon Sep 17 00:00:00 2001 From: John Doty Date: Sun, 2 Jun 2024 08:09:32 -0700 Subject: [PATCH 1/3] Stop deleting blank space in errors --- harness.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harness.py b/harness.py index acf48ff..8e87501 100644 --- a/harness.py +++ b/harness.py @@ -390,7 +390,7 @@ class Harness: for line in lines[: rows - 3]: print(line[:cols] + "\r") else: - wrapper = textwrap.TextWrapper(width=cols) + wrapper = textwrap.TextWrapper(width=cols, drop_whitespace=False) lines = [line for error in self.errors for line in wrapper.wrap(error)] for line in lines[: rows - 3]: print(line + "\r") From e57a827e5c240ed1aa474f77226a7a6cd1037361 Mon Sep 17 00:00:00 2001 From: John Doty Date: Sun, 2 Jun 2024 08:09:47 -0700 Subject: [PATCH 2/3] Better formatting, dump full graph, disallow conflicts --- parser.py | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/parser.py b/parser.py index 4ad5683..f85d4be 100644 --- a/parser.py +++ b/parser.py @@ -135,6 +135,7 @@ import dataclasses import enum import functools import inspect +import json import sys import typing @@ -282,7 +283,11 @@ class Configuration: ) def format(self, alphabet: list[str]) -> str: - la = ", " + str(tuple(alphabet[i] for i in self.lookahead)) if self.lookahead != () else "" + if self.lookahead != (): + la = " {" + ",".join(alphabet[i] for i in self.lookahead) + "}" + else: + la = "" + return "{name} -> {bits}{lookahead}".format( name=alphabet[self.name], bits=" ".join( @@ -297,7 +302,7 @@ class Configuration: # ConfigSet = typing.Tuple[Configuration, ...] -class ConfigSet(frozenset): +class ConfigSet(frozenset[Configuration]): pass @@ -354,6 +359,21 @@ class ConfigurationSetInfo: """ self.successors[c_id][symbol] = successor + def dump_state(self, alphabet: list[str]) -> str: + return json.dumps( + { + str(set_index): { + "configs": [c.format(alphabet) for c in config_set], + "successors": { + alphabet[k]: str(v) for k, v in self.successors[set_index].items() + }, + } + for set_index, config_set in enumerate(self.sets) + }, + indent=4, + sort_keys=True, + ) + def find_path_to_set(self, target_set: ConfigSet) -> list[int]: """Trace the path of grammar symbols from the first set (which always set 0) to the target set. This is useful in conflict reporting, @@ -369,6 +389,8 @@ class ConfigurationSetInfo: visited = set() queue: collections.deque = collections.deque() + # NOTE: Set 0 is always the first set, the one that contains the + # start symbol. queue.appendleft((0, [])) while len(queue) > 0: set_index, path = queue.pop() @@ -452,7 +474,7 @@ class AmbiguityError(Exception): self.ambiguities = ambiguities def __str__(self): - return "The grammar is ambiguous:\n\n" + "\n\n".join( + return f"{len(self.ambiguities)} ambiguities:\n\n" + "\n\n".join( str(ambiguity) for ambiguity in self.ambiguities ) @@ -505,7 +527,7 @@ class ErrorCollection: alphabet: list[str], all_sets: ConfigurationSetInfo, ) -> AmbiguityError | None: - """Format all the errors into a string, or return None if there are no + """Format all the errors into an error, or return None if there are no errors. We need the alphabet to turn all these integers into something human @@ -515,6 +537,9 @@ class ErrorCollection: if len(self.errors) == 0: return None + # with open("ambiguity.json", mode="w", encoding="utf-8") as aj: + # aj.write(all_sets.dump_state(alphabet)) + errors = [] for config_set, set_errors in self.errors.items(): path = all_sets.find_path_to_set(config_set) @@ -1518,7 +1543,7 @@ class GenerateLR1(GenerateSLR1): lookahead_tuple = tuple(sorted(lookahead)) next.append(Configuration.from_rule(config_next, rule, lookahead=lookahead_tuple)) - return tuple(sorted(next)) + return tuple(next) def gen_all_sets(self): """Generate all of the configuration sets for the grammar. @@ -1951,6 +1976,10 @@ class Grammar: new_clause = [] for symbol in clause: if isinstance(symbol, Terminal): + if symbol.value in temp_grammar: + raise ValueError( + f"'{symbol.value}' is the name of both a Terminal and a NonTerminal rule. This will cause problems." + ) new_clause.append(symbol.value) else: new_clause.append(symbol) @@ -1959,7 +1988,7 @@ class Grammar: return grammar, transparents - def build_table(self, start: str | None, generator=None): + def build_table(self, start: str | None = None, generator=None): """Construct a parse table for this grammar, starting at the named nonterminal rule. """ From cabc091ee43d0203525f1e437221a14407aa0049 Mon Sep 17 00:00:00 2001 From: John Doty Date: Sun, 2 Jun 2024 08:10:18 -0700 Subject: [PATCH 3/3] Start extracting better expressions --- grammar.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/grammar.py b/grammar.py index f57c85b..754f2e9 100644 --- a/grammar.py +++ b/grammar.py @@ -1,7 +1,8 @@ # This is an example grammar. import re -from parser import Assoc, GenerateLALR, GenerateLR1, Grammar, Nothing, Terminal, rule, seq, Rule +import parser +from parser import Assoc, Grammar, Nothing, Terminal, rule, seq, Rule ARROW = Terminal("Arrow") AS = Terminal("As") @@ -54,7 +55,7 @@ RSQUARE = Terminal("RightBracket") class FineGrammar(Grammar): - generator = GenerateLALR + # generator = parser.GenerateLR1 start = "File" def __init__(self): @@ -254,7 +255,7 @@ class FineGrammar(Grammar): @rule def primary_expression(self) -> Rule: return ( - IDENTIFIER + self.identifier_expression | SELF | NUMBER | STRING @@ -273,6 +274,10 @@ class FineGrammar(Grammar): | seq(LPAREN, self.expression, RPAREN) ) + @rule("IdentifierExpression") + def identifier_expression(self): + return IDENTIFIER + @rule("ConditionalExpression") def conditional_expression(self) -> Rule: return ( @@ -318,7 +323,6 @@ class FineGrammar(Grammar): return ( seq(self.variable_binding, self._pattern_core, self.pattern_predicate) | seq(self.variable_binding, self._pattern_core) - | seq(self._pattern_core, self.pattern_predicate) | self._pattern_core ) @@ -534,3 +538,7 @@ class FineTokens: column_index = start - col_start value = self.src[start : start + length] print(f"{start:04} {kind.value:12} {value} ({line_index}, {column_index})") + + +if __name__ == "__main__": + FineGrammar().build_table()