Start extracting better expressions

Better formatting, dump full graph, disallow conflicts
Stop deleting blank space in errors
2024-06-02 08:10:18 -07:00 · 2024-06-02 08:09:47 -07:00 · 2024-06-02 08:09:32 -07:00
3 changed files with 48 additions and 11 deletions
--- a/grammar.py
+++ b/grammar.py
@ -1,7 +1,8 @@
 # This is an example grammar.
 import re
-from parser import Assoc, GenerateLALR, GenerateLR1, Grammar, Nothing, Terminal, rule, seq, Rule
+import parser
 from parser import Assoc, Grammar, Nothing, Terminal, rule, seq, Rule
 ARROW = Terminal("Arrow")
 AS = Terminal("As")
@ -54,7 +55,7 @@ RSQUARE = Terminal("RightBracket")
 class FineGrammar(Grammar):
-    generator = GenerateLALR
+    # generator = parser.GenerateLR1
    start = "File"
    def __init__(self):
@ -254,7 +255,7 @@ class FineGrammar(Grammar):
    @rule
    def primary_expression(self) -> Rule:
        return (
-            IDENTIFIER
+            self.identifier_expression
            | SELF
            | NUMBER
            | STRING
@ -273,6 +274,10 @@ class FineGrammar(Grammar):
            | seq(LPAREN, self.expression, RPAREN)
        )
    @rule("IdentifierExpression")
    def identifier_expression(self):
        return IDENTIFIER
    @rule("ConditionalExpression")
    def conditional_expression(self) -> Rule:
        return (
@ -318,7 +323,6 @@ class FineGrammar(Grammar):
        return (
            seq(self.variable_binding, self._pattern_core, self.pattern_predicate)
            | seq(self.variable_binding, self._pattern_core)
            | seq(self._pattern_core, self.pattern_predicate)
            | self._pattern_core
        )
@ -534,3 +538,7 @@ class FineTokens:
            column_index = start - col_start
            value = self.src[start : start + length]
            print(f"{start:04} {kind.value:12} {value} ({line_index}, {column_index})")
 if __name__ == "__main__":
    FineGrammar().build_table()
--- a/harness.py
+++ b/harness.py
@ -390,7 +390,7 @@ class Harness:
            for line in lines[: rows - 3]:
                print(line[:cols] + "\r")
        else:
-            wrapper = textwrap.TextWrapper(width=cols)
+            wrapper = textwrap.TextWrapper(width=cols, drop_whitespace=False)
            lines = [line for error in self.errors for line in wrapper.wrap(error)]
            for line in lines[: rows - 3]:
                print(line + "\r")
--- a/parser.py
+++ b/parser.py
@ -135,6 +135,7 @@ import dataclasses
 import enum
 import functools
 import inspect
 import json
 import sys
 import typing
@ -282,7 +283,11 @@ class Configuration:
        )
    def format(self, alphabet: list[str]) -> str:
-        la = ", " + str(tuple(alphabet[i] for i in self.lookahead)) if self.lookahead != () else ""
+        if self.lookahead != ():
            la = " {" + ",".join(alphabet[i] for i in self.lookahead) + "}"
        else:
            la = ""
        return "{name} -> {bits}{lookahead}".format(
            name=alphabet[self.name],
            bits=" ".join(
@ -297,7 +302,7 @@ class Configuration:
 # ConfigSet = typing.Tuple[Configuration, ...]
-class ConfigSet(frozenset):
+class ConfigSet(frozenset[Configuration]):
    pass
@ -354,6 +359,21 @@ class ConfigurationSetInfo:
        """
        self.successors[c_id][symbol] = successor
    def dump_state(self, alphabet: list[str]) -> str:
        return json.dumps(
            {
                str(set_index): {
                    "configs": [c.format(alphabet) for c in config_set],
                    "successors": {
                        alphabet[k]: str(v) for k, v in self.successors[set_index].items()
                    },
                }
                for set_index, config_set in enumerate(self.sets)
            },
            indent=4,
            sort_keys=True,
        )
    def find_path_to_set(self, target_set: ConfigSet) -> list[int]:
        """Trace the path of grammar symbols from the first set (which always
        set 0) to the target set. This is useful in conflict reporting,
@ -369,6 +389,8 @@ class ConfigurationSetInfo:
        visited = set()
        queue: collections.deque = collections.deque()
        # NOTE: Set 0 is always the first set, the one that contains the
        #       start symbol.
        queue.appendleft((0, []))
        while len(queue) > 0:
            set_index, path = queue.pop()
@ -452,7 +474,7 @@ class AmbiguityError(Exception):
        self.ambiguities = ambiguities
    def __str__(self):
-        return "The grammar is ambiguous:\n\n" + "\n\n".join(
+        return f"{len(self.ambiguities)} ambiguities:\n\n" + "\n\n".join(
            str(ambiguity) for ambiguity in self.ambiguities
        )
@ -505,7 +527,7 @@ class ErrorCollection:
        alphabet: list[str],
        all_sets: ConfigurationSetInfo,
    ) -> AmbiguityError | None:
-        """Format all the errors into a string, or return None if there are no
+        """Format all the errors into an error, or return None if there are no
        errors.
        We need the alphabet to turn all these integers into something human
@ -515,6 +537,9 @@ class ErrorCollection:
        if len(self.errors) == 0:
            return None
        # with open("ambiguity.json", mode="w", encoding="utf-8") as aj:
        #     aj.write(all_sets.dump_state(alphabet))
        errors = []
        for config_set, set_errors in self.errors.items():
            path = all_sets.find_path_to_set(config_set)
@ -1518,7 +1543,7 @@ class GenerateLR1(GenerateSLR1):
                lookahead_tuple = tuple(sorted(lookahead))
                next.append(Configuration.from_rule(config_next, rule, lookahead=lookahead_tuple))
-            return tuple(sorted(next))
+            return tuple(next)
    def gen_all_sets(self):
        """Generate all of the configuration sets for the grammar.
@ -1951,6 +1976,10 @@ class Grammar:
                new_clause = []
                for symbol in clause:
                    if isinstance(symbol, Terminal):
                        if symbol.value in temp_grammar:
                            raise ValueError(
                                f"'{symbol.value}' is the name of both a Terminal and a NonTerminal rule. This will cause problems."
                            )
                        new_clause.append(symbol.value)
                    else:
                        new_clause.append(symbol)
@ -1959,7 +1988,7 @@ class Grammar:
        return grammar, transparents
-    def build_table(self, start: str | None, generator=None):
+    def build_table(self, start: str | None = None, generator=None):
        """Construct a parse table for this grammar, starting at the named
        nonterminal rule.
        """
Author	SHA1	Message	Date
John Doty	cabc091ee4	Start extracting better expressions	2024-06-02 08:10:18 -07:00
John Doty	e57a827e5c	Better formatting, dump full graph, disallow conflicts	2024-06-02 08:09:47 -07:00
John Doty	d6131ad53e	Stop deleting blank space in errors	2024-06-02 08:09:32 -07:00