Support prefix newlines, stop generating empty productions

This required rebuilding the matcher compiler significantly, and it was a lot a lot of work. But now we don't generate so many spurious newlines and the document we produce is a lot a lot better.
2024-09-13 22:41:33 -07:00 · 2024-09-13 22:41:33 -07:00 · c8fef52c0c
commit c8fef52c0c
parent 709ba060b4
1 changed files with 84 additions and 23 deletions
--- a/parser/wadler.py
+++ b/parser/wadler.py
@ -314,11 +314,15 @@ class Matcher:
                        child = cons(child, NewLine(replace))
                    elif name[0] == "p":
-                        child = cons(NewLine(""), child)
+                        replace = self.newline_replace[name]
                        child = cons(NewLine(replace), child)
                    elif name[0] == "f":
                        child = cons(child, ForceBreak())
                    elif name[0] == "d":
                        child = cons(ForceBreak(), child)
                    else:
                        pass  # Reducing a transparent rule probably.
@ -360,10 +364,23 @@ class Printer:
    def compile_rule(self, rule: parser.NonTerminal) -> Matcher:
        generated_grammar: list[typing.Tuple[str, list[str]]] = []
        visited: set[str] = set()
        # In order to generate groups, indents, and newlines we need to
        # synthesize new productions. And it happens sometimes that we get
        # duplicates, repeated synthetic productions. It's important to
        # de-duplicate productions, otherwise we'll wind up with ambiguities
        # in the parser.
        #
        # These dictionaries track the synthetic rules: the keys are
        # production and also the parameter (if any), and the values are the
        # names of the productions that produce the effect.
        #
        groups: dict[tuple[str, ...], str] = {}
-        indent_amounts: dict[str, int] = {}
+        indents: dict[tuple[tuple[str, ...], int], str] = {}
-        newline_map: dict[str, str] = {}
+        newlines: dict[tuple[tuple[str, ...], str], str] = {}
-        done_forced_break = False
+        prefix_count: int = 0
        final_newlines: dict[str, str] = {}
        def compile_nonterminal(name: str, rule: parser.NonTerminal):
            if name not in visited:
@ -374,8 +391,12 @@ class Printer:
        def compile_production(production: parser.FlattenedWithMetadata) -> list[str]:
            nonlocal groups
-            nonlocal indent_amounts
+            nonlocal indents
-            nonlocal done_forced_break
+            nonlocal newlines
            nonlocal prefix_count
            nonlocal final_newlines
            prefix_stack: list[str] = []
            result = []
            for item in production:
@ -416,26 +437,55 @@ class Printer:
                            tx_children = [rule_name]
                        if pretty.indent:
-                            rule_name = f"i_{len(indent_amounts)}"
+                            child_key = (tuple(tx_children), pretty.indent)
-                            indent_amounts[rule_name] = pretty.indent
+                            rule_name = indents.get(child_key)
                            if rule_name is None:
                                rule_name = f"i_{len(indents)}"
                                indents[child_key] = rule_name
                                generated_grammar.append((rule_name, tx_children))
                            tx_children = [rule_name]
                        if pretty.newline is not None:
-                            newline_rule_name = newline_map.get(pretty.newline)
+                            if len(tx_children) == 0:
-                            if newline_rule_name is None:
+                                tx_children = result
-                                newline_rule_name = f"n{len(newline_map)}"
+                                result = []
                                newline_map[pretty.newline] = newline_rule_name
                                generated_grammar.append((newline_rule_name, []))
-                            tx_children.append(newline_rule_name)
+                            if len(tx_children) > 0:
                                # n == postfix newline
                                child_key = (tuple(tx_children), pretty.newline)
                                rule_name = newlines.get(child_key)
                                if rule_name is None:
                                    rule_name = f"n_{len(newlines)}"
                                    newlines[child_key] = rule_name
                                    generated_grammar.append((rule_name, tx_children))
                                tx_children = [rule_name]
                            else:
                                # p == prefix newline
                                rule_name = f"p_{prefix_count}"
                                prefix_count += 1
                                final_newlines[rule_name] = pretty.newline
                                prefix_stack.append(rule_name)
                        if pretty.forced_break:
-                            if not done_forced_break:
+                            if len(tx_children) == 0:
-                                generated_grammar.append(("forced_break", []))
+                                tx_children = result
-                                done_forced_break = True
+                                result = []
-                            tx_children.append("forced_break")
+                            if len(tx_children) > 0:
                                # f == postfix forced break
                                rule_name = f"f_{prefix_count}"
                                prefix_count += 1
                                generated_grammar.append((rule_name, tx_children))
                                tx_children = [rule_name]
                            else:
                                # d == prefix forced break (to the right of 'f' on my kbd)
                                rule_name = f"d_{prefix_count}"
                                prefix_count += 1
                                prefix_stack.append(rule_name)
                    # If it turned out to have formatting meta then we will
                    # have replaced or augmented the translated children
@ -444,17 +494,28 @@ class Printer:
                    # translated children should just be inserted inline.
                    result.extend(tx_children)
            # OK so we might have some prefix newlines. They should contain... things.
            while len(prefix_stack) > 0:
                rule_name = prefix_stack.pop()
                generated_grammar.append((rule_name, result))
                result = [rule_name]
            return result
-        compile_nonterminal(rule.name, rule)
+        start_name = f"yyy_{rule.name}"
-        gen = self.grammar._generator(rule.name, generated_grammar)
+        compile_nonterminal(start_name, rule)
        gen = self.grammar._generator(start_name, generated_grammar)
        parse_table = gen.gen_table()
-        newline_replace = {v: k for k, v in newline_map.items()}
+        for (_, replacement), rule_name in newlines.items():
            final_newlines[rule_name] = replacement
        indent_amounts = {rule_name: amount for ((_, amount), rule_name) in indents.items()}
        return Matcher(
            parse_table,
            indent_amounts,
-            newline_replace,
+            final_newlines,
        )
    def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher: