Support prefix newlines, stop generating empty productions
This required rebuilding the matcher compiler significantly, and it was a lot a lot of work. But now we don't generate so many spurious newlines and the document we produce is a lot a lot better.
This commit is contained in:
parent
709ba060b4
commit
c8fef52c0c
1 changed files with 84 additions and 23 deletions
105
parser/wadler.py
105
parser/wadler.py
|
|
@ -314,11 +314,15 @@ class Matcher:
|
||||||
child = cons(child, NewLine(replace))
|
child = cons(child, NewLine(replace))
|
||||||
|
|
||||||
elif name[0] == "p":
|
elif name[0] == "p":
|
||||||
child = cons(NewLine(""), child)
|
replace = self.newline_replace[name]
|
||||||
|
child = cons(NewLine(replace), child)
|
||||||
|
|
||||||
elif name[0] == "f":
|
elif name[0] == "f":
|
||||||
child = cons(child, ForceBreak())
|
child = cons(child, ForceBreak())
|
||||||
|
|
||||||
|
elif name[0] == "d":
|
||||||
|
child = cons(ForceBreak(), child)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
pass # Reducing a transparent rule probably.
|
pass # Reducing a transparent rule probably.
|
||||||
|
|
||||||
|
|
@ -360,10 +364,23 @@ class Printer:
|
||||||
def compile_rule(self, rule: parser.NonTerminal) -> Matcher:
|
def compile_rule(self, rule: parser.NonTerminal) -> Matcher:
|
||||||
generated_grammar: list[typing.Tuple[str, list[str]]] = []
|
generated_grammar: list[typing.Tuple[str, list[str]]] = []
|
||||||
visited: set[str] = set()
|
visited: set[str] = set()
|
||||||
|
|
||||||
|
# In order to generate groups, indents, and newlines we need to
|
||||||
|
# synthesize new productions. And it happens sometimes that we get
|
||||||
|
# duplicates, repeated synthetic productions. It's important to
|
||||||
|
# de-duplicate productions, otherwise we'll wind up with ambiguities
|
||||||
|
# in the parser.
|
||||||
|
#
|
||||||
|
# These dictionaries track the synthetic rules: the keys are
|
||||||
|
# production and also the parameter (if any), and the values are the
|
||||||
|
# names of the productions that produce the effect.
|
||||||
|
#
|
||||||
groups: dict[tuple[str, ...], str] = {}
|
groups: dict[tuple[str, ...], str] = {}
|
||||||
indent_amounts: dict[str, int] = {}
|
indents: dict[tuple[tuple[str, ...], int], str] = {}
|
||||||
newline_map: dict[str, str] = {}
|
newlines: dict[tuple[tuple[str, ...], str], str] = {}
|
||||||
done_forced_break = False
|
prefix_count: int = 0
|
||||||
|
|
||||||
|
final_newlines: dict[str, str] = {}
|
||||||
|
|
||||||
def compile_nonterminal(name: str, rule: parser.NonTerminal):
|
def compile_nonterminal(name: str, rule: parser.NonTerminal):
|
||||||
if name not in visited:
|
if name not in visited:
|
||||||
|
|
@ -374,8 +391,12 @@ class Printer:
|
||||||
|
|
||||||
def compile_production(production: parser.FlattenedWithMetadata) -> list[str]:
|
def compile_production(production: parser.FlattenedWithMetadata) -> list[str]:
|
||||||
nonlocal groups
|
nonlocal groups
|
||||||
nonlocal indent_amounts
|
nonlocal indents
|
||||||
nonlocal done_forced_break
|
nonlocal newlines
|
||||||
|
nonlocal prefix_count
|
||||||
|
nonlocal final_newlines
|
||||||
|
|
||||||
|
prefix_stack: list[str] = []
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
for item in production:
|
for item in production:
|
||||||
|
|
@ -416,26 +437,55 @@ class Printer:
|
||||||
tx_children = [rule_name]
|
tx_children = [rule_name]
|
||||||
|
|
||||||
if pretty.indent:
|
if pretty.indent:
|
||||||
rule_name = f"i_{len(indent_amounts)}"
|
child_key = (tuple(tx_children), pretty.indent)
|
||||||
indent_amounts[rule_name] = pretty.indent
|
rule_name = indents.get(child_key)
|
||||||
|
if rule_name is None:
|
||||||
|
rule_name = f"i_{len(indents)}"
|
||||||
|
indents[child_key] = rule_name
|
||||||
generated_grammar.append((rule_name, tx_children))
|
generated_grammar.append((rule_name, tx_children))
|
||||||
|
|
||||||
tx_children = [rule_name]
|
tx_children = [rule_name]
|
||||||
|
|
||||||
if pretty.newline is not None:
|
if pretty.newline is not None:
|
||||||
newline_rule_name = newline_map.get(pretty.newline)
|
if len(tx_children) == 0:
|
||||||
if newline_rule_name is None:
|
tx_children = result
|
||||||
newline_rule_name = f"n{len(newline_map)}"
|
result = []
|
||||||
newline_map[pretty.newline] = newline_rule_name
|
|
||||||
generated_grammar.append((newline_rule_name, []))
|
|
||||||
|
|
||||||
tx_children.append(newline_rule_name)
|
if len(tx_children) > 0:
|
||||||
|
# n == postfix newline
|
||||||
|
child_key = (tuple(tx_children), pretty.newline)
|
||||||
|
rule_name = newlines.get(child_key)
|
||||||
|
if rule_name is None:
|
||||||
|
rule_name = f"n_{len(newlines)}"
|
||||||
|
newlines[child_key] = rule_name
|
||||||
|
generated_grammar.append((rule_name, tx_children))
|
||||||
|
|
||||||
|
tx_children = [rule_name]
|
||||||
|
|
||||||
|
else:
|
||||||
|
# p == prefix newline
|
||||||
|
rule_name = f"p_{prefix_count}"
|
||||||
|
prefix_count += 1
|
||||||
|
final_newlines[rule_name] = pretty.newline
|
||||||
|
prefix_stack.append(rule_name)
|
||||||
|
|
||||||
if pretty.forced_break:
|
if pretty.forced_break:
|
||||||
if not done_forced_break:
|
if len(tx_children) == 0:
|
||||||
generated_grammar.append(("forced_break", []))
|
tx_children = result
|
||||||
done_forced_break = True
|
result = []
|
||||||
|
|
||||||
tx_children.append("forced_break")
|
if len(tx_children) > 0:
|
||||||
|
# f == postfix forced break
|
||||||
|
rule_name = f"f_{prefix_count}"
|
||||||
|
prefix_count += 1
|
||||||
|
|
||||||
|
generated_grammar.append((rule_name, tx_children))
|
||||||
|
tx_children = [rule_name]
|
||||||
|
else:
|
||||||
|
# d == prefix forced break (to the right of 'f' on my kbd)
|
||||||
|
rule_name = f"d_{prefix_count}"
|
||||||
|
prefix_count += 1
|
||||||
|
prefix_stack.append(rule_name)
|
||||||
|
|
||||||
# If it turned out to have formatting meta then we will
|
# If it turned out to have formatting meta then we will
|
||||||
# have replaced or augmented the translated children
|
# have replaced or augmented the translated children
|
||||||
|
|
@ -444,17 +494,28 @@ class Printer:
|
||||||
# translated children should just be inserted inline.
|
# translated children should just be inserted inline.
|
||||||
result.extend(tx_children)
|
result.extend(tx_children)
|
||||||
|
|
||||||
|
# OK so we might have some prefix newlines. They should contain... things.
|
||||||
|
while len(prefix_stack) > 0:
|
||||||
|
rule_name = prefix_stack.pop()
|
||||||
|
generated_grammar.append((rule_name, result))
|
||||||
|
result = [rule_name]
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
compile_nonterminal(rule.name, rule)
|
start_name = f"yyy_{rule.name}"
|
||||||
gen = self.grammar._generator(rule.name, generated_grammar)
|
compile_nonterminal(start_name, rule)
|
||||||
|
gen = self.grammar._generator(start_name, generated_grammar)
|
||||||
parse_table = gen.gen_table()
|
parse_table = gen.gen_table()
|
||||||
|
|
||||||
newline_replace = {v: k for k, v in newline_map.items()}
|
for (_, replacement), rule_name in newlines.items():
|
||||||
|
final_newlines[rule_name] = replacement
|
||||||
|
|
||||||
|
indent_amounts = {rule_name: amount for ((_, amount), rule_name) in indents.items()}
|
||||||
|
|
||||||
return Matcher(
|
return Matcher(
|
||||||
parse_table,
|
parse_table,
|
||||||
indent_amounts,
|
indent_amounts,
|
||||||
newline_replace,
|
final_newlines,
|
||||||
)
|
)
|
||||||
|
|
||||||
def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher:
|
def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue