# A prettier printer. import dataclasses import typing from . import parser from . import runtime # TODO: I think I want a *force break*, i.e., a document which forces things # to not fit on one line. @dataclasses.dataclass(frozen=True) class Cons: left: "Document" right: "Document" def cons(left: "Document", right: "Document") -> "Document": if left and right: return Cons(left, right) else: return left or right @dataclasses.dataclass(frozen=True) class NewLine: replace: str @dataclasses.dataclass(frozen=True) class ForceBreak: pass @dataclasses.dataclass(frozen=True) class Indent: amount: int doc: "Document" @dataclasses.dataclass(frozen=True) class Text: start: int end: int @dataclasses.dataclass(frozen=True) class Literal: text: str @dataclasses.dataclass(frozen=True) class Group: child: "Document" @dataclasses.dataclass class Lazy: value: typing.Callable[[], "Document"] | "Document" def resolve(self) -> "Document": if callable(self.value): self.value = self.value() return self.value @classmethod def from_tree(cls, tree: runtime.Tree, printer: "Printer") -> "Lazy": return Lazy(lambda: printer.convert_tree_to_document(tree)) Document = None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Lazy class DocumentLayout: segments: list[str | tuple[int, int]] def __init__(self, segments): self.segments = segments def apply_to_source(self, original: str) -> str: result = "" for segment in self.segments: if isinstance(segment, str): result += segment else: start, end = segment result += original[start:end] return result def layout_document(doc: Document, width: int) -> DocumentLayout: """Lay out a document to fit within the given width. The result of this function is a layout which can trivially be converted into a string given the original document. """ @dataclasses.dataclass class Chunk: doc: Document indent: int flat: bool def with_document(self, doc: Document, and_indent: int = 0) -> "Chunk": return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat) column = 0 chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)] def fits(chunk: Chunk) -> bool: remaining = width - column if remaining <= 0: return False stack = list(chunks) stack.append(chunk) while len(stack) > 0: chunk = stack.pop() match chunk.doc: case None: pass case Text(start, end): remaining -= end - start case Literal(text): remaining -= len(text) case NewLine(replace): if chunk.flat: remaining -= len(replace) else: # These are newlines that are real, so it must have # all fit. return True case ForceBreak(): # If we're in a flattened chunk then force it to break by # returning false here, otherwise we're at the end of the # line and yes, whatever you were asking about has fit. return not chunk.flat case Cons(left, right): stack.append(chunk.with_document(right)) stack.append(chunk.with_document(left)) case Lazy(): stack.append(chunk.with_document(chunk.doc.resolve())) case Indent(amount, child): stack.append(chunk.with_document(child, and_indent=amount)) case Group(child): # The difference between this approach and Justin's twist # is that we consider the flat variable in Newline(), # above, rather than here in Group. This makes us more # like Wadler's original formulation, I guess. The # grouping is an implicit transform over alternatives # represented by newline. (If we have other kinds of # alternatives we'll have to work those out elsewhere as # well.) stack.append(chunk.with_document(child)) case _: typing.assert_never(chunk.doc) if remaining < 0: return False return True # Everything must fit, so great! output: list[str | tuple[int, int]] = [] while len(chunks) > 0: chunk = chunks.pop() match chunk.doc: case None: pass case Text(start, end): output.append((start, end)) column += end - start case Literal(text): output.append(text) column += len(text) case NewLine(replace): if chunk.flat: output.append(replace) column += len(replace) else: # TODO: Custom newline expansion, custom indent segments. output.append("\n" + (chunk.indent * " ")) column = chunk.indent case ForceBreak(): # TODO: Custom newline expansion, custom indent segments. output.append("\n" + (chunk.indent * " ")) column = chunk.indent case Cons(left, right): chunks.append(chunk.with_document(right)) chunks.append(chunk.with_document(left)) case Indent(amount, doc): chunks.append(chunk.with_document(doc, and_indent=amount)) case Lazy(): chunks.append(chunk.with_document(chunk.doc.resolve())) case Group(child): candidate = Chunk(doc=child, indent=chunk.indent, flat=True) if chunk.flat or fits(candidate): chunks.append(candidate) else: chunks.append(Chunk(doc=child, indent=chunk.indent, flat=False)) case _: typing.assert_never(chunk) return DocumentLayout(output) def resolve_document(doc: Document) -> Document: match doc: case Cons(left, right): lr = resolve_document(left) rr = resolve_document(right) if lr is not left or rr is not right: return cons(lr, rr) else: return doc case Lazy(_): return resolve_document(doc.resolve()) case _: return doc def child_to_name(child: runtime.Tree | runtime.TokenValue) -> str: # TODO: RECONSIDER THE EXISTENCE OF THIS FUNCTION # The naming condition is important but if isinstance(child, runtime.Tree): return f"tree_{child.name}" else: return f"token_{child.kind}" @dataclasses.dataclass class Matcher: table: parser.ParseTable indent_amounts: dict[str, int] newline_replace: dict[str, str] def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document: stack: list[tuple[int, Document]] = [(0, None)] table = self.table input = [(child_to_name(i), i) for i in items] + [ ( "$", runtime.TokenValue( kind="$", start=0, end=0, pre_trivia=[], post_trivia=[], ), ) ] input_index = 0 while True: current_token = input[input_index] current_state = stack[-1][0] action = table.actions[current_state].get(current_token[0], parser.Error()) match action: case parser.Accept(): return stack[-1][1] case parser.Reduce(name=name, count=size): child: Document = None if size > 0: for _, c in stack[-size:]: if c is None: continue child = cons(child, c) del stack[-size:] if name[0] == "g": child = Group(child) elif name[0] == "i": amount = self.indent_amounts[name] child = Indent(amount, child) elif name[0] == "n": replace = self.newline_replace[name] child = cons(child, NewLine(replace)) elif name[0] == "p": child = cons(NewLine(""), child) elif name[0] == "f": child = cons(child, ForceBreak()) else: pass # Reducing a transparent rule probably. goto = table.gotos[stack[-1][0]].get(name) assert goto is not None stack.append((goto, child)) case parser.Shift(): value = current_token[1] if isinstance(value, runtime.Tree): child = Lazy.from_tree(value, printer) else: # TODO: Consider trivia and preserve comments! child = Text(value.start, value.end) stack.append((action.state, child)) input_index += 1 case parser.Error(): raise Exception("How did I get a parse error here??") class Printer: # TODO: Pre-generate the matcher tables for a grammar, to make it # possible to do codegen in other languages. grammar: parser.Grammar _matchers: dict[str, Matcher] _nonterminals: dict[str, parser.NonTerminal] def __init__(self, grammar: parser.Grammar): self.grammar = grammar self._nonterminals = {nt.name: nt for nt in grammar.non_terminals()} self._matchers = {} def lookup_nonterminal(self, name: str) -> parser.NonTerminal: return self._nonterminals[name] def compile_rule(self, rule: parser.NonTerminal) -> Matcher: generated_grammar: list[typing.Tuple[str, list[str]]] = [] visited: set[str] = set() groups: dict[tuple[str, ...], str] = {} indent_amounts: dict[str, int] = {} newline_map: dict[str, str] = {} done_forced_break = False def compile_nonterminal(name: str, rule: parser.NonTerminal): if name not in visited: visited.add(name) for production in rule.fn(self.grammar).flatten(with_metadata=True): trans_prod = compile_production(production) generated_grammar.append((name, trans_prod)) def compile_production(production: parser.FlattenedWithMetadata) -> list[str]: nonlocal groups nonlocal indent_amounts nonlocal done_forced_break result = [] for item in production: if isinstance(item, str): nt = self._nonterminals[item] if nt.transparent: # If it's transparent then we make a new set of # productions that covers the contents of the # transparent nonterminal. name = "xxx_" + nt.name compile_nonterminal(name, nt) result.append(name) else: # Otherwise it's a "token" in our input, named # "tree_{whatever}". result.append(f"tree_{item}") elif isinstance(item, parser.Terminal): # If it's a terminal it will appear in our input as # "token_{whatever}". result.append(f"token_{item.name}") else: meta, children = item tx_children = compile_production(children) pretty = meta.get("format") if isinstance(pretty, parser.FormatMeta): if pretty.group: # Make a fake rule. child_key = tuple(tx_children) rule_name = groups.get(child_key) if rule_name is None: rule_name = f"g_{len(groups)}" groups[child_key] = rule_name generated_grammar.append((rule_name, tx_children)) tx_children = [rule_name] if pretty.indent: rule_name = f"i_{len(indent_amounts)}" indent_amounts[rule_name] = pretty.indent generated_grammar.append((rule_name, tx_children)) tx_children = [rule_name] if pretty.newline is not None: newline_rule_name = newline_map.get(pretty.newline) if newline_rule_name is None: newline_rule_name = f"n{len(newline_map)}" newline_map[pretty.newline] = newline_rule_name generated_grammar.append((newline_rule_name, [])) tx_children.append(newline_rule_name) if pretty.forced_break: if not done_forced_break: generated_grammar.append(("forced_break", [])) done_forced_break = True tx_children.append("forced_break") # If it turned out to have formatting meta then we will # have replaced or augmented the translated children # appropriately. Otherwise, if it's highlighting meta or # something else, we'll have ignored it and the # translated children should just be inserted inline. result.extend(tx_children) return result compile_nonterminal(rule.name, rule) gen = self.grammar._generator(rule.name, generated_grammar) parse_table = gen.gen_table() newline_replace = {v: k for k, v in newline_map.items()} return Matcher( parse_table, indent_amounts, newline_replace, ) def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher: result = self._matchers.get(rule.name) if result is None: result = self.compile_rule(rule) self._matchers[rule.name] = result return result def convert_tree_to_document(self, tree: runtime.Tree) -> Document: name = tree.name assert name is not None, "Cannot format a tree if it still has transparent nodes inside" rule = self.lookup_nonterminal(name) matcher = self.rule_to_matcher(rule) m = matcher.match(self, list(tree.children)) if m is None: raise ValueError( f"Could not match a valid tree for {tree.name} with {len(tree.children)} children:\n{tree.format()}" ) return resolve_document(m) def format_tree(self, tree: runtime.Tree, width: int) -> DocumentLayout: doc = self.convert_tree_to_document(tree) return layout_document(doc, width)