lrparsers/parser/wadler/runtime.py

import dataclasses
import typing

from . import builder
from .. import parser
from .. import runtime


############################################################################
# Documents
############################################################################


@dataclasses.dataclass(frozen=True)
class Cons:
    docs: list["Document"]


@dataclasses.dataclass(frozen=True)
class NewLine:
    replace: str


@dataclasses.dataclass(frozen=True)
class ForceBreak:
    silent: bool


@dataclasses.dataclass(frozen=True)
class Indent:
    amount: int
    doc: "Document"


@dataclasses.dataclass(frozen=True)
class Literal:
    text: str


@dataclasses.dataclass(frozen=True)
class Group:
    child: "Document"


@dataclasses.dataclass(frozen=True)
class Marker:
    child: "Document"
    meta: dict


@dataclasses.dataclass(frozen=True)
class Trivia:
    child: "Document"


@dataclasses.dataclass
class Lazy:
    value: typing.Callable[[], "Document"] | "Document"

    def resolve(self) -> "Document":
        if callable(self.value):
            self.value = self.value()
        return self.value

    @classmethod
    def from_tree(cls, tree: runtime.Tree, src: str, printer: "Printer") -> "Lazy":
        return Lazy(lambda: printer.convert_tree_to_document(tree, src))


Document = None | Literal | NewLine | ForceBreak | Cons | Indent | Group | Trivia | Marker | Lazy


def cons(*documents: Document) -> Document:
    if len(documents) == 0:
        return None

    # TODO: Merge adjacent trivia together?

    result = []
    for document in documents:
        if isinstance(document, Cons):
            result.extend(document.docs)
        elif document is not None:
            result.append(document)

    if len(result) == 0:
        return None
    if len(result) == 1:
        return result[0]

    return Cons(result)


def group(document: Document) -> Document:
    if document is None:
        return None

    if isinstance(document, Cons):
        children = list(document.docs)
    else:
        children = [document]

    # Split the trivia off the left and right of the incoming group: trivia
    # at the edges shouldn't affect the inside of the group.
    right_trivia: list[Document] = []
    while len(children) > 0 and isinstance(children[-1], Trivia):
        right_trivia.append(children.pop())

    children.reverse()
    left_trivia: list[Document] = []
    while len(children) > 0 and isinstance(children[-1], Trivia):
        left_trivia.append(children.pop())

    # IF we still have more than one child, *then* we can actually make a
    # group. (A group with one child is a waste. A group with no children
    # doubly so.)
    children.reverse()
    if len(children) > 1:
        children = [Group(cons(*children))]

    results = left_trivia + children + right_trivia
    return cons(*results)


def trivia(document: Document) -> Document:
    if document is None:
        return None

    if isinstance(document, Trivia):
        return document

    return Trivia(document)


############################################################################
# Layouts
############################################################################


class DocumentLayout:
    """A structure that is trivially convertable to a string; the result of
    layout out a document."""

    segments: list[str | tuple[int, int]]

    def __init__(self, segments):
        self.segments = segments

    def apply_to_source(self, original: str) -> str:
        """Convert this layout to a string by copying chunks of the source
        text into the right place.
        """
        result = ""
        for segment in self.segments:
            if isinstance(segment, str):
                result += segment
            else:
                start, end = segment
                result += original[start:end]

        return result


def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
    """Lay out a document to fit within the given width.

    The result of this function is a DocumentLayout which can trivially be
    converted into a string given the original document.
    """

    @dataclasses.dataclass
    class Chunk:
        doc: Document
        indent: int
        flat: bool

        def with_document(self, doc: Document, and_indent: int = 0) -> "Chunk":
            return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)

    column = 0
    chunks: list[Chunk] = [
        Chunk(
            doc=doc,
            indent=0,
            flat=False,  # NOTE: Assume flat until we know how to break.
        )
    ]

    def fits(chunk: Chunk) -> bool:
        remaining = width - column
        if remaining <= 0:
            return False

        stack = list(chunks)
        stack.append(chunk)
        while len(stack) > 0:
            chunk = stack.pop()
            match chunk.doc:
                case None:
                    pass

                case Literal(text):
                    remaining -= len(text)

                case NewLine(replace):
                    if chunk.flat:
                        remaining -= len(replace)
                    else:
                        # These are newlines that are real, so it must have
                        # all fit.
                        return True

                case ForceBreak():
                    # If we're in a flattened chunk then force it to break by
                    # returning false here, otherwise we're at the end of the
                    # line and yes, whatever you were asking about has fit.
                    return not chunk.flat

                case Cons(docs):
                    stack.extend(chunk.with_document(doc) for doc in reversed(docs))

                case Lazy():
                    stack.append(chunk.with_document(chunk.doc.resolve()))

                case Indent(amount, child):
                    stack.append(chunk.with_document(child, and_indent=amount))

                case Group(child):
                    # The difference between this approach and Justin's twist
                    # is that we consider the flat variable in Newline(),
                    # above, rather than here in Group. This makes us more
                    # like Wadler's original formulation, I guess. The
                    # grouping is an implicit transform over alternatives
                    # represented by newline. (If we have other kinds of
                    # alternatives we'll have to work those out elsewhere as
                    # well.)
                    stack.append(chunk.with_document(child))

                case Marker():
                    stack.append(chunk.with_document(chunk.doc.child))

                case Trivia(child):
                    stack.append(chunk.with_document(child))

                case _:
                    typing.assert_never(chunk.doc)

            if remaining < 0:
                return False

        return True  # Everything must fit, so great!

    output: list[str | tuple[int, int]] = []
    while len(chunks) > 0:
        chunk = chunks.pop()
        match chunk.doc:
            case None:
                pass

            case Literal(text):
                output.append(text)
                column += len(text)

            case NewLine(replace):
                if chunk.flat:
                    output.append(replace)
                    column += len(replace)
                else:
                    # TODO: Custom newline expansion, custom indent segments.
                    output.append("\n" + (chunk.indent * indent))
                    column = chunk.indent * len(indent)

            case ForceBreak(silent):
                # TODO: Custom newline expansion, custom indent segments.
                if not silent:
                    output.append("\n" + (chunk.indent * indent))
                    column = chunk.indent * len(indent)

            case Cons(docs):
                chunks.extend(chunk.with_document(doc) for doc in reversed(docs))

            case Indent(amount, doc):
                chunks.append(chunk.with_document(doc, and_indent=amount))

            case Lazy():
                chunks.append(chunk.with_document(chunk.doc.resolve()))

            case Group(child):
                candidate = Chunk(doc=child, indent=chunk.indent, flat=True)
                if chunk.flat or fits(candidate):
                    chunks.append(candidate)
                else:
                    chunks.append(Chunk(doc=child, indent=chunk.indent, flat=False))

            case Marker():
                chunks.append(chunk.with_document(chunk.doc.child))

            case Trivia(child):
                chunks.append(chunk.with_document(child))

            case _:
                typing.assert_never(chunk)

    return DocumentLayout(output)


def resolve_document(doc: Document) -> Document:
    match doc:
        case Cons(docs):
            docs = [resolve_document(d) for d in docs]
            return cons(*docs)

        case Lazy(_):
            return resolve_document(doc.resolve())

        case Group(doc):
            return group(resolve_document(doc))

        case Marker(child, meta):
            return Marker(resolve_document(child), meta)

        case Trivia(child):
            return Trivia(resolve_document(child))

        case Literal() | NewLine() | ForceBreak() | Indent() | None:
            return doc

        case _:
            typing.assert_never(doc)


def child_to_name(child: runtime.Tree | runtime.TokenValue) -> str:
    if isinstance(child, runtime.Tree):
        return f"tree_{child.name}"
    else:
        return f"token_{child.kind}"


def slice_pre_post_trivia(
    trivia_mode: dict[str, parser.TriviaMode],
    trivia_tokens: list[runtime.TokenValue],
) -> tuple[
    list[tuple[parser.TriviaMode, runtime.TokenValue]],
    list[tuple[parser.TriviaMode, runtime.TokenValue]],
]:
    tokens = [
        (trivia_mode.get(token.kind, parser.TriviaMode.Blank), token) for token in trivia_tokens
    ]

    for index, (mode, token) in enumerate(tokens):
        if token.start == 0:
            # Everything is pre-trivia if we're at the start of the file.
            return (tokens, [])

        if mode == parser.TriviaMode.NewLine:
            # This is the first newline; it belongs with the pre-trivia.
            return (tokens[index:], tokens[:index])

    # If we never found a new line then it's all post-trivia.
    return ([], tokens)


############################################################################
# The Actual Pretty Printer
############################################################################


class Matcher:
    table: builder.MatcherTable
    trivia_mode: dict[str, parser.TriviaMode]

    def __init__(self, table: builder.MatcherTable, trivia_mode: dict[str, parser.TriviaMode]):
        self.table = table
        self.trivia_mode = trivia_mode

    def match(
        self,
        printer: "Printer",
        items: list[runtime.Tree | runtime.TokenValue],
        src: str,
    ) -> Document:
        stack: list[tuple[int, Document]] = [(0, None)]
        table = self.table.table

        # eof_trivia = []
        # if len(items) > 0:
        #     item = items[-1]
        #     if isinstance(item, runtime.TokenValue):
        #         eof_trivia = item.post_trivia

        input = [(child_to_name(i), i) for i in items] + [
            (
                "$",
                runtime.TokenValue(
                    kind="$",
                    start=0,
                    end=0,
                    pre_trivia=[],
                    post_trivia=[],
                ),
            )
        ]
        input_index = 0

        while True:
            current_token = input[input_index]
            current_state = stack[-1][0]
            action = table.actions[current_state].get(current_token[0], parser.Error())

            match action:
                case parser.Accept():
                    result = stack[-1][1]
                    # result = cons(result, self.apply_trivia(eof_trivia))
                    return result

                case parser.Reduce(name=name, count=size):
                    child: Document = None
                    if size > 0:
                        for _, c in stack[-size:]:
                            if c is None:
                                continue
                            child = cons(child, c)
                        del stack[-size:]

                    if name[0] == "g":
                        child = group(child)

                    elif name[0] == "i":
                        amount = self.table.indent_amounts[name]
                        child = Indent(amount, child)

                    elif name[0] == "n":
                        replace = self.table.newline_replace[name]
                        child = cons(child, NewLine(replace))

                    elif name[0] == "p":
                        replace = self.table.newline_replace[name]
                        child = cons(NewLine(replace), child)

                    elif name[0] == "f":
                        child = cons(child, ForceBreak(False))

                    elif name[0] == "d":
                        child = cons(ForceBreak(False), child)

                    else:
                        pass  # Reducing a transparent rule probably.

                    goto = table.gotos[stack[-1][0]].get(name)
                    assert goto is not None
                    stack.append((goto, child))

                case parser.Shift():
                    value = current_token[1]

                    if isinstance(value, runtime.Tree):
                        child = Lazy.from_tree(value, src, printer)
                    else:
                        child = cons(
                            trivia(self.apply_pre_trivia(value.pre_trivia, src)),
                            Literal(src[value.start : value.end]),
                            trivia(self.apply_post_trivia(value.post_trivia, src)),
                        )

                    stack.append((action.state, child))
                    input_index += 1

                case parser.Error():
                    raise Exception("How did I get a parse error here??")

    def apply_pre_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
        pre_trivia, _ = slice_pre_post_trivia(self.trivia_mode, trivia_tokens)
        if len(pre_trivia) == 0:
            return None

        at_start_of_file = pre_trivia[0][1].start == 0

        trivia_doc = None
        new_line_count = 0
        for mode, token in pre_trivia:
            match mode:
                case parser.TriviaMode.LineComment:
                    trivia_doc = cons(
                        trivia_doc,
                        Literal(src[token.start : token.end]),
                        ForceBreak(False),
                    )
                    new_line_count = 0  # There will be a newline after this.
                    at_start_of_file = False

                case parser.TriviaMode.Blank:
                    pass

                case parser.TriviaMode.NewLine:
                    new_line_count += 1
                    if new_line_count == 2 and not at_start_of_file:
                        trivia_doc = cons(
                            trivia_doc,
                            ForceBreak(False),
                        )

                case _:
                    typing.assert_never(mode)

        return trivia_doc

    def apply_post_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
        if len(trivia_tokens) > 0 and trivia_tokens[-1].end == len(src):
            return self.apply_eof_trivia(trivia_tokens, src)

        _, post_trivia = slice_pre_post_trivia(self.trivia_mode, trivia_tokens)

        trivia_doc = None
        for mode, token in post_trivia:
            match mode:
                case parser.TriviaMode.Blank:
                    pass

                case parser.TriviaMode.NewLine:
                    # Anything after a line break is not processed as post
                    # trivia.
                    break

                case parser.TriviaMode.LineComment:
                    # Because this is post-trivia, we know there's something
                    # to our left, and we can force the space.
                    trivia_doc = cons(
                        Literal(" "),
                        Literal(src[token.start : token.end]),
                        ForceBreak(True),  # And the line needs to end.
                    )
                    break

                case _:
                    typing.assert_never(mode)

        return trivia_doc

    def apply_eof_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
        # EOF trivia has weird rules, namely, it's like pre and post joined together but.
        tokens = [
            (self.trivia_mode.get(token.kind, parser.TriviaMode.Blank), token)
            for token in trivia_tokens
        ]

        at_start = True
        newline_count = 0
        trivia_doc = None
        for mode, token in tokens:
            match mode:
                case parser.TriviaMode.Blank:
                    pass

                case parser.TriviaMode.NewLine:
                    at_start = False
                    newline_count += 1
                    if newline_count <= 2:
                        trivia_doc = cons(trivia_doc, ForceBreak(False))

                case parser.TriviaMode.LineComment:
                    # Because this is post-trivia, we know there's something
                    # to our left, and we can force the space.
                    trivia_doc = cons(
                        trivia_doc,
                        Literal(" ") if at_start else None,
                        Literal(src[token.start : token.end]),
                    )
                    newline_count = 0
                    at_start = False

                case _:
                    typing.assert_never(mode)

        return trivia_doc


class Printer:
    table: builder.PrettyTable
    matchers: dict[str, Matcher]

    def __init__(self, table: builder.PrettyTable):
        self.table = table
        self.matchers = {
            name: Matcher(value, self.table.trivia_modes) for name, value in table.matchers.items()
        }

    def indent(self) -> str:
        return self.table.indent

    def convert_tree_to_document(self, tree: runtime.Tree, src: str) -> Document:
        name = tree.name
        assert name is not None, "Cannot format a tree if it still has transparent nodes inside"

        matcher = self.matchers[name]
        m = matcher.match(self, list(tree.children), src)
        if m is None:
            raise ValueError(
                f"Could not match a valid tree for {tree.name} with {len(tree.children)} children:\n{tree.format()}"
            )
        return resolve_document(m)

    def format_tree(self, tree: runtime.Tree, src: str, width: int) -> DocumentLayout:
        doc = self.convert_tree_to_document(tree, src)
        return layout_document(doc, width, self.table.indent)