lrparsers/tests/test_wadler.py
John Doty 276449287d Allow for text to follow tokens in pretty-printing
It's weird that it counts against the line length though, like if you
were going to break you could ignore it right? At least, for the
grammar I'm working here....
2024-09-11 11:22:41 -07:00

175 lines
4.2 KiB
Python

import typing
from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt, indent
import parser.runtime as runtime
import parser.wadler as wadler
class JsonGrammar(Grammar):
start = "root"
trivia = ["BLANKS"]
@rule
def root(self):
return self.value
@rule(transparent=True)
def value(self):
return (
self.object
| self.array
| self.NUMBER
| self.TRUE
| self.FALSE
| self.NULL
| self.STRING
)
@rule
def object(self):
return group(self.LCURLY + opt(indent(self._object_pairs)) + newline() + self.RCURLY)
@rule
def _object_pairs(self):
return alt(
newline() + self.object_pair,
newline() + self.object_pair + self.COMMA + self._object_pairs,
)
@rule
def object_pair(self):
return group(self.STRING + self.COLON + self.value)
@rule
def array(self):
return group(self.LSQUARE + opt(indent(self._array_items)) + newline() + self.RSQUARE)
@rule
def _array_items(self):
return alt(
newline() + self.value,
newline() + self.value + self.COMMA + self._array_items,
)
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
LCURLY = Terminal("{")
RCURLY = Terminal("}")
COMMA = Terminal(",")
COLON = Terminal(":", format_follow=" ")
LSQUARE = Terminal("[")
RSQUARE = Terminal("]")
TRUE = Terminal("true")
FALSE = Terminal("false")
NULL = Terminal("null")
NUMBER = Terminal(
Re.seq(
Re.set(("0", "9")).plus(),
Re.seq(
Re.literal("."),
Re.set(("0", "9")).plus(),
).question(),
Re.seq(
Re.set("e", "E"),
Re.set("+", "-").question(),
Re.set(("0", "9")).plus(),
).question(),
),
)
STRING = Terminal(
Re.seq(
Re.literal('"'),
(~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(),
Re.literal('"'),
)
)
JSON = JsonGrammar()
JSON_TABLE = JSON.build_table()
JSON_LEXER = JSON.compile_lexer()
JSON_PARSER = runtime.Parser(JSON_TABLE)
def flatten_document(doc: wadler.Document, src: str) -> list:
match doc:
case wadler.NewLine():
return ["<newline>"]
case wadler.Indent():
return [f"<indent {doc.amount}>", flatten_document(doc.doc, src)]
case wadler.Text(start, end):
return [src[start:end]]
case wadler.Group():
return [flatten_document(doc.child, src)]
case wadler.Lazy():
return flatten_document(doc.resolve(), src)
case wadler.Cons():
return flatten_document(doc.left, src) + flatten_document(doc.right, src)
case None:
return []
case _:
typing.assert_never(doc)
def test_convert_tree_to_document():
text = '{"a": true, "b":[1,2,3]}'
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
tree, errors = JSON_PARSER.parse(tokens)
assert [] == errors
assert tree is not None
printer = wadler.Printer(JSON)
doc = flatten_document(printer.convert_tree_to_document(tree), text)
assert doc == [
[
"{",
['"a"', ":", "true"],
",",
"<newline>",
[
'"b"',
":",
[
"[",
"1",
",",
"<newline>",
"2",
",",
"<newline>",
"3",
"<newline>",
"]",
],
],
"<newline>",
"}",
]
]
def test_layout_basic():
text = '{"a": true, "b":[1,2,3]}'
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
tree, errors = JSON_PARSER.parse(tokens)
assert [] == errors
assert tree is not None
printer = wadler.Printer(JSON)
result = printer.format_tree(tree, 10).apply_to_source(text)
assert (
result
== """
{
"a": true,
"b": [
1,
2,
3
]
}
""".strip()
)