lrparsers/tests/test_wadler.py
John Doty c31d527077 [wadler] Trivia escapes groups
This means that forced breaks from comments don't screw up the
following single-line things. But this still isn't right; we need to
fine tune how we represent trivia.
2024-09-15 08:51:18 -07:00

288 lines
7.2 KiB
Python

import typing
from parser.parser import (
Grammar,
Re,
Terminal,
rule,
opt,
group,
newline,
alt,
indent,
seq,
sp,
nl,
br,
)
import parser.runtime as runtime
import parser.wadler as wadler
class JsonGrammar(Grammar):
start = "root"
trivia = ["BLANKS"]
@rule
def root(self):
return self.value
@rule(transparent=True)
def value(self):
return (
self.object
| self.array
| self.NUMBER
| self.TRUE
| self.FALSE
| self.NULL
| self.STRING
)
@rule
def object(self):
return group(
self.LCURLY + opt(indent(newline() + self._object_pairs)) + newline() + self.RCURLY
)
@rule
def _object_pairs(self):
return alt(
self.object_pair,
self.object_pair + self.COMMA + newline(" ") + self._object_pairs,
)
@rule
def object_pair(self):
return group(self.STRING + self.COLON + indent(newline(" ") + self.value))
@rule
def array(self):
return group(
self.LSQUARE + opt(indent(newline() + self._array_items)) + newline() + self.RSQUARE
)
@rule
def _array_items(self):
return alt(
self.value,
self.value + self.COMMA + newline(" ") + self._array_items,
)
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
LCURLY = Terminal("{")
RCURLY = Terminal("}")
COMMA = Terminal(",")
COLON = Terminal(":")
LSQUARE = Terminal("[")
RSQUARE = Terminal("]")
TRUE = Terminal("true")
FALSE = Terminal("false")
NULL = Terminal("null")
NUMBER = Terminal(
Re.seq(
Re.set(("0", "9")).plus(),
Re.seq(
Re.literal("."),
Re.set(("0", "9")).plus(),
).question(),
Re.seq(
Re.set("e", "E"),
Re.set("+", "-").question(),
Re.set(("0", "9")).plus(),
).question(),
),
)
STRING = Terminal(
Re.seq(
Re.literal('"'),
(~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(),
Re.literal('"'),
)
)
JSON = JsonGrammar()
JSON_TABLE = JSON.build_table()
JSON_LEXER = JSON.compile_lexer()
JSON_PARSER = runtime.Parser(JSON_TABLE)
def flatten_document(doc: wadler.Document, src: str) -> list:
match doc:
case wadler.NewLine(replace):
return [f"<newline {repr(replace)}>"]
case wadler.ForceBreak():
return ["<forced break>"]
case wadler.Indent():
return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]]
case wadler.Text(start, end):
return [src[start:end]]
case wadler.Literal(text):
return [text]
case wadler.Group():
return [flatten_document(doc.child, src)]
case wadler.Lazy():
return flatten_document(doc.resolve(), src)
case wadler.Cons():
result = []
for d in doc.docs:
result += flatten_document(d, src)
return result
case None:
return []
case wadler.Marker():
return [f"<marker {repr(doc.meta)}>", flatten_document(doc.child, src)]
case wadler.Trivia():
return [f"<trivia>", flatten_document(doc.child, src)]
case _:
typing.assert_never(doc)
def test_convert_tree_to_document():
text = '{"a": true, "b":[1,2,3]}'
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
tree, errors = JSON_PARSER.parse(tokens)
assert [] == errors
assert tree is not None
printer = wadler.Printer(JSON)
doc = flatten_document(printer.convert_tree_to_document(tree), text)
assert doc == [
[
"{",
[
"<indent 1>",
[
"<newline ''>",
[
'"a"',
":",
[
"<indent 1>",
["<newline ' '>", "true"],
],
],
",",
"<newline ' '>",
[
'"b"',
":",
[
"<indent 1>",
[
"<newline ' '>",
[
"[",
[
"<indent 1>",
[
"<newline ''>",
"1",
",",
"<newline ' '>",
"2",
",",
"<newline ' '>",
"3",
],
],
"<newline ''>",
"]",
],
],
],
],
],
],
"<newline ''>",
"}",
]
]
def test_layout_basic():
text = '{"a": true, "b":[1,2,3], "c":[1,2,3,4,5,6,7]}'
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
tree, errors = JSON_PARSER.parse(tokens)
assert [] == errors
assert tree is not None
printer = wadler.Printer(JSON)
result = printer.format_tree(tree, 50).apply_to_source(text)
assert (
result
== """
{
"a": true,
"b": [1, 2, 3],
"c": [1, 2, 3, 4, 5, 6, 7]
}
""".strip()
)
def test_forced_break():
class TG(Grammar):
start = "root"
trivia = ["BLANKS"]
@rule
def root(self):
return self._expression
@rule
def _expression(self):
return self.word | self.list
@rule
def list(self):
return group(self.LPAREN, indent(nl, self._expressions), nl, self.RPAREN)
@rule
def _expressions(self):
return self._expression | seq(self._expressions, sp, self._expression)
@rule
def word(self):
return self.OK | seq(self.BREAK, br, self.BREAK)
LPAREN = Terminal("(")
RPAREN = Terminal(")")
OK = Terminal("ok")
BREAK = Terminal("break")
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
g = TG()
g_lexer = g.compile_lexer()
g_parser = runtime.Parser(g.build_table())
text = "((ok ok) (ok break break ok) (ok ok ok ok))"
tree, errors = g_parser.parse(runtime.GenericTokenStream(text, g_lexer))
assert errors == []
assert tree is not None
printer = wadler.Printer(g)
result = printer.format_tree(tree, 200).apply_to_source(text)
assert (
result
== """
(
(ok ok)
(
ok
break
break
ok
)
(ok ok ok ok)
)
""".strip()
)