Really messing around with trivia, it's not good yet
It's really not clear how to track it and how to compose it with groups yet. Really very difficult.
This commit is contained in:
parent
71b59302fa
commit
d5ccd5b147
4 changed files with 131 additions and 30 deletions
21
grammar.py
21
grammar.py
|
|
@ -5,6 +5,7 @@ from parser import (
|
|||
Re,
|
||||
Rule,
|
||||
Terminal,
|
||||
TriviaMode,
|
||||
alt,
|
||||
br,
|
||||
group,
|
||||
|
|
@ -23,7 +24,7 @@ class FineGrammar(Grammar):
|
|||
# generator = parser.GenerateLR1
|
||||
start = "File"
|
||||
|
||||
trivia = ["BLANKS", "COMMENT"]
|
||||
trivia = ["BLANKS", "LINE_BREAKS", "COMMENT"]
|
||||
|
||||
pretty_indent = " "
|
||||
|
||||
|
|
@ -76,12 +77,10 @@ class FineGrammar(Grammar):
|
|||
def class_declaration(self) -> Rule:
|
||||
return seq(
|
||||
group(
|
||||
group(
|
||||
self.CLASS,
|
||||
sp,
|
||||
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
|
||||
sp,
|
||||
),
|
||||
self.CLASS,
|
||||
sp,
|
||||
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
|
||||
sp,
|
||||
self.LCURLY,
|
||||
),
|
||||
indent(nl, mark(opt(self.class_body), field="body")),
|
||||
|
|
@ -193,7 +192,7 @@ class FineGrammar(Grammar):
|
|||
def block(self) -> Rule:
|
||||
return alt(
|
||||
group(self.LCURLY, nl, self.RCURLY),
|
||||
seq(self.LCURLY, indent(nl, self.block_body), nl, self.RCURLY),
|
||||
group(self.LCURLY, indent(sp, self.block_body), sp, self.RCURLY),
|
||||
)
|
||||
|
||||
@rule("BlockBody")
|
||||
|
|
@ -201,7 +200,7 @@ class FineGrammar(Grammar):
|
|||
return alt(
|
||||
self.expression,
|
||||
self._statement_list,
|
||||
seq(self._statement_list, nl, self.expression),
|
||||
seq(self._statement_list, br, self.expression),
|
||||
)
|
||||
|
||||
@rule
|
||||
|
|
@ -420,10 +419,12 @@ class FineGrammar(Grammar):
|
|||
def field_value(self) -> Rule:
|
||||
return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
BLANKS = Terminal(Re.set(" ", "\t").plus())
|
||||
LINE_BREAKS = Terminal(Re.set("\r", "\n").plus(), trivia_mode=TriviaMode.NewLine)
|
||||
COMMENT = Terminal(
|
||||
Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
|
||||
highlight=highlight.comment.line,
|
||||
trivia_mode=TriviaMode.LineComment,
|
||||
)
|
||||
|
||||
ARROW = Terminal("->", highlight=highlight.keyword.operator)
|
||||
|
|
|
|||
|
|
@ -543,12 +543,12 @@ class Harness:
|
|||
|
||||
case wadler.Text(start, end):
|
||||
if self.source is not None:
|
||||
append(f"< {self.source[start:end]}")
|
||||
append(f"< {repr(self.source[start:end])}")
|
||||
else:
|
||||
append(f"< ??? {start}:{end}")
|
||||
|
||||
case wadler.Literal(text):
|
||||
append(f"' {text}")
|
||||
append(f"literal {repr(text)}")
|
||||
|
||||
case wadler.Group():
|
||||
append("group")
|
||||
|
|
|
|||
|
|
@ -2724,11 +2724,35 @@ sp = newline(" ")
|
|||
|
||||
|
||||
def forced_break() -> Rule:
|
||||
"""Indicate that the line MUST break right here, for whatever reason."""
|
||||
return mark(Nothing, format=FormatMeta(forced_break=True))
|
||||
|
||||
|
||||
br = forced_break()
|
||||
|
||||
|
||||
class TriviaMode(enum.Enum):
|
||||
"""Indicate how a particular bit of trivia is to be handled during
|
||||
pretty-printing. Attach this to a "trivia_mode" property on a Terminal
|
||||
definition.
|
||||
|
||||
- Ignore means that the trivia should be ignored. (This is the default.)
|
||||
|
||||
- NewLine means that the trivia is a line break. This is important for
|
||||
other modes, specifically...
|
||||
|
||||
- LineComment means that the trivia is a line comment. If a line comment
|
||||
is alone on a line, then a forced break is inserted so that it remains
|
||||
alone on its line after formatting, otherwise it is attached to whatever
|
||||
is to its left by a single space. A LineComment is *always* followed by
|
||||
a forced break.
|
||||
"""
|
||||
|
||||
Ignore = 0
|
||||
NewLine = 1
|
||||
LineComment = 2
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Finally, the base class for grammars
|
||||
###############################################################################
|
||||
|
|
|
|||
112
parser/wadler.py
112
parser/wadler.py
|
|
@ -17,13 +17,6 @@ class Cons:
|
|||
right: "Document"
|
||||
|
||||
|
||||
def cons(left: "Document", right: "Document") -> "Document":
|
||||
if left and right:
|
||||
return Cons(left, right)
|
||||
else:
|
||||
return left or right
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class NewLine:
|
||||
replace: str
|
||||
|
|
@ -31,7 +24,7 @@ class NewLine:
|
|||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class ForceBreak:
|
||||
pass
|
||||
silent: bool
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
|
|
@ -79,6 +72,21 @@ class Lazy:
|
|||
Document = None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Marker | Lazy
|
||||
|
||||
|
||||
def cons(*documents: Document) -> Document:
|
||||
result = None
|
||||
for doc in documents:
|
||||
if result is None:
|
||||
result = doc
|
||||
elif doc is not None:
|
||||
result = Cons(result, doc)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def group(document: Document) -> Document:
|
||||
return Group(document)
|
||||
|
||||
|
||||
############################################################################
|
||||
# Layouts
|
||||
############################################################################
|
||||
|
|
@ -125,7 +133,13 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
|
|||
return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)
|
||||
|
||||
column = 0
|
||||
chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)]
|
||||
chunks: list[Chunk] = [
|
||||
Chunk(
|
||||
doc=doc,
|
||||
indent=0,
|
||||
flat=False, # NOTE: Assume flat until we know how to break.
|
||||
)
|
||||
]
|
||||
|
||||
def fits(chunk: Chunk) -> bool:
|
||||
remaining = width - column
|
||||
|
|
@ -216,10 +230,11 @@ def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
|
|||
output.append("\n" + (chunk.indent * indent))
|
||||
column = chunk.indent * len(indent)
|
||||
|
||||
case ForceBreak():
|
||||
case ForceBreak(silent):
|
||||
# TODO: Custom newline expansion, custom indent segments.
|
||||
output.append("\n" + (chunk.indent * indent))
|
||||
column = chunk.indent * len(indent)
|
||||
if not silent:
|
||||
output.append("\n" + (chunk.indent * indent))
|
||||
column = chunk.indent * len(indent)
|
||||
|
||||
case Cons(left, right):
|
||||
chunks.append(chunk.with_document(right))
|
||||
|
|
@ -276,11 +291,18 @@ class Matcher:
|
|||
table: parser.ParseTable
|
||||
indent_amounts: dict[str, int]
|
||||
newline_replace: dict[str, str]
|
||||
trivia_mode: dict[str, parser.TriviaMode]
|
||||
|
||||
def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
|
||||
stack: list[tuple[int, Document]] = [(0, None)]
|
||||
table = self.table
|
||||
|
||||
# eof_trivia = []
|
||||
# if len(items) > 0:
|
||||
# item = items[-1]
|
||||
# if isinstance(item, runtime.TokenValue):
|
||||
# eof_trivia = item.post_trivia
|
||||
|
||||
input = [(child_to_name(i), i) for i in items] + [
|
||||
(
|
||||
"$",
|
||||
|
|
@ -302,7 +324,9 @@ class Matcher:
|
|||
|
||||
match action:
|
||||
case parser.Accept():
|
||||
return stack[-1][1]
|
||||
result = stack[-1][1]
|
||||
# result = cons(result, self.apply_trivia(eof_trivia))
|
||||
return result
|
||||
|
||||
case parser.Reduce(name=name, count=size):
|
||||
child: Document = None
|
||||
|
|
@ -314,7 +338,7 @@ class Matcher:
|
|||
del stack[-size:]
|
||||
|
||||
if name[0] == "g":
|
||||
child = Group(child)
|
||||
child = group(child)
|
||||
|
||||
elif name[0] == "i":
|
||||
amount = self.indent_amounts[name]
|
||||
|
|
@ -329,10 +353,10 @@ class Matcher:
|
|||
child = cons(NewLine(replace), child)
|
||||
|
||||
elif name[0] == "f":
|
||||
child = cons(child, ForceBreak())
|
||||
child = cons(child, ForceBreak(False))
|
||||
|
||||
elif name[0] == "d":
|
||||
child = cons(ForceBreak(), child)
|
||||
child = cons(ForceBreak(False), child)
|
||||
|
||||
else:
|
||||
pass # Reducing a transparent rule probably.
|
||||
|
|
@ -347,8 +371,8 @@ class Matcher:
|
|||
if isinstance(value, runtime.Tree):
|
||||
child = Lazy.from_tree(value, printer)
|
||||
else:
|
||||
# TODO: Consider trivia and preserve comments!
|
||||
child = Text(value.start, value.end)
|
||||
child = cons(child, self.apply_trivia(value.post_trivia))
|
||||
|
||||
stack.append((action.state, child))
|
||||
input_index += 1
|
||||
|
|
@ -356,6 +380,47 @@ class Matcher:
|
|||
case parser.Error():
|
||||
raise Exception("How did I get a parse error here??")
|
||||
|
||||
def apply_trivia(self, trivia: list[runtime.TokenValue]) -> Document:
|
||||
had_newline = False
|
||||
trivia_doc = None
|
||||
for token in trivia:
|
||||
mode = self.trivia_mode.get(token.kind, parser.TriviaMode.Ignore)
|
||||
match mode:
|
||||
case parser.TriviaMode.Ignore:
|
||||
pass
|
||||
|
||||
case parser.TriviaMode.NewLine:
|
||||
# We ignore line breaks because obviously
|
||||
# we expect the pretty-printer to put the
|
||||
# line breaks in where they belong *but*
|
||||
# we track if they happened to influence
|
||||
# the layout.
|
||||
had_newline = True
|
||||
|
||||
case parser.TriviaMode.LineComment:
|
||||
if had_newline:
|
||||
# This line comment is all alone on
|
||||
# its line, so we need to maintain
|
||||
# that.
|
||||
line_break = NewLine("")
|
||||
else:
|
||||
# This line comment is attached to
|
||||
# something to the left, reduce it to
|
||||
# a space.
|
||||
line_break = Literal(" ")
|
||||
|
||||
trivia_doc = cons(
|
||||
trivia_doc,
|
||||
line_break,
|
||||
Text(token.start, token.end),
|
||||
ForceBreak(True), # This is probably the wrong place for this!
|
||||
)
|
||||
|
||||
case _:
|
||||
typing.assert_never(mode)
|
||||
|
||||
return trivia_doc
|
||||
|
||||
|
||||
class Printer:
|
||||
# TODO: Pre-generate the matcher tables for a grammar, to make it
|
||||
|
|
@ -364,6 +429,7 @@ class Printer:
|
|||
_matchers: dict[str, Matcher]
|
||||
_nonterminals: dict[str, parser.NonTerminal]
|
||||
_indent: str
|
||||
_trivia_mode: dict[str, parser.TriviaMode]
|
||||
|
||||
def __init__(self, grammar: parser.Grammar, indent: str | None = None):
|
||||
self.grammar = grammar
|
||||
|
|
@ -371,9 +437,18 @@ class Printer:
|
|||
self._matchers = {}
|
||||
|
||||
if indent is None:
|
||||
indent = getattr(self.grammar, "pretty_indent", " ")
|
||||
indent = getattr(self.grammar, "pretty_indent", None)
|
||||
if indent is None:
|
||||
indent = " "
|
||||
self._indent = indent
|
||||
|
||||
trivia_mode = {}
|
||||
for t in grammar.terminals():
|
||||
mode = t.meta.get("trivia_mode")
|
||||
if t.name is not None and isinstance(mode, parser.TriviaMode):
|
||||
trivia_mode[t.name] = mode
|
||||
self._trivia_mode = trivia_mode
|
||||
|
||||
def indent(self) -> str:
|
||||
return self._indent
|
||||
|
||||
|
|
@ -535,6 +610,7 @@ class Printer:
|
|||
parse_table,
|
||||
indent_amounts,
|
||||
final_newlines,
|
||||
self._trivia_mode,
|
||||
)
|
||||
|
||||
def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue