Specify and honor trivia tokens

e.g. "this is how machine-generated parsers know to skip blanks and
comments"

The run time implementation could be better; we don't really want to
just discard trivia because it's useful for e.g. doc comments and the
like. BUT for now this is fine.
This commit is contained in:
John Doty 2024-08-24 10:01:40 -07:00
parent 8e22c59aa8
commit 7a5f17f74b
3 changed files with 43 additions and 7 deletions

View file

@ -18,6 +18,8 @@ class FineGrammar(Grammar):
# generator = parser.GenerateLR1
start = "File"
trivia = ["BLANKS", "COMMENT"]
def __init__(self):
super().__init__(
precedence=[
@ -330,14 +332,14 @@ class FineGrammar(Grammar):
def field_value(self) -> Rule:
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COLON, self.expression)
BLANK = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
COMMENT = Terminal(Re.seq(Re.literal("//"), Re.set("\n").invert().star()))
ARROW = Terminal("->")
AS = Terminal("as")
BAR = Terminal("bar")
CLASS = Terminal("class")
COLON = Terminal("colon")
COMMENT = Terminal("comment")
ELSE = Terminal("else")
FOR = Terminal("for")
FUN = Terminal("fun")

View file

@ -561,6 +561,7 @@ class ErrorCollection:
class ParseTable:
actions: list[dict[str, ParseAction]]
gotos: list[dict[str, int]]
trivia: set[str]
def format(self):
"""Format a parser table so pretty."""
@ -651,7 +652,7 @@ class TableBuilder(object):
if error is not None:
raise error
return ParseTable(actions=self.actions, gotos=self.gotos)
return ParseTable(actions=self.actions, gotos=self.gotos, trivia=set())
def new_row(self, config_set: ConfigSet):
"""Start a new row, processing the given config set. Call this before
@ -1794,12 +1795,14 @@ class Grammar:
_start: str
_generator: type[GenerateLR0]
_terminals: list[Terminal]
_trivia: list[Terminal]
def __init__(
self,
start: str | None = None,
precedence: PrecedenceList | None = None,
generator: type[GenerateLR0] | None = None,
trivia: list[str | Terminal] | None = None,
):
if start is None:
start = getattr(self, "start", None)
@ -1817,12 +1820,30 @@ class Grammar:
generator = getattr(self, "generator", GenerateLALR)
assert generator is not None
if trivia is None:
trivia = getattr(self, "trivia", [])
assert trivia is not None
# Fixup terminal names with the name of the member that declared it.
terminals = []
terminals = {}
for n, t in inspect.getmembers(self, lambda x: isinstance(x, Terminal)):
if t.value is None:
t.value = n
terminals.append(t)
if n in terminals:
raise ValueError(f"More than one terminal has the name '{n}'")
terminals[n] = t
# Resolve the trivia declarations correctly.
resolved_trivia: list[Terminal] = []
for t in trivia:
if isinstance(t, str):
resolved = terminals.get(t)
if resolved is None:
raise ValueError(f"The trivia '{t}' is not a terminal name")
resolved_trivia.append(resolved)
else:
resolved_trivia.append(t)
# Fix up the precedence table.
precedence_table = {}
@ -1840,12 +1861,17 @@ class Grammar:
self._precedence = precedence_table
self._start = start
self._generator = generator
self._terminals = terminals
self._terminals = list(terminals.values())
self._trivia = resolved_trivia
@property
def terminals(self) -> list[Terminal]:
return self._terminals
@property
def resolved_trivia(self) -> list[Terminal]:
return self._trivia
def generate_nonterminal_dict(
self, start: str | None = None
) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]:
@ -1919,7 +1945,7 @@ class Grammar:
return grammar, transparents
def build_table(self, start: str | None = None, generator=None):
def build_table(self, start: str | None = None, generator=None) -> ParseTable:
"""Construct a parse table for this grammar, starting at the named
nonterminal rule.
"""
@ -1931,6 +1957,11 @@ class Grammar:
generator = self._generator
gen = generator(start, desugared, precedence=self._precedence, transparents=transparents)
table = gen.gen_table()
for t in self._trivia:
assert t.value is not None
table.trivia.add(t.value)
return table

View file

@ -288,10 +288,13 @@ class Parser:
self.table = table
def parse(self, tokens: TokenStream) -> typing.Tuple[Tree | None, list[str]]:
# TODO: If this were a for reals for reals parser we would keep the trivia
# accessible in the tree.
input_tokens = tokens.tokens()
input: list[TokenValue] = [
TokenValue(kind=kind.value, start=start, end=start + length)
for (kind, start, length) in input_tokens
if kind.value is not None and kind.value not in self.table.trivia
]
eof = 0 if len(input) == 0 else input[-1].end