Specify and honor trivia tokens
e.g. "this is how machine-generated parsers know to skip blanks and comments" The run time implementation could be better; we don't really want to just discard trivia because it's useful for e.g. doc comments and the like. BUT for now this is fine.
This commit is contained in:
parent
8e22c59aa8
commit
7a5f17f74b
3 changed files with 43 additions and 7 deletions
|
|
@ -18,6 +18,8 @@ class FineGrammar(Grammar):
|
|||
# generator = parser.GenerateLR1
|
||||
start = "File"
|
||||
|
||||
trivia = ["BLANKS", "COMMENT"]
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
precedence=[
|
||||
|
|
@ -330,14 +332,14 @@ class FineGrammar(Grammar):
|
|||
def field_value(self) -> Rule:
|
||||
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COLON, self.expression)
|
||||
|
||||
BLANK = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
COMMENT = Terminal(Re.seq(Re.literal("//"), Re.set("\n").invert().star()))
|
||||
|
||||
ARROW = Terminal("->")
|
||||
AS = Terminal("as")
|
||||
BAR = Terminal("bar")
|
||||
CLASS = Terminal("class")
|
||||
COLON = Terminal("colon")
|
||||
COMMENT = Terminal("comment")
|
||||
ELSE = Terminal("else")
|
||||
FOR = Terminal("for")
|
||||
FUN = Terminal("fun")
|
||||
|
|
|
|||
|
|
@ -561,6 +561,7 @@ class ErrorCollection:
|
|||
class ParseTable:
|
||||
actions: list[dict[str, ParseAction]]
|
||||
gotos: list[dict[str, int]]
|
||||
trivia: set[str]
|
||||
|
||||
def format(self):
|
||||
"""Format a parser table so pretty."""
|
||||
|
|
@ -651,7 +652,7 @@ class TableBuilder(object):
|
|||
if error is not None:
|
||||
raise error
|
||||
|
||||
return ParseTable(actions=self.actions, gotos=self.gotos)
|
||||
return ParseTable(actions=self.actions, gotos=self.gotos, trivia=set())
|
||||
|
||||
def new_row(self, config_set: ConfigSet):
|
||||
"""Start a new row, processing the given config set. Call this before
|
||||
|
|
@ -1794,12 +1795,14 @@ class Grammar:
|
|||
_start: str
|
||||
_generator: type[GenerateLR0]
|
||||
_terminals: list[Terminal]
|
||||
_trivia: list[Terminal]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
start: str | None = None,
|
||||
precedence: PrecedenceList | None = None,
|
||||
generator: type[GenerateLR0] | None = None,
|
||||
trivia: list[str | Terminal] | None = None,
|
||||
):
|
||||
if start is None:
|
||||
start = getattr(self, "start", None)
|
||||
|
|
@ -1817,12 +1820,30 @@ class Grammar:
|
|||
generator = getattr(self, "generator", GenerateLALR)
|
||||
assert generator is not None
|
||||
|
||||
if trivia is None:
|
||||
trivia = getattr(self, "trivia", [])
|
||||
assert trivia is not None
|
||||
|
||||
# Fixup terminal names with the name of the member that declared it.
|
||||
terminals = []
|
||||
terminals = {}
|
||||
for n, t in inspect.getmembers(self, lambda x: isinstance(x, Terminal)):
|
||||
if t.value is None:
|
||||
t.value = n
|
||||
terminals.append(t)
|
||||
|
||||
if n in terminals:
|
||||
raise ValueError(f"More than one terminal has the name '{n}'")
|
||||
terminals[n] = t
|
||||
|
||||
# Resolve the trivia declarations correctly.
|
||||
resolved_trivia: list[Terminal] = []
|
||||
for t in trivia:
|
||||
if isinstance(t, str):
|
||||
resolved = terminals.get(t)
|
||||
if resolved is None:
|
||||
raise ValueError(f"The trivia '{t}' is not a terminal name")
|
||||
resolved_trivia.append(resolved)
|
||||
else:
|
||||
resolved_trivia.append(t)
|
||||
|
||||
# Fix up the precedence table.
|
||||
precedence_table = {}
|
||||
|
|
@ -1840,12 +1861,17 @@ class Grammar:
|
|||
self._precedence = precedence_table
|
||||
self._start = start
|
||||
self._generator = generator
|
||||
self._terminals = terminals
|
||||
self._terminals = list(terminals.values())
|
||||
self._trivia = resolved_trivia
|
||||
|
||||
@property
|
||||
def terminals(self) -> list[Terminal]:
|
||||
return self._terminals
|
||||
|
||||
@property
|
||||
def resolved_trivia(self) -> list[Terminal]:
|
||||
return self._trivia
|
||||
|
||||
def generate_nonterminal_dict(
|
||||
self, start: str | None = None
|
||||
) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]:
|
||||
|
|
@ -1919,7 +1945,7 @@ class Grammar:
|
|||
|
||||
return grammar, transparents
|
||||
|
||||
def build_table(self, start: str | None = None, generator=None):
|
||||
def build_table(self, start: str | None = None, generator=None) -> ParseTable:
|
||||
"""Construct a parse table for this grammar, starting at the named
|
||||
nonterminal rule.
|
||||
"""
|
||||
|
|
@ -1931,6 +1957,11 @@ class Grammar:
|
|||
generator = self._generator
|
||||
gen = generator(start, desugared, precedence=self._precedence, transparents=transparents)
|
||||
table = gen.gen_table()
|
||||
|
||||
for t in self._trivia:
|
||||
assert t.value is not None
|
||||
table.trivia.add(t.value)
|
||||
|
||||
return table
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -288,10 +288,13 @@ class Parser:
|
|||
self.table = table
|
||||
|
||||
def parse(self, tokens: TokenStream) -> typing.Tuple[Tree | None, list[str]]:
|
||||
# TODO: If this were a for reals for reals parser we would keep the trivia
|
||||
# accessible in the tree.
|
||||
input_tokens = tokens.tokens()
|
||||
input: list[TokenValue] = [
|
||||
TokenValue(kind=kind.value, start=start, end=start + length)
|
||||
for (kind, start, length) in input_tokens
|
||||
if kind.value is not None and kind.value not in self.table.trivia
|
||||
]
|
||||
|
||||
eof = 0 if len(input) == 0 else input[-1].end
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue