Specify and honor trivia tokens
e.g. "this is how machine-generated parsers know to skip blanks and comments" The run time implementation could be better; we don't really want to just discard trivia because it's useful for e.g. doc comments and the like. BUT for now this is fine.
This commit is contained in:
parent
8e22c59aa8
commit
7a5f17f74b
3 changed files with 43 additions and 7 deletions
|
|
@ -18,6 +18,8 @@ class FineGrammar(Grammar):
|
||||||
# generator = parser.GenerateLR1
|
# generator = parser.GenerateLR1
|
||||||
start = "File"
|
start = "File"
|
||||||
|
|
||||||
|
trivia = ["BLANKS", "COMMENT"]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
precedence=[
|
precedence=[
|
||||||
|
|
@ -330,14 +332,14 @@ class FineGrammar(Grammar):
|
||||||
def field_value(self) -> Rule:
|
def field_value(self) -> Rule:
|
||||||
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COLON, self.expression)
|
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COLON, self.expression)
|
||||||
|
|
||||||
BLANK = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||||
|
COMMENT = Terminal(Re.seq(Re.literal("//"), Re.set("\n").invert().star()))
|
||||||
|
|
||||||
ARROW = Terminal("->")
|
ARROW = Terminal("->")
|
||||||
AS = Terminal("as")
|
AS = Terminal("as")
|
||||||
BAR = Terminal("bar")
|
BAR = Terminal("bar")
|
||||||
CLASS = Terminal("class")
|
CLASS = Terminal("class")
|
||||||
COLON = Terminal("colon")
|
COLON = Terminal("colon")
|
||||||
COMMENT = Terminal("comment")
|
|
||||||
ELSE = Terminal("else")
|
ELSE = Terminal("else")
|
||||||
FOR = Terminal("for")
|
FOR = Terminal("for")
|
||||||
FUN = Terminal("fun")
|
FUN = Terminal("fun")
|
||||||
|
|
|
||||||
|
|
@ -561,6 +561,7 @@ class ErrorCollection:
|
||||||
class ParseTable:
|
class ParseTable:
|
||||||
actions: list[dict[str, ParseAction]]
|
actions: list[dict[str, ParseAction]]
|
||||||
gotos: list[dict[str, int]]
|
gotos: list[dict[str, int]]
|
||||||
|
trivia: set[str]
|
||||||
|
|
||||||
def format(self):
|
def format(self):
|
||||||
"""Format a parser table so pretty."""
|
"""Format a parser table so pretty."""
|
||||||
|
|
@ -651,7 +652,7 @@ class TableBuilder(object):
|
||||||
if error is not None:
|
if error is not None:
|
||||||
raise error
|
raise error
|
||||||
|
|
||||||
return ParseTable(actions=self.actions, gotos=self.gotos)
|
return ParseTable(actions=self.actions, gotos=self.gotos, trivia=set())
|
||||||
|
|
||||||
def new_row(self, config_set: ConfigSet):
|
def new_row(self, config_set: ConfigSet):
|
||||||
"""Start a new row, processing the given config set. Call this before
|
"""Start a new row, processing the given config set. Call this before
|
||||||
|
|
@ -1794,12 +1795,14 @@ class Grammar:
|
||||||
_start: str
|
_start: str
|
||||||
_generator: type[GenerateLR0]
|
_generator: type[GenerateLR0]
|
||||||
_terminals: list[Terminal]
|
_terminals: list[Terminal]
|
||||||
|
_trivia: list[Terminal]
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
start: str | None = None,
|
start: str | None = None,
|
||||||
precedence: PrecedenceList | None = None,
|
precedence: PrecedenceList | None = None,
|
||||||
generator: type[GenerateLR0] | None = None,
|
generator: type[GenerateLR0] | None = None,
|
||||||
|
trivia: list[str | Terminal] | None = None,
|
||||||
):
|
):
|
||||||
if start is None:
|
if start is None:
|
||||||
start = getattr(self, "start", None)
|
start = getattr(self, "start", None)
|
||||||
|
|
@ -1817,12 +1820,30 @@ class Grammar:
|
||||||
generator = getattr(self, "generator", GenerateLALR)
|
generator = getattr(self, "generator", GenerateLALR)
|
||||||
assert generator is not None
|
assert generator is not None
|
||||||
|
|
||||||
|
if trivia is None:
|
||||||
|
trivia = getattr(self, "trivia", [])
|
||||||
|
assert trivia is not None
|
||||||
|
|
||||||
# Fixup terminal names with the name of the member that declared it.
|
# Fixup terminal names with the name of the member that declared it.
|
||||||
terminals = []
|
terminals = {}
|
||||||
for n, t in inspect.getmembers(self, lambda x: isinstance(x, Terminal)):
|
for n, t in inspect.getmembers(self, lambda x: isinstance(x, Terminal)):
|
||||||
if t.value is None:
|
if t.value is None:
|
||||||
t.value = n
|
t.value = n
|
||||||
terminals.append(t)
|
|
||||||
|
if n in terminals:
|
||||||
|
raise ValueError(f"More than one terminal has the name '{n}'")
|
||||||
|
terminals[n] = t
|
||||||
|
|
||||||
|
# Resolve the trivia declarations correctly.
|
||||||
|
resolved_trivia: list[Terminal] = []
|
||||||
|
for t in trivia:
|
||||||
|
if isinstance(t, str):
|
||||||
|
resolved = terminals.get(t)
|
||||||
|
if resolved is None:
|
||||||
|
raise ValueError(f"The trivia '{t}' is not a terminal name")
|
||||||
|
resolved_trivia.append(resolved)
|
||||||
|
else:
|
||||||
|
resolved_trivia.append(t)
|
||||||
|
|
||||||
# Fix up the precedence table.
|
# Fix up the precedence table.
|
||||||
precedence_table = {}
|
precedence_table = {}
|
||||||
|
|
@ -1840,12 +1861,17 @@ class Grammar:
|
||||||
self._precedence = precedence_table
|
self._precedence = precedence_table
|
||||||
self._start = start
|
self._start = start
|
||||||
self._generator = generator
|
self._generator = generator
|
||||||
self._terminals = terminals
|
self._terminals = list(terminals.values())
|
||||||
|
self._trivia = resolved_trivia
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def terminals(self) -> list[Terminal]:
|
def terminals(self) -> list[Terminal]:
|
||||||
return self._terminals
|
return self._terminals
|
||||||
|
|
||||||
|
@property
|
||||||
|
def resolved_trivia(self) -> list[Terminal]:
|
||||||
|
return self._trivia
|
||||||
|
|
||||||
def generate_nonterminal_dict(
|
def generate_nonterminal_dict(
|
||||||
self, start: str | None = None
|
self, start: str | None = None
|
||||||
) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]:
|
) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]:
|
||||||
|
|
@ -1919,7 +1945,7 @@ class Grammar:
|
||||||
|
|
||||||
return grammar, transparents
|
return grammar, transparents
|
||||||
|
|
||||||
def build_table(self, start: str | None = None, generator=None):
|
def build_table(self, start: str | None = None, generator=None) -> ParseTable:
|
||||||
"""Construct a parse table for this grammar, starting at the named
|
"""Construct a parse table for this grammar, starting at the named
|
||||||
nonterminal rule.
|
nonterminal rule.
|
||||||
"""
|
"""
|
||||||
|
|
@ -1931,6 +1957,11 @@ class Grammar:
|
||||||
generator = self._generator
|
generator = self._generator
|
||||||
gen = generator(start, desugared, precedence=self._precedence, transparents=transparents)
|
gen = generator(start, desugared, precedence=self._precedence, transparents=transparents)
|
||||||
table = gen.gen_table()
|
table = gen.gen_table()
|
||||||
|
|
||||||
|
for t in self._trivia:
|
||||||
|
assert t.value is not None
|
||||||
|
table.trivia.add(t.value)
|
||||||
|
|
||||||
return table
|
return table
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -288,10 +288,13 @@ class Parser:
|
||||||
self.table = table
|
self.table = table
|
||||||
|
|
||||||
def parse(self, tokens: TokenStream) -> typing.Tuple[Tree | None, list[str]]:
|
def parse(self, tokens: TokenStream) -> typing.Tuple[Tree | None, list[str]]:
|
||||||
|
# TODO: If this were a for reals for reals parser we would keep the trivia
|
||||||
|
# accessible in the tree.
|
||||||
input_tokens = tokens.tokens()
|
input_tokens = tokens.tokens()
|
||||||
input: list[TokenValue] = [
|
input: list[TokenValue] = [
|
||||||
TokenValue(kind=kind.value, start=start, end=start + length)
|
TokenValue(kind=kind.value, start=start, end=start + length)
|
||||||
for (kind, start, length) in input_tokens
|
for (kind, start, length) in input_tokens
|
||||||
|
if kind.value is not None and kind.value not in self.table.trivia
|
||||||
]
|
]
|
||||||
|
|
||||||
eof = 0 if len(input) == 0 else input[-1].end
|
eof = 0 if len(input) == 0 else input[-1].end
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue