diff --git a/grammar.py b/grammar.py index c6438dd..aba6259 100644 --- a/grammar.py +++ b/grammar.py @@ -18,6 +18,8 @@ class FineGrammar(Grammar): # generator = parser.GenerateLR1 start = "File" + trivia = ["BLANKS", "COMMENT"] + def __init__(self): super().__init__( precedence=[ @@ -330,14 +332,14 @@ class FineGrammar(Grammar): def field_value(self) -> Rule: return self.IDENTIFIER | seq(self.IDENTIFIER, self.COLON, self.expression) - BLANK = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) + BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) + COMMENT = Terminal(Re.seq(Re.literal("//"), Re.set("\n").invert().star())) ARROW = Terminal("->") AS = Terminal("as") BAR = Terminal("bar") CLASS = Terminal("class") COLON = Terminal("colon") - COMMENT = Terminal("comment") ELSE = Terminal("else") FOR = Terminal("for") FUN = Terminal("fun") diff --git a/parser/parser.py b/parser/parser.py index 8b8d6e0..51cbe69 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -561,6 +561,7 @@ class ErrorCollection: class ParseTable: actions: list[dict[str, ParseAction]] gotos: list[dict[str, int]] + trivia: set[str] def format(self): """Format a parser table so pretty.""" @@ -651,7 +652,7 @@ class TableBuilder(object): if error is not None: raise error - return ParseTable(actions=self.actions, gotos=self.gotos) + return ParseTable(actions=self.actions, gotos=self.gotos, trivia=set()) def new_row(self, config_set: ConfigSet): """Start a new row, processing the given config set. Call this before @@ -1794,12 +1795,14 @@ class Grammar: _start: str _generator: type[GenerateLR0] _terminals: list[Terminal] + _trivia: list[Terminal] def __init__( self, start: str | None = None, precedence: PrecedenceList | None = None, generator: type[GenerateLR0] | None = None, + trivia: list[str | Terminal] | None = None, ): if start is None: start = getattr(self, "start", None) @@ -1817,12 +1820,30 @@ class Grammar: generator = getattr(self, "generator", GenerateLALR) assert generator is not None + if trivia is None: + trivia = getattr(self, "trivia", []) + assert trivia is not None + # Fixup terminal names with the name of the member that declared it. - terminals = [] + terminals = {} for n, t in inspect.getmembers(self, lambda x: isinstance(x, Terminal)): if t.value is None: t.value = n - terminals.append(t) + + if n in terminals: + raise ValueError(f"More than one terminal has the name '{n}'") + terminals[n] = t + + # Resolve the trivia declarations correctly. + resolved_trivia: list[Terminal] = [] + for t in trivia: + if isinstance(t, str): + resolved = terminals.get(t) + if resolved is None: + raise ValueError(f"The trivia '{t}' is not a terminal name") + resolved_trivia.append(resolved) + else: + resolved_trivia.append(t) # Fix up the precedence table. precedence_table = {} @@ -1840,12 +1861,17 @@ class Grammar: self._precedence = precedence_table self._start = start self._generator = generator - self._terminals = terminals + self._terminals = list(terminals.values()) + self._trivia = resolved_trivia @property def terminals(self) -> list[Terminal]: return self._terminals + @property + def resolved_trivia(self) -> list[Terminal]: + return self._trivia + def generate_nonterminal_dict( self, start: str | None = None ) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]: @@ -1919,7 +1945,7 @@ class Grammar: return grammar, transparents - def build_table(self, start: str | None = None, generator=None): + def build_table(self, start: str | None = None, generator=None) -> ParseTable: """Construct a parse table for this grammar, starting at the named nonterminal rule. """ @@ -1931,6 +1957,11 @@ class Grammar: generator = self._generator gen = generator(start, desugared, precedence=self._precedence, transparents=transparents) table = gen.gen_table() + + for t in self._trivia: + assert t.value is not None + table.trivia.add(t.value) + return table diff --git a/parser/runtime.py b/parser/runtime.py index 732016a..2aefd7b 100644 --- a/parser/runtime.py +++ b/parser/runtime.py @@ -288,10 +288,13 @@ class Parser: self.table = table def parse(self, tokens: TokenStream) -> typing.Tuple[Tree | None, list[str]]: + # TODO: If this were a for reals for reals parser we would keep the trivia + # accessible in the tree. input_tokens = tokens.tokens() input: list[TokenValue] = [ TokenValue(kind=kind.value, start=start, end=start + length) for (kind, start, length) in input_tokens + if kind.value is not None and kind.value not in self.table.trivia ] eof = 0 if len(input) == 0 else input[-1].end