From 3bffe98df03e3cfabc3f30e45d99e631ea35bfca Mon Sep 17 00:00:00 2001 From: John Doty Date: Sun, 10 Nov 2024 18:47:24 -0800 Subject: [PATCH] [parser] one_or_more Finally making lists easier --- parser/parser.py | 49 +++++++++++++++++++++++++++++++++++++++++-- tests/test_grammar.py | 39 +++++++++++++++++++++++++++++++++- 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/parser/parser.py b/parser/parser.py index a54da0f..a6ff2d0 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -1116,7 +1116,7 @@ class ParserGenerator: # Check to make sure they didn't use anything that will give us # heartburn later. - reserved = [a for a in alphabet if a.startswith("__") or a == "$"] + reserved = [a for a in alphabet if (a.startswith("__") and not a.startswith("__gen_")) or a == "$"] if reserved: raise ValueError( "Can't use {symbols} in grammars, {what} reserved.".format( @@ -1616,6 +1616,9 @@ class Terminal(Rule): return self.name or "" +_CURRENT_DEFINITION: str = "__global" +_CURRENT_GEN_INDEX: int = 0 + class NonTerminal(Rule): """A non-terminal, or a production, in the grammar. @@ -1676,6 +1679,8 @@ class NonTerminal(Rule): """The flattened body of the nonterminal: a list of productions where each production is a sequence of Terminals and NonTerminals. """ + global _CURRENT_DEFINITION + global _CURRENT_GEN_INDEX def without_metadata(result: FlattenedWithMetadata) -> list[NonTerminal | Terminal]: for item in result: @@ -1683,7 +1688,17 @@ class NonTerminal(Rule): return typing.cast(list[NonTerminal | Terminal], result) if self._body is None: - self._body = [without_metadata(rule) for rule in self.fn().flatten(with_metadata=False)] + prev_defn = _CURRENT_DEFINITION + prev_idx = _CURRENT_GEN_INDEX + try: + _CURRENT_DEFINITION = self.name + _CURRENT_GEN_INDEX = 0 + body = self.fn() + self._body = [without_metadata(rule) for rule in body.flatten(with_metadata=False)] + finally: + _CURRENT_DEFINITION = prev_defn + _CURRENT_GEN_INDEX = prev_idx + return self._body @@ -1799,6 +1814,28 @@ def mark(rule: Rule, **kwargs) -> Rule: return MetadataRule(rule, kwargs) +def one_or_more(r: Rule) -> Rule: + global _CURRENT_DEFINITION + global _CURRENT_GEN_INDEX + + tail : NonTerminal | None = None + def impl() -> Rule: + nonlocal tail + assert(tail is not None) + return opt(tail) + r + + tail = NonTerminal( + fn=impl, + name=f"__gen_{_CURRENT_DEFINITION}_{_CURRENT_GEN_INDEX}", + transparent=True, + ) + _CURRENT_GEN_INDEX = _CURRENT_GEN_INDEX + 1 + + return tail + +def zero_or_more(r:Rule) -> Rule: + return opt(one_or_more(r)) + @typing.overload def rule(f: typing.Callable, /) -> NonTerminal: ... @@ -2083,6 +2120,13 @@ class Re: def __str__(self) -> str: raise NotImplementedError() + @classmethod + def alt(cls, *values: "Re") -> "Re": + result = values[0] + for v in values[1:]: + result = ReAlt(result, v) + return result + @classmethod def seq(cls, *values: "Re") -> "Re": result = values[0] @@ -2615,6 +2659,7 @@ class LanguageVariableHighlight(VariableHighlight): class _Highlight: class _Comment(CommentHighlight): line = LineCommentHighlight() + block = BlockCommentHighlight() class _Constant(ConstantHighlight): language = LanguageConstantHighlight() diff --git a/tests/test_grammar.py b/tests/test_grammar.py index 398b416..b81dd86 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -2,7 +2,7 @@ import pytest import parser.runtime as runtime -from parser import Grammar, seq, rule, Terminal +from parser import Grammar, seq, rule, Terminal, one_or_more class Tokens: @@ -169,3 +169,40 @@ def test_grammar_ignore_trivia(): ), ), ) + + +def test_one_or_more(): + @rule + def sentence(): + return one_or_more(WORD) + + WORD = Terminal("WORD", "blah") + BLANK = Terminal("BLANK", " ") + + table = Grammar(start=sentence, trivia=[BLANK]).build_table() + assert "BLANK" in table.trivia + + tree, errors = runtime.Parser(table).parse(Tokens(WORD, BLANK, WORD, BLANK)) + + assert errors == [] + assert tree == runtime.Tree( + "sentence", + 0, + 3, + ( + runtime.TokenValue( + "WORD", + 0, + 1, + [], + [runtime.TokenValue("BLANK", 1, 2, [], [])], + ), + runtime.TokenValue( + "WORD", + 2, + 3, + [runtime.TokenValue("BLANK", 1, 2, [], [])], + [runtime.TokenValue("BLANK", 3, 4, [], [])], + ), + ), + )