Rework the documentation examples

This commit is contained in:
John Doty 2025-02-15 15:06:42 -08:00
parent ed5baefd5d
commit 5f19b1e73e
2 changed files with 189 additions and 130 deletions

149
README.md
View file

@ -17,26 +17,31 @@ class. Create one method per non-terminal, decorated with the `rule`
decorator. Here's an example:
```python
class SimpleGrammar(Grammar):
start = "expression"
from parser import *
@rule
def expression(self):
return seq(self.expression, self.PLUS, self.term) | self.term
@rule
def expression():
return seq(expression, PLUS, term) | term
@rule
def term(self):
return seq(self.LPAREN, self.expression, self.RPAREN) | self.ID
@rule
def term():
return seq(LPAREN, expression, RPAREN) | ID
PLUS = Terminal('+')
LPAREN = Terminal('(')
RPAREN = Terminal(')')
ID = Terminal(
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
),
)
PLUS = Terminal('PLUS', '+')
LPAREN = Terminal('LPAREN', '(')
RPAREN = Terminal('RPAREN', ')')
ID = Terminal(
'ID',
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
),
)
SimpleGrammar = Grammar(
name="Simple",
start=expression,
)
```
Terminals can be plain strings or regular expressions constructed with
@ -54,15 +59,17 @@ There are no helpers (yet!) for consuming lists, so they need to be
constructed in the classic context-free grammar way:
```python
class NumberList(Grammar):
start = "list"
@rule
def list():
return NUMBER | (list + COMMA + NUMBER)
@rule
def list(self):
return self.NUMBER | (self.list + self.COMMA + self.NUMBER)
NUMBER = Terminal(Re.set(("0", "9")).plus())
COMMA = Terminal(',')
NUMBER = Terminal(Re.set(("0", "9")).plus())
COMMA = Terminal(',')
NumberList = Grammar(
name="NumberList",
start=list,
)
```
(Unlike with PEGs, you can write grammars with left or right-recursion,
@ -88,21 +95,23 @@ which means they don't generate nodes in the tree and just dump their
contents into the parent node instead.
```python
class NumberList(Grammar):
start = "list"
@rule
def list():
# The starting rule can't be transparent: there has to be something to
# hold on to!
return transparent_list
@rule
def list(self):
# The starting rule can't be transparent: there has to be something to
# hold on to!
return self.transparent_list
@rule(transparent=True)
def transparent_list() -> Rule:
return NUMBER | (transparent_list + COMMA + NUMBER)
@rule(transparent=True)
def transparent_list(self) -> Rule:
return self.NUMBER | (self.transparent_list + self.COMMA + self.NUMBER)
NUMBER = Terminal(Re.set(("0", "9")).plus())
COMMA = Terminal(',')
NUMBER = Terminal(Re.set(("0", "9")).plus())
COMMA = Terminal(',')
NumberList = Grammar(
name="NumberList",
start=list,
)
```
This grammar will generate the far more useful tree:
@ -121,23 +130,46 @@ following the lead set by tree-sitter, and so the grammar above is
probably better-written as:
```python
class NumberList(Grammar):
start = "list"
@rule
def list():
# The starting rule can't be transparent: there has to be something to
# hold on to!
return transparent_list
@rule
def list(self):
return self._list
@rule
def _list() -> Rule:
return NUMBER | (_list + COMMA + NUMBER)
@rule
def _list(self):
return self.NUMBER | (self._list + self.COMMA + self.NUMBER)
NUMBER = Terminal(Re.set(("0", "9")).plus())
COMMA = Terminal(',')
NUMBER = Terminal(Re.set(("0", "9")).plus())
COMMA = Terminal(',')
NumberList = Grammar(
name="NumberList",
start=list,
)
```
That will generate the same tree, but a little more succinctly.
Of course, it's a lot of work to write these transparent recursive
rules by hand all the time, so there are helpers that do it for you:
```python
@rule
def list():
return zero_or_more(NUMBER, COMMA) + NUMBER
NUMBER = Terminal(Re.set(("0", "9")).plus())
COMMA = Terminal(',')
NumberList = Grammar(
name="NumberList",
start=list,
)
```
Much better.
### Trivia
Most folks that want to parse something want to skip blanks when they
@ -148,23 +180,20 @@ To allow (and ignore) spaces, newlines, tabs, and carriage-returns in
our number lists, we would modify the grammar as follows:
```python
class NumberList(Grammar):
start = "list"
trivia = ["BLANKS"] # <- Add a `trivia` member
@rule
def list():
return zero_or_more(NUMBER, COMMA) + NUMBER
@rule
def list(self):
return self._list
NUMBER = Terminal(Re.set(("0", "9")).plus())
COMMA = Terminal(',')
@rule
def _list(self):
return self.NUMBER | (self._list + self.COMMA + self.NUMBER)
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
NUMBER = Terminal(Re.set(("0", "9")).plus())
COMMA = Terminal(',')
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
# ^ and add a new terminal to describe it
NumberList = Grammar(
name="NumberList",
start=list,
trivia=[BLANKS],
)
```
Now we can parse a list with spaces! "1 , 2, 3" will parse happily