Compare commits
No commits in common. "5f19b1e73ec42ba353ef4a4cf185ca19b01b2915" and "1aa85cc295f32e253a9803501f8ab1a4a4dd10e1" have entirely different histories.
5f19b1e73e
...
1aa85cc295
4 changed files with 131 additions and 204 deletions
149
README.md
149
README.md
|
|
@ -17,31 +17,26 @@ class. Create one method per non-terminal, decorated with the `rule`
|
||||||
decorator. Here's an example:
|
decorator. Here's an example:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from parser import *
|
class SimpleGrammar(Grammar):
|
||||||
|
start = "expression"
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def expression():
|
def expression(self):
|
||||||
return seq(expression, PLUS, term) | term
|
return seq(self.expression, self.PLUS, self.term) | self.term
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def term():
|
def term(self):
|
||||||
return seq(LPAREN, expression, RPAREN) | ID
|
return seq(self.LPAREN, self.expression, self.RPAREN) | self.ID
|
||||||
|
|
||||||
PLUS = Terminal('PLUS', '+')
|
PLUS = Terminal('+')
|
||||||
LPAREN = Terminal('LPAREN', '(')
|
LPAREN = Terminal('(')
|
||||||
RPAREN = Terminal('RPAREN', ')')
|
RPAREN = Terminal(')')
|
||||||
ID = Terminal(
|
ID = Terminal(
|
||||||
'ID',
|
Re.seq(
|
||||||
Re.seq(
|
Re.set(("a", "z"), ("A", "Z"), "_"),
|
||||||
Re.set(("a", "z"), ("A", "Z"), "_"),
|
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
||||||
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
),
|
||||||
),
|
)
|
||||||
)
|
|
||||||
|
|
||||||
SimpleGrammar = Grammar(
|
|
||||||
name="Simple",
|
|
||||||
start=expression,
|
|
||||||
)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Terminals can be plain strings or regular expressions constructed with
|
Terminals can be plain strings or regular expressions constructed with
|
||||||
|
|
@ -59,17 +54,15 @@ There are no helpers (yet!) for consuming lists, so they need to be
|
||||||
constructed in the classic context-free grammar way:
|
constructed in the classic context-free grammar way:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
@rule
|
class NumberList(Grammar):
|
||||||
def list():
|
start = "list"
|
||||||
return NUMBER | (list + COMMA + NUMBER)
|
|
||||||
|
|
||||||
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
@rule
|
||||||
COMMA = Terminal(',')
|
def list(self):
|
||||||
|
return self.NUMBER | (self.list + self.COMMA + self.NUMBER)
|
||||||
|
|
||||||
NumberList = Grammar(
|
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
||||||
name="NumberList",
|
COMMA = Terminal(',')
|
||||||
start=list,
|
|
||||||
)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
(Unlike with PEGs, you can write grammars with left or right-recursion,
|
(Unlike with PEGs, you can write grammars with left or right-recursion,
|
||||||
|
|
@ -95,23 +88,21 @@ which means they don't generate nodes in the tree and just dump their
|
||||||
contents into the parent node instead.
|
contents into the parent node instead.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
@rule
|
class NumberList(Grammar):
|
||||||
def list():
|
start = "list"
|
||||||
# The starting rule can't be transparent: there has to be something to
|
|
||||||
# hold on to!
|
|
||||||
return transparent_list
|
|
||||||
|
|
||||||
@rule(transparent=True)
|
@rule
|
||||||
def transparent_list() -> Rule:
|
def list(self):
|
||||||
return NUMBER | (transparent_list + COMMA + NUMBER)
|
# The starting rule can't be transparent: there has to be something to
|
||||||
|
# hold on to!
|
||||||
|
return self.transparent_list
|
||||||
|
|
||||||
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
@rule(transparent=True)
|
||||||
COMMA = Terminal(',')
|
def transparent_list(self) -> Rule:
|
||||||
|
return self.NUMBER | (self.transparent_list + self.COMMA + self.NUMBER)
|
||||||
|
|
||||||
NumberList = Grammar(
|
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
||||||
name="NumberList",
|
COMMA = Terminal(',')
|
||||||
start=list,
|
|
||||||
)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
This grammar will generate the far more useful tree:
|
This grammar will generate the far more useful tree:
|
||||||
|
|
@ -130,46 +121,23 @@ following the lead set by tree-sitter, and so the grammar above is
|
||||||
probably better-written as:
|
probably better-written as:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
@rule
|
class NumberList(Grammar):
|
||||||
def list():
|
start = "list"
|
||||||
# The starting rule can't be transparent: there has to be something to
|
|
||||||
# hold on to!
|
|
||||||
return transparent_list
|
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def _list() -> Rule:
|
def list(self):
|
||||||
return NUMBER | (_list + COMMA + NUMBER)
|
return self._list
|
||||||
|
|
||||||
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
@rule
|
||||||
COMMA = Terminal(',')
|
def _list(self):
|
||||||
|
return self.NUMBER | (self._list + self.COMMA + self.NUMBER)
|
||||||
|
|
||||||
NumberList = Grammar(
|
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
||||||
name="NumberList",
|
COMMA = Terminal(',')
|
||||||
start=list,
|
|
||||||
)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
That will generate the same tree, but a little more succinctly.
|
That will generate the same tree, but a little more succinctly.
|
||||||
|
|
||||||
Of course, it's a lot of work to write these transparent recursive
|
|
||||||
rules by hand all the time, so there are helpers that do it for you:
|
|
||||||
|
|
||||||
```python
|
|
||||||
@rule
|
|
||||||
def list():
|
|
||||||
return zero_or_more(NUMBER, COMMA) + NUMBER
|
|
||||||
|
|
||||||
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
|
||||||
COMMA = Terminal(',')
|
|
||||||
|
|
||||||
NumberList = Grammar(
|
|
||||||
name="NumberList",
|
|
||||||
start=list,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Much better.
|
|
||||||
|
|
||||||
### Trivia
|
### Trivia
|
||||||
|
|
||||||
Most folks that want to parse something want to skip blanks when they
|
Most folks that want to parse something want to skip blanks when they
|
||||||
|
|
@ -180,20 +148,23 @@ To allow (and ignore) spaces, newlines, tabs, and carriage-returns in
|
||||||
our number lists, we would modify the grammar as follows:
|
our number lists, we would modify the grammar as follows:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
@rule
|
class NumberList(Grammar):
|
||||||
def list():
|
start = "list"
|
||||||
return zero_or_more(NUMBER, COMMA) + NUMBER
|
trivia = ["BLANKS"] # <- Add a `trivia` member
|
||||||
|
|
||||||
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
@rule
|
||||||
COMMA = Terminal(',')
|
def list(self):
|
||||||
|
return self._list
|
||||||
|
|
||||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
@rule
|
||||||
|
def _list(self):
|
||||||
|
return self.NUMBER | (self._list + self.COMMA + self.NUMBER)
|
||||||
|
|
||||||
NumberList = Grammar(
|
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
||||||
name="NumberList",
|
COMMA = Terminal(',')
|
||||||
start=list,
|
|
||||||
trivia=[BLANKS],
|
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||||
)
|
# ^ and add a new terminal to describe it
|
||||||
```
|
```
|
||||||
|
|
||||||
Now we can parse a list with spaces! "1 , 2, 3" will parse happily
|
Now we can parse a list with spaces! "1 , 2, 3" will parse happily
|
||||||
|
|
|
||||||
170
dingus/about.md
170
dingus/about.md
|
|
@ -12,66 +12,59 @@ about doing fun things with grammars.
|
||||||
|
|
||||||
## Making Grammars
|
## Making Grammars
|
||||||
|
|
||||||
To get started, create one function per non-terminal, decorated with
|
To get started, create a grammar that derives from the `Grammar`
|
||||||
the `rule` decorator, and one instance of a `Terminal` object for each
|
class. Create one method per non-terminal, decorated with the `rule`
|
||||||
terminal. Then tie it all together with an instance of a Grammar
|
decorator. Here's an example:
|
||||||
object.
|
|
||||||
|
|
||||||
Here's an example:
|
|
||||||
|
|
||||||
```python {.numberLines}
|
```python {.numberLines}
|
||||||
from parser import *
|
from parser import *
|
||||||
|
|
||||||
@rule
|
class SimpleGrammar(Grammar):
|
||||||
def expression():
|
start = "expression"
|
||||||
return seq(expression, PLUS, term) | term
|
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def term():
|
def expression(self):
|
||||||
return seq(LPAREN, expression, RPAREN) | ID
|
return seq(self.expression, self.PLUS, self.term) | self.term
|
||||||
|
|
||||||
PLUS = Terminal('PLUS', '+')
|
@rule
|
||||||
LPAREN = Terminal('LPAREN', '(')
|
def term(self):
|
||||||
RPAREN = Terminal('RPAREN', ')')
|
return seq(self.LPAREN, self.expression, self.RPAREN) | self.ID
|
||||||
ID = Terminal(
|
|
||||||
'ID',
|
|
||||||
Re.seq(
|
|
||||||
Re.set(("a", "z"), ("A", "Z"), "_"),
|
|
||||||
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
SimpleGrammar = Grammar(
|
PLUS = Terminal('+')
|
||||||
name="Simple",
|
LPAREN = Terminal('(')
|
||||||
start=expression,
|
RPAREN = Terminal(')')
|
||||||
)
|
ID = Terminal(
|
||||||
|
Re.seq(
|
||||||
|
Re.set(("a", "z"), ("A", "Z"), "_"),
|
||||||
|
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
||||||
|
),
|
||||||
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
Terminal patterns can be plain strings or regular expressions
|
Terminals can be plain strings or regular expressions constructed with
|
||||||
constructed with the `Re` object. (Ironically, I guess this library is
|
the `Re` object. (Ironically, I guess this library is not clever
|
||||||
not clever enough to parse a regular expression string into one of
|
enough to parse a regular expression string into one of these
|
||||||
these structures. If you want to build one, go nuts! It's just Python,
|
structures. If you want to build one, go nuts! It's just Python, you
|
||||||
you can do whatever you want so long as the result is an `Re` object.)
|
can do whatever you want so long as the result is an `Re` object.)
|
||||||
|
|
||||||
Productions can be built out of terminals and non-terminals,
|
Productions can be built out of terminals and non-terminals,
|
||||||
concatenated with the `seq` function or the `+` operator. Alternatives
|
concatenated with the `seq` function or the `+` operator. Alternatives
|
||||||
can be expressed with the `alt` function or the `|` operator. These
|
can be expressed with the `alt` function or the `|` operator. These
|
||||||
things can be freely nested, as desired.
|
things can be freely nested, as desired.
|
||||||
|
|
||||||
You can make lists in the classic context-free grammar way:
|
There are no helpers (yet!) for consuming lists, so they need to be
|
||||||
|
constructed in the classic context-free grammar way:
|
||||||
|
|
||||||
```python {.numberLines}
|
```python {.numberLines}
|
||||||
@rule
|
class NumberList(Grammar):
|
||||||
def list():
|
start = "list"
|
||||||
return NUMBER | (list + COMMA + NUMBER)
|
|
||||||
|
|
||||||
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
@rule
|
||||||
COMMA = Terminal(',')
|
def list(self):
|
||||||
|
return self.NUMBER | (self.list + self.COMMA + self.NUMBER)
|
||||||
|
|
||||||
NumberList = Grammar(
|
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
||||||
name="NumberList",
|
COMMA = Terminal(',')
|
||||||
start=list,
|
|
||||||
)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
(Unlike with PEGs, you can write grammars with left or right-recursion,
|
(Unlike with PEGs, you can write grammars with left or right-recursion,
|
||||||
|
|
@ -97,23 +90,21 @@ which means they don't generate nodes in the tree and just dump their
|
||||||
contents into the parent node instead.
|
contents into the parent node instead.
|
||||||
|
|
||||||
```python {.numberLines}
|
```python {.numberLines}
|
||||||
@rule
|
class NumberList(Grammar):
|
||||||
def list():
|
start = "list"
|
||||||
# The starting rule can't be transparent: there has to be something to
|
|
||||||
# hold on to!
|
|
||||||
return transparent_list
|
|
||||||
|
|
||||||
@rule(transparent=True)
|
@rule
|
||||||
def transparent_list() -> Rule:
|
def list(self):
|
||||||
return NUMBER | (transparent_list + COMMA + NUMBER)
|
# The starting rule can't be transparent: there has to be something to
|
||||||
|
# hold on to!
|
||||||
|
return self.transparent_list
|
||||||
|
|
||||||
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
@rule(transparent=True)
|
||||||
COMMA = Terminal(',')
|
def transparent_list(self) -> Rule:
|
||||||
|
return self.NUMBER | (self.transparent_list + self.COMMA + self.NUMBER)
|
||||||
|
|
||||||
NumberList = Grammar(
|
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
||||||
name="NumberList",
|
COMMA = Terminal(',')
|
||||||
start=list,
|
|
||||||
)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
This grammar will generate the far more useful tree:
|
This grammar will generate the far more useful tree:
|
||||||
|
|
@ -132,46 +123,23 @@ following the lead set by tree-sitter, and so the grammar above is
|
||||||
probably better-written as:
|
probably better-written as:
|
||||||
|
|
||||||
```python {.numberLines}
|
```python {.numberLines}
|
||||||
@rule
|
class NumberList(Grammar):
|
||||||
def list():
|
start = "list"
|
||||||
# The starting rule can't be transparent: there has to be something to
|
|
||||||
# hold on to!
|
|
||||||
return transparent_list
|
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def _list() -> Rule:
|
def list(self):
|
||||||
return NUMBER | (_list + COMMA + NUMBER)
|
return self._list
|
||||||
|
|
||||||
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
@rule
|
||||||
COMMA = Terminal(',')
|
def _list(self):
|
||||||
|
return self.NUMBER | (self._list + self.COMMA + self.NUMBER)
|
||||||
|
|
||||||
NumberList = Grammar(
|
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
||||||
name="NumberList",
|
COMMA = Terminal(',')
|
||||||
start=list,
|
|
||||||
)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
That will generate the same tree, but a little more succinctly.
|
That will generate the same tree, but a little more succinctly.
|
||||||
|
|
||||||
Of course, it's a lot of work to write these transparent recursive
|
|
||||||
rules by hand all the time, so there are helpers that do it for you:
|
|
||||||
|
|
||||||
```python {.numberLines}
|
|
||||||
@rule
|
|
||||||
def list():
|
|
||||||
return zero_or_more(NUMBER, COMMA) + NUMBER
|
|
||||||
|
|
||||||
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
|
||||||
COMMA = Terminal(',')
|
|
||||||
|
|
||||||
NumberList = Grammar(
|
|
||||||
name="NumberList",
|
|
||||||
start=list,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Much better.
|
|
||||||
|
|
||||||
### Trivia
|
### Trivia
|
||||||
|
|
||||||
Most folks that want to parse something want to skip blanks when they
|
Most folks that want to parse something want to skip blanks when they
|
||||||
|
|
@ -182,21 +150,23 @@ To allow (and ignore) spaces, newlines, tabs, and carriage-returns in
|
||||||
our number lists, we would modify the grammar as follows:
|
our number lists, we would modify the grammar as follows:
|
||||||
|
|
||||||
```python {.numberLines}
|
```python {.numberLines}
|
||||||
@rule
|
class NumberList(Grammar):
|
||||||
def list():
|
start = "list"
|
||||||
return zero_or_more(NUMBER, COMMA) + NUMBER
|
trivia = ["BLANKS"] # <- Add a `trivia` member
|
||||||
|
|
||||||
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
@rule
|
||||||
COMMA = Terminal(',')
|
def list(self):
|
||||||
|
return self._list
|
||||||
|
|
||||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
@rule
|
||||||
# ^ and add a new terminal to describe what we're ignoring...
|
def _list(self):
|
||||||
|
return self.NUMBER | (self._list + self.COMMA + self.NUMBER)
|
||||||
|
|
||||||
NumberList = Grammar(
|
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
||||||
name="NumberList",
|
COMMA = Terminal(',')
|
||||||
start=list,
|
|
||||||
trivia=[BLANKS],
|
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||||
)
|
# ^ and add a new terminal to describe it
|
||||||
```
|
```
|
||||||
|
|
||||||
Now we can parse a list with spaces! "1 , 2, 3" will parse happily
|
Now we can parse a list with spaces! "1 , 2, 3" will parse happily
|
||||||
|
|
|
||||||
2
makefile
2
makefile
|
|
@ -18,7 +18,7 @@ include lrparser.mk
|
||||||
wheel: dist/lrparsers-$(VERSION)-py3-none-any.whl
|
wheel: dist/lrparsers-$(VERSION)-py3-none-any.whl
|
||||||
|
|
||||||
dist/lrparsers-$(VERSION).tar.gz dist/lrparsers-$(VERSION)-py3-none-any.whl: pyproject.toml $(PYTHON_SOURCES)
|
dist/lrparsers-$(VERSION).tar.gz dist/lrparsers-$(VERSION)-py3-none-any.whl: pyproject.toml $(PYTHON_SOURCES)
|
||||||
uv build --offline #--no-clean
|
uv build --no-clean
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean:
|
clean:
|
||||||
|
|
|
||||||
|
|
@ -236,20 +236,6 @@ class ItemSet:
|
||||||
|
|
||||||
def __init__(self, items=None):
|
def __init__(self, items=None):
|
||||||
self.items = items or {}
|
self.items = items or {}
|
||||||
self._hash = None
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
# TODO: FREEZE
|
|
||||||
if self._hash is None:
|
|
||||||
self._hash = hash(tuple((key, frozenset(value)) for key, value in self.items.items()))
|
|
||||||
|
|
||||||
return self._hash
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
if not isinstance(other, ItemSet):
|
|
||||||
return False
|
|
||||||
|
|
||||||
return self.items == other.items
|
|
||||||
|
|
||||||
def weakly_compatible(self, other: "ItemSet") -> bool:
|
def weakly_compatible(self, other: "ItemSet") -> bool:
|
||||||
a = self.items
|
a = self.items
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue