Compare commits
4 commits
72052645d6
...
30f7798719
| Author | SHA1 | Date | |
|---|---|---|---|
| 30f7798719 | |||
| c0b623bd6d | |||
| 454e6fd6fd | |||
| 6d6aabdeb3 |
4 changed files with 87 additions and 24 deletions
33
grammar.py
33
grammar.py
|
|
@ -12,7 +12,6 @@ from parser import (
|
||||||
Terminal,
|
Terminal,
|
||||||
Re,
|
Re,
|
||||||
)
|
)
|
||||||
from parser.parser import compile_lexer, dump_lexer_table
|
|
||||||
|
|
||||||
|
|
||||||
class FineGrammar(Grammar):
|
class FineGrammar(Grammar):
|
||||||
|
|
@ -356,7 +355,20 @@ class FineGrammar(Grammar):
|
||||||
RCURLY = Terminal("}")
|
RCURLY = Terminal("}")
|
||||||
RETURN = Terminal("return")
|
RETURN = Terminal("return")
|
||||||
SEMICOLON = Terminal(";")
|
SEMICOLON = Terminal(";")
|
||||||
STRING = Terminal('""') # TODO
|
STRING = Terminal(
|
||||||
|
# Double-quoted string.
|
||||||
|
Re.seq(
|
||||||
|
Re.literal('"'),
|
||||||
|
(~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(),
|
||||||
|
Re.literal('"'),
|
||||||
|
)
|
||||||
|
# Single-quoted string.
|
||||||
|
| Re.seq(
|
||||||
|
Re.literal("'"),
|
||||||
|
(~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(),
|
||||||
|
Re.literal("'"),
|
||||||
|
)
|
||||||
|
)
|
||||||
WHILE = Terminal("while")
|
WHILE = Terminal("while")
|
||||||
EQUAL = Terminal("=")
|
EQUAL = Terminal("=")
|
||||||
LPAREN = Terminal("(")
|
LPAREN = Terminal("(")
|
||||||
|
|
@ -376,7 +388,20 @@ class FineGrammar(Grammar):
|
||||||
MINUS = Terminal("-")
|
MINUS = Terminal("-")
|
||||||
STAR = Terminal("*")
|
STAR = Terminal("*")
|
||||||
SLASH = Terminal("/")
|
SLASH = Terminal("/")
|
||||||
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
NUMBER = Terminal(
|
||||||
|
Re.seq(
|
||||||
|
Re.set(("0", "9")).plus(),
|
||||||
|
Re.seq(
|
||||||
|
Re.literal("."),
|
||||||
|
Re.set(("0", "9")),
|
||||||
|
Re.seq(
|
||||||
|
Re.set("e", "E"),
|
||||||
|
Re.set("+", "-").question(),
|
||||||
|
Re.set(("0", "9")).plus(),
|
||||||
|
).question(),
|
||||||
|
).question(),
|
||||||
|
)
|
||||||
|
)
|
||||||
TRUE = Terminal("true")
|
TRUE = Terminal("true")
|
||||||
FALSE = Terminal("false")
|
FALSE = Terminal("false")
|
||||||
BANG = Terminal("!")
|
BANG = Terminal("!")
|
||||||
|
|
@ -570,6 +595,8 @@ class FineTokens:
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
from parser.parser import compile_lexer, dump_lexer_table
|
||||||
|
|
||||||
grammar = FineGrammar()
|
grammar = FineGrammar()
|
||||||
grammar.build_table()
|
grammar.build_table()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1609,7 +1609,7 @@ class Terminal(Rule):
|
||||||
value: str | None
|
value: str | None
|
||||||
pattern: "str | Re"
|
pattern: "str | Re"
|
||||||
|
|
||||||
def __init__(self, pattern, name=None):
|
def __init__(self, pattern, *, name=None):
|
||||||
self.value = name
|
self.value = name
|
||||||
self.pattern = pattern
|
self.pattern = pattern
|
||||||
|
|
||||||
|
|
@ -2180,36 +2180,46 @@ class Re:
|
||||||
def seq(cls, *values: "Re") -> "Re":
|
def seq(cls, *values: "Re") -> "Re":
|
||||||
result = values[0]
|
result = values[0]
|
||||||
for v in values[1:]:
|
for v in values[1:]:
|
||||||
result = RegexSequence(result, v)
|
result = ReSeq(result, v)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def literal(cls, value: str) -> "Re":
|
def literal(cls, value: str) -> "Re":
|
||||||
return cls.seq(*[RegexLiteral.from_ranges(c) for c in value])
|
return cls.seq(*[ReSet.from_ranges(c) for c in value])
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def set(cls, *args: str | tuple[str, str]) -> "Re":
|
def set(cls, *args: str | tuple[str, str]) -> "ReSet":
|
||||||
return RegexLiteral.from_ranges(*args)
|
return ReSet.from_ranges(*args)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def any(cls) -> "ReSet":
|
||||||
|
return ReSet.any()
|
||||||
|
|
||||||
def plus(self) -> "Re":
|
def plus(self) -> "Re":
|
||||||
return RegexPlus(self)
|
return RePlus(self)
|
||||||
|
|
||||||
def star(self) -> "Re":
|
def star(self) -> "Re":
|
||||||
return RegexStar(self)
|
return ReStar(self)
|
||||||
|
|
||||||
def question(self) -> "Re":
|
def question(self) -> "Re":
|
||||||
return RegexQuestion(self)
|
return ReQuestion(self)
|
||||||
|
|
||||||
def __or__(self, value: "Re", /) -> "Re":
|
def __or__(self, value: "Re", /) -> "Re":
|
||||||
return RegexAlternation(self, value)
|
return ReAlt(self, value)
|
||||||
|
|
||||||
|
def __add__(self, value: "Re") -> "Re":
|
||||||
|
return ReSeq(self, value)
|
||||||
|
|
||||||
|
|
||||||
|
UNICODE_MAX_CP = 1114112
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class RegexLiteral(Re):
|
class ReSet(Re):
|
||||||
values: list[Span]
|
values: list[Span]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_ranges(cls, *args: str | tuple[str, str]) -> "RegexLiteral":
|
def from_ranges(cls, *args: str | tuple[str, str]) -> "ReSet":
|
||||||
values = []
|
values = []
|
||||||
for a in args:
|
for a in args:
|
||||||
if isinstance(a, str):
|
if isinstance(a, str):
|
||||||
|
|
@ -2217,7 +2227,36 @@ class RegexLiteral(Re):
|
||||||
else:
|
else:
|
||||||
values.append(Span.from_str(a[0], a[1]))
|
values.append(Span.from_str(a[0], a[1]))
|
||||||
|
|
||||||
return RegexLiteral(values)
|
return ReSet(values)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def any(cls) -> "ReSet":
|
||||||
|
return ReSet(values=[Span(0, UNICODE_MAX_CP)])
|
||||||
|
|
||||||
|
def invert(self) -> "ReSet":
|
||||||
|
spans = []
|
||||||
|
lower = 0
|
||||||
|
for span in self.values:
|
||||||
|
upper = span.lower
|
||||||
|
if upper != lower:
|
||||||
|
assert lower < upper
|
||||||
|
spans.append(Span(lower, upper))
|
||||||
|
lower = span.upper
|
||||||
|
|
||||||
|
# What... is.... the top end here? Are we dealing with bytes? Are we
|
||||||
|
# dealing with unicode character ranges? In python we're dealing with
|
||||||
|
# "ord". I feel like this... here... is correct but might need to
|
||||||
|
# change when the state machine is converted for other languages.
|
||||||
|
#
|
||||||
|
upper = UNICODE_MAX_CP
|
||||||
|
if upper != lower:
|
||||||
|
assert lower < upper
|
||||||
|
spans.append(Span(lower, upper))
|
||||||
|
|
||||||
|
return ReSet(spans)
|
||||||
|
|
||||||
|
def __invert__(self) -> "ReSet":
|
||||||
|
return self.invert()
|
||||||
|
|
||||||
def to_nfa(self, start: NFAState) -> NFAState:
|
def to_nfa(self, start: NFAState) -> NFAState:
|
||||||
end = NFAState()
|
end = NFAState()
|
||||||
|
|
@ -2243,7 +2282,7 @@ class RegexLiteral(Re):
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class RegexPlus(Re):
|
class RePlus(Re):
|
||||||
child: Re
|
child: Re
|
||||||
|
|
||||||
def to_nfa(self, start: NFAState) -> NFAState:
|
def to_nfa(self, start: NFAState) -> NFAState:
|
||||||
|
|
@ -2256,7 +2295,7 @@ class RegexPlus(Re):
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class RegexStar(Re):
|
class ReStar(Re):
|
||||||
child: Re
|
child: Re
|
||||||
|
|
||||||
def to_nfa(self, start: NFAState) -> NFAState:
|
def to_nfa(self, start: NFAState) -> NFAState:
|
||||||
|
|
@ -2270,7 +2309,7 @@ class RegexStar(Re):
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class RegexQuestion(Re):
|
class ReQuestion(Re):
|
||||||
child: Re
|
child: Re
|
||||||
|
|
||||||
def to_nfa(self, start: NFAState) -> NFAState:
|
def to_nfa(self, start: NFAState) -> NFAState:
|
||||||
|
|
@ -2283,7 +2322,7 @@ class RegexQuestion(Re):
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class RegexSequence(Re):
|
class ReSeq(Re):
|
||||||
left: Re
|
left: Re
|
||||||
right: Re
|
right: Re
|
||||||
|
|
||||||
|
|
@ -2296,7 +2335,7 @@ class RegexSequence(Re):
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class RegexAlternation(Re):
|
class ReAlt(Re):
|
||||||
left: Re
|
left: Re
|
||||||
right: Re
|
right: Re
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,3 @@
|
||||||
import typing
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import parser
|
import parser
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,8 @@
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
from hypothesis import assume, example, given
|
from hypothesis import assume, example, given
|
||||||
from hypothesis.strategies import integers, lists, tuples
|
from hypothesis.strategies import integers, lists
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from parser import (
|
from parser import (
|
||||||
EdgeList,
|
EdgeList,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue