Compare commits
No commits in common. "30f7798719d6767edca3325ced3a0a73c480f28e" and "72052645d6088c5d70358546f55d4e694ee78913" have entirely different histories.
30f7798719
...
72052645d6
4 changed files with 24 additions and 87 deletions
33
grammar.py
33
grammar.py
|
|
@ -12,6 +12,7 @@ from parser import (
|
|||
Terminal,
|
||||
Re,
|
||||
)
|
||||
from parser.parser import compile_lexer, dump_lexer_table
|
||||
|
||||
|
||||
class FineGrammar(Grammar):
|
||||
|
|
@ -355,20 +356,7 @@ class FineGrammar(Grammar):
|
|||
RCURLY = Terminal("}")
|
||||
RETURN = Terminal("return")
|
||||
SEMICOLON = Terminal(";")
|
||||
STRING = Terminal(
|
||||
# Double-quoted string.
|
||||
Re.seq(
|
||||
Re.literal('"'),
|
||||
(~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(),
|
||||
Re.literal('"'),
|
||||
)
|
||||
# Single-quoted string.
|
||||
| Re.seq(
|
||||
Re.literal("'"),
|
||||
(~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(),
|
||||
Re.literal("'"),
|
||||
)
|
||||
)
|
||||
STRING = Terminal('""') # TODO
|
||||
WHILE = Terminal("while")
|
||||
EQUAL = Terminal("=")
|
||||
LPAREN = Terminal("(")
|
||||
|
|
@ -388,20 +376,7 @@ class FineGrammar(Grammar):
|
|||
MINUS = Terminal("-")
|
||||
STAR = Terminal("*")
|
||||
SLASH = Terminal("/")
|
||||
NUMBER = Terminal(
|
||||
Re.seq(
|
||||
Re.set(("0", "9")).plus(),
|
||||
Re.seq(
|
||||
Re.literal("."),
|
||||
Re.set(("0", "9")),
|
||||
Re.seq(
|
||||
Re.set("e", "E"),
|
||||
Re.set("+", "-").question(),
|
||||
Re.set(("0", "9")).plus(),
|
||||
).question(),
|
||||
).question(),
|
||||
)
|
||||
)
|
||||
NUMBER = Terminal(Re.set(("0", "9")).plus())
|
||||
TRUE = Terminal("true")
|
||||
FALSE = Terminal("false")
|
||||
BANG = Terminal("!")
|
||||
|
|
@ -595,8 +570,6 @@ class FineTokens:
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from parser.parser import compile_lexer, dump_lexer_table
|
||||
|
||||
grammar = FineGrammar()
|
||||
grammar.build_table()
|
||||
|
||||
|
|
|
|||
|
|
@ -1609,7 +1609,7 @@ class Terminal(Rule):
|
|||
value: str | None
|
||||
pattern: "str | Re"
|
||||
|
||||
def __init__(self, pattern, *, name=None):
|
||||
def __init__(self, pattern, name=None):
|
||||
self.value = name
|
||||
self.pattern = pattern
|
||||
|
||||
|
|
@ -2180,46 +2180,36 @@ class Re:
|
|||
def seq(cls, *values: "Re") -> "Re":
|
||||
result = values[0]
|
||||
for v in values[1:]:
|
||||
result = ReSeq(result, v)
|
||||
result = RegexSequence(result, v)
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def literal(cls, value: str) -> "Re":
|
||||
return cls.seq(*[ReSet.from_ranges(c) for c in value])
|
||||
return cls.seq(*[RegexLiteral.from_ranges(c) for c in value])
|
||||
|
||||
@classmethod
|
||||
def set(cls, *args: str | tuple[str, str]) -> "ReSet":
|
||||
return ReSet.from_ranges(*args)
|
||||
|
||||
@classmethod
|
||||
def any(cls) -> "ReSet":
|
||||
return ReSet.any()
|
||||
def set(cls, *args: str | tuple[str, str]) -> "Re":
|
||||
return RegexLiteral.from_ranges(*args)
|
||||
|
||||
def plus(self) -> "Re":
|
||||
return RePlus(self)
|
||||
return RegexPlus(self)
|
||||
|
||||
def star(self) -> "Re":
|
||||
return ReStar(self)
|
||||
return RegexStar(self)
|
||||
|
||||
def question(self) -> "Re":
|
||||
return ReQuestion(self)
|
||||
return RegexQuestion(self)
|
||||
|
||||
def __or__(self, value: "Re", /) -> "Re":
|
||||
return ReAlt(self, value)
|
||||
|
||||
def __add__(self, value: "Re") -> "Re":
|
||||
return ReSeq(self, value)
|
||||
|
||||
|
||||
UNICODE_MAX_CP = 1114112
|
||||
return RegexAlternation(self, value)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ReSet(Re):
|
||||
class RegexLiteral(Re):
|
||||
values: list[Span]
|
||||
|
||||
@classmethod
|
||||
def from_ranges(cls, *args: str | tuple[str, str]) -> "ReSet":
|
||||
def from_ranges(cls, *args: str | tuple[str, str]) -> "RegexLiteral":
|
||||
values = []
|
||||
for a in args:
|
||||
if isinstance(a, str):
|
||||
|
|
@ -2227,36 +2217,7 @@ class ReSet(Re):
|
|||
else:
|
||||
values.append(Span.from_str(a[0], a[1]))
|
||||
|
||||
return ReSet(values)
|
||||
|
||||
@classmethod
|
||||
def any(cls) -> "ReSet":
|
||||
return ReSet(values=[Span(0, UNICODE_MAX_CP)])
|
||||
|
||||
def invert(self) -> "ReSet":
|
||||
spans = []
|
||||
lower = 0
|
||||
for span in self.values:
|
||||
upper = span.lower
|
||||
if upper != lower:
|
||||
assert lower < upper
|
||||
spans.append(Span(lower, upper))
|
||||
lower = span.upper
|
||||
|
||||
# What... is.... the top end here? Are we dealing with bytes? Are we
|
||||
# dealing with unicode character ranges? In python we're dealing with
|
||||
# "ord". I feel like this... here... is correct but might need to
|
||||
# change when the state machine is converted for other languages.
|
||||
#
|
||||
upper = UNICODE_MAX_CP
|
||||
if upper != lower:
|
||||
assert lower < upper
|
||||
spans.append(Span(lower, upper))
|
||||
|
||||
return ReSet(spans)
|
||||
|
||||
def __invert__(self) -> "ReSet":
|
||||
return self.invert()
|
||||
return RegexLiteral(values)
|
||||
|
||||
def to_nfa(self, start: NFAState) -> NFAState:
|
||||
end = NFAState()
|
||||
|
|
@ -2282,7 +2243,7 @@ class ReSet(Re):
|
|||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class RePlus(Re):
|
||||
class RegexPlus(Re):
|
||||
child: Re
|
||||
|
||||
def to_nfa(self, start: NFAState) -> NFAState:
|
||||
|
|
@ -2295,7 +2256,7 @@ class RePlus(Re):
|
|||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ReStar(Re):
|
||||
class RegexStar(Re):
|
||||
child: Re
|
||||
|
||||
def to_nfa(self, start: NFAState) -> NFAState:
|
||||
|
|
@ -2309,7 +2270,7 @@ class ReStar(Re):
|
|||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ReQuestion(Re):
|
||||
class RegexQuestion(Re):
|
||||
child: Re
|
||||
|
||||
def to_nfa(self, start: NFAState) -> NFAState:
|
||||
|
|
@ -2322,7 +2283,7 @@ class ReQuestion(Re):
|
|||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ReSeq(Re):
|
||||
class RegexSequence(Re):
|
||||
left: Re
|
||||
right: Re
|
||||
|
||||
|
|
@ -2335,7 +2296,7 @@ class ReSeq(Re):
|
|||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ReAlt(Re):
|
||||
class RegexAlternation(Re):
|
||||
left: Re
|
||||
right: Re
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import typing
|
||||
|
||||
import pytest
|
||||
|
||||
import parser
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
import collections
|
||||
|
||||
from hypothesis import assume, example, given
|
||||
from hypothesis.strategies import integers, lists
|
||||
from hypothesis.strategies import integers, lists, tuples
|
||||
|
||||
import pytest
|
||||
|
||||
from parser import (
|
||||
EdgeList,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue