Compare commits

..

No commits in common. "30f7798719d6767edca3325ced3a0a73c480f28e" and "72052645d6088c5d70358546f55d4e694ee78913" have entirely different histories.

4 changed files with 24 additions and 87 deletions

View file

@ -12,6 +12,7 @@ from parser import (
Terminal, Terminal,
Re, Re,
) )
from parser.parser import compile_lexer, dump_lexer_table
class FineGrammar(Grammar): class FineGrammar(Grammar):
@ -355,20 +356,7 @@ class FineGrammar(Grammar):
RCURLY = Terminal("}") RCURLY = Terminal("}")
RETURN = Terminal("return") RETURN = Terminal("return")
SEMICOLON = Terminal(";") SEMICOLON = Terminal(";")
STRING = Terminal( STRING = Terminal('""') # TODO
# Double-quoted string.
Re.seq(
Re.literal('"'),
(~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(),
Re.literal('"'),
)
# Single-quoted string.
| Re.seq(
Re.literal("'"),
(~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(),
Re.literal("'"),
)
)
WHILE = Terminal("while") WHILE = Terminal("while")
EQUAL = Terminal("=") EQUAL = Terminal("=")
LPAREN = Terminal("(") LPAREN = Terminal("(")
@ -388,20 +376,7 @@ class FineGrammar(Grammar):
MINUS = Terminal("-") MINUS = Terminal("-")
STAR = Terminal("*") STAR = Terminal("*")
SLASH = Terminal("/") SLASH = Terminal("/")
NUMBER = Terminal( NUMBER = Terminal(Re.set(("0", "9")).plus())
Re.seq(
Re.set(("0", "9")).plus(),
Re.seq(
Re.literal("."),
Re.set(("0", "9")),
Re.seq(
Re.set("e", "E"),
Re.set("+", "-").question(),
Re.set(("0", "9")).plus(),
).question(),
).question(),
)
)
TRUE = Terminal("true") TRUE = Terminal("true")
FALSE = Terminal("false") FALSE = Terminal("false")
BANG = Terminal("!") BANG = Terminal("!")
@ -595,8 +570,6 @@ class FineTokens:
if __name__ == "__main__": if __name__ == "__main__":
from parser.parser import compile_lexer, dump_lexer_table
grammar = FineGrammar() grammar = FineGrammar()
grammar.build_table() grammar.build_table()

View file

@ -1609,7 +1609,7 @@ class Terminal(Rule):
value: str | None value: str | None
pattern: "str | Re" pattern: "str | Re"
def __init__(self, pattern, *, name=None): def __init__(self, pattern, name=None):
self.value = name self.value = name
self.pattern = pattern self.pattern = pattern
@ -2180,46 +2180,36 @@ class Re:
def seq(cls, *values: "Re") -> "Re": def seq(cls, *values: "Re") -> "Re":
result = values[0] result = values[0]
for v in values[1:]: for v in values[1:]:
result = ReSeq(result, v) result = RegexSequence(result, v)
return result return result
@classmethod @classmethod
def literal(cls, value: str) -> "Re": def literal(cls, value: str) -> "Re":
return cls.seq(*[ReSet.from_ranges(c) for c in value]) return cls.seq(*[RegexLiteral.from_ranges(c) for c in value])
@classmethod @classmethod
def set(cls, *args: str | tuple[str, str]) -> "ReSet": def set(cls, *args: str | tuple[str, str]) -> "Re":
return ReSet.from_ranges(*args) return RegexLiteral.from_ranges(*args)
@classmethod
def any(cls) -> "ReSet":
return ReSet.any()
def plus(self) -> "Re": def plus(self) -> "Re":
return RePlus(self) return RegexPlus(self)
def star(self) -> "Re": def star(self) -> "Re":
return ReStar(self) return RegexStar(self)
def question(self) -> "Re": def question(self) -> "Re":
return ReQuestion(self) return RegexQuestion(self)
def __or__(self, value: "Re", /) -> "Re": def __or__(self, value: "Re", /) -> "Re":
return ReAlt(self, value) return RegexAlternation(self, value)
def __add__(self, value: "Re") -> "Re":
return ReSeq(self, value)
UNICODE_MAX_CP = 1114112
@dataclasses.dataclass @dataclasses.dataclass
class ReSet(Re): class RegexLiteral(Re):
values: list[Span] values: list[Span]
@classmethod @classmethod
def from_ranges(cls, *args: str | tuple[str, str]) -> "ReSet": def from_ranges(cls, *args: str | tuple[str, str]) -> "RegexLiteral":
values = [] values = []
for a in args: for a in args:
if isinstance(a, str): if isinstance(a, str):
@ -2227,36 +2217,7 @@ class ReSet(Re):
else: else:
values.append(Span.from_str(a[0], a[1])) values.append(Span.from_str(a[0], a[1]))
return ReSet(values) return RegexLiteral(values)
@classmethod
def any(cls) -> "ReSet":
return ReSet(values=[Span(0, UNICODE_MAX_CP)])
def invert(self) -> "ReSet":
spans = []
lower = 0
for span in self.values:
upper = span.lower
if upper != lower:
assert lower < upper
spans.append(Span(lower, upper))
lower = span.upper
# What... is.... the top end here? Are we dealing with bytes? Are we
# dealing with unicode character ranges? In python we're dealing with
# "ord". I feel like this... here... is correct but might need to
# change when the state machine is converted for other languages.
#
upper = UNICODE_MAX_CP
if upper != lower:
assert lower < upper
spans.append(Span(lower, upper))
return ReSet(spans)
def __invert__(self) -> "ReSet":
return self.invert()
def to_nfa(self, start: NFAState) -> NFAState: def to_nfa(self, start: NFAState) -> NFAState:
end = NFAState() end = NFAState()
@ -2282,7 +2243,7 @@ class ReSet(Re):
@dataclasses.dataclass @dataclasses.dataclass
class RePlus(Re): class RegexPlus(Re):
child: Re child: Re
def to_nfa(self, start: NFAState) -> NFAState: def to_nfa(self, start: NFAState) -> NFAState:
@ -2295,7 +2256,7 @@ class RePlus(Re):
@dataclasses.dataclass @dataclasses.dataclass
class ReStar(Re): class RegexStar(Re):
child: Re child: Re
def to_nfa(self, start: NFAState) -> NFAState: def to_nfa(self, start: NFAState) -> NFAState:
@ -2309,7 +2270,7 @@ class ReStar(Re):
@dataclasses.dataclass @dataclasses.dataclass
class ReQuestion(Re): class RegexQuestion(Re):
child: Re child: Re
def to_nfa(self, start: NFAState) -> NFAState: def to_nfa(self, start: NFAState) -> NFAState:
@ -2322,7 +2283,7 @@ class ReQuestion(Re):
@dataclasses.dataclass @dataclasses.dataclass
class ReSeq(Re): class RegexSequence(Re):
left: Re left: Re
right: Re right: Re
@ -2335,7 +2296,7 @@ class ReSeq(Re):
@dataclasses.dataclass @dataclasses.dataclass
class ReAlt(Re): class RegexAlternation(Re):
left: Re left: Re
right: Re right: Re

View file

@ -1,3 +1,5 @@
import typing
import pytest import pytest
import parser import parser

View file

@ -1,8 +1,9 @@
import collections import collections
from hypothesis import assume, example, given from hypothesis import assume, example, given
from hypothesis.strategies import integers, lists from hypothesis.strategies import integers, lists, tuples
import pytest
from parser import ( from parser import (
EdgeList, EdgeList,