Augment number pattern, tests

More robust testing. Error messages would be nice but.
This commit is contained in:
John Doty 2024-08-24 09:38:21 -07:00
parent 0c952e4905
commit f29ec5072f
2 changed files with 23 additions and 17 deletions

View file

@ -441,7 +441,7 @@ def generic_tokenize(
last_accept = None
last_accept_pos = 0
print(f"LEXING: {src} ({len(src)})")
# print(f"LEXING: {src} ({len(src)})")
while pos < len(src):
while state is not None:
@ -450,37 +450,39 @@ def generic_tokenize(
last_accept = accept
last_accept_pos = pos
print(f" @ {pos} state: {state} ({accept})")
# print(f" @ {pos} state: {state} ({accept})")
if pos >= len(src):
break
char = ord(src[pos])
print(f" -> char: {char} ({repr(src[pos])})")
# print(f" -> char: {char} ({repr(src[pos])})")
# Find the index of the span where the upper value is the tightest
# bound on the character.
state = None
index = bisect.bisect_right(edges, char, key=lambda x: x[0].upper)
print(f" -> {index}")
# print(f" -> {index}")
if index < len(edges):
span, target = edges[index]
print(f" -> {span}, {target}")
# print(f" -> {span}, {target}")
if char >= span.lower:
print(f" -> target: {target}")
# print(f" -> target: {target}")
state = target
pos += 1
else:
print(f" Nope (outside range)")
# print(f" Nope (outside range)")
pass
else:
print(f" Nope (at end)")
# print(f" Nope (at end)")
pass
if last_accept is None:
raise Exception(f"Token error at {pos}")
yield (last_accept, start, last_accept_pos - start)
print(f" Yield: {last_accept}, reset to {last_accept_pos}")
# print(f" Yield: {last_accept}, reset to {last_accept_pos}")
last_accept = None
pos = last_accept_pos
start = pos

View file

@ -1,7 +1,8 @@
import collections
import math
from hypothesis import assume, example, given
from hypothesis.strategies import integers, lists
from hypothesis.strategies import integers, lists, floats
from parser import (
@ -383,7 +384,10 @@ def test_lexer_compile():
]
def test_lexer_numbers():
@given(floats().map(abs))
def test_lexer_numbers(n: float):
assume(math.isfinite(n))
class LexTest(Grammar):
@rule
def number(self):
@ -397,11 +401,11 @@ def test_lexer_numbers():
Re.seq(
Re.literal("."),
Re.set(("0", "9")).plus(),
Re.seq(
Re.set("e", "E"),
Re.set("+", "-").question(),
Re.set(("0", "9")).plus(),
).question(),
).question(),
Re.seq(
Re.set("e", "E"),
Re.set("+", "-").question(),
Re.set(("0", "9")).plus(),
).question(),
)
)
@ -409,7 +413,7 @@ def test_lexer_numbers():
lexer = compile_lexer(LexTest())
dump_lexer_table(lexer)
number_string = "1234.12"
number_string = str(n)
tokens = list(generic_tokenize(number_string, lexer))
assert tokens == [