Augment number pattern, tests

More robust testing. Error messages would be nice but.
This commit is contained in:
John Doty 2024-08-24 09:38:21 -07:00
parent 0c952e4905
commit f29ec5072f
2 changed files with 23 additions and 17 deletions

View file

@ -441,7 +441,7 @@ def generic_tokenize(
last_accept = None last_accept = None
last_accept_pos = 0 last_accept_pos = 0
print(f"LEXING: {src} ({len(src)})") # print(f"LEXING: {src} ({len(src)})")
while pos < len(src): while pos < len(src):
while state is not None: while state is not None:
@ -450,37 +450,39 @@ def generic_tokenize(
last_accept = accept last_accept = accept
last_accept_pos = pos last_accept_pos = pos
print(f" @ {pos} state: {state} ({accept})") # print(f" @ {pos} state: {state} ({accept})")
if pos >= len(src): if pos >= len(src):
break break
char = ord(src[pos]) char = ord(src[pos])
print(f" -> char: {char} ({repr(src[pos])})") # print(f" -> char: {char} ({repr(src[pos])})")
# Find the index of the span where the upper value is the tightest # Find the index of the span where the upper value is the tightest
# bound on the character. # bound on the character.
state = None state = None
index = bisect.bisect_right(edges, char, key=lambda x: x[0].upper) index = bisect.bisect_right(edges, char, key=lambda x: x[0].upper)
print(f" -> {index}") # print(f" -> {index}")
if index < len(edges): if index < len(edges):
span, target = edges[index] span, target = edges[index]
print(f" -> {span}, {target}") # print(f" -> {span}, {target}")
if char >= span.lower: if char >= span.lower:
print(f" -> target: {target}") # print(f" -> target: {target}")
state = target state = target
pos += 1 pos += 1
else: else:
print(f" Nope (outside range)") # print(f" Nope (outside range)")
pass
else: else:
print(f" Nope (at end)") # print(f" Nope (at end)")
pass
if last_accept is None: if last_accept is None:
raise Exception(f"Token error at {pos}") raise Exception(f"Token error at {pos}")
yield (last_accept, start, last_accept_pos - start) yield (last_accept, start, last_accept_pos - start)
print(f" Yield: {last_accept}, reset to {last_accept_pos}") # print(f" Yield: {last_accept}, reset to {last_accept_pos}")
last_accept = None last_accept = None
pos = last_accept_pos pos = last_accept_pos
start = pos start = pos

View file

@ -1,7 +1,8 @@
import collections import collections
import math
from hypothesis import assume, example, given from hypothesis import assume, example, given
from hypothesis.strategies import integers, lists from hypothesis.strategies import integers, lists, floats
from parser import ( from parser import (
@ -383,7 +384,10 @@ def test_lexer_compile():
] ]
def test_lexer_numbers(): @given(floats().map(abs))
def test_lexer_numbers(n: float):
assume(math.isfinite(n))
class LexTest(Grammar): class LexTest(Grammar):
@rule @rule
def number(self): def number(self):
@ -397,11 +401,11 @@ def test_lexer_numbers():
Re.seq( Re.seq(
Re.literal("."), Re.literal("."),
Re.set(("0", "9")).plus(), Re.set(("0", "9")).plus(),
Re.seq( ).question(),
Re.set("e", "E"), Re.seq(
Re.set("+", "-").question(), Re.set("e", "E"),
Re.set(("0", "9")).plus(), Re.set("+", "-").question(),
).question(), Re.set(("0", "9")).plus(),
).question(), ).question(),
) )
) )
@ -409,7 +413,7 @@ def test_lexer_numbers():
lexer = compile_lexer(LexTest()) lexer = compile_lexer(LexTest())
dump_lexer_table(lexer) dump_lexer_table(lexer)
number_string = "1234.12" number_string = str(n)
tokens = list(generic_tokenize(number_string, lexer)) tokens = list(generic_tokenize(number_string, lexer))
assert tokens == [ assert tokens == [