lrparsers/tests/test_lexer.py

420 lines
9.4 KiB
Python

import collections
import math
from hypothesis import assume, example, given
from hypothesis.strategies import integers, lists, floats
from parser import (
EdgeList,
Span,
Grammar,
rule,
Terminal,
dump_lexer_table,
Re,
)
from parser.runtime import generic_tokenize
def test_span_intersection():
pairs = [
((1, 3), (2, 4)),
((1, 3), (2, 3)),
((1, 3), (1, 2)),
((1, 3), (0, 2)),
((1, 3), (0, 4)),
]
for a, b in pairs:
left = Span(*a)
right = Span(*b)
assert left.intersects(right)
assert right.intersects(left)
def test_span_no_intersection():
pairs = [
((1, 2), (3, 4)),
]
for a, b in pairs:
left = Span(*a)
right = Span(*b)
assert not left.intersects(right)
assert not right.intersects(left)
def test_span_split():
TC = collections.namedtuple("TC", ["left", "right", "expected"])
cases = [
TC(
left=Span(1, 4),
right=Span(2, 3),
expected=(Span(1, 2), Span(2, 3), Span(3, 4)),
),
TC(
left=Span(1, 4),
right=Span(1, 2),
expected=(None, Span(1, 2), Span(2, 4)),
),
TC(
left=Span(1, 4),
right=Span(3, 4),
expected=(Span(1, 3), Span(3, 4), None),
),
TC(
left=Span(1, 4),
right=Span(1, 4),
expected=(None, Span(1, 4), None),
),
]
for left, right, expected in cases:
result = left.split(right)
assert result == expected
result = right.split(left)
assert result == expected
@given(integers(), integers())
def test_equal_span_mid_only(x, y):
"""Splitting spans against themselves results in an empty lo and hi bound."""
assume(x < y)
span = Span(x, y)
lo, mid, hi = span.split(span)
assert lo is None
assert hi is None
assert mid == span
three_distinct_points = lists(
integers(),
min_size=3,
max_size=3,
unique=True,
).map(sorted)
@given(three_distinct_points)
def test_span_low_align_lo_none(vals):
"""Splitting spans with aligned lower bounds results in an empty lo bound."""
# x y z
# [ a )
# [ b )
x, y, z = vals
a = Span(x, y)
b = Span(x, z)
lo, _, _ = a.split(b)
assert lo is None
@given(three_distinct_points)
def test_span_high_align_hi_none(vals):
"""Splitting spans with aligned lower bounds results in an empty lo bound."""
# x y z
# [ a )
# [ b )
x, y, z = vals
a = Span(y, z)
b = Span(x, z)
_, _, hi = a.split(b)
assert hi is None
four_distinct_points = lists(
integers(),
min_size=4,
max_size=4,
unique=True,
).map(sorted)
@given(four_distinct_points)
def test_span_split_overlapping_lo_left(vals):
"""Splitting two overlapping spans results in lo overlapping left."""
a, b, c, d = vals
left = Span(a, c)
right = Span(b, d)
lo, _, _ = left.split(right)
assert lo is not None
assert lo.intersects(left)
@given(four_distinct_points)
def test_span_split_overlapping_lo_not_right(vals):
"""Splitting two overlapping spans results in lo NOT overlapping right."""
a, b, c, d = vals
left = Span(a, c)
right = Span(b, d)
lo, _, _ = left.split(right)
assert lo is not None
assert not lo.intersects(right)
@given(four_distinct_points)
def test_span_split_overlapping_mid_left(vals):
"""Splitting two overlapping spans results in mid overlapping left."""
a, b, c, d = vals
left = Span(a, c)
right = Span(b, d)
_, mid, _ = left.split(right)
assert mid is not None
assert mid.intersects(left)
@given(four_distinct_points)
def test_span_split_overlapping_mid_right(vals):
"""Splitting two overlapping spans results in mid overlapping right."""
a, b, c, d = vals
left = Span(a, c)
right = Span(b, d)
_, mid, _ = left.split(right)
assert mid is not None
assert mid.intersects(right)
@given(four_distinct_points)
def test_span_split_overlapping_hi_right(vals):
"""Splitting two overlapping spans results in hi overlapping right."""
a, b, c, d = vals
left = Span(a, c)
right = Span(b, d)
_, _, hi = left.split(right)
assert hi is not None
assert hi.intersects(right)
@given(four_distinct_points)
def test_span_split_overlapping_hi_not_left(vals):
"""Splitting two overlapping spans results in hi NOT overlapping left."""
a, b, c, d = vals
left = Span(a, c)
right = Span(b, d)
_, _, hi = left.split(right)
assert hi is not None
assert not hi.intersects(left)
@given(four_distinct_points)
def test_span_split_embedded(vals):
"""Splitting two spans where one overlaps the other."""
a, b, c, d = vals
outer = Span(a, d)
inner = Span(b, c)
lo, mid, hi = outer.split(inner)
assert lo is not None
assert mid is not None
assert hi is not None
assert lo.intersects(outer)
assert not lo.intersects(inner)
assert mid.intersects(outer)
assert mid.intersects(inner)
assert hi.intersects(outer)
assert not hi.intersects(inner)
def test_edge_list_single():
el: EdgeList[str] = EdgeList()
el.add_edge(Span(1, 4), "A")
edges = list(el)
assert edges == [
(Span(1, 4), ["A"]),
]
def test_edge_list_fully_enclosed():
el: EdgeList[str] = EdgeList()
el.add_edge(Span(1, 4), "A")
el.add_edge(Span(2, 3), "B")
edges = list(el)
assert edges == [
(Span(1, 2), ["A"]),
(Span(2, 3), ["A", "B"]),
(Span(3, 4), ["A"]),
]
def test_edge_list_overlap():
el: EdgeList[str] = EdgeList()
el.add_edge(Span(1, 4), "A")
el.add_edge(Span(2, 5), "B")
edges = list(el)
assert edges == [
(Span(1, 2), ["A"]),
(Span(2, 4), ["A", "B"]),
(Span(4, 5), ["B"]),
]
def test_edge_list_no_overlap():
el: EdgeList[str] = EdgeList()
el.add_edge(Span(1, 4), "A")
el.add_edge(Span(5, 8), "B")
edges = list(el)
assert edges == [
(Span(1, 4), ["A"]),
(Span(5, 8), ["B"]),
]
def test_edge_list_no_overlap_ordered():
el: EdgeList[str] = EdgeList()
el.add_edge(Span(5, 8), "B")
el.add_edge(Span(1, 4), "A")
edges = list(el)
assert edges == [
(Span(1, 4), ["A"]),
(Span(5, 8), ["B"]),
]
def test_edge_list_overlap_span():
el: EdgeList[str] = EdgeList()
el.add_edge(Span(1, 3), "A")
el.add_edge(Span(4, 6), "B")
el.add_edge(Span(2, 5), "C")
edges = list(el)
assert edges == [
(Span(1, 2), ["A"]),
(Span(2, 3), ["A", "C"]),
(Span(3, 4), ["C"]),
(Span(4, 5), ["B", "C"]),
(Span(5, 6), ["B"]),
]
def test_edge_list_overlap_span_big():
el: EdgeList[str] = EdgeList()
el.add_edge(Span(2, 3), "A")
el.add_edge(Span(4, 5), "B")
el.add_edge(Span(6, 7), "C")
el.add_edge(Span(1, 8), "D")
edges = list(el)
assert edges == [
(Span(1, 2), ["D"]),
(Span(2, 3), ["A", "D"]),
(Span(3, 4), ["D"]),
(Span(4, 5), ["B", "D"]),
(Span(5, 6), ["D"]),
(Span(6, 7), ["C", "D"]),
(Span(7, 8), ["D"]),
]
@given(lists(lists(integers(), min_size=2, max_size=2, unique=True), min_size=1))
@example(points=[[0, 1], [1, 2]])
def test_edge_list_always_sorted(points: list[tuple[int, int]]):
# OK this is weird but stick with me.
el: EdgeList[str] = EdgeList()
for i, (a, b) in enumerate(points):
lower = min(a, b)
upper = max(a, b)
span = Span(lower, upper)
el.add_edge(span, str(i))
last_upper = None
for span, _ in el:
if last_upper is not None:
assert last_upper <= span.lower, "Edges from list are not sorted"
last_upper = span.upper
def test_lexer_compile():
class LexTest(Grammar):
@rule
def foo(self):
return self.IS
start = foo
IS = Terminal("is")
AS = Terminal("as")
IDENTIFIER = Terminal(
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
)
)
BLANKS = Terminal(Re.set("\r", "\n", "\t", " ").plus())
lexer = LexTest().compile_lexer()
dump_lexer_table(lexer)
tokens = list(generic_tokenize("xy is ass", lexer))
assert tokens == [
(LexTest.IDENTIFIER, 0, 2),
(LexTest.BLANKS, 2, 1),
(LexTest.IS, 3, 2),
(LexTest.BLANKS, 5, 1),
(LexTest.IDENTIFIER, 6, 3),
]
@given(floats().map(abs))
def test_lexer_numbers(n: float):
assume(math.isfinite(n))
class LexTest(Grammar):
@rule
def number(self):
return self.NUMBER
start = number
NUMBER = Terminal(
Re.seq(
Re.set(("0", "9")).plus(),
Re.seq(
Re.literal("."),
Re.set(("0", "9")).plus(),
).question(),
Re.seq(
Re.set("e", "E"),
Re.set("+", "-").question(),
Re.set(("0", "9")).plus(),
).question(),
)
)
lexer = LexTest().compile_lexer()
dump_lexer_table(lexer)
number_string = str(n)
tokens = list(generic_tokenize(number_string, lexer))
assert tokens == [
(LexTest.NUMBER, 0, len(number_string)),
]