Vendor things
This commit is contained in:
parent
5deceec006
commit
977e3c17e5
19434 changed files with 10682014 additions and 0 deletions
22
third-party/vendor/regex/testdata/README.md
vendored
Normal file
22
third-party/vendor/regex/testdata/README.md
vendored
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
This directory contains a large suite of regex tests defined in a TOML format.
|
||||
They are used to drive tests in `tests/lib.rs`, `regex-automata/tests/lib.rs`
|
||||
and `regex-lite/tests/lib.rs`.
|
||||
|
||||
See the [`regex-test`][regex-test] crate documentation for an explanation of
|
||||
the format and how it generates tests.
|
||||
|
||||
The basic idea here is that we have many different regex engines but generally
|
||||
one set of tests. We want to be able to run those tests (or most of them) on
|
||||
every engine. Prior to `regex 1.9`, we used to do this with a hodge podge soup
|
||||
of macros and a different test executable for each engine. It overall took a
|
||||
longer time to compile, was harder to maintain and it made the test definitions
|
||||
themselves less clear.
|
||||
|
||||
In `regex 1.9`, when we moved over to `regex-automata`, the situation got a lot
|
||||
worse because of an increase in the number of engines. So I devised an engine
|
||||
independent format for testing regex patterns and their semantics.
|
||||
|
||||
Note: the naming scheme used in these tests isn't terribly consistent. It would
|
||||
be great to fix that.
|
||||
|
||||
[regex-test]: https://docs.rs/regex-test
|
||||
127
third-party/vendor/regex/testdata/anchored.toml
vendored
Normal file
127
third-party/vendor/regex/testdata/anchored.toml
vendored
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
# These tests are specifically geared toward searches with 'anchored = true'.
|
||||
# While they are interesting in their own right, they are particularly
|
||||
# important for testing the one-pass DFA since the one-pass DFA can't work in
|
||||
# unanchored contexts.
|
||||
#
|
||||
# Note that "anchored" in this context does not mean "^". Anchored searches are
|
||||
# searches whose matches must begin at the start of the search, which may not
|
||||
# be at the start of the haystack. That's why anchored searches---and there are
|
||||
# some examples below---can still report multiple matches. This occurs when the
|
||||
# matches are adjacent to one another.
|
||||
|
||||
[[test]]
|
||||
name = "greedy"
|
||||
regex = '(abc)+'
|
||||
haystack = "abcabcabc"
|
||||
matches = [
|
||||
[[0, 9], [6, 9]],
|
||||
]
|
||||
anchored = true
|
||||
|
||||
# When a "earliest" search is used, greediness doesn't really exist because
|
||||
# matches are reported as soon as they are known.
|
||||
[[test]]
|
||||
name = "greedy-earliest"
|
||||
regex = '(abc)+'
|
||||
haystack = "abcabcabc"
|
||||
matches = [
|
||||
[[0, 3], [0, 3]],
|
||||
[[3, 6], [3, 6]],
|
||||
[[6, 9], [6, 9]],
|
||||
]
|
||||
anchored = true
|
||||
search-kind = "earliest"
|
||||
|
||||
[[test]]
|
||||
name = "nongreedy"
|
||||
regex = '(abc)+?'
|
||||
haystack = "abcabcabc"
|
||||
matches = [
|
||||
[[0, 3], [0, 3]],
|
||||
[[3, 6], [3, 6]],
|
||||
[[6, 9], [6, 9]],
|
||||
]
|
||||
anchored = true
|
||||
|
||||
# When "all" semantics are used, non-greediness doesn't exist since the longest
|
||||
# possible match is always taken.
|
||||
[[test]]
|
||||
name = "nongreedy-all"
|
||||
regex = '(abc)+?'
|
||||
haystack = "abcabcabc"
|
||||
matches = [
|
||||
[[0, 9], [6, 9]],
|
||||
]
|
||||
anchored = true
|
||||
match-kind = "all"
|
||||
|
||||
[[test]]
|
||||
name = "word-boundary-unicode-01"
|
||||
regex = '\b\w+\b'
|
||||
haystack = 'βββ☃'
|
||||
matches = [[0, 6]]
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "word-boundary-nounicode-01"
|
||||
regex = '\b\w+\b'
|
||||
haystack = 'abcβ'
|
||||
matches = [[0, 3]]
|
||||
anchored = true
|
||||
unicode = false
|
||||
|
||||
# Tests that '.c' doesn't match 'abc' when performing an anchored search from
|
||||
# the beginning of the haystack. This test found two different bugs in the
|
||||
# PikeVM and the meta engine.
|
||||
[[test]]
|
||||
name = "no-match-at-start"
|
||||
regex = '.c'
|
||||
haystack = 'abc'
|
||||
matches = []
|
||||
anchored = true
|
||||
|
||||
# Like above, but at a non-zero start offset.
|
||||
[[test]]
|
||||
name = "no-match-at-start-bounds"
|
||||
regex = '.c'
|
||||
haystack = 'aabc'
|
||||
bounds = [1, 4]
|
||||
matches = []
|
||||
anchored = true
|
||||
|
||||
# This is like no-match-at-start, but hits the "reverse inner" optimization
|
||||
# inside the meta engine. (no-match-at-start hits the "reverse suffix"
|
||||
# optimization.)
|
||||
[[test]]
|
||||
name = "no-match-at-start-reverse-inner"
|
||||
regex = '.c[a-z]'
|
||||
haystack = 'abcz'
|
||||
matches = []
|
||||
anchored = true
|
||||
|
||||
# Like above, but at a non-zero start offset.
|
||||
[[test]]
|
||||
name = "no-match-at-start-reverse-inner-bounds"
|
||||
regex = '.c[a-z]'
|
||||
haystack = 'aabcz'
|
||||
bounds = [1, 5]
|
||||
matches = []
|
||||
anchored = true
|
||||
|
||||
# Same as no-match-at-start, but applies to the meta engine's "reverse
|
||||
# anchored" optimization.
|
||||
[[test]]
|
||||
name = "no-match-at-start-reverse-anchored"
|
||||
regex = '.c[a-z]$'
|
||||
haystack = 'abcz'
|
||||
matches = []
|
||||
anchored = true
|
||||
|
||||
# Like above, but at a non-zero start offset.
|
||||
[[test]]
|
||||
name = "no-match-at-start-reverse-anchored-bounds"
|
||||
regex = '.c[a-z]$'
|
||||
haystack = 'aabcz'
|
||||
bounds = [1, 5]
|
||||
matches = []
|
||||
anchored = true
|
||||
235
third-party/vendor/regex/testdata/bytes.toml
vendored
Normal file
235
third-party/vendor/regex/testdata/bytes.toml
vendored
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
# These are tests specifically crafted for regexes that can match arbitrary
|
||||
# bytes. In some cases, we also test the Unicode variant as well, just because
|
||||
# it's good sense to do so. But also, these tests aren't really about Unicode,
|
||||
# but whether matches are only reported at valid UTF-8 boundaries. For most
|
||||
# tests in this entire collection, utf8 = true. But for these tests, we use
|
||||
# utf8 = false.
|
||||
|
||||
[[test]]
|
||||
name = "word-boundary-ascii"
|
||||
regex = ' \b'
|
||||
haystack = " δ"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "word-boundary-unicode"
|
||||
regex = ' \b'
|
||||
haystack = " δ"
|
||||
matches = [[0, 1]]
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "word-boundary-ascii-not"
|
||||
regex = ' \B'
|
||||
haystack = " δ"
|
||||
matches = [[0, 1]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "word-boundary-unicode-not"
|
||||
regex = ' \B'
|
||||
haystack = " δ"
|
||||
matches = []
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "perl-word-ascii"
|
||||
regex = '\w+'
|
||||
haystack = "aδ"
|
||||
matches = [[0, 1]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "perl-word-unicode"
|
||||
regex = '\w+'
|
||||
haystack = "aδ"
|
||||
matches = [[0, 3]]
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "perl-decimal-ascii"
|
||||
regex = '\d+'
|
||||
haystack = "1२३9"
|
||||
matches = [[0, 1], [7, 8]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "perl-decimal-unicode"
|
||||
regex = '\d+'
|
||||
haystack = "1२३9"
|
||||
matches = [[0, 8]]
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "perl-whitespace-ascii"
|
||||
regex = '\s+'
|
||||
haystack = " \u1680"
|
||||
matches = [[0, 1]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "perl-whitespace-unicode"
|
||||
regex = '\s+'
|
||||
haystack = " \u1680"
|
||||
matches = [[0, 4]]
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
# The first `(.+)` matches two Unicode codepoints, but can't match the 5th
|
||||
# byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and
|
||||
# matches.
|
||||
[[test]]
|
||||
name = "mixed-dot"
|
||||
regex = '(.+)(?-u)(.+)'
|
||||
haystack = '\xCE\x93\xCE\x94\xFF'
|
||||
matches = [
|
||||
[[0, 5], [0, 4], [4, 5]],
|
||||
]
|
||||
unescape = true
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "case-one-ascii"
|
||||
regex = 'a'
|
||||
haystack = "A"
|
||||
matches = [[0, 1]]
|
||||
case-insensitive = true
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "case-one-unicode"
|
||||
regex = 'a'
|
||||
haystack = "A"
|
||||
matches = [[0, 1]]
|
||||
case-insensitive = true
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "case-class-simple-ascii"
|
||||
regex = '[a-z]+'
|
||||
haystack = "AaAaA"
|
||||
matches = [[0, 5]]
|
||||
case-insensitive = true
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "case-class-ascii"
|
||||
regex = '[a-z]+'
|
||||
haystack = "aA\u212AaA"
|
||||
matches = [[0, 2], [5, 7]]
|
||||
case-insensitive = true
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "case-class-unicode"
|
||||
regex = '[a-z]+'
|
||||
haystack = "aA\u212AaA"
|
||||
matches = [[0, 7]]
|
||||
case-insensitive = true
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "negate-ascii"
|
||||
regex = '[^a]'
|
||||
haystack = "δ"
|
||||
matches = [[0, 1], [1, 2]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "negate-unicode"
|
||||
regex = '[^a]'
|
||||
haystack = "δ"
|
||||
matches = [[0, 2]]
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
# When utf8=true, this won't match, because the implicit '.*?' prefix is
|
||||
# Unicode aware and will refuse to match through invalid UTF-8 bytes.
|
||||
[[test]]
|
||||
name = "dotstar-prefix-ascii"
|
||||
regex = 'a'
|
||||
haystack = '\xFFa'
|
||||
matches = [[1, 2]]
|
||||
unescape = true
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "dotstar-prefix-unicode"
|
||||
regex = 'a'
|
||||
haystack = '\xFFa'
|
||||
matches = [[1, 2]]
|
||||
unescape = true
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "null-bytes"
|
||||
regex = '(?P<cstr>[^\x00]+)\x00'
|
||||
haystack = 'foo\x00'
|
||||
matches = [
|
||||
[[0, 4], [0, 3]],
|
||||
]
|
||||
unescape = true
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "invalid-utf8-anchor-100"
|
||||
regex = '\xCC?^'
|
||||
haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
|
||||
matches = [[0, 0]]
|
||||
unescape = true
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "invalid-utf8-anchor-200"
|
||||
regex = '^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$'
|
||||
haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
|
||||
matches = [[22, 22]]
|
||||
unescape = true
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "invalid-utf8-anchor-300"
|
||||
regex = '^|ddp\xff\xffdddddlQd@\x80'
|
||||
haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
|
||||
matches = [[0, 0]]
|
||||
unescape = true
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "word-boundary-ascii-100"
|
||||
regex = '\Bx\B'
|
||||
haystack = "áxβ"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "word-boundary-ascii-200"
|
||||
regex = '\B'
|
||||
haystack = "0\U0007EF5E"
|
||||
matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
315
third-party/vendor/regex/testdata/crazy.toml
vendored
Normal file
315
third-party/vendor/regex/testdata/crazy.toml
vendored
Normal file
|
|
@ -0,0 +1,315 @@
|
|||
[[test]]
|
||||
name = "nothing-empty"
|
||||
regex = []
|
||||
haystack = ""
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "nothing-something"
|
||||
regex = []
|
||||
haystack = "wat"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "ranges"
|
||||
regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b'
|
||||
haystack = "num: 255"
|
||||
matches = [[5, 8]]
|
||||
|
||||
[[test]]
|
||||
name = "ranges-not"
|
||||
regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b'
|
||||
haystack = "num: 256"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "float1"
|
||||
regex = '[-+]?[0-9]*\.?[0-9]+'
|
||||
haystack = "0.1"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "float2"
|
||||
regex = '[-+]?[0-9]*\.?[0-9]+'
|
||||
haystack = "0.1.2"
|
||||
matches = [[0, 3]]
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "float3"
|
||||
regex = '[-+]?[0-9]*\.?[0-9]+'
|
||||
haystack = "a1.2"
|
||||
matches = [[1, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "float4"
|
||||
regex = '[-+]?[0-9]*\.?[0-9]+'
|
||||
haystack = "1.a"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "float5"
|
||||
regex = '^[-+]?[0-9]*\.?[0-9]+$'
|
||||
haystack = "1.a"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "email"
|
||||
regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b'
|
||||
haystack = "mine is jam.slam@gmail.com "
|
||||
matches = [[8, 26]]
|
||||
|
||||
[[test]]
|
||||
name = "email-not"
|
||||
regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b'
|
||||
haystack = "mine is jam.slam@gmail "
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "email-big"
|
||||
regex = '''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?'''
|
||||
haystack = "mine is jam.slam@gmail.com "
|
||||
matches = [[8, 26]]
|
||||
|
||||
[[test]]
|
||||
name = "date1"
|
||||
regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$'
|
||||
haystack = "1900-01-01"
|
||||
matches = [[0, 10]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "date2"
|
||||
regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$'
|
||||
haystack = "1900-00-01"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "date3"
|
||||
regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$'
|
||||
haystack = "1900-13-01"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "start-end-empty"
|
||||
regex = '^$'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
|
||||
[[test]]
|
||||
name = "start-end-empty-rev"
|
||||
regex = '$^'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
|
||||
[[test]]
|
||||
name = "start-end-empty-many-1"
|
||||
regex = '^$^$^$'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
|
||||
[[test]]
|
||||
name = "start-end-empty-many-2"
|
||||
regex = '^^^$$$'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
|
||||
[[test]]
|
||||
name = "start-end-empty-rep"
|
||||
regex = '(?:^$)*'
|
||||
haystack = "a\nb\nc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "start-end-empty-rep-rev"
|
||||
regex = '(?:$^)*'
|
||||
haystack = "a\nb\nc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "neg-class-letter"
|
||||
regex = '[^ac]'
|
||||
haystack = "acx"
|
||||
matches = [[2, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "neg-class-letter-comma"
|
||||
regex = '[^a,]'
|
||||
haystack = "a,x"
|
||||
matches = [[2, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "neg-class-letter-space"
|
||||
regex = '[^a[:space:]]'
|
||||
haystack = "a x"
|
||||
matches = [[2, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "neg-class-comma"
|
||||
regex = '[^,]'
|
||||
haystack = ",,x"
|
||||
matches = [[2, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "neg-class-space"
|
||||
regex = '[^[:space:]]'
|
||||
haystack = " a"
|
||||
matches = [[1, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "neg-class-space-comma"
|
||||
regex = '[^,[:space:]]'
|
||||
haystack = ", a"
|
||||
matches = [[2, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "neg-class-comma-space"
|
||||
regex = '[^[:space:],]'
|
||||
haystack = " ,a"
|
||||
matches = [[2, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "neg-class-ascii"
|
||||
regex = '[^[:alpha:]Z]'
|
||||
haystack = "A1"
|
||||
matches = [[1, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "lazy-many-many"
|
||||
regex = '(?:(?:.*)*?)='
|
||||
haystack = "a=b"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "lazy-many-optional"
|
||||
regex = '(?:(?:.?)*?)='
|
||||
haystack = "a=b"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "lazy-one-many-many"
|
||||
regex = '(?:(?:.*)+?)='
|
||||
haystack = "a=b"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "lazy-one-many-optional"
|
||||
regex = '(?:(?:.?)+?)='
|
||||
haystack = "a=b"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "lazy-range-min-many"
|
||||
regex = '(?:(?:.*){1,}?)='
|
||||
haystack = "a=b"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "lazy-range-many"
|
||||
regex = '(?:(?:.*){1,2}?)='
|
||||
haystack = "a=b"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "greedy-many-many"
|
||||
regex = '(?:(?:.*)*)='
|
||||
haystack = "a=b"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "greedy-many-optional"
|
||||
regex = '(?:(?:.?)*)='
|
||||
haystack = "a=b"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "greedy-one-many-many"
|
||||
regex = '(?:(?:.*)+)='
|
||||
haystack = "a=b"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "greedy-one-many-optional"
|
||||
regex = '(?:(?:.?)+)='
|
||||
haystack = "a=b"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "greedy-range-min-many"
|
||||
regex = '(?:(?:.*){1,})='
|
||||
haystack = "a=b"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "greedy-range-many"
|
||||
regex = '(?:(?:.*){1,2})='
|
||||
haystack = "a=b"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "empty1"
|
||||
regex = ''
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
|
||||
[[test]]
|
||||
name = "empty2"
|
||||
regex = ''
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty3"
|
||||
regex = '(?:)'
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty4"
|
||||
regex = '(?:)*'
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty5"
|
||||
regex = '(?:)+'
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty6"
|
||||
regex = '(?:)?'
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty7"
|
||||
regex = '(?:)(?:)'
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty8"
|
||||
regex = '(?:)+|z'
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty9"
|
||||
regex = 'z|(?:)+'
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty10"
|
||||
regex = '(?:)+|b'
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty11"
|
||||
regex = 'b|(?:)+'
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 2], [3, 3]]
|
||||
117
third-party/vendor/regex/testdata/crlf.toml
vendored
Normal file
117
third-party/vendor/regex/testdata/crlf.toml
vendored
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
# This is a basic test that checks ^ and $ treat \r\n as a single line
|
||||
# terminator. If ^ and $ only treated \n as a line terminator, then this would
|
||||
# only match 'xyz' at the end of the haystack.
|
||||
[[test]]
|
||||
name = "basic"
|
||||
regex = '(?mR)^[a-z]+$'
|
||||
haystack = "abc\r\ndef\r\nxyz"
|
||||
matches = [[0, 3], [5, 8], [10, 13]]
|
||||
|
||||
# Tests that a CRLF-aware '^$' assertion does not match between CR and LF.
|
||||
[[test]]
|
||||
name = "start-end-non-empty"
|
||||
regex = '(?mR)^$'
|
||||
haystack = "abc\r\ndef\r\nxyz"
|
||||
matches = []
|
||||
|
||||
# Tests that a CRLF-aware '^$' assertion matches the empty string, just like
|
||||
# a non-CRLF-aware '^$' assertion.
|
||||
[[test]]
|
||||
name = "start-end-empty"
|
||||
regex = '(?mR)^$'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
|
||||
# Tests that a CRLF-aware '^$' assertion matches the empty string preceding
|
||||
# and following a line terminator.
|
||||
[[test]]
|
||||
name = "start-end-before-after"
|
||||
regex = '(?mR)^$'
|
||||
haystack = "\r\n"
|
||||
matches = [[0, 0], [2, 2]]
|
||||
|
||||
# Tests that a CRLF-aware '^' assertion does not split a line terminator.
|
||||
[[test]]
|
||||
name = "start-no-split"
|
||||
regex = '(?mR)^'
|
||||
haystack = "abc\r\ndef\r\nxyz"
|
||||
matches = [[0, 0], [5, 5], [10, 10]]
|
||||
|
||||
# Same as above, but with adjacent runs of line terminators.
|
||||
[[test]]
|
||||
name = "start-no-split-adjacent"
|
||||
regex = '(?mR)^'
|
||||
haystack = "\r\n\r\n\r\n"
|
||||
matches = [[0, 0], [2, 2], [4, 4], [6, 6]]
|
||||
|
||||
# Same as above, but with adjacent runs of just carriage returns.
|
||||
[[test]]
|
||||
name = "start-no-split-adjacent-cr"
|
||||
regex = '(?mR)^'
|
||||
haystack = "\r\r\r"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
# Same as above, but with adjacent runs of just line feeds.
|
||||
[[test]]
|
||||
name = "start-no-split-adjacent-lf"
|
||||
regex = '(?mR)^'
|
||||
haystack = "\n\n\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
# Tests that a CRLF-aware '$' assertion does not split a line terminator.
|
||||
[[test]]
|
||||
name = "end-no-split"
|
||||
regex = '(?mR)$'
|
||||
haystack = "abc\r\ndef\r\nxyz"
|
||||
matches = [[3, 3], [8, 8], [13, 13]]
|
||||
|
||||
# Same as above, but with adjacent runs of line terminators.
|
||||
[[test]]
|
||||
name = "end-no-split-adjacent"
|
||||
regex = '(?mR)$'
|
||||
haystack = "\r\n\r\n\r\n"
|
||||
matches = [[0, 0], [2, 2], [4, 4], [6, 6]]
|
||||
|
||||
# Same as above, but with adjacent runs of just carriage returns.
|
||||
[[test]]
|
||||
name = "end-no-split-adjacent-cr"
|
||||
regex = '(?mR)$'
|
||||
haystack = "\r\r\r"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
# Same as above, but with adjacent runs of just line feeds.
|
||||
[[test]]
|
||||
name = "end-no-split-adjacent-lf"
|
||||
regex = '(?mR)$'
|
||||
haystack = "\n\n\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
# Tests that '.' does not match either \r or \n when CRLF mode is enabled. Note
|
||||
# that this doesn't require multi-line mode to be enabled.
|
||||
[[test]]
|
||||
name = "dot-no-crlf"
|
||||
regex = '(?R).'
|
||||
haystack = "\r\n\r\n\r\n"
|
||||
matches = []
|
||||
|
||||
# This is a test that caught a bug in the one-pass DFA where it (amazingly) was
|
||||
# using 'is_end_lf' instead of 'is_end_crlf' here. It was probably a copy &
|
||||
# paste bug. We insert an empty capture group here because it provokes the meta
|
||||
# regex engine to first find a match and then trip over a panic because the
|
||||
# one-pass DFA erroneously says there is no match.
|
||||
[[test]]
|
||||
name = "onepass-wrong-crlf-with-capture"
|
||||
regex = '(?Rm:().$)'
|
||||
haystack = "ZZ\r"
|
||||
matches = [[[1, 2], [1, 1]]]
|
||||
|
||||
# This is like onepass-wrong-crlf-with-capture above, except it sets up the
|
||||
# test so that it can be run by the one-pass DFA directly. (i.e., Make it
|
||||
# anchored and start the search at the right place.)
|
||||
[[test]]
|
||||
name = "onepass-wrong-crlf-anchored"
|
||||
regex = '(?Rm:.$)'
|
||||
haystack = "ZZ\r"
|
||||
matches = [[1, 2]]
|
||||
anchored = true
|
||||
bounds = [1, 3]
|
||||
52
third-party/vendor/regex/testdata/earliest.toml
vendored
Normal file
52
third-party/vendor/regex/testdata/earliest.toml
vendored
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
[[test]]
|
||||
name = "no-greedy-100"
|
||||
regex = 'a+'
|
||||
haystack = "aaa"
|
||||
matches = [[0, 1], [1, 2], [2, 3]]
|
||||
search-kind = "earliest"
|
||||
|
||||
[[test]]
|
||||
name = "no-greedy-200"
|
||||
regex = 'abc+'
|
||||
haystack = "zzzabccc"
|
||||
matches = [[3, 6]]
|
||||
search-kind = "earliest"
|
||||
|
||||
[[test]]
|
||||
name = "is-ungreedy"
|
||||
regex = 'a+?'
|
||||
haystack = "aaa"
|
||||
matches = [[0, 1], [1, 2], [2, 3]]
|
||||
search-kind = "earliest"
|
||||
|
||||
[[test]]
|
||||
name = "look-start-test"
|
||||
regex = '^(abc|a)'
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
[[0, 1], [0, 1]],
|
||||
]
|
||||
search-kind = "earliest"
|
||||
|
||||
[[test]]
|
||||
name = "look-end-test"
|
||||
regex = '(abc|a)$'
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
[[0, 3], [0, 3]],
|
||||
]
|
||||
search-kind = "earliest"
|
||||
|
||||
[[test]]
|
||||
name = "no-leftmost-first-100"
|
||||
regex = 'abc|a'
|
||||
haystack = "abc"
|
||||
matches = [[0, 1]]
|
||||
search-kind = "earliest"
|
||||
|
||||
[[test]]
|
||||
name = "no-leftmost-first-200"
|
||||
regex = 'aba|a'
|
||||
haystack = "aba"
|
||||
matches = [[0, 1], [2, 3]]
|
||||
search-kind = "earliest"
|
||||
113
third-party/vendor/regex/testdata/empty.toml
vendored
Normal file
113
third-party/vendor/regex/testdata/empty.toml
vendored
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
[[test]]
|
||||
name = "100"
|
||||
regex = "|b"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "110"
|
||||
regex = "b|"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "120"
|
||||
regex = "|z"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "130"
|
||||
regex = "z|"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "200"
|
||||
regex = "|"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "210"
|
||||
regex = "||"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "220"
|
||||
regex = "||b"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "230"
|
||||
regex = "b||"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "240"
|
||||
regex = "||z"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "300"
|
||||
regex = "(?:)|b"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "310"
|
||||
regex = "b|(?:)"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "320"
|
||||
regex = "(?:|)"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "330"
|
||||
regex = "(?:|)|z"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "400"
|
||||
regex = "a(?:)|b"
|
||||
haystack = "abc"
|
||||
matches = [[0, 1], [1, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "500"
|
||||
regex = ""
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
|
||||
[[test]]
|
||||
name = "510"
|
||||
regex = ""
|
||||
haystack = "a"
|
||||
matches = [[0, 0], [1, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "520"
|
||||
regex = ""
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "600"
|
||||
regex = '(?:|a)*'
|
||||
haystack = "aaa"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "610"
|
||||
regex = '(?:|a)+'
|
||||
haystack = "aaa"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
23
third-party/vendor/regex/testdata/expensive.toml
vendored
Normal file
23
third-party/vendor/regex/testdata/expensive.toml
vendored
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
# This file represent tests that may be expensive to run on some regex engines.
|
||||
# For example, tests that build a full DFA ahead of time and minimize it can
|
||||
# take a horrendously long time on regexes that are large (or result in an
|
||||
# explosion in the number of states). We group these tests together so that
|
||||
# such engines can simply skip these tests.
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/98
|
||||
[[test]]
|
||||
name = "regression-many-repeat-no-stack-overflow"
|
||||
regex = '^.{1,2500}'
|
||||
haystack = "a"
|
||||
matches = [[0, 1]]
|
||||
|
||||
# This test is meant to blow the bounded backtracker's visited capacity. In
|
||||
# order to do that, we need a somewhat sizeable regex. The purpose of this
|
||||
# is to make sure there's at least one test that exercises this path in the
|
||||
# backtracker. All other tests (at time of writing) are small enough that the
|
||||
# backtracker can handle them fine.
|
||||
[[test]]
|
||||
name = "backtrack-blow-visited-capacity"
|
||||
regex = '\pL{50}'
|
||||
haystack = "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyZZ"
|
||||
matches = [[0, 50], [50, 100], [100, 150]]
|
||||
68
third-party/vendor/regex/testdata/flags.toml
vendored
Normal file
68
third-party/vendor/regex/testdata/flags.toml
vendored
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
[[test]]
|
||||
name = "1"
|
||||
regex = "(?i)abc"
|
||||
haystack = "ABC"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "2"
|
||||
regex = "(?i)a(?-i)bc"
|
||||
haystack = "Abc"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "3"
|
||||
regex = "(?i)a(?-i)bc"
|
||||
haystack = "ABC"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "4"
|
||||
regex = "(?is)a."
|
||||
haystack = "A\n"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "5"
|
||||
regex = "(?is)a.(?-is)a."
|
||||
haystack = "A\nab"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "6"
|
||||
regex = "(?is)a.(?-is)a."
|
||||
haystack = "A\na\n"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "7"
|
||||
regex = "(?is)a.(?-is:a.)?"
|
||||
haystack = "A\na\n"
|
||||
matches = [[0, 2]]
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "8"
|
||||
regex = "(?U)a+"
|
||||
haystack = "aa"
|
||||
matches = [[0, 1]]
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "9"
|
||||
regex = "(?U)a+?"
|
||||
haystack = "aa"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "10"
|
||||
regex = "(?U)(?-U)a+"
|
||||
haystack = "aa"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "11"
|
||||
regex = '(?m)(?:^\d+$\n?)+'
|
||||
haystack = "123\n456\n789"
|
||||
matches = [[0, 11]]
|
||||
unicode = false
|
||||
1611
third-party/vendor/regex/testdata/fowler/basic.toml
vendored
Normal file
1611
third-party/vendor/regex/testdata/fowler/basic.toml
vendored
Normal file
File diff suppressed because it is too large
Load diff
25
third-party/vendor/regex/testdata/fowler/dat/README
vendored
Normal file
25
third-party/vendor/regex/testdata/fowler/dat/README
vendored
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
Test data was taken from the Go distribution, which was in turn taken from the
|
||||
testregex test suite:
|
||||
|
||||
http://web.archive.org/web/20150925124103/http://www2.research.att.com/~astopen/testregex/testregex.html
|
||||
|
||||
Unfortunately, the original web site now appears dead, but the test data lives
|
||||
on.
|
||||
|
||||
The LICENSE in this directory corresponds to the LICENSE that the data was
|
||||
originally released under.
|
||||
|
||||
The tests themselves were modified for RE2/Go (and marked as such). A
|
||||
couple were modified further by me (Andrew Gallant) and marked with 'Rust'.
|
||||
|
||||
After some number of years, these tests were transformed into a TOML format
|
||||
using the 'regex-cli generate fowler' command. To re-generate the
|
||||
TOML files, run the following from the root of this repository:
|
||||
|
||||
regex-cli generate fowler tests/data/fowler tests/data/fowler/dat/*.dat
|
||||
|
||||
This assumes that you have 'regex-cli' installed. See 'regex-cli/README.md'
|
||||
from the root of the repository for more information.
|
||||
|
||||
This brings the Fowler tests into a more "sensible" structured format in which
|
||||
other tests can be written such that they aren't write-only.
|
||||
223
third-party/vendor/regex/testdata/fowler/dat/basic.dat
vendored
Normal file
223
third-party/vendor/regex/testdata/fowler/dat/basic.dat
vendored
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
NOTE all standard compliant implementations should pass these : 2002-05-31
|
||||
|
||||
BE abracadabra$ abracadabracadabra (7,18)
|
||||
BE a...b abababbb (2,7)
|
||||
BE XXXXXX ..XXXXXX (2,8)
|
||||
E \) () (1,2)
|
||||
BE a] a]a (0,2)
|
||||
B } } (0,1)
|
||||
E \} } (0,1)
|
||||
BE \] ] (0,1)
|
||||
B ] ] (0,1)
|
||||
E ] ] (0,1)
|
||||
B { { (0,1)
|
||||
B } } (0,1)
|
||||
BE ^a ax (0,1)
|
||||
BE \^a a^a (1,3)
|
||||
BE a\^ a^ (0,2)
|
||||
BE a$ aa (1,2)
|
||||
BE a\$ a$ (0,2)
|
||||
BE ^$ NULL (0,0)
|
||||
E $^ NULL (0,0)
|
||||
E a($) aa (1,2)(2,2)
|
||||
E a*(^a) aa (0,1)(0,1)
|
||||
E (..)*(...)* a (0,0)
|
||||
E (..)*(...)* abcd (0,4)(2,4)
|
||||
E (ab|a)(bc|c) abc (0,3)(0,2)(2,3)
|
||||
E (ab)c|abc abc (0,3)(0,2)
|
||||
E a{0}b ab (1,2)
|
||||
E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
|
||||
E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
|
||||
E a{9876543210} NULL BADBR
|
||||
E ((a|a)|a) a (0,1)(0,1)(0,1)
|
||||
E (a*)(a|aa) aaaa (0,4)(0,3)(3,4)
|
||||
E a*(a.|aa) aaaa (0,4)(2,4)
|
||||
E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2)
|
||||
E (a|b)?.* b (0,1)(0,1)
|
||||
E (a|b)c|a(b|c) ac (0,2)(0,1)
|
||||
E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2)
|
||||
E (a|b)*c|(a|ab)*c abc (0,3)(1,2)
|
||||
E (a|b)*c|(a|ab)*c xc (1,2)
|
||||
E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2)
|
||||
E a?(ab|ba)ab abab (0,4)(0,2)
|
||||
E a?(ac{0}b|ba)ab abab (0,4)(0,2)
|
||||
E ab|abab abbabab (0,2)
|
||||
E aba|bab|bba baaabbbaba (5,8)
|
||||
E aba|bab baaabbbaba (6,9)
|
||||
E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2)
|
||||
E (a.|.a.)*|(a|.a...) aa (0,2)(0,2)
|
||||
E ab|a xabc (1,3)
|
||||
E ab|a xxabc (2,4)
|
||||
Ei (Ab|cD)* aBcD (0,4)(2,4)
|
||||
BE [^-] --a (2,3)
|
||||
BE [a-]* --a (0,3)
|
||||
BE [a-m-]* --amoma-- (0,4)
|
||||
E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17)
|
||||
E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17)
|
||||
{E [[:upper:]] A (0,1) [[<element>]] not supported
|
||||
E [[:lower:]]+ `az{ (1,3)
|
||||
E [[:upper:]]+ @AZ[ (1,3)
|
||||
# No collation in Go
|
||||
#BE [[-]] [[-]] (2,4)
|
||||
#BE [[.NIL.]] NULL ECOLLATE
|
||||
#BE [[=aleph=]] NULL ECOLLATE
|
||||
}
|
||||
BE$ \n \n (0,1)
|
||||
BEn$ \n \n (0,1)
|
||||
BE$ [^a] \n (0,1)
|
||||
BE$ \na \na (0,2)
|
||||
E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3)
|
||||
BE xxx xxx (0,3)
|
||||
#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6)
|
||||
E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 6, (0,6) Rust
|
||||
#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3)
|
||||
E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) 2/7 (0,3) Rust
|
||||
#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11)
|
||||
E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 1,Feb 6 (5,11) Rust
|
||||
#E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1)
|
||||
E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) Rust
|
||||
#E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2)
|
||||
E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) Rust
|
||||
E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81)
|
||||
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25)
|
||||
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22)
|
||||
E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11)
|
||||
#BE$ .* \x01\xff (0,2)
|
||||
BE$ .* \x01\x7f (0,2) Rust
|
||||
E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57)
|
||||
L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH
|
||||
E a*a*a*a*a*b aaaaaaaaab (0,10)
|
||||
BE ^ NULL (0,0)
|
||||
BE $ NULL (0,0)
|
||||
BE ^$ NULL (0,0)
|
||||
BE ^a$ a (0,1)
|
||||
BE abc abc (0,3)
|
||||
BE abc xabcy (1,4)
|
||||
BE abc ababc (2,5)
|
||||
BE ab*c abc (0,3)
|
||||
BE ab*bc abc (0,3)
|
||||
BE ab*bc abbc (0,4)
|
||||
BE ab*bc abbbbc (0,6)
|
||||
E ab+bc abbc (0,4)
|
||||
E ab+bc abbbbc (0,6)
|
||||
E ab?bc abbc (0,4)
|
||||
E ab?bc abc (0,3)
|
||||
E ab?c abc (0,3)
|
||||
BE ^abc$ abc (0,3)
|
||||
BE ^abc abcc (0,3)
|
||||
BE abc$ aabc (1,4)
|
||||
BE ^ abc (0,0)
|
||||
BE $ abc (3,3)
|
||||
BE a.c abc (0,3)
|
||||
BE a.c axc (0,3)
|
||||
BE a.*c axyzc (0,5)
|
||||
BE a[bc]d abd (0,3)
|
||||
BE a[b-d]e ace (0,3)
|
||||
BE a[b-d] aac (1,3)
|
||||
BE a[-b] a- (0,2)
|
||||
BE a[b-] a- (0,2)
|
||||
BE a] a] (0,2)
|
||||
BE a[]]b a]b (0,3)
|
||||
BE a[^bc]d aed (0,3)
|
||||
BE a[^-b]c adc (0,3)
|
||||
BE a[^]b]c adc (0,3)
|
||||
E ab|cd abc (0,2)
|
||||
E ab|cd abcd (0,2)
|
||||
E a\(b a(b (0,3)
|
||||
E a\(*b ab (0,2)
|
||||
E a\(*b a((b (0,4)
|
||||
E ((a)) abc (0,1)(0,1)(0,1)
|
||||
E (a)b(c) abc (0,3)(0,1)(2,3)
|
||||
E a+b+c aabbabc (4,7)
|
||||
E a* aaa (0,3)
|
||||
E (a*)* - (0,0)(0,0)
|
||||
E (a*)+ - (0,0)(0,0)
|
||||
E (a*|b)* - (0,0)(0,0)
|
||||
E (a+|b)* ab (0,2)(1,2)
|
||||
E (a+|b)+ ab (0,2)(1,2)
|
||||
E (a+|b)? ab (0,1)(0,1)
|
||||
BE [^ab]* cde (0,3)
|
||||
E (^)* - (0,0)(0,0)
|
||||
BE a* NULL (0,0)
|
||||
E ([abc])*d abbbcd (0,6)(4,5)
|
||||
E ([abc])*bcd abcd (0,4)(0,1)
|
||||
E a|b|c|d|e e (0,1)
|
||||
E (a|b|c|d|e)f ef (0,2)(0,1)
|
||||
E ((a*|b))* - (0,0)(0,0)(0,0)
|
||||
BE abcd*efg abcdefg (0,7)
|
||||
BE ab* xabyabbbz (1,3)
|
||||
BE ab* xayabbbz (1,2)
|
||||
E (ab|cd)e abcde (2,5)(2,4)
|
||||
BE [abhgefdc]ij hij (0,3)
|
||||
E (a|b)c*d abcd (1,4)(1,2)
|
||||
E (ab|ab*)bc abc (0,3)(0,1)
|
||||
E a([bc]*)c* abc (0,3)(1,3)
|
||||
E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4)
|
||||
E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4)
|
||||
E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4)
|
||||
E a[bcd]*dcdcde adcdcde (0,7)
|
||||
E (ab|a)b*c abc (0,3)(0,2)
|
||||
E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4)
|
||||
BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5)
|
||||
E ^a(bc+|b[eh])g|.h$ abh (1,3)
|
||||
E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)
|
||||
E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2)
|
||||
E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6)
|
||||
E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
|
||||
BE multiple words multiple words yeah (0,14)
|
||||
E (.*)c(.*) abcde (0,5)(0,2)(3,5)
|
||||
BE abcd abcd (0,4)
|
||||
E a(bc)d abcd (0,4)(1,3)
|
||||
E a[-]?c ac (0,3)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12)
|
||||
E a+(b|c)*d+ aabcdd (0,6)(3,4)
|
||||
E ^.+$ vivi (0,4)
|
||||
E ^(.+)$ vivi (0,4)(0,4)
|
||||
E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19)
|
||||
E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3)
|
||||
E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7)
|
||||
E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7)
|
||||
E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11)
|
||||
E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3)
|
||||
E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7)
|
||||
E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3)
|
||||
E ((foo)|bar)!bas bar!bas (0,7)(0,3)
|
||||
E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7)
|
||||
E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3)
|
||||
E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3)
|
||||
E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7)
|
||||
E (foo|(bar))!bas foo!bas (0,7)(0,3)
|
||||
E (foo|bar)!bas bar!bas (0,7)(0,3)
|
||||
E (foo|bar)!bas foo!bar!bas (4,11)(4,7)
|
||||
E (foo|bar)!bas foo!bas (0,7)(0,3)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7)
|
||||
E .*(/XXX).* /XXX (0,4)(0,4)
|
||||
E .*(\\XXX).* \XXX (0,4)(0,4)
|
||||
E \\XXX \XXX (0,4)
|
||||
E .*(/000).* /000 (0,4)(0,4)
|
||||
E .*(\\000).* \000 (0,4)(0,4)
|
||||
E \\000 \000 (0,4)
|
||||
74
third-party/vendor/regex/testdata/fowler/dat/nullsubexpr.dat
vendored
Normal file
74
third-party/vendor/regex/testdata/fowler/dat/nullsubexpr.dat
vendored
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
NOTE null subexpression matches : 2002-06-06
|
||||
|
||||
E (a*)* a (0,1)(0,1)
|
||||
E SAME x (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E (a*)+ a (0,1)(0,1)
|
||||
E SAME x (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E (a+)* a (0,1)(0,1)
|
||||
E SAME x (0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E (a+)+ a (0,1)(0,1)
|
||||
E SAME x NOMATCH
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
|
||||
E ([a]*)* a (0,1)(0,1)
|
||||
E SAME x (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E ([a]*)+ a (0,1)(0,1)
|
||||
E SAME x (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E ([^b]*)* a (0,1)(0,1)
|
||||
E SAME b (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaab (0,6)(0,6)
|
||||
E ([ab]*)* a (0,1)(0,1)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME ababab (0,6)(0,6)
|
||||
E SAME bababa (0,6)(0,6)
|
||||
E SAME b (0,1)(0,1)
|
||||
E SAME bbbbbb (0,6)(0,6)
|
||||
E SAME aaaabcde (0,5)(0,5)
|
||||
E ([^a]*)* b (0,1)(0,1)
|
||||
E SAME bbbbbb (0,6)(0,6)
|
||||
E SAME aaaaaa (0,0)(0,0)
|
||||
E ([^ab]*)* ccccxx (0,6)(0,6)
|
||||
E SAME ababab (0,0)(0,0)
|
||||
|
||||
#E ((z)+|a)* zabcde (0,2)(1,2)
|
||||
E ((z)+|a)* zabcde (0,2)(1,2)(0,1) Rust
|
||||
|
||||
#{E a+? aaaaaa (0,1) no *? +? mimimal match ops
|
||||
#E (a) aaa (0,1)(0,1)
|
||||
#E (a*?) aaa (0,0)(0,0)
|
||||
#E (a)*? aaa (0,0)
|
||||
#E (a*?)*? aaa (0,0)
|
||||
#}
|
||||
|
||||
B \(a*\)*\(x\) x (0,1)(0,0)(0,1)
|
||||
B \(a*\)*\(x\) ax (0,2)(0,1)(1,2)
|
||||
B \(a*\)*\(x\) axa (0,2)(0,1)(1,2)
|
||||
B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1)
|
||||
B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2)
|
||||
B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3)
|
||||
B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4)
|
||||
B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3)
|
||||
|
||||
E (a*)*(x) x (0,1)(0,0)(0,1)
|
||||
E (a*)*(x) ax (0,2)(0,1)(1,2)
|
||||
E (a*)*(x) axa (0,2)(0,1)(1,2)
|
||||
|
||||
E (a*)+(x) x (0,1)(0,0)(0,1)
|
||||
E (a*)+(x) ax (0,2)(0,1)(1,2)
|
||||
E (a*)+(x) axa (0,2)(0,1)(1,2)
|
||||
|
||||
E (a*){2}(x) x (0,1)(0,0)(0,1)
|
||||
E (a*){2}(x) ax (0,2)(1,1)(1,2)
|
||||
E (a*){2}(x) axa (0,2)(1,1)(1,2)
|
||||
169
third-party/vendor/regex/testdata/fowler/dat/repetition.dat
vendored
Normal file
169
third-party/vendor/regex/testdata/fowler/dat/repetition.dat
vendored
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
NOTE implicit vs. explicit repetitions : 2009-02-02
|
||||
|
||||
# Glenn Fowler <gsf@research.att.com>
|
||||
# conforming matches (column 4) must match one of the following BREs
|
||||
# NOMATCH
|
||||
# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
|
||||
# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
|
||||
# i.e., each 3-tuple has two identical elements and one (?,?)
|
||||
|
||||
E ((..)|(.)) NULL NOMATCH
|
||||
E ((..)|(.))((..)|(.)) NULL NOMATCH
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH
|
||||
|
||||
E ((..)|(.)){1} NULL NOMATCH
|
||||
E ((..)|(.)){2} NULL NOMATCH
|
||||
E ((..)|(.)){3} NULL NOMATCH
|
||||
|
||||
E ((..)|(.))* NULL (0,0)
|
||||
|
||||
E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1)
|
||||
E ((..)|(.))((..)|(.)) a NOMATCH
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH
|
||||
|
||||
E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1)
|
||||
E ((..)|(.)){2} a NOMATCH
|
||||
E ((..)|(.)){3} a NOMATCH
|
||||
|
||||
E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1)
|
||||
|
||||
E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH
|
||||
|
||||
E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2)
|
||||
E ((..)|(.)){3} aa NOMATCH
|
||||
|
||||
E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?)
|
||||
|
||||
E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
|
||||
|
||||
E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?)
|
||||
#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3)
|
||||
E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
|
||||
E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3)
|
||||
|
||||
#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3)
|
||||
E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
|
||||
|
||||
E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
|
||||
|
||||
E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?)
|
||||
#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4)
|
||||
E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go
|
||||
|
||||
E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?)
|
||||
|
||||
E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
|
||||
|
||||
E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?)
|
||||
#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5)
|
||||
E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
|
||||
|
||||
#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5)
|
||||
E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
|
||||
|
||||
E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
|
||||
|
||||
E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?)
|
||||
|
||||
E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?)
|
||||
|
||||
NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
|
||||
|
||||
# These test a bug in OS X / FreeBSD / NetBSD, and libtree.
|
||||
# Linux/GLIBC gets the {8,} and {8,8} wrong.
|
||||
|
||||
:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8)
|
||||
#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8)
|
||||
|
||||
# These test a fixed bug in my regex-tdfa that did not keep the expanded
|
||||
# form properly grouped, so right association did the wrong thing with
|
||||
# these ambiguous patterns (crafted just to test my code when I became
|
||||
# suspicious of my implementation). The first subexpression should use
|
||||
# "ab" then "a" then "bcd".
|
||||
|
||||
# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
|
||||
# results like (0,6)(4,5)(6,6).
|
||||
|
||||
#:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1) Rust
|
||||
#:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1) Rust
|
||||
:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH
|
||||
#:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1) Rust
|
||||
#:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1) Rust
|
||||
:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH
|
||||
#:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1) Rust
|
||||
#:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1) Rust
|
||||
|
||||
# The above worked on Linux/GLIBC but the following often fail.
|
||||
# They also trip up OS X / FreeBSD / NetBSD:
|
||||
|
||||
#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH
|
||||
#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH
|
||||
#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
405
third-party/vendor/regex/testdata/fowler/nullsubexpr.toml
vendored
Normal file
405
third-party/vendor/regex/testdata/fowler/nullsubexpr.toml
vendored
Normal file
|
|
@ -0,0 +1,405 @@
|
|||
# !!! DO NOT EDIT !!!
|
||||
# Automatically generated by 'regex-cli generate fowler'.
|
||||
# Numbers in the test names correspond to the line number of the test from
|
||||
# the original dat file.
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr3"
|
||||
regex = '''(a*)*'''
|
||||
haystack = '''a'''
|
||||
matches = [[[0, 1], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr4"
|
||||
regex = '''(a*)*'''
|
||||
haystack = '''x'''
|
||||
matches = [[[0, 0], [0, 0]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr5"
|
||||
regex = '''(a*)*'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr6"
|
||||
regex = '''(a*)*'''
|
||||
haystack = '''aaaaaax'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr7"
|
||||
regex = '''(a*)+'''
|
||||
haystack = '''a'''
|
||||
matches = [[[0, 1], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr8"
|
||||
regex = '''(a*)+'''
|
||||
haystack = '''x'''
|
||||
matches = [[[0, 0], [0, 0]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr9"
|
||||
regex = '''(a*)+'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr10"
|
||||
regex = '''(a*)+'''
|
||||
haystack = '''aaaaaax'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr11"
|
||||
regex = '''(a+)*'''
|
||||
haystack = '''a'''
|
||||
matches = [[[0, 1], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr12"
|
||||
regex = '''(a+)*'''
|
||||
haystack = '''x'''
|
||||
matches = [[[0, 0], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr13"
|
||||
regex = '''(a+)*'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr14"
|
||||
regex = '''(a+)*'''
|
||||
haystack = '''aaaaaax'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr15"
|
||||
regex = '''(a+)+'''
|
||||
haystack = '''a'''
|
||||
matches = [[[0, 1], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr16"
|
||||
regex = '''(a+)+'''
|
||||
haystack = '''x'''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr17"
|
||||
regex = '''(a+)+'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr18"
|
||||
regex = '''(a+)+'''
|
||||
haystack = '''aaaaaax'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr20"
|
||||
regex = '''([a]*)*'''
|
||||
haystack = '''a'''
|
||||
matches = [[[0, 1], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr21"
|
||||
regex = '''([a]*)*'''
|
||||
haystack = '''x'''
|
||||
matches = [[[0, 0], [0, 0]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr22"
|
||||
regex = '''([a]*)*'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr23"
|
||||
regex = '''([a]*)*'''
|
||||
haystack = '''aaaaaax'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr24"
|
||||
regex = '''([a]*)+'''
|
||||
haystack = '''a'''
|
||||
matches = [[[0, 1], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr25"
|
||||
regex = '''([a]*)+'''
|
||||
haystack = '''x'''
|
||||
matches = [[[0, 0], [0, 0]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr26"
|
||||
regex = '''([a]*)+'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr27"
|
||||
regex = '''([a]*)+'''
|
||||
haystack = '''aaaaaax'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr28"
|
||||
regex = '''([^b]*)*'''
|
||||
haystack = '''a'''
|
||||
matches = [[[0, 1], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr29"
|
||||
regex = '''([^b]*)*'''
|
||||
haystack = '''b'''
|
||||
matches = [[[0, 0], [0, 0]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr30"
|
||||
regex = '''([^b]*)*'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr31"
|
||||
regex = '''([^b]*)*'''
|
||||
haystack = '''aaaaaab'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr32"
|
||||
regex = '''([ab]*)*'''
|
||||
haystack = '''a'''
|
||||
matches = [[[0, 1], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr33"
|
||||
regex = '''([ab]*)*'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr34"
|
||||
regex = '''([ab]*)*'''
|
||||
haystack = '''ababab'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr35"
|
||||
regex = '''([ab]*)*'''
|
||||
haystack = '''bababa'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr36"
|
||||
regex = '''([ab]*)*'''
|
||||
haystack = '''b'''
|
||||
matches = [[[0, 1], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr37"
|
||||
regex = '''([ab]*)*'''
|
||||
haystack = '''bbbbbb'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr38"
|
||||
regex = '''([ab]*)*'''
|
||||
haystack = '''aaaabcde'''
|
||||
matches = [[[0, 5], [0, 5]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr39"
|
||||
regex = '''([^a]*)*'''
|
||||
haystack = '''b'''
|
||||
matches = [[[0, 1], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr40"
|
||||
regex = '''([^a]*)*'''
|
||||
haystack = '''bbbbbb'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr41"
|
||||
regex = '''([^a]*)*'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 0], [0, 0]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr42"
|
||||
regex = '''([^ab]*)*'''
|
||||
haystack = '''ccccxx'''
|
||||
matches = [[[0, 6], [0, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr43"
|
||||
regex = '''([^ab]*)*'''
|
||||
haystack = '''ababab'''
|
||||
matches = [[[0, 0], [0, 0]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by Rust regex project.
|
||||
[[test]]
|
||||
name = "nullsubexpr46"
|
||||
regex = '''((z)+|a)*'''
|
||||
haystack = '''zabcde'''
|
||||
matches = [[[0, 2], [1, 2], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr64"
|
||||
regex = '''(a*)*(x)'''
|
||||
haystack = '''x'''
|
||||
matches = [[[0, 1], [0, 0], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr65"
|
||||
regex = '''(a*)*(x)'''
|
||||
haystack = '''ax'''
|
||||
matches = [[[0, 2], [0, 1], [1, 2]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr66"
|
||||
regex = '''(a*)*(x)'''
|
||||
haystack = '''axa'''
|
||||
matches = [[[0, 2], [0, 1], [1, 2]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr68"
|
||||
regex = '''(a*)+(x)'''
|
||||
haystack = '''x'''
|
||||
matches = [[[0, 1], [0, 0], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr69"
|
||||
regex = '''(a*)+(x)'''
|
||||
haystack = '''ax'''
|
||||
matches = [[[0, 2], [0, 1], [1, 2]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr70"
|
||||
regex = '''(a*)+(x)'''
|
||||
haystack = '''axa'''
|
||||
matches = [[[0, 2], [0, 1], [1, 2]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr72"
|
||||
regex = '''(a*){2}(x)'''
|
||||
haystack = '''x'''
|
||||
matches = [[[0, 1], [0, 0], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr73"
|
||||
regex = '''(a*){2}(x)'''
|
||||
haystack = '''ax'''
|
||||
matches = [[[0, 2], [1, 1], [1, 2]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nullsubexpr74"
|
||||
regex = '''(a*){2}(x)'''
|
||||
haystack = '''axa'''
|
||||
matches = [[[0, 2], [1, 1], [1, 2]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
746
third-party/vendor/regex/testdata/fowler/repetition.toml
vendored
Normal file
746
third-party/vendor/regex/testdata/fowler/repetition.toml
vendored
Normal file
|
|
@ -0,0 +1,746 @@
|
|||
# !!! DO NOT EDIT !!!
|
||||
# Automatically generated by 'regex-cli generate fowler'.
|
||||
# Numbers in the test names correspond to the line number of the test from
|
||||
# the original dat file.
|
||||
|
||||
[[test]]
|
||||
name = "repetition10"
|
||||
regex = '''((..)|(.))'''
|
||||
haystack = ''''''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "repetition11"
|
||||
regex = '''((..)|(.))((..)|(.))'''
|
||||
haystack = ''''''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "repetition12"
|
||||
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
|
||||
haystack = ''''''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "repetition14"
|
||||
regex = '''((..)|(.)){1}'''
|
||||
haystack = ''''''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "repetition15"
|
||||
regex = '''((..)|(.)){2}'''
|
||||
haystack = ''''''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "repetition16"
|
||||
regex = '''((..)|(.)){3}'''
|
||||
haystack = ''''''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "repetition18"
|
||||
regex = '''((..)|(.))*'''
|
||||
haystack = ''''''
|
||||
matches = [[[0, 0], [], [], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition20"
|
||||
regex = '''((..)|(.))'''
|
||||
haystack = '''a'''
|
||||
matches = [[[0, 1], [0, 1], [], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition21"
|
||||
regex = '''((..)|(.))((..)|(.))'''
|
||||
haystack = '''a'''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "repetition22"
|
||||
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
|
||||
haystack = '''a'''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "repetition24"
|
||||
regex = '''((..)|(.)){1}'''
|
||||
haystack = '''a'''
|
||||
matches = [[[0, 1], [0, 1], [], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition25"
|
||||
regex = '''((..)|(.)){2}'''
|
||||
haystack = '''a'''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "repetition26"
|
||||
regex = '''((..)|(.)){3}'''
|
||||
haystack = '''a'''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "repetition28"
|
||||
regex = '''((..)|(.))*'''
|
||||
haystack = '''a'''
|
||||
matches = [[[0, 1], [0, 1], [], [0, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition30"
|
||||
regex = '''((..)|(.))'''
|
||||
haystack = '''aa'''
|
||||
matches = [[[0, 2], [0, 2], [0, 2], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition31"
|
||||
regex = '''((..)|(.))((..)|(.))'''
|
||||
haystack = '''aa'''
|
||||
matches = [[[0, 2], [0, 1], [], [0, 1], [1, 2], [], [1, 2]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition32"
|
||||
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
|
||||
haystack = '''aa'''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "repetition34"
|
||||
regex = '''((..)|(.)){1}'''
|
||||
haystack = '''aa'''
|
||||
matches = [[[0, 2], [0, 2], [0, 2], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition35"
|
||||
regex = '''((..)|(.)){2}'''
|
||||
haystack = '''aa'''
|
||||
matches = [[[0, 2], [1, 2], [], [1, 2]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition36"
|
||||
regex = '''((..)|(.)){3}'''
|
||||
haystack = '''aa'''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
[[test]]
|
||||
name = "repetition38"
|
||||
regex = '''((..)|(.))*'''
|
||||
haystack = '''aa'''
|
||||
matches = [[[0, 2], [0, 2], [0, 2], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition40"
|
||||
regex = '''((..)|(.))'''
|
||||
haystack = '''aaa'''
|
||||
matches = [[[0, 2], [0, 2], [0, 2], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition41"
|
||||
regex = '''((..)|(.))((..)|(.))'''
|
||||
haystack = '''aaa'''
|
||||
matches = [[[0, 3], [0, 2], [0, 2], [], [2, 3], [], [2, 3]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition42"
|
||||
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
|
||||
haystack = '''aaa'''
|
||||
matches = [[[0, 3], [0, 1], [], [0, 1], [1, 2], [], [1, 2], [2, 3], [], [2, 3]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition44"
|
||||
regex = '''((..)|(.)){1}'''
|
||||
haystack = '''aaa'''
|
||||
matches = [[[0, 2], [0, 2], [0, 2], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition46"
|
||||
regex = '''((..)|(.)){2}'''
|
||||
haystack = '''aaa'''
|
||||
matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition47"
|
||||
regex = '''((..)|(.)){3}'''
|
||||
haystack = '''aaa'''
|
||||
matches = [[[0, 3], [2, 3], [], [2, 3]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition50"
|
||||
regex = '''((..)|(.))*'''
|
||||
haystack = '''aaa'''
|
||||
matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition52"
|
||||
regex = '''((..)|(.))'''
|
||||
haystack = '''aaaa'''
|
||||
matches = [[[0, 2], [0, 2], [0, 2], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition53"
|
||||
regex = '''((..)|(.))((..)|(.))'''
|
||||
haystack = '''aaaa'''
|
||||
matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition54"
|
||||
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
|
||||
haystack = '''aaaa'''
|
||||
matches = [[[0, 4], [0, 2], [0, 2], [], [2, 3], [], [2, 3], [3, 4], [], [3, 4]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition56"
|
||||
regex = '''((..)|(.)){1}'''
|
||||
haystack = '''aaaa'''
|
||||
matches = [[[0, 2], [0, 2], [0, 2], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition57"
|
||||
regex = '''((..)|(.)){2}'''
|
||||
haystack = '''aaaa'''
|
||||
matches = [[[0, 4], [2, 4], [2, 4], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition59"
|
||||
regex = '''((..)|(.)){3}'''
|
||||
haystack = '''aaaa'''
|
||||
matches = [[[0, 4], [3, 4], [0, 2], [3, 4]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition61"
|
||||
regex = '''((..)|(.))*'''
|
||||
haystack = '''aaaa'''
|
||||
matches = [[[0, 4], [2, 4], [2, 4], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition63"
|
||||
regex = '''((..)|(.))'''
|
||||
haystack = '''aaaaa'''
|
||||
matches = [[[0, 2], [0, 2], [0, 2], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition64"
|
||||
regex = '''((..)|(.))((..)|(.))'''
|
||||
haystack = '''aaaaa'''
|
||||
matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition65"
|
||||
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
|
||||
haystack = '''aaaaa'''
|
||||
matches = [[[0, 5], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 5], [], [4, 5]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition67"
|
||||
regex = '''((..)|(.)){1}'''
|
||||
haystack = '''aaaaa'''
|
||||
matches = [[[0, 2], [0, 2], [0, 2], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition68"
|
||||
regex = '''((..)|(.)){2}'''
|
||||
haystack = '''aaaaa'''
|
||||
matches = [[[0, 4], [2, 4], [2, 4], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition70"
|
||||
regex = '''((..)|(.)){3}'''
|
||||
haystack = '''aaaaa'''
|
||||
matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition73"
|
||||
regex = '''((..)|(.))*'''
|
||||
haystack = '''aaaaa'''
|
||||
matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition75"
|
||||
regex = '''((..)|(.))'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 2], [0, 2], [0, 2], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition76"
|
||||
regex = '''((..)|(.))((..)|(.))'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition77"
|
||||
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 6], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 6], [4, 6], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition79"
|
||||
regex = '''((..)|(.)){1}'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 2], [0, 2], [0, 2], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition80"
|
||||
regex = '''((..)|(.)){2}'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 4], [2, 4], [2, 4], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition81"
|
||||
regex = '''((..)|(.)){3}'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 6], [4, 6], [4, 6], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition83"
|
||||
regex = '''((..)|(.))*'''
|
||||
haystack = '''aaaaaa'''
|
||||
matches = [[[0, 6], [4, 6], [4, 6], []]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive90"
|
||||
regex = '''X(.?){0,}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [7, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive91"
|
||||
regex = '''X(.?){1,}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [7, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive92"
|
||||
regex = '''X(.?){2,}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [7, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive93"
|
||||
regex = '''X(.?){3,}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [7, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive94"
|
||||
regex = '''X(.?){4,}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [7, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive95"
|
||||
regex = '''X(.?){5,}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [7, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive96"
|
||||
regex = '''X(.?){6,}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [7, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive97"
|
||||
regex = '''X(.?){7,}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [7, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive98"
|
||||
regex = '''X(.?){8,}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [8, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive100"
|
||||
regex = '''X(.?){0,8}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [8, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive102"
|
||||
regex = '''X(.?){1,8}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [8, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive104"
|
||||
regex = '''X(.?){2,8}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [8, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive106"
|
||||
regex = '''X(.?){3,8}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [8, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive108"
|
||||
regex = '''X(.?){4,8}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [8, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive110"
|
||||
regex = '''X(.?){5,8}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [8, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive112"
|
||||
regex = '''X(.?){6,8}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [8, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive114"
|
||||
regex = '''X(.?){7,8}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [8, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive115"
|
||||
regex = '''X(.?){8,8}Y'''
|
||||
haystack = '''X1234567Y'''
|
||||
matches = [[[0, 9], [8, 8]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by Rust regex project.
|
||||
[[test]]
|
||||
name = "repetition-expensive127"
|
||||
regex = '''(a|ab|c|bcd){0,}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 1], [0, 1], [1, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by Rust regex project.
|
||||
[[test]]
|
||||
name = "repetition-expensive129"
|
||||
regex = '''(a|ab|c|bcd){1,}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 1], [0, 1], [1, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive130"
|
||||
regex = '''(a|ab|c|bcd){2,}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [3, 6], [6, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive131"
|
||||
regex = '''(a|ab|c|bcd){3,}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [3, 6], [6, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive132"
|
||||
regex = '''(a|ab|c|bcd){4,}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
# Test added by Rust regex project.
|
||||
[[test]]
|
||||
name = "repetition-expensive134"
|
||||
regex = '''(a|ab|c|bcd){0,10}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 1], [0, 1], [1, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by Rust regex project.
|
||||
[[test]]
|
||||
name = "repetition-expensive136"
|
||||
regex = '''(a|ab|c|bcd){1,10}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 1], [0, 1], [1, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive137"
|
||||
regex = '''(a|ab|c|bcd){2,10}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [3, 6], [6, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive138"
|
||||
regex = '''(a|ab|c|bcd){3,10}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [3, 6], [6, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive139"
|
||||
regex = '''(a|ab|c|bcd){4,10}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
# Test added by Rust regex project.
|
||||
[[test]]
|
||||
name = "repetition-expensive141"
|
||||
regex = '''(a|ab|c|bcd)*(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 1], [0, 1], [1, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by Rust regex project.
|
||||
[[test]]
|
||||
name = "repetition-expensive143"
|
||||
regex = '''(a|ab|c|bcd)+(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 1], [0, 1], [1, 1]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive149"
|
||||
regex = '''(ab|a|c|bcd){0,}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [4, 5], [5, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive151"
|
||||
regex = '''(ab|a|c|bcd){1,}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [4, 5], [5, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive153"
|
||||
regex = '''(ab|a|c|bcd){2,}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [4, 5], [5, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive155"
|
||||
regex = '''(ab|a|c|bcd){3,}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [4, 5], [5, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive156"
|
||||
regex = '''(ab|a|c|bcd){4,}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive158"
|
||||
regex = '''(ab|a|c|bcd){0,10}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [4, 5], [5, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive160"
|
||||
regex = '''(ab|a|c|bcd){1,10}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [4, 5], [5, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive162"
|
||||
regex = '''(ab|a|c|bcd){2,10}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [4, 5], [5, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive164"
|
||||
regex = '''(ab|a|c|bcd){3,10}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [4, 5], [5, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "repetition-expensive165"
|
||||
regex = '''(ab|a|c|bcd){4,10}(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = []
|
||||
match-limit = 1
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive167"
|
||||
regex = '''(ab|a|c|bcd)*(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [4, 5], [5, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
# Test added by RE2/Go project.
|
||||
[[test]]
|
||||
name = "repetition-expensive169"
|
||||
regex = '''(ab|a|c|bcd)+(d*)'''
|
||||
haystack = '''ababcd'''
|
||||
matches = [[[0, 6], [4, 5], [5, 6]]]
|
||||
match-limit = 1
|
||||
anchored = true
|
||||
|
||||
143
third-party/vendor/regex/testdata/iter.toml
vendored
Normal file
143
third-party/vendor/regex/testdata/iter.toml
vendored
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
[[test]]
|
||||
name = "1"
|
||||
regex = "a"
|
||||
haystack = "aaa"
|
||||
matches = [[0, 1], [1, 2], [2, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "2"
|
||||
regex = "a"
|
||||
haystack = "aba"
|
||||
matches = [[0, 1], [2, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty1"
|
||||
regex = ''
|
||||
haystack = ''
|
||||
matches = [[0, 0]]
|
||||
|
||||
[[test]]
|
||||
name = "empty2"
|
||||
regex = ''
|
||||
haystack = 'abc'
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty3"
|
||||
regex = '(?:)'
|
||||
haystack = 'abc'
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty4"
|
||||
regex = '(?:)*'
|
||||
haystack = 'abc'
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty5"
|
||||
regex = '(?:)+'
|
||||
haystack = 'abc'
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty6"
|
||||
regex = '(?:)?'
|
||||
haystack = 'abc'
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty7"
|
||||
regex = '(?:)(?:)'
|
||||
haystack = 'abc'
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty8"
|
||||
regex = '(?:)+|z'
|
||||
haystack = 'abc'
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty9"
|
||||
regex = 'z|(?:)+'
|
||||
haystack = 'abc'
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty10"
|
||||
regex = '(?:)+|b'
|
||||
haystack = 'abc'
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "empty11"
|
||||
regex = 'b|(?:)+'
|
||||
haystack = 'abc'
|
||||
matches = [[0, 0], [1, 2], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "start1"
|
||||
regex = "^a"
|
||||
haystack = "a"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "start2"
|
||||
regex = "^a"
|
||||
haystack = "aa"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "anchored1"
|
||||
regex = "a"
|
||||
haystack = "a"
|
||||
matches = [[0, 1]]
|
||||
anchored = true
|
||||
|
||||
# This test is pretty subtle. It demonstrates the crucial difference between
|
||||
# '^a' and 'a' compiled in 'anchored' mode. The former regex exclusively
|
||||
# matches at the start of a haystack and nowhere else. The latter regex has
|
||||
# no such restriction, but its automaton is constructed such that it lacks a
|
||||
# `.*?` prefix. So it can actually produce matches at multiple locations.
|
||||
# The anchored3 test drives this point home.
|
||||
[[test]]
|
||||
name = "anchored2"
|
||||
regex = "a"
|
||||
haystack = "aa"
|
||||
matches = [[0, 1], [1, 2]]
|
||||
anchored = true
|
||||
|
||||
# Unlikely anchored2, this test stops matching anything after it sees `b`
|
||||
# since it lacks a `.*?` prefix. Since it is looking for 'a' but sees 'b', it
|
||||
# determines that there are no remaining matches.
|
||||
[[test]]
|
||||
name = "anchored3"
|
||||
regex = "a"
|
||||
haystack = "aaba"
|
||||
matches = [[0, 1], [1, 2]]
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "nonempty-followedby-empty"
|
||||
regex = 'abc|.*?'
|
||||
haystack = "abczzz"
|
||||
matches = [[0, 3], [4, 4], [5, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "nonempty-followedby-oneempty"
|
||||
regex = 'abc|.*?'
|
||||
haystack = "abcz"
|
||||
matches = [[0, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "nonempty-followedby-onemixed"
|
||||
regex = 'abc|.*?'
|
||||
haystack = "abczabc"
|
||||
matches = [[0, 3], [4, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "nonempty-followedby-twomixed"
|
||||
regex = 'abc|.*?'
|
||||
haystack = "abczzabc"
|
||||
matches = [[0, 3], [4, 4], [5, 8]]
|
||||
25
third-party/vendor/regex/testdata/leftmost-all.toml
vendored
Normal file
25
third-party/vendor/regex/testdata/leftmost-all.toml
vendored
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
[[test]]
|
||||
name = "alt"
|
||||
regex = 'foo|foobar'
|
||||
haystack = "foobar"
|
||||
matches = [[0, 6]]
|
||||
match-kind = "all"
|
||||
search-kind = "leftmost"
|
||||
|
||||
[[test]]
|
||||
name = "multi"
|
||||
regex = ['foo', 'foobar']
|
||||
haystack = "foobar"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 6] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "leftmost"
|
||||
|
||||
[[test]]
|
||||
name = "dotall"
|
||||
regex = '(?s:.)'
|
||||
haystack = "foobar"
|
||||
matches = [[5, 6]]
|
||||
match-kind = "all"
|
||||
search-kind = "leftmost"
|
||||
109
third-party/vendor/regex/testdata/line-terminator.toml
vendored
Normal file
109
third-party/vendor/regex/testdata/line-terminator.toml
vendored
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
# This tests that we can switch the line terminator to the NUL byte.
|
||||
[[test]]
|
||||
name = "nul"
|
||||
regex = '(?m)^[a-z]+$'
|
||||
haystack = '\x00abc\x00'
|
||||
matches = [[1, 4]]
|
||||
unescape = true
|
||||
line-terminator = '\x00'
|
||||
|
||||
# This tests that '.' will not match the configured line terminator, but will
|
||||
# match \n.
|
||||
[[test]]
|
||||
name = "dot-changes-with-line-terminator"
|
||||
regex = '.'
|
||||
haystack = '\x00\n'
|
||||
matches = [[1, 2]]
|
||||
unescape = true
|
||||
line-terminator = '\x00'
|
||||
|
||||
# This tests that when we switch the line terminator, \n is no longer
|
||||
# recognized as the terminator.
|
||||
[[test]]
|
||||
name = "not-line-feed"
|
||||
regex = '(?m)^[a-z]+$'
|
||||
haystack = '\nabc\n'
|
||||
matches = []
|
||||
unescape = true
|
||||
line-terminator = '\x00'
|
||||
|
||||
# This tests that we can set the line terminator to a non-ASCII byte and have
|
||||
# it behave as expected.
|
||||
[[test]]
|
||||
name = "non-ascii"
|
||||
regex = '(?m)^[a-z]+$'
|
||||
haystack = '\xFFabc\xFF'
|
||||
matches = [[1, 4]]
|
||||
unescape = true
|
||||
line-terminator = '\xFF'
|
||||
utf8 = false
|
||||
|
||||
# This tests a tricky case where the line terminator is set to \r. This ensures
|
||||
# that the StartLF look-behind assertion is tracked when computing the start
|
||||
# state.
|
||||
[[test]]
|
||||
name = "carriage"
|
||||
regex = '(?m)^[a-z]+'
|
||||
haystack = 'ABC\rabc'
|
||||
matches = [[4, 7]]
|
||||
bounds = [4, 7]
|
||||
unescape = true
|
||||
line-terminator = '\r'
|
||||
|
||||
# This tests that we can set the line terminator to a byte corresponding to a
|
||||
# word character, and things work as expected.
|
||||
[[test]]
|
||||
name = "word-byte"
|
||||
regex = '(?m)^[a-z]+$'
|
||||
haystack = 'ZabcZ'
|
||||
matches = [[1, 4]]
|
||||
unescape = true
|
||||
line-terminator = 'Z'
|
||||
|
||||
# This tests that we can set the line terminator to a byte corresponding to a
|
||||
# non-word character, and things work as expected.
|
||||
[[test]]
|
||||
name = "non-word-byte"
|
||||
regex = '(?m)^[a-z]+$'
|
||||
haystack = '%abc%'
|
||||
matches = [[1, 4]]
|
||||
unescape = true
|
||||
line-terminator = '%'
|
||||
|
||||
# This combines "set line terminator to a word byte" with a word boundary
|
||||
# assertion, which should result in no match even though ^/$ matches.
|
||||
[[test]]
|
||||
name = "word-boundary"
|
||||
regex = '(?m)^\b[a-z]+\b$'
|
||||
haystack = 'ZabcZ'
|
||||
matches = []
|
||||
unescape = true
|
||||
line-terminator = 'Z'
|
||||
|
||||
# Like 'word-boundary', but does an anchored search at the point where ^
|
||||
# matches, but where \b should not.
|
||||
[[test]]
|
||||
name = "word-boundary-at"
|
||||
regex = '(?m)^\b[a-z]+\b$'
|
||||
haystack = 'ZabcZ'
|
||||
matches = []
|
||||
bounds = [1, 4]
|
||||
anchored = true
|
||||
unescape = true
|
||||
line-terminator = 'Z'
|
||||
|
||||
# Like 'word-boundary-at', but flips the word boundary to a negation. This
|
||||
# in particular tests a tricky case in DFA engines, where they must consider
|
||||
# explicitly that a starting configuration from a custom line terminator may
|
||||
# also required setting the "is from word byte" flag on a state. Otherwise,
|
||||
# it's treated as "not from a word byte," which would result in \B not matching
|
||||
# here when it should.
|
||||
[[test]]
|
||||
name = "not-word-boundary-at"
|
||||
regex = '(?m)^\B[a-z]+\B$'
|
||||
haystack = 'ZabcZ'
|
||||
matches = [[1, 4]]
|
||||
bounds = [1, 4]
|
||||
anchored = true
|
||||
unescape = true
|
||||
line-terminator = 'Z'
|
||||
99
third-party/vendor/regex/testdata/misc.toml
vendored
Normal file
99
third-party/vendor/regex/testdata/misc.toml
vendored
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
[[test]]
|
||||
name = "ascii-literal"
|
||||
regex = "a"
|
||||
haystack = "a"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "ascii-literal-not"
|
||||
regex = "a"
|
||||
haystack = "z"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "ascii-literal-anchored"
|
||||
regex = "a"
|
||||
haystack = "a"
|
||||
matches = [[0, 1]]
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "ascii-literal-anchored-not"
|
||||
regex = "a"
|
||||
haystack = "z"
|
||||
matches = []
|
||||
anchored = true
|
||||
|
||||
[[test]]
|
||||
name = "anchor-start-end-line"
|
||||
regex = '(?m)^bar$'
|
||||
haystack = "foo\nbar\nbaz"
|
||||
matches = [[4, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "prefix-literal-match"
|
||||
regex = '^abc'
|
||||
haystack = "abc"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "prefix-literal-match-ascii"
|
||||
regex = '^abc'
|
||||
haystack = "abc"
|
||||
matches = [[0, 3]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "prefix-literal-no-match"
|
||||
regex = '^abc'
|
||||
haystack = "zabc"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "one-literal-edge"
|
||||
regex = 'abc'
|
||||
haystack = "xxxxxab"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "terminates"
|
||||
regex = 'a$'
|
||||
haystack = "a"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "suffix-100"
|
||||
regex = '.*abcd'
|
||||
haystack = "abcd"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "suffix-200"
|
||||
regex = '.*(?:abcd)+'
|
||||
haystack = "abcd"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "suffix-300"
|
||||
regex = '.*(?:abcd)+'
|
||||
haystack = "abcdabcd"
|
||||
matches = [[0, 8]]
|
||||
|
||||
[[test]]
|
||||
name = "suffix-400"
|
||||
regex = '.*(?:abcd)+'
|
||||
haystack = "abcdxabcd"
|
||||
matches = [[0, 9]]
|
||||
|
||||
[[test]]
|
||||
name = "suffix-500"
|
||||
regex = '.*x(?:abcd)+'
|
||||
haystack = "abcdxabcd"
|
||||
matches = [[0, 9]]
|
||||
|
||||
[[test]]
|
||||
name = "suffix-600"
|
||||
regex = '[^abcd]*x(?:abcd)+'
|
||||
haystack = "abcdxabcd"
|
||||
matches = [[4, 9]]
|
||||
845
third-party/vendor/regex/testdata/multiline.toml
vendored
Normal file
845
third-party/vendor/regex/testdata/multiline.toml
vendored
Normal file
|
|
@ -0,0 +1,845 @@
|
|||
[[test]]
|
||||
name = "basic1"
|
||||
regex = '(?m)^[a-z]+$'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 3], [4, 7], [8, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "basic1-crlf"
|
||||
regex = '(?Rm)^[a-z]+$'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 3], [4, 7], [8, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "basic1-crlf-cr"
|
||||
regex = '(?Rm)^[a-z]+$'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[0, 3], [4, 7], [8, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "basic2"
|
||||
regex = '(?m)^$'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "basic2-crlf"
|
||||
regex = '(?Rm)^$'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "basic2-crlf-cr"
|
||||
regex = '(?Rm)^$'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "basic3"
|
||||
regex = '(?m)^'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 0], [4, 4], [8, 8]]
|
||||
|
||||
[[test]]
|
||||
name = "basic3-crlf"
|
||||
regex = '(?Rm)^'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 0], [4, 4], [8, 8]]
|
||||
|
||||
[[test]]
|
||||
name = "basic3-crlf-cr"
|
||||
regex = '(?Rm)^'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[0, 0], [4, 4], [8, 8]]
|
||||
|
||||
[[test]]
|
||||
name = "basic4"
|
||||
regex = '(?m)$'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[3, 3], [7, 7], [11, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "basic4-crlf"
|
||||
regex = '(?Rm)$'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[3, 3], [7, 7], [11, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "basic4-crlf-cr"
|
||||
regex = '(?Rm)$'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[3, 3], [7, 7], [11, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "basic5"
|
||||
regex = '(?m)^[a-z]'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 1], [4, 5], [8, 9]]
|
||||
|
||||
[[test]]
|
||||
name = "basic5-crlf"
|
||||
regex = '(?Rm)^[a-z]'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 1], [4, 5], [8, 9]]
|
||||
|
||||
[[test]]
|
||||
name = "basic5-crlf-cr"
|
||||
regex = '(?Rm)^[a-z]'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[0, 1], [4, 5], [8, 9]]
|
||||
|
||||
[[test]]
|
||||
name = "basic6"
|
||||
regex = '(?m)[a-z]^'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "basic6-crlf"
|
||||
regex = '(?Rm)[a-z]^'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "basic6-crlf-cr"
|
||||
regex = '(?Rm)[a-z]^'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "basic7"
|
||||
regex = '(?m)[a-z]$'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[2, 3], [6, 7], [10, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "basic7-crlf"
|
||||
regex = '(?Rm)[a-z]$'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[2, 3], [6, 7], [10, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "basic7-crlf-cr"
|
||||
regex = '(?Rm)[a-z]$'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[2, 3], [6, 7], [10, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "basic8"
|
||||
regex = '(?m)$[a-z]'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "basic8-crlf"
|
||||
regex = '(?Rm)$[a-z]'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "basic8-crlf-cr"
|
||||
regex = '(?Rm)$[a-z]'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "basic9"
|
||||
regex = '(?m)^$'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
|
||||
[[test]]
|
||||
name = "basic9-crlf"
|
||||
regex = '(?Rm)^$'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat1"
|
||||
regex = '(?m)(?:^$)*'
|
||||
haystack = "a\nb\nc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat1-crlf"
|
||||
regex = '(?Rm)(?:^$)*'
|
||||
haystack = "a\nb\nc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat1-crlf-cr"
|
||||
regex = '(?Rm)(?:^$)*'
|
||||
haystack = "a\rb\rc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat1-no-multi"
|
||||
regex = '(?:^$)*'
|
||||
haystack = "a\nb\nc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat1-no-multi-crlf"
|
||||
regex = '(?R)(?:^$)*'
|
||||
haystack = "a\nb\nc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat1-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:^$)*'
|
||||
haystack = "a\rb\rc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat2"
|
||||
regex = '(?m)(?:^|a)+'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat2-crlf"
|
||||
regex = '(?Rm)(?:^|a)+'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat2-crlf-cr"
|
||||
regex = '(?Rm)(?:^|a)+'
|
||||
haystack = "a\raaa\r"
|
||||
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat2-no-multi"
|
||||
regex = '(?:^|a)+'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [2, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat2-no-multi-crlf"
|
||||
regex = '(?R)(?:^|a)+'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [2, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat2-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:^|a)+'
|
||||
haystack = "a\raaa\r"
|
||||
matches = [[0, 0], [2, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat3"
|
||||
regex = '(?m)(?:^|a)*'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat3-crlf"
|
||||
regex = '(?Rm)(?:^|a)*'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat3-crlf-cr"
|
||||
regex = '(?Rm)(?:^|a)*'
|
||||
haystack = "a\raaa\r"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat3-no-multi"
|
||||
regex = '(?:^|a)*'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat3-no-multi-crlf"
|
||||
regex = '(?R)(?:^|a)*'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat3-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:^|a)*'
|
||||
haystack = "a\raaa\r"
|
||||
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat4"
|
||||
regex = '(?m)(?:^|a+)'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat4-crlf"
|
||||
regex = '(?Rm)(?:^|a+)'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat4-crlf-cr"
|
||||
regex = '(?Rm)(?:^|a+)'
|
||||
haystack = "a\raaa\r"
|
||||
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat4-no-multi"
|
||||
regex = '(?:^|a+)'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [2, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat4-no-multi-crlf"
|
||||
regex = '(?R)(?:^|a+)'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [2, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat4-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:^|a+)'
|
||||
haystack = "a\raaa\r"
|
||||
matches = [[0, 0], [2, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat5"
|
||||
regex = '(?m)(?:^|a*)'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat5-crlf"
|
||||
regex = '(?Rm)(?:^|a*)'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat5-crlf-cr"
|
||||
regex = '(?Rm)(?:^|a*)'
|
||||
haystack = "a\raaa\r"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat5-no-multi"
|
||||
regex = '(?:^|a*)'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat5-no-multi-crlf"
|
||||
regex = '(?R)(?:^|a*)'
|
||||
haystack = "a\naaa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat5-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:^|a*)'
|
||||
haystack = "a\raaa\r"
|
||||
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat6"
|
||||
regex = '(?m)(?:^[a-z])+'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 1], [4, 5], [8, 9]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat6-crlf"
|
||||
regex = '(?Rm)(?:^[a-z])+'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 1], [4, 5], [8, 9]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat6-crlf-cr"
|
||||
regex = '(?Rm)(?:^[a-z])+'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[0, 1], [4, 5], [8, 9]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat6-no-multi"
|
||||
regex = '(?:^[a-z])+'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat6-no-multi-crlf"
|
||||
regex = '(?R)(?:^[a-z])+'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat6-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:^[a-z])+'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat7"
|
||||
regex = '(?m)(?:^[a-z]{3}\n?)+'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat7-crlf"
|
||||
regex = '(?Rm)(?:^[a-z]{3}\n?)+'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat7-crlf-cr"
|
||||
regex = '(?Rm)(?:^[a-z]{3}\r?)+'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[0, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat7-no-multi"
|
||||
regex = '(?:^[a-z]{3}\n?)+'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat7-no-multi-crlf"
|
||||
regex = '(?R)(?:^[a-z]{3}\n?)+'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat7-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:^[a-z]{3}\r?)+'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat8"
|
||||
regex = '(?m)(?:^[a-z]{3}\n?)*'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat8-crlf"
|
||||
regex = '(?Rm)(?:^[a-z]{3}\n?)*'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat8-crlf-cr"
|
||||
regex = '(?Rm)(?:^[a-z]{3}\r?)*'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[0, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat8-no-multi"
|
||||
regex = '(?:^[a-z]{3}\n?)*'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat8-no-multi-crlf"
|
||||
regex = '(?R)(?:^[a-z]{3}\n?)*'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat8-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:^[a-z]{3}\r?)*'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat9"
|
||||
regex = '(?m)(?:\n?[a-z]{3}$)+'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat9-crlf"
|
||||
regex = '(?Rm)(?:\n?[a-z]{3}$)+'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat9-crlf-cr"
|
||||
regex = '(?Rm)(?:\r?[a-z]{3}$)+'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[0, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat9-no-multi"
|
||||
regex = '(?:\n?[a-z]{3}$)+'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[7, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat9-no-multi-crlf"
|
||||
regex = '(?R)(?:\n?[a-z]{3}$)+'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[7, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat9-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:\r?[a-z]{3}$)+'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[7, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat10"
|
||||
regex = '(?m)(?:\n?[a-z]{3}$)*'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat10-crlf"
|
||||
regex = '(?Rm)(?:\n?[a-z]{3}$)*'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat10-crlf-cr"
|
||||
regex = '(?Rm)(?:\r?[a-z]{3}$)*'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[0, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat10-no-multi"
|
||||
regex = '(?:\n?[a-z]{3}$)*'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat10-no-multi-crlf"
|
||||
regex = '(?R)(?:\n?[a-z]{3}$)*'
|
||||
haystack = "abc\ndef\nxyz"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat10-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:\r?[a-z]{3}$)*'
|
||||
haystack = "abc\rdef\rxyz"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat11"
|
||||
regex = '(?m)^*'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat11-crlf"
|
||||
regex = '(?Rm)^*'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat11-crlf-cr"
|
||||
regex = '(?Rm)^*'
|
||||
haystack = "\raa\r"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat11-no-multi"
|
||||
regex = '^*'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat11-no-multi-crlf"
|
||||
regex = '(?R)^*'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat11-no-multi-crlf-cr"
|
||||
regex = '(?R)^*'
|
||||
haystack = "\raa\r"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat12"
|
||||
regex = '(?m)^+'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0], [1, 1], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat12-crlf"
|
||||
regex = '(?Rm)^+'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0], [1, 1], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat12-crlf-cr"
|
||||
regex = '(?Rm)^+'
|
||||
haystack = "\raa\r"
|
||||
matches = [[0, 0], [1, 1], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat12-no-multi"
|
||||
regex = '^+'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat12-no-multi-crlf"
|
||||
regex = '(?R)^+'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat12-no-multi-crlf-cr"
|
||||
regex = '(?R)^+'
|
||||
haystack = "\raa\r"
|
||||
matches = [[0, 0]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat13"
|
||||
regex = '(?m)$*'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat13-crlf"
|
||||
regex = '(?Rm)$*'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat13-crlf-cr"
|
||||
regex = '(?Rm)$*'
|
||||
haystack = "\raa\r"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat13-no-multi"
|
||||
regex = '$*'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat13-no-multi-crlf"
|
||||
regex = '(?R)$*'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat13-no-multi-crlf-cr"
|
||||
regex = '(?R)$*'
|
||||
haystack = "\raa\r"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat14"
|
||||
regex = '(?m)$+'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat14-crlf"
|
||||
regex = '(?Rm)$+'
|
||||
haystack = "\naa\n"
|
||||
matches = [[0, 0], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat14-crlf-cr"
|
||||
regex = '(?Rm)$+'
|
||||
haystack = "\raa\r"
|
||||
matches = [[0, 0], [3, 3], [4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat14-no-multi"
|
||||
regex = '$+'
|
||||
haystack = "\naa\n"
|
||||
matches = [[4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat14-no-multi-crlf"
|
||||
regex = '(?R)$+'
|
||||
haystack = "\naa\n"
|
||||
matches = [[4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat14-no-multi-crlf-cr"
|
||||
regex = '(?R)$+'
|
||||
haystack = "\raa\r"
|
||||
matches = [[4, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat15"
|
||||
regex = '(?m)(?:$\n)+'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = [[0, 2], [5, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat15-crlf"
|
||||
regex = '(?Rm)(?:$\n)+'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = [[0, 2], [5, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat15-crlf-cr"
|
||||
regex = '(?Rm)(?:$\r)+'
|
||||
haystack = "\r\raaa\r\r"
|
||||
matches = [[0, 2], [5, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat15-no-multi"
|
||||
regex = '(?:$\n)+'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "repeat15-no-multi-crlf"
|
||||
regex = '(?R)(?:$\n)+'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "repeat15-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:$\r)+'
|
||||
haystack = "\r\raaa\r\r"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "repeat16"
|
||||
regex = '(?m)(?:$\n)*'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = [[0, 2], [3, 3], [4, 4], [5, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat16-crlf"
|
||||
regex = '(?Rm)(?:$\n)*'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = [[0, 2], [3, 3], [4, 4], [5, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat16-crlf-cr"
|
||||
regex = '(?Rm)(?:$\r)*'
|
||||
haystack = "\r\raaa\r\r"
|
||||
matches = [[0, 2], [3, 3], [4, 4], [5, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat16-no-multi"
|
||||
regex = '(?:$\n)*'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat16-no-multi-crlf"
|
||||
regex = '(?R)(?:$\n)*'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat16-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:$\r)*'
|
||||
haystack = "\r\raaa\r\r"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat17"
|
||||
regex = '(?m)(?:$\n^)+'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = [[0, 2], [5, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat17-crlf"
|
||||
regex = '(?Rm)(?:$\n^)+'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = [[0, 2], [5, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat17-crlf-cr"
|
||||
regex = '(?Rm)(?:$\r^)+'
|
||||
haystack = "\r\raaa\r\r"
|
||||
matches = [[0, 2], [5, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat17-no-multi"
|
||||
regex = '(?:$\n^)+'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "repeat17-no-multi-crlf"
|
||||
regex = '(?R)(?:$\n^)+'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "repeat17-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:$\r^)+'
|
||||
haystack = "\r\raaa\r\r"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "repeat18"
|
||||
regex = '(?m)(?:^|$)+'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat18-crlf"
|
||||
regex = '(?Rm)(?:^|$)+'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat18-crlf-cr"
|
||||
regex = '(?Rm)(?:^|$)+'
|
||||
haystack = "\r\raaa\r\r"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat18-no-multi"
|
||||
regex = '(?:^|$)+'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = [[0, 0], [7, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat18-no-multi-crlf"
|
||||
regex = '(?R)(?:^|$)+'
|
||||
haystack = "\n\naaa\n\n"
|
||||
matches = [[0, 0], [7, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "repeat18-no-multi-crlf-cr"
|
||||
regex = '(?R)(?:^|$)+'
|
||||
haystack = "\r\raaa\r\r"
|
||||
matches = [[0, 0], [7, 7]]
|
||||
|
||||
[[test]]
|
||||
name = "match-line-100"
|
||||
regex = '(?m)^.+$'
|
||||
haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
|
||||
matches = [[0, 2], [3, 22]]
|
||||
|
||||
[[test]]
|
||||
name = "match-line-100-crlf"
|
||||
regex = '(?Rm)^.+$'
|
||||
haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
|
||||
matches = [[0, 2], [3, 22]]
|
||||
|
||||
[[test]]
|
||||
name = "match-line-100-crlf-cr"
|
||||
regex = '(?Rm)^.+$'
|
||||
haystack = "aa\raaaaaaaaaaaaaaaaaaa\r"
|
||||
matches = [[0, 2], [3, 22]]
|
||||
|
||||
[[test]]
|
||||
name = "match-line-200"
|
||||
regex = '(?m)^.+$'
|
||||
haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
|
||||
matches = [[0, 2], [3, 22]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "match-line-200-crlf"
|
||||
regex = '(?Rm)^.+$'
|
||||
haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
|
||||
matches = [[0, 2], [3, 22]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "match-line-200-crlf-cr"
|
||||
regex = '(?Rm)^.+$'
|
||||
haystack = "aa\raaaaaaaaaaaaaaaaaaa\r"
|
||||
matches = [[0, 2], [3, 22]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
222
third-party/vendor/regex/testdata/no-unicode.toml
vendored
Normal file
222
third-party/vendor/regex/testdata/no-unicode.toml
vendored
Normal file
|
|
@ -0,0 +1,222 @@
|
|||
[[test]]
|
||||
name = "invalid-utf8-literal1"
|
||||
regex = '\xFF'
|
||||
haystack = '\xFF'
|
||||
matches = [[0, 1]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
unescape = true
|
||||
|
||||
|
||||
[[test]]
|
||||
name = "mixed"
|
||||
regex = '(?:.+)(?-u)(?:.+)'
|
||||
haystack = '\xCE\x93\xCE\x94\xFF'
|
||||
matches = [[0, 5]]
|
||||
utf8 = false
|
||||
unescape = true
|
||||
|
||||
|
||||
[[test]]
|
||||
name = "case1"
|
||||
regex = "a"
|
||||
haystack = "A"
|
||||
matches = [[0, 1]]
|
||||
case-insensitive = true
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "case2"
|
||||
regex = "[a-z]+"
|
||||
haystack = "AaAaA"
|
||||
matches = [[0, 5]]
|
||||
case-insensitive = true
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "case3"
|
||||
regex = "[a-z]+"
|
||||
haystack = "aA\u212AaA"
|
||||
matches = [[0, 7]]
|
||||
case-insensitive = true
|
||||
|
||||
[[test]]
|
||||
name = "case4"
|
||||
regex = "[a-z]+"
|
||||
haystack = "aA\u212AaA"
|
||||
matches = [[0, 2], [5, 7]]
|
||||
case-insensitive = true
|
||||
unicode = false
|
||||
|
||||
|
||||
[[test]]
|
||||
name = "negate1"
|
||||
regex = "[^a]"
|
||||
haystack = "δ"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "negate2"
|
||||
regex = "[^a]"
|
||||
haystack = "δ"
|
||||
matches = [[0, 1], [1, 2]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
|
||||
[[test]]
|
||||
name = "dotstar-prefix1"
|
||||
regex = "a"
|
||||
haystack = '\xFFa'
|
||||
matches = [[1, 2]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
unescape = true
|
||||
|
||||
[[test]]
|
||||
name = "dotstar-prefix2"
|
||||
regex = "a"
|
||||
haystack = '\xFFa'
|
||||
matches = [[1, 2]]
|
||||
utf8 = false
|
||||
unescape = true
|
||||
|
||||
|
||||
[[test]]
|
||||
name = "null-bytes1"
|
||||
regex = '[^\x00]+\x00'
|
||||
haystack = 'foo\x00'
|
||||
matches = [[0, 4]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
unescape = true
|
||||
|
||||
|
||||
[[test]]
|
||||
name = "word-ascii"
|
||||
regex = '\w+'
|
||||
haystack = "aδ"
|
||||
matches = [[0, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-unicode"
|
||||
regex = '\w+'
|
||||
haystack = "aδ"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "decimal-ascii"
|
||||
regex = '\d+'
|
||||
haystack = "1२३9"
|
||||
matches = [[0, 1], [7, 8]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "decimal-unicode"
|
||||
regex = '\d+'
|
||||
haystack = "1२३9"
|
||||
matches = [[0, 8]]
|
||||
|
||||
[[test]]
|
||||
name = "space-ascii"
|
||||
regex = '\s+'
|
||||
haystack = " \u1680"
|
||||
matches = [[0, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "space-unicode"
|
||||
regex = '\s+'
|
||||
haystack = " \u1680"
|
||||
matches = [[0, 4]]
|
||||
|
||||
|
||||
[[test]]
|
||||
# See: https://github.com/rust-lang/regex/issues/484
|
||||
name = "iter1-bytes"
|
||||
regex = ''
|
||||
haystack = "☃"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
# See: https://github.com/rust-lang/regex/issues/484
|
||||
name = "iter1-utf8"
|
||||
regex = ''
|
||||
haystack = "☃"
|
||||
matches = [[0, 0], [3, 3]]
|
||||
|
||||
[[test]]
|
||||
# See: https://github.com/rust-lang/regex/issues/484
|
||||
# Note that iter2-utf8 doesn't make sense here, since the input isn't UTF-8.
|
||||
name = "iter2-bytes"
|
||||
regex = ''
|
||||
haystack = 'b\xFFr'
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
unescape = true
|
||||
utf8 = false
|
||||
|
||||
|
||||
# These test that unanchored prefixes can munch through invalid UTF-8 even when
|
||||
# utf8 is enabled.
|
||||
#
|
||||
# This test actually reflects an interesting simplification in how the Thompson
|
||||
# NFA is constructed. It used to be that the NFA could be built with an
|
||||
# unanchored prefix that either matched any byte or _only_ matched valid UTF-8.
|
||||
# But the latter turns out to be pretty precarious when it comes to prefilters,
|
||||
# because if you search a haystack that contains invalid UTF-8 but have an
|
||||
# unanchored prefix that requires UTF-8, then prefilters are no longer a valid
|
||||
# optimization because you actually have to check that everything is valid
|
||||
# UTF-8.
|
||||
#
|
||||
# Originally, I had thought that we needed a valid UTF-8 unanchored prefix in
|
||||
# order to guarantee that we only match at valid UTF-8 boundaries. But this
|
||||
# isn't actually true! There are really only two things to consider here:
|
||||
#
|
||||
# 1) Will a regex match split an encoded codepoint? No. Because by construction,
|
||||
# we ensure that a MATCH state can only be reached by following valid UTF-8 (assuming
|
||||
# all of the UTF-8 modes are enabled).
|
||||
#
|
||||
# 2) Will a regex match arbitrary bytes that aren't valid UTF-8? Again, no,
|
||||
# assuming all of the UTF-8 modes are enabled.
|
||||
[[test]]
|
||||
name = "unanchored-invalid-utf8-match-100"
|
||||
regex = '[a-z]'
|
||||
haystack = '\xFFa\xFF'
|
||||
matches = [[1, 2]]
|
||||
unescape = true
|
||||
utf8 = false
|
||||
|
||||
# This test shows that we can still prevent a match from occurring by requiring
|
||||
# that valid UTF-8 match by inserting our own unanchored prefix. Thus, if the
|
||||
# behavior of not munching through invalid UTF-8 anywhere is needed, then it
|
||||
# can be achieved thusly.
|
||||
[[test]]
|
||||
name = "unanchored-invalid-utf8-nomatch"
|
||||
regex = '^(?s:.)*?[a-z]'
|
||||
haystack = '\xFFa\xFF'
|
||||
matches = []
|
||||
unescape = true
|
||||
utf8 = false
|
||||
|
||||
# This is a tricky test that makes sure we don't accidentally do a kind of
|
||||
# unanchored search when we've requested that a regex engine not report
|
||||
# empty matches that split a codepoint. This test caught a regression during
|
||||
# development where the code for skipping over bad empty matches would do so
|
||||
# even if the search should have been anchored. This is ultimately what led to
|
||||
# making 'anchored' an 'Input' option, so that it was always clear what kind
|
||||
# of search was being performed. (Before that, whether a search was anchored
|
||||
# or not was a config knob on the regex engine.) This did wind up making DFAs
|
||||
# a little more complex to configure (with their 'StartKind' knob), but it
|
||||
# generally smoothed out everything else.
|
||||
#
|
||||
# Great example of a test whose failure motivated a sweeping API refactoring.
|
||||
[[test]]
|
||||
name = "anchored-iter-empty-utf8"
|
||||
regex = ''
|
||||
haystack = 'a☃z'
|
||||
matches = [[0, 0], [1, 1]]
|
||||
unescape = false
|
||||
utf8 = true
|
||||
anchored = true
|
||||
280
third-party/vendor/regex/testdata/overlapping.toml
vendored
Normal file
280
third-party/vendor/regex/testdata/overlapping.toml
vendored
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
# NOTE: We define a number of tests where the *match* kind is 'leftmost-first'
|
||||
# but the *search* kind is 'overlapping'. This is a somewhat nonsensical
|
||||
# combination and can produce odd results. Nevertheless, those results should
|
||||
# be consistent so we test them here. (At the time of writing this note, I
|
||||
# hadn't yet decided whether to make 'leftmost-first' with 'overlapping' result
|
||||
# in unspecified behavior.)
|
||||
|
||||
# This demonstrates how a full overlapping search is obvious quadratic. This
|
||||
# regex reports a match for every substring in the haystack.
|
||||
[[test]]
|
||||
name = "ungreedy-dotstar-matches-everything-100"
|
||||
regex = [".*?"]
|
||||
haystack = "zzz"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 0] },
|
||||
{ id = 0, span = [1, 1] },
|
||||
{ id = 0, span = [0, 1] },
|
||||
{ id = 0, span = [2, 2] },
|
||||
{ id = 0, span = [1, 2] },
|
||||
{ id = 0, span = [0, 2] },
|
||||
{ id = 0, span = [3, 3] },
|
||||
{ id = 0, span = [2, 3] },
|
||||
{ id = 0, span = [1, 3] },
|
||||
{ id = 0, span = [0, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "greedy-dotstar-matches-everything-100"
|
||||
regex = [".*"]
|
||||
haystack = "zzz"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 0] },
|
||||
{ id = 0, span = [1, 1] },
|
||||
{ id = 0, span = [0, 1] },
|
||||
{ id = 0, span = [2, 2] },
|
||||
{ id = 0, span = [1, 2] },
|
||||
{ id = 0, span = [0, 2] },
|
||||
{ id = 0, span = [3, 3] },
|
||||
{ id = 0, span = [2, 3] },
|
||||
{ id = 0, span = [1, 3] },
|
||||
{ id = 0, span = [0, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "repetition-plus-leftmost-first-100"
|
||||
regex = 'a+'
|
||||
haystack = "aaa"
|
||||
matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "repetition-plus-leftmost-first-110"
|
||||
regex = '☃+'
|
||||
haystack = "☃☃☃"
|
||||
matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "repetition-plus-all-100"
|
||||
regex = 'a+'
|
||||
haystack = "aaa"
|
||||
matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "repetition-plus-all-110"
|
||||
regex = '☃+'
|
||||
haystack = "☃☃☃"
|
||||
matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "repetition-plus-leftmost-first-200"
|
||||
regex = '(abc)+'
|
||||
haystack = "zzabcabczzabc"
|
||||
matches = [
|
||||
[[2, 5], [2, 5]],
|
||||
[[5, 8], [5, 8]],
|
||||
[[2, 8], [5, 8]],
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "repetition-plus-all-200"
|
||||
regex = '(abc)+'
|
||||
haystack = "zzabcabczzabc"
|
||||
matches = [
|
||||
[[2, 5], [2, 5]],
|
||||
[[5, 8], [5, 8]],
|
||||
[[2, 8], [5, 8]],
|
||||
[[10, 13], [10, 13]],
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "repetition-star-leftmost-first-100"
|
||||
regex = 'a*'
|
||||
haystack = "aaa"
|
||||
matches = [
|
||||
[0, 0],
|
||||
[1, 1],
|
||||
[0, 1],
|
||||
[2, 2],
|
||||
[1, 2],
|
||||
[0, 2],
|
||||
[3, 3],
|
||||
[2, 3],
|
||||
[1, 3],
|
||||
[0, 3],
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "repetition-star-all-100"
|
||||
regex = 'a*'
|
||||
haystack = "aaa"
|
||||
matches = [
|
||||
[0, 0],
|
||||
[1, 1],
|
||||
[0, 1],
|
||||
[2, 2],
|
||||
[1, 2],
|
||||
[0, 2],
|
||||
[3, 3],
|
||||
[2, 3],
|
||||
[1, 3],
|
||||
[0, 3],
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "repetition-star-leftmost-first-200"
|
||||
regex = '(abc)*'
|
||||
haystack = "zzabcabczzabc"
|
||||
matches = [
|
||||
[[0, 0], []],
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "repetition-star-all-200"
|
||||
regex = '(abc)*'
|
||||
haystack = "zzabcabczzabc"
|
||||
matches = [
|
||||
[[0, 0], []],
|
||||
[[1, 1], []],
|
||||
[[2, 2], []],
|
||||
[[3, 3], []],
|
||||
[[4, 4], []],
|
||||
[[5, 5], []],
|
||||
[[2, 5], [2, 5]],
|
||||
[[6, 6], []],
|
||||
[[7, 7], []],
|
||||
[[8, 8], []],
|
||||
[[5, 8], [5, 8]],
|
||||
[[2, 8], [5, 8]],
|
||||
[[9, 9], []],
|
||||
[[10, 10], []],
|
||||
[[11, 11], []],
|
||||
[[12, 12], []],
|
||||
[[13, 13], []],
|
||||
[[10, 13], [10, 13]],
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "start-end-rep-leftmost-first"
|
||||
regex = '(^$)*'
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
[[0, 0], []],
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "start-end-rep-all"
|
||||
regex = '(^$)*'
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
[[0, 0], []],
|
||||
[[1, 1], []],
|
||||
[[2, 2], []],
|
||||
[[3, 3], []],
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "alt-leftmost-first-100"
|
||||
regex = 'abc|a'
|
||||
haystack = "zzabcazzaabc"
|
||||
matches = [[2, 3], [2, 5]]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "alt-all-100"
|
||||
regex = 'abc|a'
|
||||
haystack = "zzabcazzaabc"
|
||||
matches = [[2, 3], [2, 5], [5, 6], [8, 9], [9, 10], [9, 12]]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "empty-000"
|
||||
regex = ""
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "empty-alt-000"
|
||||
regex = "|b"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "empty-alt-010"
|
||||
regex = "b|"
|
||||
haystack = "abc"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
# See: https://github.com/rust-lang/regex/issues/484
|
||||
name = "iter1-bytes"
|
||||
regex = ''
|
||||
haystack = "☃"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
utf8 = false
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
# See: https://github.com/rust-lang/regex/issues/484
|
||||
name = "iter1-utf8"
|
||||
regex = ''
|
||||
haystack = "☃"
|
||||
matches = [[0, 0], [3, 3]]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "iter1-incomplete-utf8"
|
||||
regex = ''
|
||||
haystack = '\xE2\x98' # incomplete snowman
|
||||
matches = [[0, 0], [1, 1], [2, 2]]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
unescape = true
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "scratch"
|
||||
regex = ['sam', 'samwise']
|
||||
haystack = "samwise"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 3] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "overlapping"
|
||||
98
third-party/vendor/regex/testdata/regex-lite.toml
vendored
Normal file
98
third-party/vendor/regex/testdata/regex-lite.toml
vendored
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
# These tests are specifically written to test the regex-lite crate. While it
|
||||
# largely has the same semantics as the regex crate, there are some differences
|
||||
# around Unicode support and UTF-8.
|
||||
#
|
||||
# To be clear, regex-lite supports far fewer patterns because of its lack of
|
||||
# Unicode support, nested character classes and character class set operations.
|
||||
# What we're talking about here are the patterns that both crates support but
|
||||
# where the semantics might differ.
|
||||
|
||||
# regex-lite uses ASCII definitions for Perl character classes.
|
||||
[[test]]
|
||||
name = "perl-class-decimal"
|
||||
regex = '\d'
|
||||
haystack = '᠕'
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
# regex-lite uses ASCII definitions for Perl character classes.
|
||||
[[test]]
|
||||
name = "perl-class-space"
|
||||
regex = '\s'
|
||||
haystack = "\u2000"
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
# regex-lite uses ASCII definitions for Perl character classes.
|
||||
[[test]]
|
||||
name = "perl-class-word"
|
||||
regex = '\w'
|
||||
haystack = 'δ'
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
# regex-lite uses the ASCII definition of word for word boundary assertions.
|
||||
[[test]]
|
||||
name = "word-boundary"
|
||||
regex = '\b'
|
||||
haystack = 'δ'
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
# regex-lite uses the ASCII definition of word for negated word boundary
|
||||
# assertions. But note that it should still not split codepoints!
|
||||
[[test]]
|
||||
name = "word-boundary-negated"
|
||||
regex = '\B'
|
||||
haystack = 'δ'
|
||||
matches = [[0, 0], [2, 2]]
|
||||
unicode = true
|
||||
|
||||
# While we're here, the empty regex---which matches at every
|
||||
# position---shouldn't split a codepoint either.
|
||||
[[test]]
|
||||
name = "empty-no-split-codepoint"
|
||||
regex = ''
|
||||
haystack = '💩'
|
||||
matches = [[0, 0], [4, 4]]
|
||||
unicode = true
|
||||
|
||||
# A dot always matches a full codepoint.
|
||||
[[test]]
|
||||
name = "dot-always-matches-codepoint"
|
||||
regex = '.'
|
||||
haystack = '💩'
|
||||
matches = [[0, 4]]
|
||||
unicode = false
|
||||
|
||||
# A negated character class also always matches a full codepoint.
|
||||
[[test]]
|
||||
name = "negated-class-always-matches-codepoint"
|
||||
regex = '[^a]'
|
||||
haystack = '💩'
|
||||
matches = [[0, 4]]
|
||||
unicode = false
|
||||
|
||||
# regex-lite only supports ASCII-aware case insensitive matching.
|
||||
[[test]]
|
||||
name = "case-insensitive-is-ascii-only"
|
||||
regex = 's'
|
||||
haystack = 'ſ'
|
||||
matches = []
|
||||
unicode = true
|
||||
case-insensitive = true
|
||||
|
||||
# Negated word boundaries shouldn't split a codepoint, but they will match
|
||||
# between invalid UTF-8.
|
||||
#
|
||||
# This test is only valid for a 'bytes' API, but that doesn't (yet) exist in
|
||||
# regex-lite. This can't happen in the main API because &str can't contain
|
||||
# invalid UTF-8.
|
||||
# [[test]]
|
||||
# name = "word-boundary-invalid-utf8"
|
||||
# regex = '\B'
|
||||
# haystack = '\xFF\xFF\xFF\xFF'
|
||||
# unescape = true
|
||||
# matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
# unicode = true
|
||||
# utf8 = false
|
||||
830
third-party/vendor/regex/testdata/regression.toml
vendored
Normal file
830
third-party/vendor/regex/testdata/regression.toml
vendored
Normal file
|
|
@ -0,0 +1,830 @@
|
|||
# See: https://github.com/rust-lang/regex/issues/48
|
||||
[[test]]
|
||||
name = "invalid-regex-no-crash-100"
|
||||
regex = '(*)'
|
||||
haystack = ""
|
||||
matches = []
|
||||
compiles = false
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/48
|
||||
[[test]]
|
||||
name = "invalid-regex-no-crash-200"
|
||||
regex = '(?:?)'
|
||||
haystack = ""
|
||||
matches = []
|
||||
compiles = false
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/48
|
||||
[[test]]
|
||||
name = "invalid-regex-no-crash-300"
|
||||
regex = '(?)'
|
||||
haystack = ""
|
||||
matches = []
|
||||
compiles = false
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/48
|
||||
[[test]]
|
||||
name = "invalid-regex-no-crash-400"
|
||||
regex = '*'
|
||||
haystack = ""
|
||||
matches = []
|
||||
compiles = false
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/75
|
||||
[[test]]
|
||||
name = "unsorted-binary-search-100"
|
||||
regex = '(?i-u)[a_]+'
|
||||
haystack = "A_"
|
||||
matches = [[0, 2]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/75
|
||||
[[test]]
|
||||
name = "unsorted-binary-search-200"
|
||||
regex = '(?i-u)[A_]+'
|
||||
haystack = "a_"
|
||||
matches = [[0, 2]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/76
|
||||
[[test]]
|
||||
name = "unicode-case-lower-nocase-flag"
|
||||
regex = '(?i)\p{Ll}+'
|
||||
haystack = "ΛΘΓΔα"
|
||||
matches = [[0, 10]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/99
|
||||
[[test]]
|
||||
name = "negated-char-class-100"
|
||||
regex = '(?i)[^x]'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/99
|
||||
[[test]]
|
||||
name = "negated-char-class-200"
|
||||
regex = '(?i)[^x]'
|
||||
haystack = "X"
|
||||
matches = []
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/101
|
||||
[[test]]
|
||||
name = "ascii-word-underscore"
|
||||
regex = '[[:word:]]'
|
||||
haystack = "_"
|
||||
matches = [[0, 1]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/129
|
||||
[[test]]
|
||||
name = "captures-repeat"
|
||||
regex = '([a-f]){2}(?P<foo>[x-z])'
|
||||
haystack = "abx"
|
||||
matches = [
|
||||
[[0, 3], [1, 2], [2, 3]],
|
||||
]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/153
|
||||
[[test]]
|
||||
name = "alt-in-alt-100"
|
||||
regex = 'ab?|$'
|
||||
haystack = "az"
|
||||
matches = [[0, 1], [2, 2]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/153
|
||||
[[test]]
|
||||
name = "alt-in-alt-200"
|
||||
regex = '^(?:.*?)(?:\n|\r\n?|$)'
|
||||
haystack = "ab\rcd"
|
||||
matches = [[0, 3]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/169
|
||||
[[test]]
|
||||
name = "leftmost-first-prefix"
|
||||
regex = 'z*azb'
|
||||
haystack = "azb"
|
||||
matches = [[0, 3]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/191
|
||||
[[test]]
|
||||
name = "many-alternates"
|
||||
regex = '1|2|3|4|5|6|7|8|9|10|int'
|
||||
haystack = "int"
|
||||
matches = [[0, 3]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/204
|
||||
[[test]]
|
||||
name = "word-boundary-alone-100"
|
||||
regex = '\b'
|
||||
haystack = "Should this (work?)"
|
||||
matches = [[0, 0], [6, 6], [7, 7], [11, 11], [13, 13], [17, 17]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/204
|
||||
[[test]]
|
||||
name = "word-boundary-alone-200"
|
||||
regex = '\b'
|
||||
haystack = "a b c"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/264
|
||||
[[test]]
|
||||
name = "word-boundary-ascii-no-capture"
|
||||
regex = '\B'
|
||||
haystack = "\U00028F3E"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/264
|
||||
[[test]]
|
||||
name = "word-boundary-ascii-capture"
|
||||
regex = '(?:\B)'
|
||||
haystack = "\U00028F3E"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/268
|
||||
[[test]]
|
||||
name = "partial-anchor"
|
||||
regex = '^a|b'
|
||||
haystack = "ba"
|
||||
matches = [[0, 1]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/271
|
||||
[[test]]
|
||||
name = "endl-or-word-boundary"
|
||||
regex = '(?m:$)|(?-u:\b)'
|
||||
haystack = "\U0006084E"
|
||||
matches = [[4, 4]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/271
|
||||
[[test]]
|
||||
name = "zero-or-end"
|
||||
regex = '(?i-u:\x00)|$'
|
||||
haystack = "\U000E682F"
|
||||
matches = [[4, 4]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/271
|
||||
[[test]]
|
||||
name = "y-or-endl"
|
||||
regex = '(?i-u:y)|(?m:$)'
|
||||
haystack = "\U000B4331"
|
||||
matches = [[4, 4]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/271
|
||||
[[test]]
|
||||
name = "word-boundary-start-x"
|
||||
regex = '(?u:\b)^(?-u:X)'
|
||||
haystack = "X"
|
||||
matches = [[0, 1]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/271
|
||||
[[test]]
|
||||
name = "word-boundary-ascii-start-x"
|
||||
regex = '(?-u:\b)^(?-u:X)'
|
||||
haystack = "X"
|
||||
matches = [[0, 1]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/271
|
||||
[[test]]
|
||||
name = "end-not-word-boundary"
|
||||
regex = '$\B'
|
||||
haystack = "\U0005C124\U000B576C"
|
||||
matches = [[8, 8]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/280
|
||||
[[test]]
|
||||
name = "partial-anchor-alternate-begin"
|
||||
regex = '^a|z'
|
||||
haystack = "yyyyya"
|
||||
matches = []
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/280
|
||||
[[test]]
|
||||
name = "partial-anchor-alternate-end"
|
||||
regex = 'a$|z'
|
||||
haystack = "ayyyyy"
|
||||
matches = []
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/289
|
||||
[[test]]
|
||||
name = "lits-unambiguous-100"
|
||||
regex = '(?:ABC|CDA|BC)X'
|
||||
haystack = "CDAX"
|
||||
matches = [[0, 4]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/291
|
||||
[[test]]
|
||||
name = "lits-unambiguous-200"
|
||||
regex = '((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$'
|
||||
haystack = "CIMG2341"
|
||||
matches = [
|
||||
[[0, 8], [0, 4], [], [0, 4], [4, 8]],
|
||||
]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/303
|
||||
#
|
||||
# 2022-09-19: This has now been "properly" fixed in that empty character
|
||||
# classes are fully supported as something that can never match. This test
|
||||
# used to be marked as 'compiles = false', but now it works.
|
||||
[[test]]
|
||||
name = "negated-full-byte-range"
|
||||
regex = '[^\x00-\xFF]'
|
||||
haystack = ""
|
||||
matches = []
|
||||
compiles = true
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/321
|
||||
[[test]]
|
||||
name = "strange-anchor-non-complete-prefix"
|
||||
regex = 'a^{2}'
|
||||
haystack = ""
|
||||
matches = []
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/321
|
||||
[[test]]
|
||||
name = "strange-anchor-non-complete-suffix"
|
||||
regex = '${2}a'
|
||||
haystack = ""
|
||||
matches = []
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/334
|
||||
# See: https://github.com/rust-lang/regex/issues/557
|
||||
[[test]]
|
||||
name = "captures-after-dfa-premature-end-100"
|
||||
regex = 'a(b*(X|$))?'
|
||||
haystack = "abcbX"
|
||||
matches = [
|
||||
[[0, 1], [], []],
|
||||
]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/334
|
||||
# See: https://github.com/rust-lang/regex/issues/557
|
||||
[[test]]
|
||||
name = "captures-after-dfa-premature-end-200"
|
||||
regex = 'a(bc*(X|$))?'
|
||||
haystack = "abcbX"
|
||||
matches = [
|
||||
[[0, 1], [], []],
|
||||
]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/334
|
||||
# See: https://github.com/rust-lang/regex/issues/557
|
||||
[[test]]
|
||||
name = "captures-after-dfa-premature-end-300"
|
||||
regex = '(aa$)?'
|
||||
haystack = "aaz"
|
||||
matches = [
|
||||
[[0, 0], []],
|
||||
[[1, 1], []],
|
||||
[[2, 2], []],
|
||||
[[3, 3], []],
|
||||
]
|
||||
|
||||
# Plucked from "Why aren’t regular expressions a lingua franca? an empirical
|
||||
# study on the re-use and portability of regular expressions", The ACM Joint
|
||||
# European Software Engineering Conference and Symposium on the Foundations of
|
||||
# Software Engineering (ESEC/FSE), 2019.
|
||||
#
|
||||
# Link: https://dl.acm.org/doi/pdf/10.1145/3338906.3338909
|
||||
[[test]]
|
||||
name = "captures-after-dfa-premature-end-400"
|
||||
regex = '(a)\d*\.?\d+\b'
|
||||
haystack = "a0.0c"
|
||||
matches = [
|
||||
[[0, 2], [0, 1]],
|
||||
]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/437
|
||||
[[test]]
|
||||
name = "literal-panic"
|
||||
regex = 'typename type\-parameter\-[0-9]+\-[0-9]+::.+'
|
||||
haystack = "test"
|
||||
matches = []
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/527
|
||||
[[test]]
|
||||
name = "empty-flag-expr"
|
||||
regex = '(?:(?:(?x)))'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/533
|
||||
#[[tests]]
|
||||
#name = "blank-matches-nothing-between-space-and-tab"
|
||||
#regex = '[[:blank:]]'
|
||||
#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'
|
||||
#match = false
|
||||
#unescape = true
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/533
|
||||
#[[tests]]
|
||||
#name = "blank-matches-nothing-between-space-and-tab-inverted"
|
||||
#regex = '^[[:^blank:]]+$'
|
||||
#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'
|
||||
#match = true
|
||||
#unescape = true
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/555
|
||||
[[test]]
|
||||
name = "invalid-repetition"
|
||||
regex = '(?m){1,1}'
|
||||
haystack = ""
|
||||
matches = []
|
||||
compiles = false
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/640
|
||||
[[test]]
|
||||
name = "flags-are-unset"
|
||||
regex = '(?:(?i)foo)|Bar'
|
||||
haystack = "foo Foo bar Bar"
|
||||
matches = [[0, 3], [4, 7], [12, 15]]
|
||||
|
||||
# Note that 'Ј' is not 'j', but cyrillic Je
|
||||
# https://en.wikipedia.org/wiki/Je_(Cyrillic)
|
||||
#
|
||||
# See: https://github.com/rust-lang/regex/issues/659
|
||||
[[test]]
|
||||
name = "empty-group-with-unicode"
|
||||
regex = '(?:)Ј01'
|
||||
haystack = 'zЈ01'
|
||||
matches = [[1, 5]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/579
|
||||
[[test]]
|
||||
name = "word-boundary-weird"
|
||||
regex = '\b..\b'
|
||||
haystack = "I have 12, he has 2!"
|
||||
matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/579
|
||||
[[test]]
|
||||
name = "word-boundary-weird-ascii"
|
||||
regex = '\b..\b'
|
||||
haystack = "I have 12, he has 2!"
|
||||
matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/579
|
||||
[[test]]
|
||||
name = "word-boundary-weird-minimal-ascii"
|
||||
regex = '\b..\b'
|
||||
haystack = "az,,b"
|
||||
matches = [[0, 2], [2, 4]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
# See: https://github.com/BurntSushi/ripgrep/issues/1203
|
||||
[[test]]
|
||||
name = "reverse-suffix-100"
|
||||
regex = '[0-4][0-4][0-4]000'
|
||||
haystack = "153.230000"
|
||||
matches = [[4, 10]]
|
||||
|
||||
# See: https://github.com/BurntSushi/ripgrep/issues/1203
|
||||
[[test]]
|
||||
name = "reverse-suffix-200"
|
||||
regex = '[0-9][0-9][0-9]000'
|
||||
haystack = "153.230000\n"
|
||||
matches = [[4, 10]]
|
||||
|
||||
# This is a tricky case for the reverse suffix optimization, because it
|
||||
# finds the 'foobar' match but the reverse scan must fail to find a match by
|
||||
# correctly dealing with the word boundary following the 'foobar' literal when
|
||||
# computing the start state.
|
||||
#
|
||||
# This test exists because I tried to break the following assumption that
|
||||
# is currently in the code: that if a suffix is found and the reverse scan
|
||||
# succeeds, then it's guaranteed that there is an overall match. Namely, the
|
||||
# 'is_match' routine does *not* do another forward scan in this case because of
|
||||
# this assumption.
|
||||
[[test]]
|
||||
name = "reverse-suffix-300"
|
||||
regex = '\w+foobar\b'
|
||||
haystack = "xyzfoobarZ"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
# See: https://github.com/BurntSushi/ripgrep/issues/1247
|
||||
[[test]]
|
||||
name = "stops"
|
||||
regex = '\bs(?:[ab])'
|
||||
haystack = 's\xE4'
|
||||
matches = []
|
||||
unescape = true
|
||||
utf8 = false
|
||||
|
||||
# See: https://github.com/BurntSushi/ripgrep/issues/1247
|
||||
[[test]]
|
||||
name = "stops-ascii"
|
||||
regex = '(?-u:\b)s(?:[ab])'
|
||||
haystack = 's\xE4'
|
||||
matches = []
|
||||
unescape = true
|
||||
utf8 = false
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/850
|
||||
[[test]]
|
||||
name = "adjacent-line-boundary-100"
|
||||
regex = '(?m)^(?:[^ ]+?)$'
|
||||
haystack = "line1\nline2"
|
||||
matches = [[0, 5], [6, 11]]
|
||||
|
||||
# Continued.
|
||||
[[test]]
|
||||
name = "adjacent-line-boundary-200"
|
||||
regex = '(?m)^(?:[^ ]+?)$'
|
||||
haystack = "A\nB"
|
||||
matches = [[0, 1], [2, 3]]
|
||||
|
||||
# There is no issue for this bug.
|
||||
[[test]]
|
||||
name = "anchored-prefix-100"
|
||||
regex = '^a[[:^space:]]'
|
||||
haystack = "a "
|
||||
matches = []
|
||||
|
||||
# There is no issue for this bug.
|
||||
[[test]]
|
||||
name = "anchored-prefix-200"
|
||||
regex = '^a[[:^space:]]'
|
||||
haystack = "foo boo a"
|
||||
matches = []
|
||||
|
||||
# There is no issue for this bug.
|
||||
[[test]]
|
||||
name = "anchored-prefix-300"
|
||||
regex = '^-[a-z]'
|
||||
haystack = "r-f"
|
||||
matches = []
|
||||
|
||||
# Tests that a possible Aho-Corasick optimization works correctly. It only
|
||||
# kicks in when we have a lot of literals. By "works correctly," we mean that
|
||||
# leftmost-first match semantics are properly respected. That is, samwise
|
||||
# should match, not sam.
|
||||
#
|
||||
# There is no issue for this bug.
|
||||
[[test]]
|
||||
name = "aho-corasick-100"
|
||||
regex = 'samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z'
|
||||
haystack = "samwise"
|
||||
matches = [[0, 7]]
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/921
|
||||
[[test]]
|
||||
name = "interior-anchor-capture"
|
||||
regex = '(a$)b$'
|
||||
haystack = 'ab'
|
||||
matches = []
|
||||
|
||||
# I found this bug in the course of adding some of the regexes that Ruff uses
|
||||
# to rebar. It turns out that the lazy DFA was finding a match that was being
|
||||
# rejected by the one-pass DFA. Yikes. I then minimized the regex and haystack.
|
||||
#
|
||||
# Source: https://github.com/charliermarsh/ruff/blob/a919041ddaa64cdf6f216f90dd0480dab69fd3ba/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs#L52
|
||||
[[test]]
|
||||
name = "ruff-whitespace-around-keywords"
|
||||
regex = '^(a|ab)$'
|
||||
haystack = "ab"
|
||||
anchored = true
|
||||
unicode = false
|
||||
utf8 = true
|
||||
matches = [[[0, 2], [0, 2]]]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-0"
|
||||
regex = '(?:(?-u:\b)|(?u:h))+'
|
||||
haystack = "h"
|
||||
unicode = true
|
||||
utf8 = false
|
||||
matches = [[0, 0], [1, 1]]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-1"
|
||||
regex = '(?u:\B)'
|
||||
haystack = "鋸"
|
||||
unicode = true
|
||||
utf8 = false
|
||||
matches = []
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-2"
|
||||
regex = '(?:(?u:\b)|(?s-u:.))+'
|
||||
haystack = "oB"
|
||||
unicode = true
|
||||
utf8 = false
|
||||
matches = [[0, 0], [1, 2]]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-3"
|
||||
regex = '(?:(?-u:\B)|(?su:.))+'
|
||||
haystack = "\U000FEF80"
|
||||
unicode = true
|
||||
utf8 = false
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-3-utf8"
|
||||
regex = '(?:(?-u:\B)|(?su:.))+'
|
||||
haystack = "\U000FEF80"
|
||||
unicode = true
|
||||
utf8 = true
|
||||
matches = [[0, 0], [4, 4]]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-4"
|
||||
regex = '(?m:$)(?m:^)(?su:.)'
|
||||
haystack = "\n‣"
|
||||
unicode = true
|
||||
utf8 = false
|
||||
matches = [[0, 1]]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-5"
|
||||
regex = '(?m:$)^(?m:^)'
|
||||
haystack = "\n"
|
||||
unicode = true
|
||||
utf8 = false
|
||||
matches = [[0, 0]]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-6"
|
||||
regex = '(?P<kp>(?iu:do)(?m:$))*'
|
||||
haystack = "dodo"
|
||||
unicode = true
|
||||
utf8 = false
|
||||
matches = [
|
||||
[[0, 0], []],
|
||||
[[1, 1], []],
|
||||
[[2, 4], [2, 4]],
|
||||
]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-7"
|
||||
regex = '(?u:\B)'
|
||||
haystack = "䡁"
|
||||
unicode = true
|
||||
utf8 = false
|
||||
matches = []
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-8"
|
||||
regex = '(?:(?-u:\b)|(?u:[\u{0}-W]))+'
|
||||
haystack = "0"
|
||||
unicode = true
|
||||
utf8 = false
|
||||
matches = [[0, 0], [1, 1]]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-9"
|
||||
regex = '((?m:$)(?-u:\B)(?s-u:.)(?-u:\B)$)'
|
||||
haystack = "\n\n"
|
||||
unicode = true
|
||||
utf8 = false
|
||||
matches = [
|
||||
[[1, 2], [1, 2]],
|
||||
]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-10"
|
||||
regex = '(?m:$)(?m:$)^(?su:.)'
|
||||
haystack = "\n\u0081¨\u200a"
|
||||
unicode = true
|
||||
utf8 = false
|
||||
matches = [[0, 1]]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-11"
|
||||
regex = '(?-u:\B)(?m:^)'
|
||||
haystack = "0\n"
|
||||
unicode = true
|
||||
utf8 = false
|
||||
matches = [[2, 2]]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/429
|
||||
[[test]]
|
||||
name = "i429-12"
|
||||
regex = '(?:(?u:\b)|(?-u:.))+'
|
||||
haystack = "0"
|
||||
unicode = true
|
||||
utf8 = false
|
||||
matches = [[0, 0], [1, 1]]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/969
|
||||
[[test]]
|
||||
name = "i969"
|
||||
regex = 'c.*d\z'
|
||||
haystack = "ababcd"
|
||||
bounds = [4, 6]
|
||||
search-kind = "earliest"
|
||||
matches = [[4, 6]]
|
||||
|
||||
# I found this during the regex-automata migration. This is the fowler basic
|
||||
# 154 test, but without anchored = true and without a match limit.
|
||||
#
|
||||
# This test caught a subtle bug in the hybrid reverse DFA search, where it
|
||||
# would skip over the termination condition if it entered a start state. This
|
||||
# was a double bug. Firstly, the reverse DFA shouldn't have had start states
|
||||
# specialized in the first place, and thus it shouldn't have possible to detect
|
||||
# that the DFA had entered a start state. The second bug was that the start
|
||||
# state handling was incorrect by jumping over the termination condition.
|
||||
[[test]]
|
||||
name = "fowler-basic154-unanchored"
|
||||
regex = '''a([bc]*)c*'''
|
||||
haystack = '''abc'''
|
||||
matches = [[[0, 3], [1, 3]]]
|
||||
|
||||
# From: https://github.com/rust-lang/regex/issues/981
|
||||
#
|
||||
# This was never really a problem in the new architecture because the
|
||||
# regex-automata engines are far more principled about how they deal with
|
||||
# look-around. (This was one of the many reasons I wanted to re-work the
|
||||
# original regex crate engines.)
|
||||
[[test]]
|
||||
name = "word-boundary-interact-poorly-with-literal-optimizations"
|
||||
regex = '(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))'
|
||||
haystack = 'ubi-Darwin-x86_64.tar.gz'
|
||||
matches = []
|
||||
|
||||
# This was found during fuzz testing of regex. It provoked a panic in the meta
|
||||
# engine as a result of the reverse suffix optimization. Namely, it hit a case
|
||||
# where a suffix match was found, a corresponding reverse match was found, but
|
||||
# the forward search turned up no match. The forward search should always match
|
||||
# if the suffix and reverse search match.
|
||||
#
|
||||
# This in turn uncovered an inconsistency between the PikeVM and the DFA (lazy
|
||||
# and fully compiled) engines. It was caused by a mishandling of the collection
|
||||
# of NFA state IDs in the generic determinization code (which is why both types
|
||||
# of DFA were impacted). Namely, when a fail state was encountered (that's the
|
||||
# `[^\s\S]` in the pattern below), then it would just stop collecting states.
|
||||
# But that's not correct since a later state could lead to a match.
|
||||
[[test]]
|
||||
name = "impossible-branch"
|
||||
regex = '.*[^\s\S]A|B'
|
||||
haystack = "B"
|
||||
matches = [[0, 1]]
|
||||
|
||||
# This was found during fuzz testing in regex-lite. The regex crate never
|
||||
# suffered from this bug, but it causes regex-lite to incorrectly compile
|
||||
# captures.
|
||||
[[test]]
|
||||
name = "captures-wrong-order"
|
||||
regex = '(a){0}(a)'
|
||||
haystack = 'a'
|
||||
matches = [[[0, 1], [], [0, 1]]]
|
||||
|
||||
# This tests a bug in how quit states are handled in the DFA. At some point
|
||||
# during development, the DFAs were tweaked slightly such that if they hit
|
||||
# a quit state (which means, they hit a byte that the caller configured should
|
||||
# stop the search), then it might not return an error necessarily. Namely, if a
|
||||
# match had already been found, then it would be returned instead of an error.
|
||||
#
|
||||
# But this is actually wrong! Why? Because even though a match had been found,
|
||||
# it wouldn't be fully correct to return it once a quit state has been seen
|
||||
# because you can't determine whether the match offset returned is the correct
|
||||
# greedy/leftmost-first match. Since you can't complete the search as requested
|
||||
# by the caller, the DFA should just stop and return an error.
|
||||
#
|
||||
# Interestingly, this does seem to produce an unavoidable difference between
|
||||
# 'try_is_match().unwrap()' and 'try_find().unwrap().is_some()' for the DFAs.
|
||||
# The former will stop immediately once a match is known to occur and return
|
||||
# 'Ok(true)', where as the latter could find the match but quit with an
|
||||
# 'Err(..)' first.
|
||||
#
|
||||
# Thankfully, I believe this inconsistency between 'is_match()' and 'find()'
|
||||
# cannot be observed in the higher level meta regex API because it specifically
|
||||
# will try another engine that won't fail in the case of a DFA failing.
|
||||
#
|
||||
# This regression happened in the regex crate rewrite, but before anything got
|
||||
# released.
|
||||
[[test]]
|
||||
name = "negated-unicode-word-boundary-dfa-fail"
|
||||
regex = '\B.*'
|
||||
haystack = "!\u02D7"
|
||||
matches = [[0, 3]]
|
||||
|
||||
# This failure was found in the *old* regex crate (prior to regex 1.9), but
|
||||
# I didn't investigate why. My best guess is that it's a literal optimization
|
||||
# bug. It didn't occur in the rewrite.
|
||||
[[test]]
|
||||
name = "missed-match"
|
||||
regex = 'e..+e.ee>'
|
||||
haystack = 'Zeee.eZZZZZZZZeee>eeeeeee>'
|
||||
matches = [[1, 26]]
|
||||
|
||||
# This test came from the 'ignore' crate and tripped a bug in how accelerated
|
||||
# DFA states were handled in an overlapping search.
|
||||
[[test]]
|
||||
name = "regex-to-glob"
|
||||
regex = ['(?-u)^path1/[^/]*$']
|
||||
haystack = "path1/foo"
|
||||
matches = [[0, 9]]
|
||||
utf8 = false
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# See: https://github.com/rust-lang/regex/issues/1060
|
||||
[[test]]
|
||||
name = "reverse-inner-plus-shorter-than-expected"
|
||||
regex = '(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})'
|
||||
haystack = '102:12:39'
|
||||
matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]
|
||||
|
||||
# Like reverse-inner-plus-shorter-than-expected, but using a far simpler regex
|
||||
# to demonstrate the extent of the rot. Sigh.
|
||||
#
|
||||
# See: https://github.com/rust-lang/regex/issues/1060
|
||||
[[test]]
|
||||
name = "reverse-inner-short"
|
||||
regex = '(?:([0-9][0-9][0-9]):)?([0-9][0-9]):([0-9][0-9])'
|
||||
haystack = '102:12:39'
|
||||
matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]
|
||||
|
||||
# This regression test was found via the RegexSet APIs. It triggered a
|
||||
# particular code path where a regex was compiled with 'All' match semantics
|
||||
# (to support overlapping search), but got funneled down into a standard
|
||||
# leftmost search when calling 'is_match'. This is fine on its own, but the
|
||||
# leftmost search will use a prefilter and that's where this went awry.
|
||||
#
|
||||
# Namely, since 'All' semantics were used, the aho-corasick prefilter was
|
||||
# incorrectly compiled with 'Standard' semantics. This was wrong because
|
||||
# 'Standard' immediately attempts to report a match at every position, even if
|
||||
# that would mean reporting a match past the leftmost match before reporting
|
||||
# the leftmost match. This breaks the prefilter contract of never having false
|
||||
# negatives and leads overall to the engine not finding a match.
|
||||
#
|
||||
# See: https://github.com/rust-lang/regex/issues/1070
|
||||
[[test]]
|
||||
name = "prefilter-with-aho-corasick-standard-semantics"
|
||||
regex = '(?m)^ *v [0-9]'
|
||||
haystack = 'v 0'
|
||||
matches = [
|
||||
{ id = 0, spans = [[0, 3]] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
unicode = true
|
||||
utf8 = true
|
||||
|
||||
# This tests that the PikeVM and the meta regex agree on a particular regex.
|
||||
# This test previously failed when the ad hoc engines inside the meta engine
|
||||
# did not handle quit states correctly. Namely, the Unicode word boundary here
|
||||
# combined with a non-ASCII codepoint provokes the quit state. The ad hoc
|
||||
# engines were previously returning a match even after entering the quit state
|
||||
# if a match had been previously detected, but this is incorrect. The reason
|
||||
# is that if a quit state is found, then the search must give up *immediately*
|
||||
# because it prevents the search from finding the "proper" leftmost-first
|
||||
# match. If it instead returns a match that has been found, it risks reporting
|
||||
# an improper match, as it did in this case.
|
||||
#
|
||||
# See: https://github.com/rust-lang/regex/issues/1046
|
||||
[[test]]
|
||||
name = "non-prefix-literal-quit-state"
|
||||
regex = '.+\b\n'
|
||||
haystack = "β77\n"
|
||||
matches = [[0, 5]]
|
||||
|
||||
# This is a regression test for some errant HIR interval set operations that
|
||||
# were made in the regex-syntax 0.8.0 release and then reverted in 0.8.1. The
|
||||
# issue here is that the HIR produced from the regex had out-of-order ranges.
|
||||
#
|
||||
# See: https://github.com/rust-lang/regex/issues/1103
|
||||
# Ref: https://github.com/rust-lang/regex/pull/1051
|
||||
# Ref: https://github.com/rust-lang/regex/pull/1102
|
||||
[[test]]
|
||||
name = "hir-optimization-out-of-order-class"
|
||||
regex = '^[[:alnum:]./-]+$'
|
||||
haystack = "a-b"
|
||||
matches = [[0, 3]]
|
||||
|
||||
# This is a regression test for an improper reverse suffix optimization. This
|
||||
# occurred when I "broadened" the applicability of the optimization to include
|
||||
# multiple possible literal suffixes instead of only sticking to a non-empty
|
||||
# longest common suffix. It turns out that, at least given how the reverse
|
||||
# suffix optimization works, we need to stick to the longest common suffix for
|
||||
# now.
|
||||
#
|
||||
# See: https://github.com/rust-lang/regex/issues/1110
|
||||
# See also: https://github.com/astral-sh/ruff/pull/7980
|
||||
[[test]]
|
||||
name = 'improper-reverse-suffix-optimization'
|
||||
regex = '(\\N\{[^}]+})|([{}])'
|
||||
haystack = 'hiya \N{snowman} bye'
|
||||
matches = [[[5, 16], [5, 16], []]]
|
||||
641
third-party/vendor/regex/testdata/set.toml
vendored
Normal file
641
third-party/vendor/regex/testdata/set.toml
vendored
Normal file
|
|
@ -0,0 +1,641 @@
|
|||
# Basic multi-regex tests.
|
||||
|
||||
[[test]]
|
||||
name = "basic10"
|
||||
regex = ["a", "a"]
|
||||
haystack = "a"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 1] },
|
||||
{ id = 1, span = [0, 1] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic10-leftmost-first"
|
||||
regex = ["a", "a"]
|
||||
haystack = "a"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 1] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
|
||||
[[test]]
|
||||
name = "basic20"
|
||||
regex = ["a", "a"]
|
||||
haystack = "ba"
|
||||
matches = [
|
||||
{ id = 0, span = [1, 2] },
|
||||
{ id = 1, span = [1, 2] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic30"
|
||||
regex = ["a", "b"]
|
||||
haystack = "a"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 1] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic40"
|
||||
regex = ["a", "b"]
|
||||
haystack = "b"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 1] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic50"
|
||||
regex = ["a|b", "b|a"]
|
||||
haystack = "b"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 1] },
|
||||
{ id = 1, span = [0, 1] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic60"
|
||||
regex = ["foo", "oo"]
|
||||
haystack = "foo"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 3] },
|
||||
{ id = 1, span = [1, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic60-leftmost-first"
|
||||
regex = ["foo", "oo"]
|
||||
haystack = "foo"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 3] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
|
||||
[[test]]
|
||||
name = "basic61"
|
||||
regex = ["oo", "foo"]
|
||||
haystack = "foo"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 3] },
|
||||
{ id = 0, span = [1, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic61-leftmost-first"
|
||||
regex = ["oo", "foo"]
|
||||
haystack = "foo"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 3] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
|
||||
[[test]]
|
||||
name = "basic70"
|
||||
regex = ["abcd", "bcd", "cd", "d"]
|
||||
haystack = "abcd"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 4] },
|
||||
{ id = 1, span = [1, 4] },
|
||||
{ id = 2, span = [2, 4] },
|
||||
{ id = 3, span = [3, 4] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic71"
|
||||
regex = ["bcd", "cd", "d", "abcd"]
|
||||
haystack = "abcd"
|
||||
matches = [
|
||||
{ id = 3, span = [0, 4] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
|
||||
[[test]]
|
||||
name = "basic80"
|
||||
regex = ["^foo", "bar$"]
|
||||
haystack = "foo"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic81"
|
||||
regex = ["^foo", "bar$"]
|
||||
haystack = "foo bar"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 3] },
|
||||
{ id = 1, span = [4, 7] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic82"
|
||||
regex = ["^foo", "bar$"]
|
||||
haystack = "bar"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic90"
|
||||
regex = ["[a-z]+$", "foo"]
|
||||
haystack = "01234 foo"
|
||||
matches = [
|
||||
{ id = 0, span = [8, 9] },
|
||||
{ id = 0, span = [7, 9] },
|
||||
{ id = 0, span = [6, 9] },
|
||||
{ id = 1, span = [6, 9] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic91"
|
||||
regex = ["[a-z]+$", "foo"]
|
||||
haystack = "foo 01234"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic100"
|
||||
regex = [".*?", "a"]
|
||||
haystack = "zzza"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 0] },
|
||||
{ id = 0, span = [1, 1] },
|
||||
{ id = 0, span = [0, 1] },
|
||||
{ id = 0, span = [2, 2] },
|
||||
{ id = 0, span = [1, 2] },
|
||||
{ id = 0, span = [0, 2] },
|
||||
{ id = 0, span = [3, 3] },
|
||||
{ id = 0, span = [2, 3] },
|
||||
{ id = 0, span = [1, 3] },
|
||||
{ id = 0, span = [0, 3] },
|
||||
{ id = 0, span = [4, 4] },
|
||||
{ id = 0, span = [3, 4] },
|
||||
{ id = 0, span = [2, 4] },
|
||||
{ id = 0, span = [1, 4] },
|
||||
{ id = 0, span = [0, 4] },
|
||||
{ id = 1, span = [3, 4] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic101"
|
||||
regex = [".*", "a"]
|
||||
haystack = "zzza"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 0] },
|
||||
{ id = 0, span = [1, 1] },
|
||||
{ id = 0, span = [0, 1] },
|
||||
{ id = 0, span = [2, 2] },
|
||||
{ id = 0, span = [1, 2] },
|
||||
{ id = 0, span = [0, 2] },
|
||||
{ id = 0, span = [3, 3] },
|
||||
{ id = 0, span = [2, 3] },
|
||||
{ id = 0, span = [1, 3] },
|
||||
{ id = 0, span = [0, 3] },
|
||||
{ id = 0, span = [4, 4] },
|
||||
{ id = 0, span = [3, 4] },
|
||||
{ id = 0, span = [2, 4] },
|
||||
{ id = 0, span = [1, 4] },
|
||||
{ id = 0, span = [0, 4] },
|
||||
{ id = 1, span = [3, 4] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic102"
|
||||
regex = [".*", "a"]
|
||||
haystack = "zzz"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 0] },
|
||||
{ id = 0, span = [1, 1] },
|
||||
{ id = 0, span = [0, 1] },
|
||||
{ id = 0, span = [2, 2] },
|
||||
{ id = 0, span = [1, 2] },
|
||||
{ id = 0, span = [0, 2] },
|
||||
{ id = 0, span = [3, 3] },
|
||||
{ id = 0, span = [2, 3] },
|
||||
{ id = 0, span = [1, 3] },
|
||||
{ id = 0, span = [0, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic110"
|
||||
regex = ['\ba\b']
|
||||
haystack = "hello a bye"
|
||||
matches = [
|
||||
{ id = 0, span = [6, 7] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic111"
|
||||
regex = ['\ba\b', '\be\b']
|
||||
haystack = "hello a bye e"
|
||||
matches = [
|
||||
{ id = 0, span = [6, 7] },
|
||||
{ id = 1, span = [12, 13] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic120"
|
||||
regex = ["a"]
|
||||
haystack = "a"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 1] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic121"
|
||||
regex = [".*a"]
|
||||
haystack = "a"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 1] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic122"
|
||||
regex = [".*a", "β"]
|
||||
haystack = "β"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 2] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "basic130"
|
||||
regex = ["ab", "b"]
|
||||
haystack = "ba"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 1] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# These test cases where one of the regexes matches the empty string.
|
||||
|
||||
[[test]]
|
||||
name = "empty10"
|
||||
regex = ["", "a"]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 0] },
|
||||
{ id = 1, span = [0, 1] },
|
||||
{ id = 0, span = [1, 1] },
|
||||
{ id = 0, span = [2, 2] },
|
||||
{ id = 0, span = [3, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "empty10-leftmost-first"
|
||||
regex = ["", "a"]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 0] },
|
||||
{ id = 0, span = [1, 1] },
|
||||
{ id = 0, span = [2, 2] },
|
||||
{ id = 0, span = [3, 3] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
|
||||
[[test]]
|
||||
name = "empty11"
|
||||
regex = ["a", ""]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 0] },
|
||||
{ id = 0, span = [0, 1] },
|
||||
{ id = 1, span = [1, 1] },
|
||||
{ id = 1, span = [2, 2] },
|
||||
{ id = 1, span = [3, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "empty11-leftmost-first"
|
||||
regex = ["a", ""]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 1] },
|
||||
{ id = 1, span = [2, 2] },
|
||||
{ id = 1, span = [3, 3] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
|
||||
[[test]]
|
||||
name = "empty20"
|
||||
regex = ["", "b"]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 0] },
|
||||
{ id = 0, span = [1, 1] },
|
||||
{ id = 1, span = [1, 2] },
|
||||
{ id = 0, span = [2, 2] },
|
||||
{ id = 0, span = [3, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "empty20-leftmost-first"
|
||||
regex = ["", "b"]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 0] },
|
||||
{ id = 0, span = [1, 1] },
|
||||
{ id = 0, span = [2, 2] },
|
||||
{ id = 0, span = [3, 3] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
|
||||
[[test]]
|
||||
name = "empty21"
|
||||
regex = ["b", ""]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 0] },
|
||||
{ id = 1, span = [1, 1] },
|
||||
{ id = 0, span = [1, 2] },
|
||||
{ id = 1, span = [2, 2] },
|
||||
{ id = 1, span = [3, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "empty21-leftmost-first"
|
||||
regex = ["b", ""]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 0] },
|
||||
{ id = 0, span = [1, 2] },
|
||||
{ id = 1, span = [3, 3] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
|
||||
[[test]]
|
||||
name = "empty22"
|
||||
regex = ["(?:)", "b"]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 0] },
|
||||
{ id = 0, span = [1, 1] },
|
||||
{ id = 1, span = [1, 2] },
|
||||
{ id = 0, span = [2, 2] },
|
||||
{ id = 0, span = [3, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "empty23"
|
||||
regex = ["b", "(?:)"]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 0] },
|
||||
{ id = 1, span = [1, 1] },
|
||||
{ id = 0, span = [1, 2] },
|
||||
{ id = 1, span = [2, 2] },
|
||||
{ id = 1, span = [3, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "empty30"
|
||||
regex = ["", "z"]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 0] },
|
||||
{ id = 0, span = [1, 1] },
|
||||
{ id = 0, span = [2, 2] },
|
||||
{ id = 0, span = [3, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "empty30-leftmost-first"
|
||||
regex = ["", "z"]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 0, span = [0, 0] },
|
||||
{ id = 0, span = [1, 1] },
|
||||
{ id = 0, span = [2, 2] },
|
||||
{ id = 0, span = [3, 3] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
|
||||
[[test]]
|
||||
name = "empty31"
|
||||
regex = ["z", ""]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 0] },
|
||||
{ id = 1, span = [1, 1] },
|
||||
{ id = 1, span = [2, 2] },
|
||||
{ id = 1, span = [3, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "empty31-leftmost-first"
|
||||
regex = ["z", ""]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 1, span = [0, 0] },
|
||||
{ id = 1, span = [1, 1] },
|
||||
{ id = 1, span = [2, 2] },
|
||||
{ id = 1, span = [3, 3] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
|
||||
[[test]]
|
||||
name = "empty40"
|
||||
regex = ["c(?:)", "b"]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 1, span = [1, 2] },
|
||||
{ id = 0, span = [2, 3] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "empty40-leftmost-first"
|
||||
regex = ["c(?:)", "b"]
|
||||
haystack = "abc"
|
||||
matches = [
|
||||
{ id = 1, span = [1, 2] },
|
||||
{ id = 0, span = [2, 3] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
|
||||
# These test cases where there are no matches.
|
||||
|
||||
[[test]]
|
||||
name = "nomatch10"
|
||||
regex = ["a", "a"]
|
||||
haystack = "b"
|
||||
matches = []
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "nomatch20"
|
||||
regex = ["^foo", "bar$"]
|
||||
haystack = "bar foo"
|
||||
matches = []
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "nomatch30"
|
||||
regex = []
|
||||
haystack = "a"
|
||||
matches = []
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
[[test]]
|
||||
name = "nomatch40"
|
||||
regex = ["^rooted$", '\.log$']
|
||||
haystack = "notrooted"
|
||||
matches = []
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# These test multi-regex searches with capture groups.
|
||||
#
|
||||
# NOTE: I wrote these tests in the course of developing a first class API for
|
||||
# overlapping capturing group matches, but ultimately removed that API because
|
||||
# the semantics for overlapping matches aren't totally clear. However, I've
|
||||
# left the tests because I believe the semantics for these patterns are clear
|
||||
# and because we can still test our "which patterns matched" APIs with them.
|
||||
|
||||
[[test]]
|
||||
name = "caps-010"
|
||||
regex = ['^(\w+) (\w+)$', '^(\S+) (\S+)$']
|
||||
haystack = "Bruce Springsteen"
|
||||
matches = [
|
||||
{ id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
|
||||
{ id = 1, spans = [[0, 17], [0, 5], [6, 17]] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "caps-020"
|
||||
regex = ['^(\w+) (\w+)$', '^[A-Z](\S+) [A-Z](\S+)$']
|
||||
haystack = "Bruce Springsteen"
|
||||
matches = [
|
||||
{ id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
|
||||
{ id = 1, spans = [[0, 17], [1, 5], [7, 17]] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "caps-030"
|
||||
regex = ['^(\w+) (\w+)$', '^([A-Z])(\S+) ([A-Z])(\S+)$']
|
||||
haystack = "Bruce Springsteen"
|
||||
matches = [
|
||||
{ id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
|
||||
{ id = 1, spans = [[0, 17], [0, 1], [1, 5], [6, 7], [7, 17]] },
|
||||
]
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "caps-110"
|
||||
regex = ['(\w+) (\w+)', '(\S+) (\S+)']
|
||||
haystack = "Bruce Springsteen"
|
||||
matches = [
|
||||
{ id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "caps-120"
|
||||
regex = ['(\w+) (\w+)', '(\S+) (\S+)']
|
||||
haystack = "&ruce $pringsteen"
|
||||
matches = [
|
||||
{ id = 1, spans = [[0, 17], [0, 5], [6, 17]] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "caps-121"
|
||||
regex = ['(\w+) (\w+)', '(\S+) (\S+)']
|
||||
haystack = "&ruce $pringsteen Foo Bar"
|
||||
matches = [
|
||||
{ id = 1, spans = [[0, 17], [0, 5], [6, 17]] },
|
||||
{ id = 0, spans = [[18, 25], [18, 21], [22, 25]] },
|
||||
]
|
||||
match-kind = "leftmost-first"
|
||||
search-kind = "leftmost"
|
||||
unicode = false
|
||||
utf8 = false
|
||||
36
third-party/vendor/regex/testdata/substring.toml
vendored
Normal file
36
third-party/vendor/regex/testdata/substring.toml
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
# These tests check that regex engines perform as expected when the search is
|
||||
# instructed to only search a substring of a haystack instead of the entire
|
||||
# haystack. This tends to exercise interesting edge cases that are otherwise
|
||||
# difficult to provoke. (But not necessarily impossible. Regex search iterators
|
||||
# for example, make use of the "search just a substring" APIs by changing the
|
||||
# starting position of a search to the end position of the previous match.)
|
||||
|
||||
[[test]]
|
||||
name = "unicode-word-start"
|
||||
regex = '\b[0-9]+\b'
|
||||
haystack = "β123"
|
||||
bounds = { start = 2, end = 5 }
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "unicode-word-end"
|
||||
regex = '\b[0-9]+\b'
|
||||
haystack = "123β"
|
||||
bounds = { start = 0, end = 3 }
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "ascii-word-start"
|
||||
regex = '\b[0-9]+\b'
|
||||
haystack = "β123"
|
||||
bounds = { start = 2, end = 5 }
|
||||
matches = [[2, 5]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "ascii-word-end"
|
||||
regex = '\b[0-9]+\b'
|
||||
haystack = "123β"
|
||||
bounds = { start = 0, end = 3 }
|
||||
matches = [[0, 3]]
|
||||
unicode = false
|
||||
517
third-party/vendor/regex/testdata/unicode.toml
vendored
Normal file
517
third-party/vendor/regex/testdata/unicode.toml
vendored
Normal file
|
|
@ -0,0 +1,517 @@
|
|||
# Basic Unicode literal support.
|
||||
[[test]]
|
||||
name = "literal1"
|
||||
regex = '☃'
|
||||
haystack = "☃"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "literal2"
|
||||
regex = '☃+'
|
||||
haystack = "☃"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "literal3"
|
||||
regex = '☃+'
|
||||
haystack = "☃"
|
||||
matches = [[0, 3]]
|
||||
case-insensitive = true
|
||||
|
||||
[[test]]
|
||||
name = "literal4"
|
||||
regex = 'Δ'
|
||||
haystack = "δ"
|
||||
matches = [[0, 2]]
|
||||
case-insensitive = true
|
||||
|
||||
# Unicode word boundaries.
|
||||
[[test]]
|
||||
name = "wb-100"
|
||||
regex = '\d\b'
|
||||
haystack = "6δ"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "wb-200"
|
||||
regex = '\d\b'
|
||||
haystack = "6 "
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "wb-300"
|
||||
regex = '\d\B'
|
||||
haystack = "6δ"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "wb-400"
|
||||
regex = '\d\B'
|
||||
haystack = "6 "
|
||||
matches = []
|
||||
|
||||
# Unicode character class support.
|
||||
[[test]]
|
||||
name = "class1"
|
||||
regex = '[☃Ⅰ]+'
|
||||
haystack = "☃"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class2"
|
||||
regex = '\pN'
|
||||
haystack = "Ⅰ"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class3"
|
||||
regex = '\pN+'
|
||||
haystack = "Ⅰ1Ⅱ2"
|
||||
matches = [[0, 8]]
|
||||
|
||||
[[test]]
|
||||
name = "class4"
|
||||
regex = '\PN+'
|
||||
haystack = "abⅠ"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "class5"
|
||||
regex = '[\PN]+'
|
||||
haystack = "abⅠ"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "class6"
|
||||
regex = '[^\PN]+'
|
||||
haystack = "abⅠ"
|
||||
matches = [[2, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "class7"
|
||||
regex = '\p{Lu}+'
|
||||
haystack = "ΛΘΓΔα"
|
||||
matches = [[0, 8]]
|
||||
|
||||
[[test]]
|
||||
name = "class8"
|
||||
regex = '\p{Lu}+'
|
||||
haystack = "ΛΘΓΔα"
|
||||
matches = [[0, 10]]
|
||||
case-insensitive = true
|
||||
|
||||
[[test]]
|
||||
name = "class9"
|
||||
regex = '\pL+'
|
||||
haystack = "ΛΘΓΔα"
|
||||
matches = [[0, 10]]
|
||||
|
||||
[[test]]
|
||||
name = "class10"
|
||||
regex = '\p{Ll}+'
|
||||
haystack = "ΛΘΓΔα"
|
||||
matches = [[8, 10]]
|
||||
|
||||
# Unicode aware "Perl" character classes.
|
||||
[[test]]
|
||||
name = "perl1"
|
||||
regex = '\w+'
|
||||
haystack = "dδd"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "perl2"
|
||||
regex = '\w+'
|
||||
haystack = "⥡"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "perl3"
|
||||
regex = '\W+'
|
||||
haystack = "⥡"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "perl4"
|
||||
regex = '\d+'
|
||||
haystack = "1२३9"
|
||||
matches = [[0, 8]]
|
||||
|
||||
[[test]]
|
||||
name = "perl5"
|
||||
regex = '\d+'
|
||||
haystack = "Ⅱ"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "perl6"
|
||||
regex = '\D+'
|
||||
haystack = "Ⅱ"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "perl7"
|
||||
regex = '\s+'
|
||||
haystack = " "
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "perl8"
|
||||
regex = '\s+'
|
||||
haystack = "☃"
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "perl9"
|
||||
regex = '\S+'
|
||||
haystack = "☃"
|
||||
matches = [[0, 3]]
|
||||
|
||||
# Specific tests for Unicode general category classes.
|
||||
[[test]]
|
||||
name = "class-gencat1"
|
||||
regex = '\p{Cased_Letter}'
|
||||
haystack = "A"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat2"
|
||||
regex = '\p{Close_Punctuation}'
|
||||
haystack = "❯"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat3"
|
||||
regex = '\p{Connector_Punctuation}'
|
||||
haystack = "⁀"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat4"
|
||||
regex = '\p{Control}'
|
||||
haystack = "\u009F"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat5"
|
||||
regex = '\p{Currency_Symbol}'
|
||||
haystack = "£"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat6"
|
||||
regex = '\p{Dash_Punctuation}'
|
||||
haystack = "〰"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat7"
|
||||
regex = '\p{Decimal_Number}'
|
||||
haystack = "𑓙"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat8"
|
||||
regex = '\p{Enclosing_Mark}'
|
||||
haystack = "\uA672"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat9"
|
||||
regex = '\p{Final_Punctuation}'
|
||||
haystack = "⸡"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat10"
|
||||
regex = '\p{Format}'
|
||||
haystack = "\U000E007F"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat11"
|
||||
regex = '\p{Initial_Punctuation}'
|
||||
haystack = "⸜"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat12"
|
||||
regex = '\p{Letter}'
|
||||
haystack = "Έ"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat13"
|
||||
regex = '\p{Letter_Number}'
|
||||
haystack = "ↂ"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat14"
|
||||
regex = '\p{Line_Separator}'
|
||||
haystack = "\u2028"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat15"
|
||||
regex = '\p{Lowercase_Letter}'
|
||||
haystack = "ϛ"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat16"
|
||||
regex = '\p{Mark}'
|
||||
haystack = "\U000E01EF"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat17"
|
||||
regex = '\p{Math}'
|
||||
haystack = "⋿"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat18"
|
||||
regex = '\p{Modifier_Letter}'
|
||||
haystack = "𖭃"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat19"
|
||||
regex = '\p{Modifier_Symbol}'
|
||||
haystack = "🏿"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat20"
|
||||
regex = '\p{Nonspacing_Mark}'
|
||||
haystack = "\U0001E94A"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat21"
|
||||
regex = '\p{Number}'
|
||||
haystack = "⓿"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat22"
|
||||
regex = '\p{Open_Punctuation}'
|
||||
haystack = "⦅"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat23"
|
||||
regex = '\p{Other}'
|
||||
haystack = "\u0BC9"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat24"
|
||||
regex = '\p{Other_Letter}'
|
||||
haystack = "ꓷ"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat25"
|
||||
regex = '\p{Other_Number}'
|
||||
haystack = "㉏"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat26"
|
||||
regex = '\p{Other_Punctuation}'
|
||||
haystack = "𞥞"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat27"
|
||||
regex = '\p{Other_Symbol}'
|
||||
haystack = "⅌"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat28"
|
||||
regex = '\p{Paragraph_Separator}'
|
||||
haystack = "\u2029"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat29"
|
||||
regex = '\p{Private_Use}'
|
||||
haystack = "\U0010FFFD"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat30"
|
||||
regex = '\p{Punctuation}'
|
||||
haystack = "𑁍"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat31"
|
||||
regex = '\p{Separator}'
|
||||
haystack = "\u3000"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat32"
|
||||
regex = '\p{Space_Separator}'
|
||||
haystack = "\u205F"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat33"
|
||||
regex = '\p{Spacing_Mark}'
|
||||
haystack = "\U00016F7E"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat34"
|
||||
regex = '\p{Symbol}'
|
||||
haystack = "⯈"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat35"
|
||||
regex = '\p{Titlecase_Letter}'
|
||||
haystack = "ῼ"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat36"
|
||||
regex = '\p{Unassigned}'
|
||||
haystack = "\U0010FFFF"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gencat37"
|
||||
regex = '\p{Uppercase_Letter}'
|
||||
haystack = "Ꝋ"
|
||||
matches = [[0, 3]]
|
||||
|
||||
|
||||
# Tests for Unicode emoji properties.
|
||||
[[test]]
|
||||
name = "class-emoji1"
|
||||
regex = '\p{Emoji}'
|
||||
haystack = "\u23E9"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-emoji2"
|
||||
regex = '\p{emoji}'
|
||||
haystack = "\U0001F21A"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-emoji3"
|
||||
regex = '\p{extendedpictographic}'
|
||||
haystack = "\U0001FA6E"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-emoji4"
|
||||
regex = '\p{extendedpictographic}'
|
||||
haystack = "\U0001FFFD"
|
||||
matches = [[0, 4]]
|
||||
|
||||
|
||||
# Tests for Unicode grapheme cluster properties.
|
||||
[[test]]
|
||||
name = "class-gcb1"
|
||||
regex = '\p{grapheme_cluster_break=prepend}'
|
||||
haystack = "\U00011D46"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gcb2"
|
||||
regex = '\p{gcb=regional_indicator}'
|
||||
haystack = "\U0001F1E6"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gcb3"
|
||||
regex = '\p{gcb=ri}'
|
||||
haystack = "\U0001F1E7"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gcb4"
|
||||
regex = '\p{regionalindicator}'
|
||||
haystack = "\U0001F1FF"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gcb5"
|
||||
regex = '\p{gcb=lvt}'
|
||||
haystack = "\uC989"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-gcb6"
|
||||
regex = '\p{gcb=zwj}'
|
||||
haystack = "\u200D"
|
||||
matches = [[0, 3]]
|
||||
|
||||
# Tests for Unicode word boundary properties.
|
||||
[[test]]
|
||||
name = "class-word-break1"
|
||||
regex = '\p{word_break=Hebrew_Letter}'
|
||||
haystack = "\uFB46"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-word-break2"
|
||||
regex = '\p{wb=hebrewletter}'
|
||||
haystack = "\uFB46"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-word-break3"
|
||||
regex = '\p{wb=ExtendNumLet}'
|
||||
haystack = "\uFF3F"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-word-break4"
|
||||
regex = '\p{wb=WSegSpace}'
|
||||
haystack = "\u3000"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-word-break5"
|
||||
regex = '\p{wb=numeric}'
|
||||
haystack = "\U0001E950"
|
||||
matches = [[0, 4]]
|
||||
|
||||
# Tests for Unicode sentence boundary properties.
|
||||
[[test]]
|
||||
name = "class-sentence-break1"
|
||||
regex = '\p{sentence_break=Lower}'
|
||||
haystack = "\u0469"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "class-sentence-break2"
|
||||
regex = '\p{sb=lower}'
|
||||
haystack = "\u0469"
|
||||
matches = [[0, 2]]
|
||||
|
||||
[[test]]
|
||||
name = "class-sentence-break3"
|
||||
regex = '\p{sb=Close}'
|
||||
haystack = "\uFF60"
|
||||
matches = [[0, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "class-sentence-break4"
|
||||
regex = '\p{sb=Close}'
|
||||
haystack = "\U0001F677"
|
||||
matches = [[0, 4]]
|
||||
|
||||
[[test]]
|
||||
name = "class-sentence-break5"
|
||||
regex = '\p{sb=SContinue}'
|
||||
haystack = "\uFF64"
|
||||
matches = [[0, 3]]
|
||||
399
third-party/vendor/regex/testdata/utf8.toml
vendored
Normal file
399
third-party/vendor/regex/testdata/utf8.toml
vendored
Normal file
|
|
@ -0,0 +1,399 @@
|
|||
# These test the UTF-8 modes expose by regex-automata. Namely, when utf8 is
|
||||
# true, then we promise that the haystack is valid UTF-8. (Otherwise behavior
|
||||
# is unspecified.) This also corresponds to building the regex engine with the
|
||||
# following two guarantees:
|
||||
#
|
||||
# 1) For any non-empty match reported, its span is guaranteed to correspond to
|
||||
# valid UTF-8.
|
||||
# 2) All empty or zero-width matches reported must never split a UTF-8
|
||||
# encoded codepoint. If the haystack has invalid UTF-8, then this results in
|
||||
# unspecified behavior.
|
||||
#
|
||||
# The (2) is in particular what we focus our testing on since (1) is generally
|
||||
# guaranteed by regex-syntax's AST-to-HIR translator and is well tested there.
|
||||
# The thing with (2) is that it can't be described in the HIR, so the regex
|
||||
# engines have to handle that case. Thus, we test it here.
|
||||
#
|
||||
# Note that it is possible to build a regex that has property (1) but not
|
||||
# (2), and vice versa. This is done by building the HIR with 'utf8=true' but
|
||||
# building the Thompson NFA with 'utf8=false'. We don't test that here because
|
||||
# the harness doesn't expose a way to enable or disable UTF-8 mode with that
|
||||
# granularity. Instead, those combinations are lightly tested via doc examples.
|
||||
# That's not to say that (1) without (2) is uncommon. Indeed, ripgrep uses it
|
||||
# because it cannot guarantee that its haystack is valid UTF-8.
|
||||
|
||||
# This tests that an empty regex doesn't split a codepoint.
|
||||
[[test]]
|
||||
name = "empty-utf8yes"
|
||||
regex = ''
|
||||
haystack = '☃'
|
||||
matches = [[0, 0], [3, 3]]
|
||||
unicode = true
|
||||
utf8 = true
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
[[test]]
|
||||
name = "empty-utf8yes-overlapping"
|
||||
regex = ''
|
||||
haystack = '☃'
|
||||
matches = [[0, 0], [3, 3]]
|
||||
unicode = true
|
||||
utf8 = true
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# This tests that an empty regex DOES split a codepoint when utf=false.
|
||||
[[test]]
|
||||
name = "empty-utf8no"
|
||||
regex = ''
|
||||
haystack = '☃'
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
[[test]]
|
||||
name = "empty-utf8no-overlapping"
|
||||
regex = ''
|
||||
haystack = '☃'
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
|
||||
unicode = true
|
||||
utf8 = false
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# This tests that an empty regex doesn't split a codepoint, even if we give
|
||||
# it bounds entirely within the codepoint.
|
||||
#
|
||||
# This is one of the trickier cases and is what motivated the current UTF-8
|
||||
# mode design. In particular, at one point, this test failed the 'is_match'
|
||||
# variant of the test but not 'find'. This is because the 'is_match' code path
|
||||
# is specifically optimized for "was a match found" rather than "where is the
|
||||
# match." In the former case, you don't really care about the empty-vs-non-empty
|
||||
# matches, and thus, the codepoint splitting filtering logic wasn't getting
|
||||
# applied. (In multiple ways across multiple regex engines.) In this way, you
|
||||
# can wind up with a situation where 'is_match' says "yes," but 'find' says,
|
||||
# "I didn't find anything." Which is... not great.
|
||||
#
|
||||
# I could have decided to say that providing boundaries that themselves split
|
||||
# a codepoint would have unspecified behavior. But I couldn't quite convince
|
||||
# myself that such boundaries were the only way to get an inconsistency between
|
||||
# 'is_match' and 'find'.
|
||||
#
|
||||
# Note that I also tried to come up with a test like this that fails without
|
||||
# using `bounds`. Specifically, a test where 'is_match' and 'find' disagree.
|
||||
# But I couldn't do it, and I'm tempted to conclude it is impossible. The
|
||||
# fundamental problem is that you need to simultaneously produce an empty match
|
||||
# that splits a codepoint while *not* matching before or after the codepoint.
|
||||
[[test]]
|
||||
name = "empty-utf8yes-bounds"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 3]
|
||||
matches = []
|
||||
unicode = true
|
||||
utf8 = true
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
[[test]]
|
||||
name = "empty-utf8yes-bounds-overlapping"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 3]
|
||||
matches = []
|
||||
unicode = true
|
||||
utf8 = true
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# This tests that an empty regex splits a codepoint when the bounds are
|
||||
# entirely within the codepoint.
|
||||
[[test]]
|
||||
name = "empty-utf8no-bounds"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 3]
|
||||
matches = [[1, 1], [2, 2], [3, 3]]
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
[[test]]
|
||||
name = "empty-utf8no-bounds-overlapping"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 3]
|
||||
matches = [[1, 1], [2, 2], [3, 3]]
|
||||
unicode = true
|
||||
utf8 = false
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# In this test, we anchor the search. Since the start position is also a UTF-8
|
||||
# boundary, we get a match.
|
||||
[[test]]
|
||||
name = "empty-utf8yes-anchored"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
matches = [[0, 0]]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = true
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
[[test]]
|
||||
name = "empty-utf8yes-anchored-overlapping"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
matches = [[0, 0]]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = true
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# Same as above, except with UTF-8 mode disabled. It almost doesn't change the
|
||||
# result, except for the fact that since this is an anchored search and we
|
||||
# always find all matches, the test harness will keep reporting matches until
|
||||
# none are found. Because it's anchored, matches will be reported so long as
|
||||
# they are directly adjacent. Since with UTF-8 mode the next anchored search
|
||||
# after the match at [0, 0] fails, iteration stops (and doesn't find the last
|
||||
# match at [4, 4]).
|
||||
[[test]]
|
||||
name = "empty-utf8no-anchored"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
#
|
||||
# Note that overlapping anchored searches are a little weird, and it's not
|
||||
# totally clear what their semantics ought to be. For now, we just test the
|
||||
# current behavior of our test shim that implements overlapping search. (This
|
||||
# is one of the reasons why we don't really expose regex-level overlapping
|
||||
# searches.)
|
||||
[[test]]
|
||||
name = "empty-utf8no-anchored-overlapping"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
matches = [[0, 0]]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = false
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# In this test, we anchor the search, but also set bounds. The bounds start the
|
||||
# search in the middle of a codepoint, so there should never be a match.
|
||||
[[test]]
|
||||
name = "empty-utf8yes-anchored-bounds"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
matches = []
|
||||
bounds = [1, 3]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = true
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
[[test]]
|
||||
name = "empty-utf8yes-anchored-bounds-overlapping"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
matches = []
|
||||
bounds = [1, 3]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = true
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# Same as above, except with UTF-8 mode disabled. Without UTF-8 mode enabled,
|
||||
# matching within a codepoint is allowed. And remember, as in the anchored test
|
||||
# above with UTF-8 mode disabled, iteration will report all adjacent matches.
|
||||
# The matches at [0, 0] and [4, 4] are not included because of the bounds of
|
||||
# the search.
|
||||
[[test]]
|
||||
name = "empty-utf8no-anchored-bounds"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 3]
|
||||
matches = [[1, 1], [2, 2], [3, 3]]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
#
|
||||
# Note that overlapping anchored searches are a little weird, and it's not
|
||||
# totally clear what their semantics ought to be. For now, we just test the
|
||||
# current behavior of our test shim that implements overlapping search. (This
|
||||
# is one of the reasons why we don't really expose regex-level overlapping
|
||||
# searches.)
|
||||
[[test]]
|
||||
name = "empty-utf8no-anchored-bounds-overlapping"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 3]
|
||||
matches = [[1, 1]]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = false
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# This tests that we find the match at the end of the string when the bounds
|
||||
# exclude the first match.
|
||||
[[test]]
|
||||
name = "empty-utf8yes-startbound"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 4]
|
||||
matches = [[4, 4]]
|
||||
unicode = true
|
||||
utf8 = true
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
[[test]]
|
||||
name = "empty-utf8yes-startbound-overlapping"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 4]
|
||||
matches = [[4, 4]]
|
||||
unicode = true
|
||||
utf8 = true
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# Same as above, except since UTF-8 mode is disabled, we also find the matches
|
||||
# inbetween that split the codepoint.
|
||||
[[test]]
|
||||
name = "empty-utf8no-startbound"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 4]
|
||||
matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
[[test]]
|
||||
name = "empty-utf8no-startbound-overlapping"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 4]
|
||||
matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
unicode = true
|
||||
utf8 = false
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# This tests that we don't find any matches in an anchored search, even when
|
||||
# the bounds include a match (at the end).
|
||||
[[test]]
|
||||
name = "empty-utf8yes-anchored-startbound"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 4]
|
||||
matches = []
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = true
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
[[test]]
|
||||
name = "empty-utf8yes-anchored-startbound-overlapping"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 4]
|
||||
matches = []
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = true
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# Same as above, except since UTF-8 mode is disabled, we also find the matches
|
||||
# inbetween that split the codepoint. Even though this is an anchored search,
|
||||
# since the matches are adjacent, we find all of them.
|
||||
[[test]]
|
||||
name = "empty-utf8no-anchored-startbound"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 4]
|
||||
matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
#
|
||||
# Note that overlapping anchored searches are a little weird, and it's not
|
||||
# totally clear what their semantics ought to be. For now, we just test the
|
||||
# current behavior of our test shim that implements overlapping search. (This
|
||||
# is one of the reasons why we don't really expose regex-level overlapping
|
||||
# searches.)
|
||||
[[test]]
|
||||
name = "empty-utf8no-anchored-startbound-overlapping"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [1, 4]
|
||||
matches = [[1, 1]]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = false
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# This tests that we find the match at the end of the haystack in UTF-8 mode
|
||||
# when our bounds only include the empty string at the end of the haystack.
|
||||
[[test]]
|
||||
name = "empty-utf8yes-anchored-endbound"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [4, 4]
|
||||
matches = [[4, 4]]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = true
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
[[test]]
|
||||
name = "empty-utf8yes-anchored-endbound-overlapping"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [4, 4]
|
||||
matches = [[4, 4]]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = true
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
|
||||
# Same as above, but with UTF-8 mode disabled. Results remain the same since
|
||||
# the only possible match does not split a codepoint.
|
||||
[[test]]
|
||||
name = "empty-utf8no-anchored-endbound"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [4, 4]
|
||||
matches = [[4, 4]]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = false
|
||||
|
||||
# Tests the overlapping case of the above.
|
||||
[[test]]
|
||||
name = "empty-utf8no-anchored-endbound-overlapping"
|
||||
regex = ''
|
||||
haystack = '𝛃'
|
||||
bounds = [4, 4]
|
||||
matches = [[4, 4]]
|
||||
anchored = true
|
||||
unicode = true
|
||||
utf8 = false
|
||||
match-kind = "all"
|
||||
search-kind = "overlapping"
|
||||
687
third-party/vendor/regex/testdata/word-boundary-special.toml
vendored
Normal file
687
third-party/vendor/regex/testdata/word-boundary-special.toml
vendored
Normal file
|
|
@ -0,0 +1,687 @@
|
|||
# These tests are for the "special" word boundary assertions. That is,
|
||||
# \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty
|
||||
# assertions for more niche use cases, but hitting those cases without these
|
||||
# assertions is difficult. For example, \b{start-half} and \b{end-half} are
|
||||
# used to implement the -w/--word-regexp flag in a grep program.
|
||||
|
||||
# Tests for (?-u:\b{start})
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-010"
|
||||
regex = '\b{start}'
|
||||
haystack = "a"
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-020"
|
||||
regex = '\b{start}'
|
||||
haystack = "a "
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-030"
|
||||
regex = '\b{start}'
|
||||
haystack = " a "
|
||||
matches = [[1, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-040"
|
||||
regex = '\b{start}'
|
||||
haystack = ""
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-050"
|
||||
regex = '\b{start}'
|
||||
haystack = "ab"
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-060"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-060-bounds"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-070"
|
||||
regex = '\b{start}'
|
||||
haystack = " 𝛃 "
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-080"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-090"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[4, 4]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-110"
|
||||
regex = '\b{start}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
# Tests for (?-u:\b{end})
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-010"
|
||||
regex = '\b{end}'
|
||||
haystack = "a"
|
||||
matches = [[1, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-020"
|
||||
regex = '\b{end}'
|
||||
haystack = "a "
|
||||
matches = [[1, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-030"
|
||||
regex = '\b{end}'
|
||||
haystack = " a "
|
||||
matches = [[2, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-040"
|
||||
regex = '\b{end}'
|
||||
haystack = ""
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-050"
|
||||
regex = '\b{end}'
|
||||
haystack = "ab"
|
||||
matches = [[2, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-060"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-060-bounds"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-070"
|
||||
regex = '\b{end}'
|
||||
haystack = " 𝛃 "
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-080"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-090"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[5, 5]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-110"
|
||||
regex = '\b{end}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[1, 1]]
|
||||
unicode = false
|
||||
|
||||
# Tests for \b{start}
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-010"
|
||||
regex = '\b{start}'
|
||||
haystack = "a"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-020"
|
||||
regex = '\b{start}'
|
||||
haystack = "a "
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-030"
|
||||
regex = '\b{start}'
|
||||
haystack = " a "
|
||||
matches = [[1, 1]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-040"
|
||||
regex = '\b{start}'
|
||||
haystack = ""
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-050"
|
||||
regex = '\b{start}'
|
||||
haystack = "ab"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-060"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-060-bounds"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-070"
|
||||
regex = '\b{start}'
|
||||
haystack = " 𝛃 "
|
||||
matches = [[1, 1]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-080"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-090"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-110"
|
||||
regex = '\b{start}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
# Tests for \b{end}
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-010"
|
||||
regex = '\b{end}'
|
||||
haystack = "a"
|
||||
matches = [[1, 1]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-020"
|
||||
regex = '\b{end}'
|
||||
haystack = "a "
|
||||
matches = [[1, 1]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-030"
|
||||
regex = '\b{end}'
|
||||
haystack = " a "
|
||||
matches = [[2, 2]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-040"
|
||||
regex = '\b{end}'
|
||||
haystack = ""
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-050"
|
||||
regex = '\b{end}'
|
||||
haystack = "ab"
|
||||
matches = [[2, 2]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-060"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃"
|
||||
matches = [[4, 4]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-060-bounds"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-070"
|
||||
regex = '\b{end}'
|
||||
haystack = " 𝛃 "
|
||||
matches = [[5, 5]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-080"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = [[4, 4]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-090"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[5, 5]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-110"
|
||||
regex = '\b{end}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[5, 5]]
|
||||
unicode = true
|
||||
|
||||
# Tests for (?-u:\b{start-half})
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-010"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "a"
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-020"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "a "
|
||||
matches = [[0, 0], [2, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-030"
|
||||
regex = '\b{start-half}'
|
||||
haystack = " a "
|
||||
matches = [[0, 0], [1, 1], [3, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-040"
|
||||
regex = '\b{start-half}'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-050"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "ab"
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-060"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃"
|
||||
matches = [[0, 0], [4, 4]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-060-noutf8"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-060-bounds"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-070"
|
||||
regex = '\b{start-half}'
|
||||
haystack = " 𝛃 "
|
||||
matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-080"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = [[0, 0], [4, 4], [8, 8]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-090"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[0, 0], [4, 4]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-110"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[0, 0], [5, 5]]
|
||||
unicode = false
|
||||
|
||||
# Tests for (?-u:\b{end-half})
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-010"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "a"
|
||||
matches = [[1, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-020"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "a "
|
||||
matches = [[1, 1], [2, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-030"
|
||||
regex = '\b{end-half}'
|
||||
haystack = " a "
|
||||
matches = [[0, 0], [2, 2], [3, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-040"
|
||||
regex = '\b{end-half}'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-050"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "ab"
|
||||
matches = [[2, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-060"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃"
|
||||
matches = [[0, 0], [4, 4]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-060-bounds"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-070"
|
||||
regex = '\b{end-half}'
|
||||
haystack = " 𝛃 "
|
||||
matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-080"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = [[0, 0], [4, 4], [8, 8]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-090"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[0, 0], [5, 5]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-110"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[1, 1], [5, 5]]
|
||||
unicode = false
|
||||
|
||||
# Tests for \b{start-half}
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-010"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "a"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-020"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "a "
|
||||
matches = [[0, 0], [2, 2]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-030"
|
||||
regex = '\b{start-half}'
|
||||
haystack = " a "
|
||||
matches = [[0, 0], [1, 1], [3, 3]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-040"
|
||||
regex = '\b{start-half}'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-050"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "ab"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-060"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-060-bounds"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-070"
|
||||
regex = '\b{start-half}'
|
||||
haystack = " 𝛃 "
|
||||
matches = [[0, 0], [1, 1], [6, 6]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-080"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = [[0, 0], [8, 8]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-090"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-110"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
# Tests for \b{end-half}
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-010"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "a"
|
||||
matches = [[1, 1]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-020"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "a "
|
||||
matches = [[1, 1], [2, 2]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-030"
|
||||
regex = '\b{end-half}'
|
||||
haystack = " a "
|
||||
matches = [[0, 0], [2, 2], [3, 3]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-040"
|
||||
regex = '\b{end-half}'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-050"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "ab"
|
||||
matches = [[2, 2]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-060"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃"
|
||||
matches = [[4, 4]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-060-bounds"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-070"
|
||||
regex = '\b{end-half}'
|
||||
haystack = " 𝛃 "
|
||||
matches = [[0, 0], [5, 5], [6, 6]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-080"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = [[4, 4], [8, 8]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-090"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[5, 5]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-110"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[5, 5]]
|
||||
unicode = true
|
||||
|
||||
# Specialty tests.
|
||||
|
||||
# Since \r is special cased in the start state computation (to deal with CRLF
|
||||
# mode), this test ensures that the correct start state is computed when the
|
||||
# pattern starts with a half word boundary assertion.
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-carriage"
|
||||
regex = '\b{start-half}[a-z]+'
|
||||
haystack = 'ABC\rabc'
|
||||
matches = [[4, 7]]
|
||||
bounds = [4, 7]
|
||||
unescape = true
|
||||
|
||||
# Since \n is also special cased in the start state computation, this test
|
||||
# ensures that the correct start state is computed when the pattern starts with
|
||||
# a half word boundary assertion.
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-linefeed"
|
||||
regex = '\b{start-half}[a-z]+'
|
||||
haystack = 'ABC\nabc'
|
||||
matches = [[4, 7]]
|
||||
bounds = [4, 7]
|
||||
unescape = true
|
||||
|
||||
# Like the carriage return test above, but with a custom line terminator.
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-customlineterm"
|
||||
regex = '\b{start-half}[a-z]+'
|
||||
haystack = 'ABC!abc'
|
||||
matches = [[4, 7]]
|
||||
bounds = [4, 7]
|
||||
unescape = true
|
||||
line-terminator = '!'
|
||||
781
third-party/vendor/regex/testdata/word-boundary.toml
vendored
Normal file
781
third-party/vendor/regex/testdata/word-boundary.toml
vendored
Normal file
|
|
@ -0,0 +1,781 @@
|
|||
# Some of these are cribbed from RE2's test suite.
|
||||
|
||||
# These test \b. Below are tests for \B.
|
||||
[[test]]
|
||||
name = "wb1"
|
||||
regex = '\b'
|
||||
haystack = ""
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb2"
|
||||
regex = '\b'
|
||||
haystack = "a"
|
||||
matches = [[0, 0], [1, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb3"
|
||||
regex = '\b'
|
||||
haystack = "ab"
|
||||
matches = [[0, 0], [2, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb4"
|
||||
regex = '^\b'
|
||||
haystack = "ab"
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb5"
|
||||
regex = '\b$'
|
||||
haystack = "ab"
|
||||
matches = [[2, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb6"
|
||||
regex = '^\b$'
|
||||
haystack = "ab"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb7"
|
||||
regex = '\bbar\b'
|
||||
haystack = "nobar bar foo bar"
|
||||
matches = [[6, 9], [14, 17]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb8"
|
||||
regex = 'a\b'
|
||||
haystack = "faoa x"
|
||||
matches = [[3, 4]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb9"
|
||||
regex = '\bbar'
|
||||
haystack = "bar x"
|
||||
matches = [[0, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb10"
|
||||
regex = '\bbar'
|
||||
haystack = "foo\nbar x"
|
||||
matches = [[4, 7]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb11"
|
||||
regex = 'bar\b'
|
||||
haystack = "foobar"
|
||||
matches = [[3, 6]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb12"
|
||||
regex = 'bar\b'
|
||||
haystack = "foobar\nxxx"
|
||||
matches = [[3, 6]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb13"
|
||||
regex = '(?:foo|bar|[A-Z])\b'
|
||||
haystack = "foo"
|
||||
matches = [[0, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb14"
|
||||
regex = '(?:foo|bar|[A-Z])\b'
|
||||
haystack = "foo\n"
|
||||
matches = [[0, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb15"
|
||||
regex = '\b(?:foo|bar|[A-Z])'
|
||||
haystack = "foo"
|
||||
matches = [[0, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb16"
|
||||
regex = '\b(?:foo|bar|[A-Z])\b'
|
||||
haystack = "X"
|
||||
matches = [[0, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb17"
|
||||
regex = '\b(?:foo|bar|[A-Z])\b'
|
||||
haystack = "XY"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb18"
|
||||
regex = '\b(?:foo|bar|[A-Z])\b'
|
||||
haystack = "bar"
|
||||
matches = [[0, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb19"
|
||||
regex = '\b(?:foo|bar|[A-Z])\b'
|
||||
haystack = "foo"
|
||||
matches = [[0, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb20"
|
||||
regex = '\b(?:foo|bar|[A-Z])\b'
|
||||
haystack = "foo\n"
|
||||
matches = [[0, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb21"
|
||||
regex = '\b(?:foo|bar|[A-Z])\b'
|
||||
haystack = "ffoo bbar N x"
|
||||
matches = [[10, 11]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb22"
|
||||
regex = '\b(?:fo|foo)\b'
|
||||
haystack = "fo"
|
||||
matches = [[0, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb23"
|
||||
regex = '\b(?:fo|foo)\b'
|
||||
haystack = "foo"
|
||||
matches = [[0, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb24"
|
||||
regex = '\b\b'
|
||||
haystack = ""
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb25"
|
||||
regex = '\b\b'
|
||||
haystack = "a"
|
||||
matches = [[0, 0], [1, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb26"
|
||||
regex = '\b$'
|
||||
haystack = ""
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb27"
|
||||
regex = '\b$'
|
||||
haystack = "x"
|
||||
matches = [[1, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb28"
|
||||
regex = '\b$'
|
||||
haystack = "y x"
|
||||
matches = [[3, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb29"
|
||||
regex = '(?-u:\b).$'
|
||||
haystack = "x"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "wb30"
|
||||
regex = '^\b(?:fo|foo)\b'
|
||||
haystack = "fo"
|
||||
matches = [[0, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb31"
|
||||
regex = '^\b(?:fo|foo)\b'
|
||||
haystack = "foo"
|
||||
matches = [[0, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb32"
|
||||
regex = '^\b$'
|
||||
haystack = ""
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb33"
|
||||
regex = '^\b$'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb34"
|
||||
regex = '^(?-u:\b).$'
|
||||
haystack = "x"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "wb35"
|
||||
regex = '^(?-u:\b).(?-u:\b)$'
|
||||
haystack = "x"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "wb36"
|
||||
regex = '^^^^^\b$$$$$'
|
||||
haystack = ""
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb37"
|
||||
regex = '^^^^^(?-u:\b).$$$$$'
|
||||
haystack = "x"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "wb38"
|
||||
regex = '^^^^^\b$$$$$'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb39"
|
||||
regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$'
|
||||
haystack = "x"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "wb40"
|
||||
regex = '(?-u:\b).+(?-u:\b)'
|
||||
haystack = "$$abc$$"
|
||||
matches = [[2, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "wb41"
|
||||
regex = '\b'
|
||||
haystack = "a b c"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb42"
|
||||
regex = '\bfoo\b'
|
||||
haystack = "zzz foo zzz"
|
||||
matches = [[4, 7]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb43"
|
||||
regex = '\b^'
|
||||
haystack = "ab"
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "wb44"
|
||||
regex = '$\b'
|
||||
haystack = "ab"
|
||||
matches = [[2, 2]]
|
||||
unicode = false
|
||||
|
||||
|
||||
# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we
|
||||
# have to disable it for most of these tests. This is because \B can match at
|
||||
# non-UTF-8 boundaries.
|
||||
[[test]]
|
||||
name = "nb1"
|
||||
regex = '\Bfoo\B'
|
||||
haystack = "n foo xfoox that"
|
||||
matches = [[7, 10]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb2"
|
||||
regex = 'a\B'
|
||||
haystack = "faoa x"
|
||||
matches = [[1, 2]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb3"
|
||||
regex = '\Bbar'
|
||||
haystack = "bar x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb4"
|
||||
regex = '\Bbar'
|
||||
haystack = "foo\nbar x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb5"
|
||||
regex = 'bar\B'
|
||||
haystack = "foobar"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb6"
|
||||
regex = 'bar\B'
|
||||
haystack = "foobar\nxxx"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb7"
|
||||
regex = '(?:foo|bar|[A-Z])\B'
|
||||
haystack = "foox"
|
||||
matches = [[0, 3]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb8"
|
||||
regex = '(?:foo|bar|[A-Z])\B'
|
||||
haystack = "foo\n"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb9"
|
||||
regex = '\B'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb10"
|
||||
regex = '\B'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb11"
|
||||
regex = '\B(?:foo|bar|[A-Z])'
|
||||
haystack = "foo"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb12"
|
||||
regex = '\B(?:foo|bar|[A-Z])\B'
|
||||
haystack = "xXy"
|
||||
matches = [[1, 2]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb13"
|
||||
regex = '\B(?:foo|bar|[A-Z])\B'
|
||||
haystack = "XY"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb14"
|
||||
regex = '\B(?:foo|bar|[A-Z])\B'
|
||||
haystack = "XYZ"
|
||||
matches = [[1, 2]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb15"
|
||||
regex = '\B(?:foo|bar|[A-Z])\B'
|
||||
haystack = "abara"
|
||||
matches = [[1, 4]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb16"
|
||||
regex = '\B(?:foo|bar|[A-Z])\B'
|
||||
haystack = "xfoo_"
|
||||
matches = [[1, 4]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb17"
|
||||
regex = '\B(?:foo|bar|[A-Z])\B'
|
||||
haystack = "xfoo\n"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb18"
|
||||
regex = '\B(?:foo|bar|[A-Z])\B'
|
||||
haystack = "foo bar vNX"
|
||||
matches = [[9, 10]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb19"
|
||||
regex = '\B(?:fo|foo)\B'
|
||||
haystack = "xfoo"
|
||||
matches = [[1, 3]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb20"
|
||||
regex = '\B(?:foo|fo)\B'
|
||||
haystack = "xfooo"
|
||||
matches = [[1, 4]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb21"
|
||||
regex = '\B\B'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb22"
|
||||
regex = '\B\B'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb23"
|
||||
regex = '\B$'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb24"
|
||||
regex = '\B$'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb25"
|
||||
regex = '\B$'
|
||||
haystack = "y x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb26"
|
||||
regex = '\B.$'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb27"
|
||||
regex = '^\B(?:fo|foo)\B'
|
||||
haystack = "fo"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb28"
|
||||
regex = '^\B(?:fo|foo)\B'
|
||||
haystack = "fo"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb29"
|
||||
regex = '^\B'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb30"
|
||||
regex = '^\B'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb31"
|
||||
regex = '^\B\B'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb32"
|
||||
regex = '^\B\B'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb33"
|
||||
regex = '^\B$'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb34"
|
||||
regex = '^\B$'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb35"
|
||||
regex = '^\B.$'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb36"
|
||||
regex = '^\B.\B$'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb37"
|
||||
regex = '^^^^^\B$$$$$'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb38"
|
||||
regex = '^^^^^\B.$$$$$'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "nb39"
|
||||
regex = '^^^^^\B$$$$$'
|
||||
haystack = "x"
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
|
||||
# unicode1* and unicode2* work for both Unicode and ASCII because all matches
|
||||
# are reported as byte offsets, and « and » do not correspond to word
|
||||
# boundaries at either the character or byte level.
|
||||
[[test]]
|
||||
name = "unicode1"
|
||||
regex = '\bx\b'
|
||||
haystack = "«x"
|
||||
matches = [[2, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "unicode1-only-ascii"
|
||||
regex = '\bx\b'
|
||||
haystack = "«x"
|
||||
matches = [[2, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "unicode2"
|
||||
regex = '\bx\b'
|
||||
haystack = "x»"
|
||||
matches = [[0, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "unicode2-only-ascii"
|
||||
regex = '\bx\b'
|
||||
haystack = "x»"
|
||||
matches = [[0, 1]]
|
||||
unicode = false
|
||||
|
||||
# ASCII word boundaries are completely oblivious to Unicode characters, so
|
||||
# even though β is a character, an ASCII \b treats it as a word boundary
|
||||
# when it is adjacent to another ASCII character. (The ASCII \b only looks
|
||||
# at the leading byte of β.) For Unicode \b, the tests are precisely inverted.
|
||||
[[test]]
|
||||
name = "unicode3"
|
||||
regex = '\bx\b'
|
||||
haystack = 'áxβ'
|
||||
matches = []
|
||||
|
||||
[[test]]
|
||||
name = "unicode3-only-ascii"
|
||||
regex = '\bx\b'
|
||||
haystack = 'áxβ'
|
||||
matches = [[2, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "unicode4"
|
||||
regex = '\Bx\B'
|
||||
haystack = 'áxβ'
|
||||
matches = [[2, 3]]
|
||||
|
||||
[[test]]
|
||||
name = "unicode4-only-ascii"
|
||||
regex = '\Bx\B'
|
||||
haystack = 'áxβ'
|
||||
matches = []
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
# The same as above, but with \b instead of \B as a sanity check.
|
||||
[[test]]
|
||||
name = "unicode5"
|
||||
regex = '\b'
|
||||
haystack = "0\U0007EF5E"
|
||||
matches = [[0, 0], [1, 1]]
|
||||
|
||||
[[test]]
|
||||
name = "unicode5-only-ascii"
|
||||
regex = '\b'
|
||||
haystack = "0\U0007EF5E"
|
||||
matches = [[0, 0], [1, 1]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "unicode5-noutf8"
|
||||
regex = '\b'
|
||||
haystack = '0\xFF\xFF\xFF\xFF'
|
||||
matches = [[0, 0], [1, 1]]
|
||||
unescape = true
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "unicode5-noutf8-only-ascii"
|
||||
regex = '\b'
|
||||
haystack = '0\xFF\xFF\xFF\xFF'
|
||||
matches = [[0, 0], [1, 1]]
|
||||
unescape = true
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
# Weird special case to ensure that ASCII \B treats each individual code unit
|
||||
# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary
|
||||
# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the
|
||||
# \w character class.)
|
||||
[[test]]
|
||||
name = "unicode5-not"
|
||||
regex = '\B'
|
||||
haystack = "0\U0007EF5E"
|
||||
matches = [[5, 5]]
|
||||
|
||||
[[test]]
|
||||
name = "unicode5-not-only-ascii"
|
||||
regex = '\B'
|
||||
haystack = "0\U0007EF5E"
|
||||
matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
# This gets no matches since \B only matches in the presence of valid UTF-8
|
||||
# when Unicode is enabled, even when UTF-8 mode is disabled.
|
||||
[[test]]
|
||||
name = "unicode5-not-noutf8"
|
||||
regex = '\B'
|
||||
haystack = '0\xFF\xFF\xFF\xFF'
|
||||
matches = []
|
||||
unescape = true
|
||||
utf8 = false
|
||||
|
||||
# But this DOES get matches since \B in ASCII mode only looks at individual
|
||||
# bytes.
|
||||
[[test]]
|
||||
name = "unicode5-not-noutf8-only-ascii"
|
||||
regex = '\B'
|
||||
haystack = '0\xFF\xFF\xFF\xFF'
|
||||
matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
|
||||
unescape = true
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
# Some tests of no particular significance.
|
||||
[[test]]
|
||||
name = "unicode6"
|
||||
regex = '\b[0-9]+\b'
|
||||
haystack = "foo 123 bar 456 quux 789"
|
||||
matches = [[4, 7], [12, 15], [21, 24]]
|
||||
|
||||
[[test]]
|
||||
name = "unicode7"
|
||||
regex = '\b[0-9]+\b'
|
||||
haystack = "foo 123 bar a456 quux 789"
|
||||
matches = [[4, 7], [22, 25]]
|
||||
|
||||
[[test]]
|
||||
name = "unicode8"
|
||||
regex = '\b[0-9]+\b'
|
||||
haystack = "foo 123 bar 456a quux 789"
|
||||
matches = [[4, 7], [22, 25]]
|
||||
|
||||
# A variant of the problem described here:
|
||||
# https://github.com/google/re2/blob/89567f5de5b23bb5ad0c26cbafc10bdc7389d1fa/re2/dfa.cc#L658-L667
|
||||
[[test]]
|
||||
name = "alt-with-assertion-repetition"
|
||||
regex = '(?:\b|%)+'
|
||||
haystack = "z%"
|
||||
bounds = [1, 2]
|
||||
anchored = true
|
||||
matches = [[1, 1]]
|
||||
Loading…
Add table
Add a link
Reference in a new issue