Vendor things
This commit is contained in:
parent
5deceec006
commit
977e3c17e5
19434 changed files with 10682014 additions and 0 deletions
687
third-party/vendor/regex/testdata/word-boundary-special.toml
vendored
Normal file
687
third-party/vendor/regex/testdata/word-boundary-special.toml
vendored
Normal file
|
|
@ -0,0 +1,687 @@
|
|||
# These tests are for the "special" word boundary assertions. That is,
|
||||
# \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty
|
||||
# assertions for more niche use cases, but hitting those cases without these
|
||||
# assertions is difficult. For example, \b{start-half} and \b{end-half} are
|
||||
# used to implement the -w/--word-regexp flag in a grep program.
|
||||
|
||||
# Tests for (?-u:\b{start})
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-010"
|
||||
regex = '\b{start}'
|
||||
haystack = "a"
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-020"
|
||||
regex = '\b{start}'
|
||||
haystack = "a "
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-030"
|
||||
regex = '\b{start}'
|
||||
haystack = " a "
|
||||
matches = [[1, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-040"
|
||||
regex = '\b{start}'
|
||||
haystack = ""
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-050"
|
||||
regex = '\b{start}'
|
||||
haystack = "ab"
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-060"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-060-bounds"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-070"
|
||||
regex = '\b{start}'
|
||||
haystack = " 𝛃 "
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-080"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-090"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[4, 4]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-ascii-110"
|
||||
regex = '\b{start}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
# Tests for (?-u:\b{end})
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-010"
|
||||
regex = '\b{end}'
|
||||
haystack = "a"
|
||||
matches = [[1, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-020"
|
||||
regex = '\b{end}'
|
||||
haystack = "a "
|
||||
matches = [[1, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-030"
|
||||
regex = '\b{end}'
|
||||
haystack = " a "
|
||||
matches = [[2, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-040"
|
||||
regex = '\b{end}'
|
||||
haystack = ""
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-050"
|
||||
regex = '\b{end}'
|
||||
haystack = "ab"
|
||||
matches = [[2, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-060"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-060-bounds"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-070"
|
||||
regex = '\b{end}'
|
||||
haystack = " 𝛃 "
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-080"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-090"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[5, 5]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-ascii-110"
|
||||
regex = '\b{end}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[1, 1]]
|
||||
unicode = false
|
||||
|
||||
# Tests for \b{start}
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-010"
|
||||
regex = '\b{start}'
|
||||
haystack = "a"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-020"
|
||||
regex = '\b{start}'
|
||||
haystack = "a "
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-030"
|
||||
regex = '\b{start}'
|
||||
haystack = " a "
|
||||
matches = [[1, 1]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-040"
|
||||
regex = '\b{start}'
|
||||
haystack = ""
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-050"
|
||||
regex = '\b{start}'
|
||||
haystack = "ab"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-060"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-060-bounds"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-070"
|
||||
regex = '\b{start}'
|
||||
haystack = " 𝛃 "
|
||||
matches = [[1, 1]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-080"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-090"
|
||||
regex = '\b{start}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-unicode-110"
|
||||
regex = '\b{start}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
# Tests for \b{end}
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-010"
|
||||
regex = '\b{end}'
|
||||
haystack = "a"
|
||||
matches = [[1, 1]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-020"
|
||||
regex = '\b{end}'
|
||||
haystack = "a "
|
||||
matches = [[1, 1]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-030"
|
||||
regex = '\b{end}'
|
||||
haystack = " a "
|
||||
matches = [[2, 2]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-040"
|
||||
regex = '\b{end}'
|
||||
haystack = ""
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-050"
|
||||
regex = '\b{end}'
|
||||
haystack = "ab"
|
||||
matches = [[2, 2]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-060"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃"
|
||||
matches = [[4, 4]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-060-bounds"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-070"
|
||||
regex = '\b{end}'
|
||||
haystack = " 𝛃 "
|
||||
matches = [[5, 5]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-080"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = [[4, 4]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-090"
|
||||
regex = '\b{end}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[5, 5]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-unicode-110"
|
||||
regex = '\b{end}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[5, 5]]
|
||||
unicode = true
|
||||
|
||||
# Tests for (?-u:\b{start-half})
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-010"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "a"
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-020"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "a "
|
||||
matches = [[0, 0], [2, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-030"
|
||||
regex = '\b{start-half}'
|
||||
haystack = " a "
|
||||
matches = [[0, 0], [1, 1], [3, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-040"
|
||||
regex = '\b{start-half}'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-050"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "ab"
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-060"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃"
|
||||
matches = [[0, 0], [4, 4]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-060-noutf8"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃"
|
||||
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
unicode = false
|
||||
utf8 = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-060-bounds"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-070"
|
||||
regex = '\b{start-half}'
|
||||
haystack = " 𝛃 "
|
||||
matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-080"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = [[0, 0], [4, 4], [8, 8]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-090"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[0, 0], [4, 4]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-110"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[0, 0], [5, 5]]
|
||||
unicode = false
|
||||
|
||||
# Tests for (?-u:\b{end-half})
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-010"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "a"
|
||||
matches = [[1, 1]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-020"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "a "
|
||||
matches = [[1, 1], [2, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-030"
|
||||
regex = '\b{end-half}'
|
||||
haystack = " a "
|
||||
matches = [[0, 0], [2, 2], [3, 3]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-040"
|
||||
regex = '\b{end-half}'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-050"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "ab"
|
||||
matches = [[2, 2]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-060"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃"
|
||||
matches = [[0, 0], [4, 4]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-060-bounds"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-070"
|
||||
regex = '\b{end-half}'
|
||||
haystack = " 𝛃 "
|
||||
matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-080"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = [[0, 0], [4, 4], [8, 8]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-090"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[0, 0], [5, 5]]
|
||||
unicode = false
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-ascii-110"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[1, 1], [5, 5]]
|
||||
unicode = false
|
||||
|
||||
# Tests for \b{start-half}
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-010"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "a"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-020"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "a "
|
||||
matches = [[0, 0], [2, 2]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-030"
|
||||
regex = '\b{start-half}'
|
||||
haystack = " a "
|
||||
matches = [[0, 0], [1, 1], [3, 3]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-040"
|
||||
regex = '\b{start-half}'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-050"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "ab"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-060"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-060-bounds"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-070"
|
||||
regex = '\b{start-half}'
|
||||
haystack = " 𝛃 "
|
||||
matches = [[0, 0], [1, 1], [6, 6]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-080"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = [[0, 0], [8, 8]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-090"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-start-half-unicode-110"
|
||||
regex = '\b{start-half}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
# Tests for \b{end-half}
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-010"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "a"
|
||||
matches = [[1, 1]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-020"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "a "
|
||||
matches = [[1, 1], [2, 2]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-030"
|
||||
regex = '\b{end-half}'
|
||||
haystack = " a "
|
||||
matches = [[0, 0], [2, 2], [3, 3]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-040"
|
||||
regex = '\b{end-half}'
|
||||
haystack = ""
|
||||
matches = [[0, 0]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-050"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "ab"
|
||||
matches = [[2, 2]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-060"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃"
|
||||
matches = [[4, 4]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-060-bounds"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃"
|
||||
bounds = [2, 3]
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-070"
|
||||
regex = '\b{end-half}'
|
||||
haystack = " 𝛃 "
|
||||
matches = [[0, 0], [5, 5], [6, 6]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-080"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃𐆀"
|
||||
matches = [[4, 4], [8, 8]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-090"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "𝛃b"
|
||||
matches = [[5, 5]]
|
||||
unicode = true
|
||||
|
||||
[[test]]
|
||||
name = "word-end-half-unicode-110"
|
||||
regex = '\b{end-half}'
|
||||
haystack = "b𝛃"
|
||||
matches = [[5, 5]]
|
||||
unicode = true
|
||||
|
||||
# Specialty tests.
|
||||
|
||||
# Since \r is special cased in the start state computation (to deal with CRLF
|
||||
# mode), this test ensures that the correct start state is computed when the
|
||||
# pattern starts with a half word boundary assertion.
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-carriage"
|
||||
regex = '\b{start-half}[a-z]+'
|
||||
haystack = 'ABC\rabc'
|
||||
matches = [[4, 7]]
|
||||
bounds = [4, 7]
|
||||
unescape = true
|
||||
|
||||
# Since \n is also special cased in the start state computation, this test
|
||||
# ensures that the correct start state is computed when the pattern starts with
|
||||
# a half word boundary assertion.
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-linefeed"
|
||||
regex = '\b{start-half}[a-z]+'
|
||||
haystack = 'ABC\nabc'
|
||||
matches = [[4, 7]]
|
||||
bounds = [4, 7]
|
||||
unescape = true
|
||||
|
||||
# Like the carriage return test above, but with a custom line terminator.
|
||||
[[test]]
|
||||
name = "word-start-half-ascii-customlineterm"
|
||||
regex = '\b{start-half}[a-z]+'
|
||||
haystack = 'ABC!abc'
|
||||
matches = [[4, 7]]
|
||||
bounds = [4, 7]
|
||||
unescape = true
|
||||
line-terminator = '!'
|
||||
Loading…
Add table
Add a link
Reference in a new issue