Vendor things
This commit is contained in:
parent
5deceec006
commit
977e3c17e5
19434 changed files with 10682014 additions and 0 deletions
98
third-party/vendor/regex/testdata/regex-lite.toml
vendored
Normal file
98
third-party/vendor/regex/testdata/regex-lite.toml
vendored
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
# These tests are specifically written to test the regex-lite crate. While it
|
||||
# largely has the same semantics as the regex crate, there are some differences
|
||||
# around Unicode support and UTF-8.
|
||||
#
|
||||
# To be clear, regex-lite supports far fewer patterns because of its lack of
|
||||
# Unicode support, nested character classes and character class set operations.
|
||||
# What we're talking about here are the patterns that both crates support but
|
||||
# where the semantics might differ.
|
||||
|
||||
# regex-lite uses ASCII definitions for Perl character classes.
|
||||
[[test]]
|
||||
name = "perl-class-decimal"
|
||||
regex = '\d'
|
||||
haystack = '᠕'
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
# regex-lite uses ASCII definitions for Perl character classes.
|
||||
[[test]]
|
||||
name = "perl-class-space"
|
||||
regex = '\s'
|
||||
haystack = "\u2000"
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
# regex-lite uses ASCII definitions for Perl character classes.
|
||||
[[test]]
|
||||
name = "perl-class-word"
|
||||
regex = '\w'
|
||||
haystack = 'δ'
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
# regex-lite uses the ASCII definition of word for word boundary assertions.
|
||||
[[test]]
|
||||
name = "word-boundary"
|
||||
regex = '\b'
|
||||
haystack = 'δ'
|
||||
matches = []
|
||||
unicode = true
|
||||
|
||||
# regex-lite uses the ASCII definition of word for negated word boundary
|
||||
# assertions. But note that it should still not split codepoints!
|
||||
[[test]]
|
||||
name = "word-boundary-negated"
|
||||
regex = '\B'
|
||||
haystack = 'δ'
|
||||
matches = [[0, 0], [2, 2]]
|
||||
unicode = true
|
||||
|
||||
# While we're here, the empty regex---which matches at every
|
||||
# position---shouldn't split a codepoint either.
|
||||
[[test]]
|
||||
name = "empty-no-split-codepoint"
|
||||
regex = ''
|
||||
haystack = '💩'
|
||||
matches = [[0, 0], [4, 4]]
|
||||
unicode = true
|
||||
|
||||
# A dot always matches a full codepoint.
|
||||
[[test]]
|
||||
name = "dot-always-matches-codepoint"
|
||||
regex = '.'
|
||||
haystack = '💩'
|
||||
matches = [[0, 4]]
|
||||
unicode = false
|
||||
|
||||
# A negated character class also always matches a full codepoint.
|
||||
[[test]]
|
||||
name = "negated-class-always-matches-codepoint"
|
||||
regex = '[^a]'
|
||||
haystack = '💩'
|
||||
matches = [[0, 4]]
|
||||
unicode = false
|
||||
|
||||
# regex-lite only supports ASCII-aware case insensitive matching.
|
||||
[[test]]
|
||||
name = "case-insensitive-is-ascii-only"
|
||||
regex = 's'
|
||||
haystack = 'ſ'
|
||||
matches = []
|
||||
unicode = true
|
||||
case-insensitive = true
|
||||
|
||||
# Negated word boundaries shouldn't split a codepoint, but they will match
|
||||
# between invalid UTF-8.
|
||||
#
|
||||
# This test is only valid for a 'bytes' API, but that doesn't (yet) exist in
|
||||
# regex-lite. This can't happen in the main API because &str can't contain
|
||||
# invalid UTF-8.
|
||||
# [[test]]
|
||||
# name = "word-boundary-invalid-utf8"
|
||||
# regex = '\B'
|
||||
# haystack = '\xFF\xFF\xFF\xFF'
|
||||
# unescape = true
|
||||
# matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
|
||||
# unicode = true
|
||||
# utf8 = false
|
||||
Loading…
Add table
Add a link
Reference in a new issue