Vendor things

2024-03-08 11:03:01 -08:00 · 2024-03-08 11:03:01 -08:00 · 977e3c17e5
commit 977e3c17e5
parent 5deceec006
19434 changed files with 10682014 additions and 0 deletions
--- a/third-party/vendor/regex/testdata/line-terminator.toml
+++ b/third-party/vendor/regex/testdata/line-terminator.toml
@ -0,0 +1,109 @@
+# This tests that we can switch the line terminator to the NUL byte.
+[[test]]
+name = "nul"
+regex = '(?m)^[a-z]+$'
+haystack = '\x00abc\x00'
+matches = [[1, 4]]
+unescape = true
+line-terminator = '\x00'
+
+# This tests that '.' will not match the configured line terminator, but will
+# match \n.
+[[test]]
+name = "dot-changes-with-line-terminator"
+regex = '.'
+haystack = '\x00\n'
+matches = [[1, 2]]
+unescape = true
+line-terminator = '\x00'
+
+# This tests that when we switch the line terminator, \n is no longer
+# recognized as the terminator.
+[[test]]
+name = "not-line-feed"
+regex = '(?m)^[a-z]+$'
+haystack = '\nabc\n'
+matches = []
+unescape = true
+line-terminator = '\x00'
+
+# This tests that we can set the line terminator to a non-ASCII byte and have
+# it behave as expected.
+[[test]]
+name = "non-ascii"
+regex = '(?m)^[a-z]+$'
+haystack = '\xFFabc\xFF'
+matches = [[1, 4]]
+unescape = true
+line-terminator = '\xFF'
+utf8 = false
+
+# This tests a tricky case where the line terminator is set to \r. This ensures
+# that the StartLF look-behind assertion is tracked when computing the start
+# state.
+[[test]]
+name = "carriage"
+regex = '(?m)^[a-z]+'
+haystack = 'ABC\rabc'
+matches = [[4, 7]]
+bounds = [4, 7]
+unescape = true
+line-terminator = '\r'
+
+# This tests that we can set the line terminator to a byte corresponding to a
+# word character, and things work as expected.
+[[test]]
+name = "word-byte"
+regex = '(?m)^[a-z]+$'
+haystack = 'ZabcZ'
+matches = [[1, 4]]
+unescape = true
+line-terminator = 'Z'
+
+# This tests that we can set the line terminator to a byte corresponding to a
+# non-word character, and things work as expected.
+[[test]]
+name = "non-word-byte"
+regex = '(?m)^[a-z]+$'
+haystack = '%abc%'
+matches = [[1, 4]]
+unescape = true
+line-terminator = '%'
+
+# This combines "set line terminator to a word byte" with a word boundary
+# assertion, which should result in no match even though ^/$ matches.
+[[test]]
+name = "word-boundary"
+regex = '(?m)^\b[a-z]+\b$'
+haystack = 'ZabcZ'
+matches = []
+unescape = true
+line-terminator = 'Z'
+
+# Like 'word-boundary', but does an anchored search at the point where ^
+# matches, but where \b should not.
+[[test]]
+name = "word-boundary-at"
+regex = '(?m)^\b[a-z]+\b$'
+haystack = 'ZabcZ'
+matches = []
+bounds = [1, 4]
+anchored = true
+unescape = true
+line-terminator = 'Z'
+
+# Like 'word-boundary-at', but flips the word boundary to a negation. This
+# in particular tests a tricky case in DFA engines, where they must consider
+# explicitly that a starting configuration from a custom line terminator may
+# also required setting the "is from word byte" flag on a state. Otherwise,
+# it's treated as "not from a word byte," which would result in \B not matching
+# here when it should.
+[[test]]
+name = "not-word-boundary-at"
+regex = '(?m)^\B[a-z]+\B$'
+haystack = 'ZabcZ'
+matches = [[1, 4]]
+bounds = [1, 4]
+anchored = true
+unescape = true
+line-terminator = 'Z'