Vendor things

2024-03-08 11:03:01 -08:00 · 2024-03-08 11:03:01 -08:00 · 977e3c17e5
commit 977e3c17e5
parent 5deceec006
19434 changed files with 10682014 additions and 0 deletions
--- a/third-party/vendor/regex/testdata/README.md
+++ b/third-party/vendor/regex/testdata/README.md
@ -0,0 +1,22 @@
+This directory contains a large suite of regex tests defined in a TOML format.
+They are used to drive tests in `tests/lib.rs`, `regex-automata/tests/lib.rs`
+and `regex-lite/tests/lib.rs`.
+
+See the [`regex-test`][regex-test] crate documentation for an explanation of
+the format and how it generates tests.
+
+The basic idea here is that we have many different regex engines but generally
+one set of tests. We want to be able to run those tests (or most of them) on
+every engine. Prior to `regex 1.9`, we used to do this with a hodge podge soup
+of macros and a different test executable for each engine. It overall took a
+longer time to compile, was harder to maintain and it made the test definitions
+themselves less clear.
+
+In `regex 1.9`, when we moved over to `regex-automata`, the situation got a lot
+worse because of an increase in the number of engines. So I devised an engine
+independent format for testing regex patterns and their semantics.
+
+Note: the naming scheme used in these tests isn't terribly consistent. It would
+be great to fix that.
+
+[regex-test]: https://docs.rs/regex-test
--- a/third-party/vendor/regex/testdata/anchored.toml
+++ b/third-party/vendor/regex/testdata/anchored.toml
@ -0,0 +1,127 @@
+# These tests are specifically geared toward searches with 'anchored = true'.
+# While they are interesting in their own right, they are particularly
+# important for testing the one-pass DFA since the one-pass DFA can't work in
+# unanchored contexts.
+#
+# Note that "anchored" in this context does not mean "^". Anchored searches are
+# searches whose matches must begin at the start of the search, which may not
+# be at the start of the haystack. That's why anchored searches---and there are
+# some examples below---can still report multiple matches. This occurs when the
+# matches are adjacent to one another.
+
+[[test]]
+name = "greedy"
+regex = '(abc)+'
+haystack = "abcabcabc"
+matches = [
+  [[0, 9], [6, 9]],
+]
+anchored = true
+
+# When a "earliest" search is used, greediness doesn't really exist because
+# matches are reported as soon as they are known.
+[[test]]
+name = "greedy-earliest"
+regex = '(abc)+'
+haystack = "abcabcabc"
+matches = [
+  [[0, 3], [0, 3]],
+  [[3, 6], [3, 6]],
+  [[6, 9], [6, 9]],
+]
+anchored = true
+search-kind = "earliest"
+
+[[test]]
+name = "nongreedy"
+regex = '(abc)+?'
+haystack = "abcabcabc"
+matches = [
+  [[0, 3], [0, 3]],
+  [[3, 6], [3, 6]],
+  [[6, 9], [6, 9]],
+]
+anchored = true
+
+# When "all" semantics are used, non-greediness doesn't exist since the longest
+# possible match is always taken.
+[[test]]
+name = "nongreedy-all"
+regex = '(abc)+?'
+haystack = "abcabcabc"
+matches = [
+  [[0, 9], [6, 9]],
+]
+anchored = true
+match-kind = "all"
+
+[[test]]
+name = "word-boundary-unicode-01"
+regex = '\b\w+\b'
+haystack = 'βββ☃'
+matches = [[0, 6]]
+anchored = true
+
+[[test]]
+name = "word-boundary-nounicode-01"
+regex = '\b\w+\b'
+haystack = 'abcβ'
+matches = [[0, 3]]
+anchored = true
+unicode = false
+
+# Tests that '.c' doesn't match 'abc' when performing an anchored search from
+# the beginning of the haystack. This test found two different bugs in the
+# PikeVM and the meta engine.
+[[test]]
+name = "no-match-at-start"
+regex = '.c'
+haystack = 'abc'
+matches = []
+anchored = true
+
+# Like above, but at a non-zero start offset.
+[[test]]
+name = "no-match-at-start-bounds"
+regex = '.c'
+haystack = 'aabc'
+bounds = [1, 4]
+matches = []
+anchored = true
+
+# This is like no-match-at-start, but hits the "reverse inner" optimization
+# inside the meta engine. (no-match-at-start hits the "reverse suffix"
+# optimization.)
+[[test]]
+name = "no-match-at-start-reverse-inner"
+regex = '.c[a-z]'
+haystack = 'abcz'
+matches = []
+anchored = true
+
+# Like above, but at a non-zero start offset.
+[[test]]
+name = "no-match-at-start-reverse-inner-bounds"
+regex = '.c[a-z]'
+haystack = 'aabcz'
+bounds = [1, 5]
+matches = []
+anchored = true
+
+# Same as no-match-at-start, but applies to the meta engine's "reverse
+# anchored" optimization.
+[[test]]
+name = "no-match-at-start-reverse-anchored"
+regex = '.c[a-z]$'
+haystack = 'abcz'
+matches = []
+anchored = true
+
+# Like above, but at a non-zero start offset.
+[[test]]
+name = "no-match-at-start-reverse-anchored-bounds"
+regex = '.c[a-z]$'
+haystack = 'aabcz'
+bounds = [1, 5]
+matches = []
+anchored = true
--- a/third-party/vendor/regex/testdata/bytes.toml
+++ b/third-party/vendor/regex/testdata/bytes.toml
@ -0,0 +1,235 @@
+# These are tests specifically crafted for regexes that can match arbitrary
+# bytes. In some cases, we also test the Unicode variant as well, just because
+# it's good sense to do so. But also, these tests aren't really about Unicode,
+# but whether matches are only reported at valid UTF-8 boundaries. For most
+# tests in this entire collection, utf8 = true. But for these tests, we use
+# utf8 = false.
+
+[[test]]
+name = "word-boundary-ascii"
+regex = ' \b'
+haystack = " δ"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "word-boundary-unicode"
+regex = ' \b'
+haystack = " δ"
+matches = [[0, 1]]
+unicode = true
+utf8 = false
+
+[[test]]
+name = "word-boundary-ascii-not"
+regex = ' \B'
+haystack = " δ"
+matches = [[0, 1]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "word-boundary-unicode-not"
+regex = ' \B'
+haystack = " δ"
+matches = []
+unicode = true
+utf8 = false
+
+[[test]]
+name = "perl-word-ascii"
+regex = '\w+'
+haystack = "aδ"
+matches = [[0, 1]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "perl-word-unicode"
+regex = '\w+'
+haystack = "aδ"
+matches = [[0, 3]]
+unicode = true
+utf8 = false
+
+[[test]]
+name = "perl-decimal-ascii"
+regex = '\d+'
+haystack = "1२३9"
+matches = [[0, 1], [7, 8]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "perl-decimal-unicode"
+regex = '\d+'
+haystack = "1२३9"
+matches = [[0, 8]]
+unicode = true
+utf8 = false
+
+[[test]]
+name = "perl-whitespace-ascii"
+regex = '\s+'
+haystack = " \u1680"
+matches = [[0, 1]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "perl-whitespace-unicode"
+regex = '\s+'
+haystack = " \u1680"
+matches = [[0, 4]]
+unicode = true
+utf8 = false
+
+# The first `(.+)` matches two Unicode codepoints, but can't match the 5th
+# byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and
+# matches.
+[[test]]
+name = "mixed-dot"
+regex = '(.+)(?-u)(.+)'
+haystack = '\xCE\x93\xCE\x94\xFF'
+matches = [
+  [[0, 5], [0, 4], [4, 5]],
+]
+unescape = true
+unicode = true
+utf8 = false
+
+[[test]]
+name = "case-one-ascii"
+regex = 'a'
+haystack = "A"
+matches = [[0, 1]]
+case-insensitive = true
+unicode = false
+utf8 = false
+
+[[test]]
+name = "case-one-unicode"
+regex = 'a'
+haystack = "A"
+matches = [[0, 1]]
+case-insensitive = true
+unicode = true
+utf8 = false
+
+[[test]]
+name = "case-class-simple-ascii"
+regex = '[a-z]+'
+haystack = "AaAaA"
+matches = [[0, 5]]
+case-insensitive = true
+unicode = false
+utf8 = false
+
+[[test]]
+name = "case-class-ascii"
+regex = '[a-z]+'
+haystack = "aA\u212AaA"
+matches = [[0, 2], [5, 7]]
+case-insensitive = true
+unicode = false
+utf8 = false
+
+[[test]]
+name = "case-class-unicode"
+regex = '[a-z]+'
+haystack = "aA\u212AaA"
+matches = [[0, 7]]
+case-insensitive = true
+unicode = true
+utf8 = false
+
+[[test]]
+name = "negate-ascii"
+regex = '[^a]'
+haystack = "δ"
+matches = [[0, 1], [1, 2]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "negate-unicode"
+regex = '[^a]'
+haystack = "δ"
+matches = [[0, 2]]
+unicode = true
+utf8 = false
+
+# When utf8=true, this won't match, because the implicit '.*?' prefix is
+# Unicode aware and will refuse to match through invalid UTF-8 bytes.
+[[test]]
+name = "dotstar-prefix-ascii"
+regex = 'a'
+haystack = '\xFFa'
+matches = [[1, 2]]
+unescape = true
+unicode = false
+utf8 = false
+
+[[test]]
+name = "dotstar-prefix-unicode"
+regex = 'a'
+haystack = '\xFFa'
+matches = [[1, 2]]
+unescape = true
+unicode = true
+utf8 = false
+
+[[test]]
+name = "null-bytes"
+regex = '(?P<cstr>[^\x00]+)\x00'
+haystack = 'foo\x00'
+matches = [
+  [[0, 4], [0, 3]],
+]
+unescape = true
+unicode = false
+utf8 = false
+
+[[test]]
+name = "invalid-utf8-anchor-100"
+regex = '\xCC?^'
+haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
+matches = [[0, 0]]
+unescape = true
+unicode = false
+utf8 = false
+
+[[test]]
+name = "invalid-utf8-anchor-200"
+regex = '^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$'
+haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
+matches = [[22, 22]]
+unescape = true
+unicode = false
+utf8 = false
+
+[[test]]
+name = "invalid-utf8-anchor-300"
+regex = '^|ddp\xff\xffdddddlQd@\x80'
+haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
+matches = [[0, 0]]
+unescape = true
+unicode = false
+utf8 = false
+
+[[test]]
+name = "word-boundary-ascii-100"
+regex = '\Bx\B'
+haystack = "áxβ"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "word-boundary-ascii-200"
+regex = '\B'
+haystack = "0\U0007EF5E"
+matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
+unicode = false
+utf8 = false
--- a/third-party/vendor/regex/testdata/crazy.toml
+++ b/third-party/vendor/regex/testdata/crazy.toml
@ -0,0 +1,315 @@
+[[test]]
+name = "nothing-empty"
+regex = []
+haystack = ""
+matches = []
+
+[[test]]
+name = "nothing-something"
+regex = []
+haystack = "wat"
+matches = []
+
+[[test]]
+name = "ranges"
+regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b'
+haystack = "num: 255"
+matches = [[5, 8]]
+
+[[test]]
+name = "ranges-not"
+regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b'
+haystack = "num: 256"
+matches = []
+
+[[test]]
+name = "float1"
+regex = '[-+]?[0-9]*\.?[0-9]+'
+haystack = "0.1"
+matches = [[0, 3]]
+
+[[test]]
+name = "float2"
+regex = '[-+]?[0-9]*\.?[0-9]+'
+haystack = "0.1.2"
+matches = [[0, 3]]
+match-limit = 1
+
+[[test]]
+name = "float3"
+regex = '[-+]?[0-9]*\.?[0-9]+'
+haystack = "a1.2"
+matches = [[1, 4]]
+
+[[test]]
+name = "float4"
+regex = '[-+]?[0-9]*\.?[0-9]+'
+haystack = "1.a"
+matches = [[0, 1]]
+
+[[test]]
+name = "float5"
+regex = '^[-+]?[0-9]*\.?[0-9]+$'
+haystack = "1.a"
+matches = []
+
+[[test]]
+name = "email"
+regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b'
+haystack = "mine is jam.slam@gmail.com "
+matches = [[8, 26]]
+
+[[test]]
+name = "email-not"
+regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b'
+haystack = "mine is jam.slam@gmail "
+matches = []
+
+[[test]]
+name = "email-big"
+regex = '''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?'''
+haystack = "mine is jam.slam@gmail.com "
+matches = [[8, 26]]
+
+[[test]]
+name = "date1"
+regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$'
+haystack = "1900-01-01"
+matches = [[0, 10]]
+unicode = false
+
+[[test]]
+name = "date2"
+regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$'
+haystack = "1900-00-01"
+matches = []
+unicode = false
+
+[[test]]
+name = "date3"
+regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$'
+haystack = "1900-13-01"
+matches = []
+unicode = false
+
+[[test]]
+name = "start-end-empty"
+regex = '^$'
+haystack = ""
+matches = [[0, 0]]
+
+[[test]]
+name = "start-end-empty-rev"
+regex = '$^'
+haystack = ""
+matches = [[0, 0]]
+
+[[test]]
+name = "start-end-empty-many-1"
+regex = '^$^$^$'
+haystack = ""
+matches = [[0, 0]]
+
+[[test]]
+name = "start-end-empty-many-2"
+regex = '^^^$$$'
+haystack = ""
+matches = [[0, 0]]
+
+[[test]]
+name = "start-end-empty-rep"
+regex = '(?:^$)*'
+haystack = "a\nb\nc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
+
+[[test]]
+name = "start-end-empty-rep-rev"
+regex = '(?:$^)*'
+haystack = "a\nb\nc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
+
+[[test]]
+name = "neg-class-letter"
+regex = '[^ac]'
+haystack = "acx"
+matches = [[2, 3]]
+
+[[test]]
+name = "neg-class-letter-comma"
+regex = '[^a,]'
+haystack = "a,x"
+matches = [[2, 3]]
+
+[[test]]
+name = "neg-class-letter-space"
+regex = '[^a[:space:]]'
+haystack = "a x"
+matches = [[2, 3]]
+
+[[test]]
+name = "neg-class-comma"
+regex = '[^,]'
+haystack = ",,x"
+matches = [[2, 3]]
+
+[[test]]
+name = "neg-class-space"
+regex = '[^[:space:]]'
+haystack = " a"
+matches = [[1, 2]]
+
+[[test]]
+name = "neg-class-space-comma"
+regex = '[^,[:space:]]'
+haystack = ", a"
+matches = [[2, 3]]
+
+[[test]]
+name = "neg-class-comma-space"
+regex = '[^[:space:],]'
+haystack = " ,a"
+matches = [[2, 3]]
+
+[[test]]
+name = "neg-class-ascii"
+regex = '[^[:alpha:]Z]'
+haystack = "A1"
+matches = [[1, 2]]
+
+[[test]]
+name = "lazy-many-many"
+regex = '(?:(?:.*)*?)='
+haystack = "a=b"
+matches = [[0, 2]]
+
+[[test]]
+name = "lazy-many-optional"
+regex = '(?:(?:.?)*?)='
+haystack = "a=b"
+matches = [[0, 2]]
+
+[[test]]
+name = "lazy-one-many-many"
+regex = '(?:(?:.*)+?)='
+haystack = "a=b"
+matches = [[0, 2]]
+
+[[test]]
+name = "lazy-one-many-optional"
+regex = '(?:(?:.?)+?)='
+haystack = "a=b"
+matches = [[0, 2]]
+
+[[test]]
+name = "lazy-range-min-many"
+regex = '(?:(?:.*){1,}?)='
+haystack = "a=b"
+matches = [[0, 2]]
+
+[[test]]
+name = "lazy-range-many"
+regex = '(?:(?:.*){1,2}?)='
+haystack = "a=b"
+matches = [[0, 2]]
+
+[[test]]
+name = "greedy-many-many"
+regex = '(?:(?:.*)*)='
+haystack = "a=b"
+matches = [[0, 2]]
+
+[[test]]
+name = "greedy-many-optional"
+regex = '(?:(?:.?)*)='
+haystack = "a=b"
+matches = [[0, 2]]
+
+[[test]]
+name = "greedy-one-many-many"
+regex = '(?:(?:.*)+)='
+haystack = "a=b"
+matches = [[0, 2]]
+
+[[test]]
+name = "greedy-one-many-optional"
+regex = '(?:(?:.?)+)='
+haystack = "a=b"
+matches = [[0, 2]]
+
+[[test]]
+name = "greedy-range-min-many"
+regex = '(?:(?:.*){1,})='
+haystack = "a=b"
+matches = [[0, 2]]
+
+[[test]]
+name = "greedy-range-many"
+regex = '(?:(?:.*){1,2})='
+haystack = "a=b"
+matches = [[0, 2]]
+
+[[test]]
+name = "empty1"
+regex = ''
+haystack = ""
+matches = [[0, 0]]
+
+[[test]]
+name = "empty2"
+regex = ''
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty3"
+regex = '(?:)'
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty4"
+regex = '(?:)*'
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty5"
+regex = '(?:)+'
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty6"
+regex = '(?:)?'
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty7"
+regex = '(?:)(?:)'
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty8"
+regex = '(?:)+|z'
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty9"
+regex = 'z|(?:)+'
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty10"
+regex = '(?:)+|b'
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty11"
+regex = 'b|(?:)+'
+haystack = "abc"
+matches = [[0, 0], [1, 2], [3, 3]]
--- a/third-party/vendor/regex/testdata/crlf.toml
+++ b/third-party/vendor/regex/testdata/crlf.toml
@ -0,0 +1,117 @@
+# This is a basic test that checks ^ and $ treat \r\n as a single line
+# terminator. If ^ and $ only treated \n as a line terminator, then this would
+# only match 'xyz' at the end of the haystack.
+[[test]]
+name = "basic"
+regex = '(?mR)^[a-z]+$'
+haystack = "abc\r\ndef\r\nxyz"
+matches = [[0, 3], [5, 8], [10, 13]]
+
+# Tests that a CRLF-aware '^$' assertion does not match between CR and LF.
+[[test]]
+name = "start-end-non-empty"
+regex = '(?mR)^$'
+haystack = "abc\r\ndef\r\nxyz"
+matches = []
+
+# Tests that a CRLF-aware '^$' assertion matches the empty string, just like
+# a non-CRLF-aware '^$' assertion.
+[[test]]
+name = "start-end-empty"
+regex = '(?mR)^$'
+haystack = ""
+matches = [[0, 0]]
+
+# Tests that a CRLF-aware '^$' assertion matches the empty string preceding
+# and following a line terminator.
+[[test]]
+name = "start-end-before-after"
+regex = '(?mR)^$'
+haystack = "\r\n"
+matches = [[0, 0], [2, 2]]
+
+# Tests that a CRLF-aware '^' assertion does not split a line terminator.
+[[test]]
+name = "start-no-split"
+regex = '(?mR)^'
+haystack = "abc\r\ndef\r\nxyz"
+matches = [[0, 0], [5, 5], [10, 10]]
+
+# Same as above, but with adjacent runs of line terminators.
+[[test]]
+name = "start-no-split-adjacent"
+regex = '(?mR)^'
+haystack = "\r\n\r\n\r\n"
+matches = [[0, 0], [2, 2], [4, 4], [6, 6]]
+
+# Same as above, but with adjacent runs of just carriage returns.
+[[test]]
+name = "start-no-split-adjacent-cr"
+regex = '(?mR)^'
+haystack = "\r\r\r"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+# Same as above, but with adjacent runs of just line feeds.
+[[test]]
+name = "start-no-split-adjacent-lf"
+regex = '(?mR)^'
+haystack = "\n\n\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+# Tests that a CRLF-aware '$' assertion does not split a line terminator.
+[[test]]
+name = "end-no-split"
+regex = '(?mR)$'
+haystack = "abc\r\ndef\r\nxyz"
+matches = [[3, 3], [8, 8], [13, 13]]
+
+# Same as above, but with adjacent runs of line terminators.
+[[test]]
+name = "end-no-split-adjacent"
+regex = '(?mR)$'
+haystack = "\r\n\r\n\r\n"
+matches = [[0, 0], [2, 2], [4, 4], [6, 6]]
+
+# Same as above, but with adjacent runs of just carriage returns.
+[[test]]
+name = "end-no-split-adjacent-cr"
+regex = '(?mR)$'
+haystack = "\r\r\r"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+# Same as above, but with adjacent runs of just line feeds.
+[[test]]
+name = "end-no-split-adjacent-lf"
+regex = '(?mR)$'
+haystack = "\n\n\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+# Tests that '.' does not match either \r or \n when CRLF mode is enabled. Note
+# that this doesn't require multi-line mode to be enabled.
+[[test]]
+name = "dot-no-crlf"
+regex = '(?R).'
+haystack = "\r\n\r\n\r\n"
+matches = []
+
+# This is a test that caught a bug in the one-pass DFA where it (amazingly) was
+# using 'is_end_lf' instead of 'is_end_crlf' here. It was probably a copy &
+# paste bug. We insert an empty capture group here because it provokes the meta
+# regex engine to first find a match and then trip over a panic because the
+# one-pass DFA erroneously says there is no match.
+[[test]]
+name = "onepass-wrong-crlf-with-capture"
+regex = '(?Rm:().$)'
+haystack = "ZZ\r"
+matches = [[[1, 2], [1, 1]]]
+
+# This is like onepass-wrong-crlf-with-capture above, except it sets up the
+# test so that it can be run by the one-pass DFA directly. (i.e., Make it
+# anchored and start the search at the right place.)
+[[test]]
+name = "onepass-wrong-crlf-anchored"
+regex = '(?Rm:.$)'
+haystack = "ZZ\r"
+matches = [[1, 2]]
+anchored = true
+bounds = [1, 3]
--- a/third-party/vendor/regex/testdata/earliest.toml
+++ b/third-party/vendor/regex/testdata/earliest.toml
@ -0,0 +1,52 @@
+[[test]]
+name = "no-greedy-100"
+regex = 'a+'
+haystack = "aaa"
+matches = [[0, 1], [1, 2], [2, 3]]
+search-kind = "earliest"
+
+[[test]]
+name = "no-greedy-200"
+regex = 'abc+'
+haystack = "zzzabccc"
+matches = [[3, 6]]
+search-kind = "earliest"
+
+[[test]]
+name = "is-ungreedy"
+regex = 'a+?'
+haystack = "aaa"
+matches = [[0, 1], [1, 2], [2, 3]]
+search-kind = "earliest"
+
+[[test]]
+name = "look-start-test"
+regex = '^(abc|a)'
+haystack = "abc"
+matches = [
+  [[0, 1], [0, 1]],
+]
+search-kind = "earliest"
+
+[[test]]
+name = "look-end-test"
+regex = '(abc|a)$'
+haystack = "abc"
+matches = [
+  [[0, 3], [0, 3]],
+]
+search-kind = "earliest"
+
+[[test]]
+name = "no-leftmost-first-100"
+regex = 'abc|a'
+haystack = "abc"
+matches = [[0, 1]]
+search-kind = "earliest"
+
+[[test]]
+name = "no-leftmost-first-200"
+regex = 'aba|a'
+haystack = "aba"
+matches = [[0, 1], [2, 3]]
+search-kind = "earliest"
--- a/third-party/vendor/regex/testdata/empty.toml
+++ b/third-party/vendor/regex/testdata/empty.toml
@ -0,0 +1,113 @@
+[[test]]
+name = "100"
+regex = "|b"
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "110"
+regex = "b|"
+haystack = "abc"
+matches = [[0, 0], [1, 2], [3, 3]]
+
+[[test]]
+name = "120"
+regex = "|z"
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "130"
+regex = "z|"
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "200"
+regex = "|"
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "210"
+regex = "||"
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "220"
+regex = "||b"
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "230"
+regex = "b||"
+haystack = "abc"
+matches = [[0, 0], [1, 2], [3, 3]]
+
+[[test]]
+name = "240"
+regex = "||z"
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "300"
+regex = "(?:)|b"
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "310"
+regex = "b|(?:)"
+haystack = "abc"
+matches = [[0, 0], [1, 2], [3, 3]]
+
+[[test]]
+name = "320"
+regex = "(?:|)"
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "330"
+regex = "(?:|)|z"
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "400"
+regex = "a(?:)|b"
+haystack = "abc"
+matches = [[0, 1], [1, 2]]
+
+[[test]]
+name = "500"
+regex = ""
+haystack = ""
+matches = [[0, 0]]
+
+[[test]]
+name = "510"
+regex = ""
+haystack = "a"
+matches = [[0, 0], [1, 1]]
+
+[[test]]
+name = "520"
+regex = ""
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "600"
+regex = '(?:|a)*'
+haystack = "aaa"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "610"
+regex = '(?:|a)+'
+haystack = "aaa"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
--- a/third-party/vendor/regex/testdata/expensive.toml
+++ b/third-party/vendor/regex/testdata/expensive.toml
@ -0,0 +1,23 @@
+# This file represent tests that may be expensive to run on some regex engines.
+# For example, tests that build a full DFA ahead of time and minimize it can
+# take a horrendously long time on regexes that are large (or result in an
+# explosion in the number of states). We group these tests together so that
+# such engines can simply skip these tests.
+
+# See: https://github.com/rust-lang/regex/issues/98
+[[test]]
+name = "regression-many-repeat-no-stack-overflow"
+regex = '^.{1,2500}'
+haystack = "a"
+matches = [[0, 1]]
+
+# This test is meant to blow the bounded backtracker's visited capacity. In
+# order to do that, we need a somewhat sizeable regex. The purpose of this
+# is to make sure there's at least one test that exercises this path in the
+# backtracker. All other tests (at time of writing) are small enough that the
+# backtracker can handle them fine.
+[[test]]
+name = "backtrack-blow-visited-capacity"
+regex = '\pL{50}'
+haystack = "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyZZ"
+matches = [[0, 50], [50, 100], [100, 150]]
--- a/third-party/vendor/regex/testdata/flags.toml
+++ b/third-party/vendor/regex/testdata/flags.toml
@ -0,0 +1,68 @@
+[[test]]
+name = "1"
+regex = "(?i)abc"
+haystack = "ABC"
+matches = [[0, 3]]
+
+[[test]]
+name = "2"
+regex = "(?i)a(?-i)bc"
+haystack = "Abc"
+matches = [[0, 3]]
+
+[[test]]
+name = "3"
+regex = "(?i)a(?-i)bc"
+haystack = "ABC"
+matches = []
+
+[[test]]
+name = "4"
+regex = "(?is)a."
+haystack = "A\n"
+matches = [[0, 2]]
+
+[[test]]
+name = "5"
+regex = "(?is)a.(?-is)a."
+haystack = "A\nab"
+matches = [[0, 4]]
+
+[[test]]
+name = "6"
+regex = "(?is)a.(?-is)a."
+haystack = "A\na\n"
+matches = []
+
+[[test]]
+name = "7"
+regex = "(?is)a.(?-is:a.)?"
+haystack = "A\na\n"
+matches = [[0, 2]]
+match-limit = 1
+
+[[test]]
+name = "8"
+regex = "(?U)a+"
+haystack = "aa"
+matches = [[0, 1]]
+match-limit = 1
+
+[[test]]
+name = "9"
+regex = "(?U)a+?"
+haystack = "aa"
+matches = [[0, 2]]
+
+[[test]]
+name = "10"
+regex = "(?U)(?-U)a+"
+haystack = "aa"
+matches = [[0, 2]]
+
+[[test]]
+name = "11"
+regex = '(?m)(?:^\d+$\n?)+'
+haystack = "123\n456\n789"
+matches = [[0, 11]]
+unicode = false
--- a/third-party/vendor/regex/testdata/fowler/basic.toml
+++ b/third-party/vendor/regex/testdata/fowler/basic.toml
--- a/third-party/vendor/regex/testdata/fowler/dat/README
+++ b/third-party/vendor/regex/testdata/fowler/dat/README
@ -0,0 +1,25 @@
+Test data was taken from the Go distribution, which was in turn taken from the
+testregex test suite:
+
+  http://web.archive.org/web/20150925124103/http://www2.research.att.com/~astopen/testregex/testregex.html
+
+Unfortunately, the original web site now appears dead, but the test data lives
+on.
+
+The LICENSE in this directory corresponds to the LICENSE that the data was
+originally released under.
+
+The tests themselves were modified for RE2/Go (and marked as such). A
+couple were modified further by me (Andrew Gallant) and marked with 'Rust'.
+
+After some number of years, these tests were transformed into a TOML format
+using the 'regex-cli generate fowler' command. To re-generate the
+TOML files, run the following from the root of this repository:
+
+  regex-cli generate fowler tests/data/fowler tests/data/fowler/dat/*.dat
+
+This assumes that you have 'regex-cli' installed. See 'regex-cli/README.md'
+from the root of the repository for more information.
+
+This brings the Fowler tests into a more "sensible" structured format in which
+other tests can be written such that they aren't write-only.
--- a/third-party/vendor/regex/testdata/fowler/dat/basic.dat
+++ b/third-party/vendor/regex/testdata/fowler/dat/basic.dat
@ -0,0 +1,223 @@
+NOTE	all standard compliant implementations should pass these : 2002-05-31
+
+BE	abracadabra$	abracadabracadabra	(7,18)
+BE	a...b		abababbb		(2,7)
+BE	XXXXXX		..XXXXXX		(2,8)
+E	\)		()	(1,2)
+BE	a]		a]a	(0,2)
+B	}		}	(0,1)
+E	\}		}	(0,1)
+BE	\]		]	(0,1)
+B	]		]	(0,1)
+E	]		]	(0,1)
+B	{		{	(0,1)
+B	}		}	(0,1)
+BE	^a		ax	(0,1)
+BE	\^a		a^a	(1,3)
+BE	a\^		a^	(0,2)
+BE	a$		aa	(1,2)
+BE	a\$		a$	(0,2)
+BE	^$		NULL	(0,0)
+E	$^		NULL	(0,0)
+E	a($)		aa	(1,2)(2,2)
+E	a*(^a)		aa	(0,1)(0,1)
+E	(..)*(...)*		a	(0,0)
+E	(..)*(...)*		abcd	(0,4)(2,4)
+E	(ab|a)(bc|c)		abc	(0,3)(0,2)(2,3)
+E	(ab)c|abc		abc	(0,3)(0,2)
+E	a{0}b		ab			(1,2)
+E	(a*)(b?)(b+)b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	(a*)(b{0,1})(b{1,})b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	a{9876543210}	NULL	BADBR
+E	((a|a)|a)			a	(0,1)(0,1)(0,1)
+E	(a*)(a|aa)			aaaa	(0,4)(0,3)(3,4)
+E	a*(a.|aa)			aaaa	(0,4)(2,4)
+E	a(b)|c(d)|a(e)f			aef	(0,3)(?,?)(?,?)(1,2)
+E	(a|b)?.*			b	(0,1)(0,1)
+E	(a|b)c|a(b|c)			ac	(0,2)(0,1)
+E	(a|b)c|a(b|c)			ab	(0,2)(?,?)(1,2)
+E	(a|b)*c|(a|ab)*c		abc	(0,3)(1,2)
+E	(a|b)*c|(a|ab)*c		xc	(1,2)
+E	(.a|.b).*|.*(.a|.b)		xa	(0,2)(0,2)
+E	a?(ab|ba)ab			abab	(0,4)(0,2)
+E	a?(ac{0}b|ba)ab			abab	(0,4)(0,2)
+E	ab|abab				abbabab	(0,2)
+E	aba|bab|bba			baaabbbaba	(5,8)
+E	aba|bab				baaabbbaba	(6,9)
+E	(aa|aaa)*|(a|aaaaa)		aa	(0,2)(0,2)
+E	(a.|.a.)*|(a|.a...)		aa	(0,2)(0,2)
+E	ab|a				xabc	(1,3)
+E	ab|a				xxabc	(2,4)
+Ei	(Ab|cD)*			aBcD	(0,4)(2,4)
+BE	[^-]			--a		(2,3)
+BE	[a-]*			--a		(0,3)
+BE	[a-m-]*			--amoma--	(0,4)
+E	:::1:::0:|:::1:1:0:	:::0:::1:::1:::0:	(8,17)
+E	:::1:::0:|:::1:1:1:	:::0:::1:::1:::0:	(8,17)
+{E	[[:upper:]]		A		(0,1)	[[<element>]] not supported
+E	[[:lower:]]+		`az{		(1,3)
+E	[[:upper:]]+		@AZ[		(1,3)
+# No collation in Go
+#BE	[[-]]			[[-]]		(2,4)
+#BE	[[.NIL.]]	NULL	ECOLLATE
+#BE	[[=aleph=]]	NULL	ECOLLATE
+}
+BE$	\n		\n	(0,1)
+BEn$	\n		\n	(0,1)
+BE$	[^a]		\n	(0,1)
+BE$	\na		\na	(0,2)
+E	(a)(b)(c)	abc	(0,3)(0,1)(1,2)(2,3)
+BE	xxx		xxx	(0,3)
+#E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 6,	(0,6)
+E	(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)	feb 6,	(0,6)	Rust
+#E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	2/7	(0,3)
+E	(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)	2/7	(0,3)	Rust
+#E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 1,Feb 6	(5,11)
+E	(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)	feb 1,Feb 6	(5,11)	Rust
+#E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))	x	(0,1)(0,1)(0,1)
+E	(((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))	x	(0,1)(0,1)(0,1)	Rust
+#E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*	xx	(0,2)(1,2)(1,2)
+E	(((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))*	xx	(0,2)(1,2)(1,2)	Rust
+E	a?(ab|ba)*	ababababababababababababababababababababababababababababababababababababababababa	(0,81)(79,81)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabbbbaa	(18,25)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabaa	(18,22)
+E	aaac|aabc|abac|abbc|baac|babc|bbac|bbbc	baaabbbabac	(7,11)
+#BE$	.*			\x01\xff	(0,2)
+BE$	.*			\x01\x7f	(0,2)	Rust
+E	aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	(53,57)
+L	aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	NOMATCH
+E	a*a*a*a*a*b		aaaaaaaaab	(0,10)
+BE	^			NULL		(0,0)
+BE	$			NULL		(0,0)
+BE	^$			NULL		(0,0)
+BE	^a$			a		(0,1)
+BE	abc			abc		(0,3)
+BE	abc			xabcy		(1,4)
+BE	abc			ababc		(2,5)
+BE	ab*c			abc		(0,3)
+BE	ab*bc			abc		(0,3)
+BE	ab*bc			abbc		(0,4)
+BE	ab*bc			abbbbc		(0,6)
+E	ab+bc			abbc		(0,4)
+E	ab+bc			abbbbc		(0,6)
+E	ab?bc			abbc		(0,4)
+E	ab?bc			abc		(0,3)
+E	ab?c			abc		(0,3)
+BE	^abc$			abc		(0,3)
+BE	^abc			abcc		(0,3)
+BE	abc$			aabc		(1,4)
+BE	^			abc		(0,0)
+BE	$			abc		(3,3)
+BE	a.c			abc		(0,3)
+BE	a.c			axc		(0,3)
+BE	a.*c			axyzc		(0,5)
+BE	a[bc]d			abd		(0,3)
+BE	a[b-d]e			ace		(0,3)
+BE	a[b-d]			aac		(1,3)
+BE	a[-b]			a-		(0,2)
+BE	a[b-]			a-		(0,2)
+BE	a]			a]		(0,2)
+BE	a[]]b			a]b		(0,3)
+BE	a[^bc]d			aed		(0,3)
+BE	a[^-b]c			adc		(0,3)
+BE	a[^]b]c			adc		(0,3)
+E	ab|cd			abc		(0,2)
+E	ab|cd			abcd		(0,2)
+E	a\(b			a(b		(0,3)
+E	a\(*b			ab		(0,2)
+E	a\(*b			a((b		(0,4)
+E	((a))			abc		(0,1)(0,1)(0,1)
+E	(a)b(c)			abc		(0,3)(0,1)(2,3)
+E	a+b+c			aabbabc		(4,7)
+E	a*			aaa		(0,3)
+E	(a*)*			-		(0,0)(0,0)
+E	(a*)+			-		(0,0)(0,0)
+E	(a*|b)*			-		(0,0)(0,0)
+E	(a+|b)*			ab		(0,2)(1,2)
+E	(a+|b)+			ab		(0,2)(1,2)
+E	(a+|b)?			ab		(0,1)(0,1)
+BE	[^ab]*			cde		(0,3)
+E	(^)*			-		(0,0)(0,0)
+BE	a*			NULL		(0,0)
+E	([abc])*d		abbbcd		(0,6)(4,5)
+E	([abc])*bcd		abcd		(0,4)(0,1)
+E	a|b|c|d|e		e		(0,1)
+E	(a|b|c|d|e)f		ef		(0,2)(0,1)
+E	((a*|b))*		-		(0,0)(0,0)(0,0)
+BE	abcd*efg		abcdefg		(0,7)
+BE	ab*			xabyabbbz	(1,3)
+BE	ab*			xayabbbz	(1,2)
+E	(ab|cd)e		abcde		(2,5)(2,4)
+BE	[abhgefdc]ij		hij		(0,3)
+E	(a|b)c*d		abcd		(1,4)(1,2)
+E	(ab|ab*)bc		abc		(0,3)(0,1)
+E	a([bc]*)c*		abc		(0,3)(1,3)
+E	a([bc]*)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]+)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]*)(c+d)		abcd		(0,4)(1,2)(2,4)
+E	a[bcd]*dcdcde		adcdcde		(0,7)
+E	(ab|a)b*c		abc		(0,3)(0,2)
+E	((a)(b)c)(d)		abcd		(0,4)(0,3)(0,1)(1,2)(3,4)
+BE	[A-Za-z_][A-Za-z0-9_]*	alpha		(0,5)
+E	^a(bc+|b[eh])g|.h$	abh		(1,3)
+E	(bc+d$|ef*g.|h?i(j|k))	effgz		(0,5)(0,5)
+E	(bc+d$|ef*g.|h?i(j|k))	ij		(0,2)(0,2)(1,2)
+E	(bc+d$|ef*g.|h?i(j|k))	reffgz		(1,6)(1,6)
+E	(((((((((a)))))))))	a		(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
+BE	multiple words		multiple words yeah	(0,14)
+E	(.*)c(.*)		abcde		(0,5)(0,2)(3,5)
+BE	abcd			abcd		(0,4)
+E	a(bc)d			abcd		(0,4)(1,3)
+E	a[-]?c		ac		(0,3)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mo'ammar Gadhafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Kaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qadhafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gadafi	(0,14)(?,?)(10,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moamar Gaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadhdhafi	(0,18)(?,?)(13,15)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Khaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafy	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muamar Kaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Quathafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gheddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Khadafy	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Qudhafi	(0,15)(?,?)(10,12)
+E	a+(b|c)*d+		aabcdd			(0,6)(3,4)
+E	^.+$			vivi			(0,4)
+E	^(.+)$			vivi			(0,4)(0,4)
+E	^([^!.]+).att.com!(.+)$	gryphon.att.com!eby	(0,19)(0,7)(16,19)
+E	^([^!]+!)?([^!]+)$	bas			(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$	bar!bas			(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$	foo!bas			(0,7)(0,4)(4,7)
+E	^.+!([^!]+!)([^!]+)$	foo!bar!bas		(0,11)(4,8)(8,11)
+E	((foo)|(bar))!bas	bar!bas			(0,7)(0,3)(?,?)(0,3)
+E	((foo)|(bar))!bas	foo!bar!bas		(4,11)(4,7)(?,?)(4,7)
+E	((foo)|(bar))!bas	foo!bas			(0,7)(0,3)(0,3)
+E	((foo)|bar)!bas		bar!bas			(0,7)(0,3)
+E	((foo)|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	((foo)|bar)!bas		foo!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		bar!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		foo!bar!bas		(4,11)(4,7)(4,7)
+E	(foo|(bar))!bas		foo!bas			(0,7)(0,3)
+E	(foo|bar)!bas		bar!bas			(0,7)(0,3)
+E	(foo|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	(foo|bar)!bas		foo!bas			(0,7)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bas		(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bar!bas		(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bar!bas	(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bas		(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bas		(0,3)(0,3)(?,?)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bar!bas		(0,7)(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bas		(0,7)(0,7)(0,4)(4,7)
+E	.*(/XXX).*			/XXX			(0,4)(0,4)
+E	.*(\\XXX).*			\XXX			(0,4)(0,4)
+E	\\XXX				\XXX			(0,4)
+E	.*(/000).*			/000			(0,4)(0,4)
+E	.*(\\000).*			\000			(0,4)(0,4)
+E	\\000				\000			(0,4)
--- a/third-party/vendor/regex/testdata/fowler/dat/nullsubexpr.dat
+++ b/third-party/vendor/regex/testdata/fowler/dat/nullsubexpr.dat
@ -0,0 +1,74 @@
+NOTE	null subexpression matches : 2002-06-06
+
+E	(a*)*		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)*		a		(0,1)(0,1)
+E	SAME		x		(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)+		a		(0,1)(0,1)
+E	SAME		x		NOMATCH
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+
+E	([a]*)*		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([a]*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([^b]*)*	a		(0,1)(0,1)
+E	SAME		b		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaab		(0,6)(0,6)
+E	([ab]*)*	a		(0,1)(0,1)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		ababab		(0,6)(0,6)
+E	SAME		bababa		(0,6)(0,6)
+E	SAME		b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+E	SAME		aaaabcde	(0,5)(0,5)
+E	([^a]*)*	b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+E	SAME		aaaaaa		(0,0)(0,0)
+E	([^ab]*)*	ccccxx		(0,6)(0,6)
+E	SAME		ababab		(0,0)(0,0)
+
+#E	((z)+|a)*	zabcde		(0,2)(1,2)
+E	((z)+|a)*	zabcde		(0,2)(1,2)(0,1)	Rust
+
+#{E	a+?		aaaaaa		(0,1)	no *? +? mimimal match ops
+#E	(a)		aaa		(0,1)(0,1)
+#E	(a*?)		aaa		(0,0)(0,0)
+#E	(a)*?		aaa		(0,0)
+#E	(a*?)*?		aaa		(0,0)
+#}
+
+B	\(a*\)*\(x\)		x	(0,1)(0,0)(0,1)
+B	\(a*\)*\(x\)		ax	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)		axa	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)\(\1\)	x	(0,1)(0,0)(0,1)(1,1)
+B	\(a*\)*\(x\)\(\1\)	ax	(0,2)(1,1)(1,2)(2,2)
+B	\(a*\)*\(x\)\(\1\)	axa	(0,3)(0,1)(1,2)(2,3)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axax	(0,4)(0,1)(1,2)(2,3)(3,4)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axxa	(0,3)(1,1)(1,2)(2,2)(2,3)
+
+E	(a*)*(x)		x	(0,1)(0,0)(0,1)
+E	(a*)*(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)*(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*)+(x)		x	(0,1)(0,0)(0,1)
+E	(a*)+(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)+(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*){2}(x)		x	(0,1)(0,0)(0,1)
+E	(a*){2}(x)		ax	(0,2)(1,1)(1,2)
+E	(a*){2}(x)		axa	(0,2)(1,1)(1,2)
--- a/third-party/vendor/regex/testdata/fowler/dat/repetition.dat
+++ b/third-party/vendor/regex/testdata/fowler/dat/repetition.dat
@ -0,0 +1,169 @@
+NOTE	implicit vs. explicit repetitions : 2009-02-02
+
+# Glenn Fowler <gsf@research.att.com>
+# conforming matches (column 4) must match one of the following BREs
+#	NOMATCH
+#	(0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
+#	(0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
+# i.e., each 3-tuple has two identical elements and one (?,?)
+
+E	((..)|(.))				NULL		NOMATCH
+E	((..)|(.))((..)|(.))			NULL		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		NULL		NOMATCH
+
+E	((..)|(.)){1}				NULL		NOMATCH
+E	((..)|(.)){2}				NULL		NOMATCH
+E	((..)|(.)){3}				NULL		NOMATCH
+
+E	((..)|(.))*				NULL		(0,0)
+
+E	((..)|(.))				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.))((..)|(.))			a		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		a		NOMATCH
+
+E	((..)|(.)){1}				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.)){2}				a		NOMATCH
+E	((..)|(.)){3}				a		NOMATCH
+
+E	((..)|(.))*				a		(0,1)(0,1)(?,?)(0,1)
+
+E	((..)|(.))				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aa		(0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
+E	((..)|(.))((..)|(.))((..)|(.))		aa		NOMATCH
+
+E	((..)|(.)){1}				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aa		(0,2)(1,2)(?,?)(1,2)
+E	((..)|(.)){3}				aa		NOMATCH
+
+E	((..)|(.))*				aa		(0,2)(0,2)(0,2)(?,?)
+
+E	((..)|(.))				aaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaa		(0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
+E	((..)|(.))((..)|(.))((..)|(.))		aaa		(0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
+
+E	((..)|(.)){1}				aaa		(0,2)(0,2)(0,2)(?,?)
+#E	((..)|(.)){2}				aaa		(0,3)(2,3)(?,?)(2,3)
+E	((..)|(.)){2}				aaa		(0,3)(2,3)(0,2)(2,3)	RE2/Go
+E	((..)|(.)){3}				aaa		(0,3)(2,3)(?,?)(2,3)
+
+#E	((..)|(.))*				aaa		(0,3)(2,3)(?,?)(2,3)
+E	((..)|(.))*				aaa		(0,3)(2,3)(0,2)(2,3)	RE2/Go
+
+E	((..)|(.))				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaa		(0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
+
+E	((..)|(.)){1}				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaa		(0,4)(2,4)(2,4)(?,?)
+#E	((..)|(.)){3}				aaaa		(0,4)(3,4)(?,?)(3,4)
+E	((..)|(.)){3}				aaaa		(0,4)(3,4)(0,2)(3,4)	RE2/Go
+
+E	((..)|(.))*				aaaa		(0,4)(2,4)(2,4)(?,?)
+
+E	((..)|(.))				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaa		(0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
+
+E	((..)|(.)){1}				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaa		(0,4)(2,4)(2,4)(?,?)
+#E	((..)|(.)){3}				aaaaa		(0,5)(4,5)(?,?)(4,5)
+E	((..)|(.)){3}				aaaaa		(0,5)(4,5)(2,4)(4,5)	RE2/Go
+
+#E	((..)|(.))*				aaaaa		(0,5)(4,5)(?,?)(4,5)
+E	((..)|(.))*				aaaaa		(0,5)(4,5)(2,4)(4,5)	RE2/Go
+
+E	((..)|(.))				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaaa		(0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
+
+E	((..)|(.)){1}				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaaa		(0,4)(2,4)(2,4)(?,?)
+E	((..)|(.)){3}				aaaaaa		(0,6)(4,6)(4,6)(?,?)
+
+E	((..)|(.))*				aaaaaa		(0,6)(4,6)(4,6)(?,?)
+
+NOTE	additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
+
+# These test a bug in OS X / FreeBSD / NetBSD, and libtree.
+# Linux/GLIBC gets the {8,} and {8,8} wrong.
+
+:HA#100:E	X(.?){0,}Y	X1234567Y	(0,9)(7,8)
+:HA#101:E	X(.?){1,}Y	X1234567Y	(0,9)(7,8)
+:HA#102:E	X(.?){2,}Y	X1234567Y	(0,9)(7,8)
+:HA#103:E	X(.?){3,}Y	X1234567Y	(0,9)(7,8)
+:HA#104:E	X(.?){4,}Y	X1234567Y	(0,9)(7,8)
+:HA#105:E	X(.?){5,}Y	X1234567Y	(0,9)(7,8)
+:HA#106:E	X(.?){6,}Y	X1234567Y	(0,9)(7,8)
+:HA#107:E	X(.?){7,}Y	X1234567Y	(0,9)(7,8)
+:HA#108:E	X(.?){8,}Y	X1234567Y	(0,9)(8,8)
+#:HA#110:E	X(.?){0,8}Y	X1234567Y	(0,9)(7,8)
+:HA#110:E	X(.?){0,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#111:E	X(.?){1,8}Y	X1234567Y	(0,9)(7,8)
+:HA#111:E	X(.?){1,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#112:E	X(.?){2,8}Y	X1234567Y	(0,9)(7,8)
+:HA#112:E	X(.?){2,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#113:E	X(.?){3,8}Y	X1234567Y	(0,9)(7,8)
+:HA#113:E	X(.?){3,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#114:E	X(.?){4,8}Y	X1234567Y	(0,9)(7,8)
+:HA#114:E	X(.?){4,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#115:E	X(.?){5,8}Y	X1234567Y	(0,9)(7,8)
+:HA#115:E	X(.?){5,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#116:E	X(.?){6,8}Y	X1234567Y	(0,9)(7,8)
+:HA#116:E	X(.?){6,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#117:E	X(.?){7,8}Y	X1234567Y	(0,9)(7,8)
+:HA#117:E	X(.?){7,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+:HA#118:E	X(.?){8,8}Y	X1234567Y	(0,9)(8,8)
+
+# These test a fixed bug in my regex-tdfa that did not keep the expanded
+# form properly grouped, so right association did the wrong thing with
+# these ambiguous patterns (crafted just to test my code when I became
+# suspicious of my implementation).  The first subexpression should use
+# "ab" then "a" then "bcd".
+
+# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
+# results like (0,6)(4,5)(6,6).
+
+#:HA#260:E	(a|ab|c|bcd){0,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#260:E	(a|ab|c|bcd){0,}(d*)	ababcd	(0,1)(0,1)(1,1)	Rust
+#:HA#261:E	(a|ab|c|bcd){1,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#261:E	(a|ab|c|bcd){1,}(d*)	ababcd	(0,1)(0,1)(1,1)	Rust
+:HA#262:E	(a|ab|c|bcd){2,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#263:E	(a|ab|c|bcd){3,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#264:E	(a|ab|c|bcd){4,}(d*)	ababcd	NOMATCH
+#:HA#265:E	(a|ab|c|bcd){0,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#265:E	(a|ab|c|bcd){0,10}(d*)	ababcd	(0,1)(0,1)(1,1)	Rust
+#:HA#266:E	(a|ab|c|bcd){1,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#266:E	(a|ab|c|bcd){1,10}(d*)	ababcd	(0,1)(0,1)(1,1)	Rust
+:HA#267:E	(a|ab|c|bcd){2,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#268:E	(a|ab|c|bcd){3,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#269:E	(a|ab|c|bcd){4,10}(d*)	ababcd	NOMATCH
+#:HA#270:E	(a|ab|c|bcd)*(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#270:E	(a|ab|c|bcd)*(d*)	ababcd	(0,1)(0,1)(1,1)	Rust
+#:HA#271:E	(a|ab|c|bcd)+(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#271:E	(a|ab|c|bcd)+(d*)	ababcd	(0,1)(0,1)(1,1)	Rust
+
+# The above worked on Linux/GLIBC but the following often fail.
+# They also trip up OS X / FreeBSD / NetBSD:
+
+#:HA#280:E	(ab|a|c|bcd){0,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#280:E	(ab|a|c|bcd){0,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#281:E	(ab|a|c|bcd){1,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#281:E	(ab|a|c|bcd){1,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#282:E	(ab|a|c|bcd){2,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#282:E	(ab|a|c|bcd){2,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#283:E	(ab|a|c|bcd){3,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#283:E	(ab|a|c|bcd){3,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+:HA#284:E	(ab|a|c|bcd){4,}(d*)	ababcd	NOMATCH
+#:HA#285:E	(ab|a|c|bcd){0,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#285:E	(ab|a|c|bcd){0,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#286:E	(ab|a|c|bcd){1,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#286:E	(ab|a|c|bcd){1,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#287:E	(ab|a|c|bcd){2,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#287:E	(ab|a|c|bcd){2,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#288:E	(ab|a|c|bcd){3,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#288:E	(ab|a|c|bcd){3,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+:HA#289:E	(ab|a|c|bcd){4,10}(d*)	ababcd	NOMATCH
+#:HA#290:E	(ab|a|c|bcd)*(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#290:E	(ab|a|c|bcd)*(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#291:E	(ab|a|c|bcd)+(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#291:E	(ab|a|c|bcd)+(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
--- a/third-party/vendor/regex/testdata/fowler/nullsubexpr.toml
+++ b/third-party/vendor/regex/testdata/fowler/nullsubexpr.toml
@ -0,0 +1,405 @@
+# !!! DO NOT EDIT !!!
+# Automatically generated by 'regex-cli generate fowler'.
+# Numbers in the test names correspond to the line number of the test from
+# the original dat file.
+
+[[test]]
+name = "nullsubexpr3"
+regex = '''(a*)*'''
+haystack = '''a'''
+matches = [[[0, 1], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr4"
+regex = '''(a*)*'''
+haystack = '''x'''
+matches = [[[0, 0], [0, 0]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr5"
+regex = '''(a*)*'''
+haystack = '''aaaaaa'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr6"
+regex = '''(a*)*'''
+haystack = '''aaaaaax'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr7"
+regex = '''(a*)+'''
+haystack = '''a'''
+matches = [[[0, 1], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr8"
+regex = '''(a*)+'''
+haystack = '''x'''
+matches = [[[0, 0], [0, 0]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr9"
+regex = '''(a*)+'''
+haystack = '''aaaaaa'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr10"
+regex = '''(a*)+'''
+haystack = '''aaaaaax'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr11"
+regex = '''(a+)*'''
+haystack = '''a'''
+matches = [[[0, 1], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr12"
+regex = '''(a+)*'''
+haystack = '''x'''
+matches = [[[0, 0], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr13"
+regex = '''(a+)*'''
+haystack = '''aaaaaa'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr14"
+regex = '''(a+)*'''
+haystack = '''aaaaaax'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr15"
+regex = '''(a+)+'''
+haystack = '''a'''
+matches = [[[0, 1], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr16"
+regex = '''(a+)+'''
+haystack = '''x'''
+matches = []
+match-limit = 1
+
+[[test]]
+name = "nullsubexpr17"
+regex = '''(a+)+'''
+haystack = '''aaaaaa'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr18"
+regex = '''(a+)+'''
+haystack = '''aaaaaax'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr20"
+regex = '''([a]*)*'''
+haystack = '''a'''
+matches = [[[0, 1], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr21"
+regex = '''([a]*)*'''
+haystack = '''x'''
+matches = [[[0, 0], [0, 0]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr22"
+regex = '''([a]*)*'''
+haystack = '''aaaaaa'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr23"
+regex = '''([a]*)*'''
+haystack = '''aaaaaax'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr24"
+regex = '''([a]*)+'''
+haystack = '''a'''
+matches = [[[0, 1], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr25"
+regex = '''([a]*)+'''
+haystack = '''x'''
+matches = [[[0, 0], [0, 0]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr26"
+regex = '''([a]*)+'''
+haystack = '''aaaaaa'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr27"
+regex = '''([a]*)+'''
+haystack = '''aaaaaax'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr28"
+regex = '''([^b]*)*'''
+haystack = '''a'''
+matches = [[[0, 1], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr29"
+regex = '''([^b]*)*'''
+haystack = '''b'''
+matches = [[[0, 0], [0, 0]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr30"
+regex = '''([^b]*)*'''
+haystack = '''aaaaaa'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr31"
+regex = '''([^b]*)*'''
+haystack = '''aaaaaab'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr32"
+regex = '''([ab]*)*'''
+haystack = '''a'''
+matches = [[[0, 1], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr33"
+regex = '''([ab]*)*'''
+haystack = '''aaaaaa'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr34"
+regex = '''([ab]*)*'''
+haystack = '''ababab'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr35"
+regex = '''([ab]*)*'''
+haystack = '''bababa'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr36"
+regex = '''([ab]*)*'''
+haystack = '''b'''
+matches = [[[0, 1], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr37"
+regex = '''([ab]*)*'''
+haystack = '''bbbbbb'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr38"
+regex = '''([ab]*)*'''
+haystack = '''aaaabcde'''
+matches = [[[0, 5], [0, 5]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr39"
+regex = '''([^a]*)*'''
+haystack = '''b'''
+matches = [[[0, 1], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr40"
+regex = '''([^a]*)*'''
+haystack = '''bbbbbb'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr41"
+regex = '''([^a]*)*'''
+haystack = '''aaaaaa'''
+matches = [[[0, 0], [0, 0]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr42"
+regex = '''([^ab]*)*'''
+haystack = '''ccccxx'''
+matches = [[[0, 6], [0, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr43"
+regex = '''([^ab]*)*'''
+haystack = '''ababab'''
+matches = [[[0, 0], [0, 0]]]
+match-limit = 1
+anchored = true
+
+# Test added by Rust regex project.
+[[test]]
+name = "nullsubexpr46"
+regex = '''((z)+|a)*'''
+haystack = '''zabcde'''
+matches = [[[0, 2], [1, 2], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr64"
+regex = '''(a*)*(x)'''
+haystack = '''x'''
+matches = [[[0, 1], [0, 0], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr65"
+regex = '''(a*)*(x)'''
+haystack = '''ax'''
+matches = [[[0, 2], [0, 1], [1, 2]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr66"
+regex = '''(a*)*(x)'''
+haystack = '''axa'''
+matches = [[[0, 2], [0, 1], [1, 2]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr68"
+regex = '''(a*)+(x)'''
+haystack = '''x'''
+matches = [[[0, 1], [0, 0], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr69"
+regex = '''(a*)+(x)'''
+haystack = '''ax'''
+matches = [[[0, 2], [0, 1], [1, 2]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr70"
+regex = '''(a*)+(x)'''
+haystack = '''axa'''
+matches = [[[0, 2], [0, 1], [1, 2]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr72"
+regex = '''(a*){2}(x)'''
+haystack = '''x'''
+matches = [[[0, 1], [0, 0], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr73"
+regex = '''(a*){2}(x)'''
+haystack = '''ax'''
+matches = [[[0, 2], [1, 1], [1, 2]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "nullsubexpr74"
+regex = '''(a*){2}(x)'''
+haystack = '''axa'''
+matches = [[[0, 2], [1, 1], [1, 2]]]
+match-limit = 1
+anchored = true
+
--- a/third-party/vendor/regex/testdata/fowler/repetition.toml
+++ b/third-party/vendor/regex/testdata/fowler/repetition.toml
@ -0,0 +1,746 @@
+# !!! DO NOT EDIT !!!
+# Automatically generated by 'regex-cli generate fowler'.
+# Numbers in the test names correspond to the line number of the test from
+# the original dat file.
+
+[[test]]
+name = "repetition10"
+regex = '''((..)|(.))'''
+haystack = ''''''
+matches = []
+match-limit = 1
+
+[[test]]
+name = "repetition11"
+regex = '''((..)|(.))((..)|(.))'''
+haystack = ''''''
+matches = []
+match-limit = 1
+
+[[test]]
+name = "repetition12"
+regex = '''((..)|(.))((..)|(.))((..)|(.))'''
+haystack = ''''''
+matches = []
+match-limit = 1
+
+[[test]]
+name = "repetition14"
+regex = '''((..)|(.)){1}'''
+haystack = ''''''
+matches = []
+match-limit = 1
+
+[[test]]
+name = "repetition15"
+regex = '''((..)|(.)){2}'''
+haystack = ''''''
+matches = []
+match-limit = 1
+
+[[test]]
+name = "repetition16"
+regex = '''((..)|(.)){3}'''
+haystack = ''''''
+matches = []
+match-limit = 1
+
+[[test]]
+name = "repetition18"
+regex = '''((..)|(.))*'''
+haystack = ''''''
+matches = [[[0, 0], [], [], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition20"
+regex = '''((..)|(.))'''
+haystack = '''a'''
+matches = [[[0, 1], [0, 1], [], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition21"
+regex = '''((..)|(.))((..)|(.))'''
+haystack = '''a'''
+matches = []
+match-limit = 1
+
+[[test]]
+name = "repetition22"
+regex = '''((..)|(.))((..)|(.))((..)|(.))'''
+haystack = '''a'''
+matches = []
+match-limit = 1
+
+[[test]]
+name = "repetition24"
+regex = '''((..)|(.)){1}'''
+haystack = '''a'''
+matches = [[[0, 1], [0, 1], [], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition25"
+regex = '''((..)|(.)){2}'''
+haystack = '''a'''
+matches = []
+match-limit = 1
+
+[[test]]
+name = "repetition26"
+regex = '''((..)|(.)){3}'''
+haystack = '''a'''
+matches = []
+match-limit = 1
+
+[[test]]
+name = "repetition28"
+regex = '''((..)|(.))*'''
+haystack = '''a'''
+matches = [[[0, 1], [0, 1], [], [0, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition30"
+regex = '''((..)|(.))'''
+haystack = '''aa'''
+matches = [[[0, 2], [0, 2], [0, 2], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition31"
+regex = '''((..)|(.))((..)|(.))'''
+haystack = '''aa'''
+matches = [[[0, 2], [0, 1], [], [0, 1], [1, 2], [], [1, 2]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition32"
+regex = '''((..)|(.))((..)|(.))((..)|(.))'''
+haystack = '''aa'''
+matches = []
+match-limit = 1
+
+[[test]]
+name = "repetition34"
+regex = '''((..)|(.)){1}'''
+haystack = '''aa'''
+matches = [[[0, 2], [0, 2], [0, 2], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition35"
+regex = '''((..)|(.)){2}'''
+haystack = '''aa'''
+matches = [[[0, 2], [1, 2], [], [1, 2]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition36"
+regex = '''((..)|(.)){3}'''
+haystack = '''aa'''
+matches = []
+match-limit = 1
+
+[[test]]
+name = "repetition38"
+regex = '''((..)|(.))*'''
+haystack = '''aa'''
+matches = [[[0, 2], [0, 2], [0, 2], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition40"
+regex = '''((..)|(.))'''
+haystack = '''aaa'''
+matches = [[[0, 2], [0, 2], [0, 2], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition41"
+regex = '''((..)|(.))((..)|(.))'''
+haystack = '''aaa'''
+matches = [[[0, 3], [0, 2], [0, 2], [], [2, 3], [], [2, 3]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition42"
+regex = '''((..)|(.))((..)|(.))((..)|(.))'''
+haystack = '''aaa'''
+matches = [[[0, 3], [0, 1], [], [0, 1], [1, 2], [], [1, 2], [2, 3], [], [2, 3]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition44"
+regex = '''((..)|(.)){1}'''
+haystack = '''aaa'''
+matches = [[[0, 2], [0, 2], [0, 2], []]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition46"
+regex = '''((..)|(.)){2}'''
+haystack = '''aaa'''
+matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition47"
+regex = '''((..)|(.)){3}'''
+haystack = '''aaa'''
+matches = [[[0, 3], [2, 3], [], [2, 3]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition50"
+regex = '''((..)|(.))*'''
+haystack = '''aaa'''
+matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition52"
+regex = '''((..)|(.))'''
+haystack = '''aaaa'''
+matches = [[[0, 2], [0, 2], [0, 2], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition53"
+regex = '''((..)|(.))((..)|(.))'''
+haystack = '''aaaa'''
+matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition54"
+regex = '''((..)|(.))((..)|(.))((..)|(.))'''
+haystack = '''aaaa'''
+matches = [[[0, 4], [0, 2], [0, 2], [], [2, 3], [], [2, 3], [3, 4], [], [3, 4]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition56"
+regex = '''((..)|(.)){1}'''
+haystack = '''aaaa'''
+matches = [[[0, 2], [0, 2], [0, 2], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition57"
+regex = '''((..)|(.)){2}'''
+haystack = '''aaaa'''
+matches = [[[0, 4], [2, 4], [2, 4], []]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition59"
+regex = '''((..)|(.)){3}'''
+haystack = '''aaaa'''
+matches = [[[0, 4], [3, 4], [0, 2], [3, 4]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition61"
+regex = '''((..)|(.))*'''
+haystack = '''aaaa'''
+matches = [[[0, 4], [2, 4], [2, 4], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition63"
+regex = '''((..)|(.))'''
+haystack = '''aaaaa'''
+matches = [[[0, 2], [0, 2], [0, 2], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition64"
+regex = '''((..)|(.))((..)|(.))'''
+haystack = '''aaaaa'''
+matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition65"
+regex = '''((..)|(.))((..)|(.))((..)|(.))'''
+haystack = '''aaaaa'''
+matches = [[[0, 5], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 5], [], [4, 5]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition67"
+regex = '''((..)|(.)){1}'''
+haystack = '''aaaaa'''
+matches = [[[0, 2], [0, 2], [0, 2], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition68"
+regex = '''((..)|(.)){2}'''
+haystack = '''aaaaa'''
+matches = [[[0, 4], [2, 4], [2, 4], []]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition70"
+regex = '''((..)|(.)){3}'''
+haystack = '''aaaaa'''
+matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition73"
+regex = '''((..)|(.))*'''
+haystack = '''aaaaa'''
+matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition75"
+regex = '''((..)|(.))'''
+haystack = '''aaaaaa'''
+matches = [[[0, 2], [0, 2], [0, 2], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition76"
+regex = '''((..)|(.))((..)|(.))'''
+haystack = '''aaaaaa'''
+matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition77"
+regex = '''((..)|(.))((..)|(.))((..)|(.))'''
+haystack = '''aaaaaa'''
+matches = [[[0, 6], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 6], [4, 6], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition79"
+regex = '''((..)|(.)){1}'''
+haystack = '''aaaaaa'''
+matches = [[[0, 2], [0, 2], [0, 2], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition80"
+regex = '''((..)|(.)){2}'''
+haystack = '''aaaaaa'''
+matches = [[[0, 4], [2, 4], [2, 4], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition81"
+regex = '''((..)|(.)){3}'''
+haystack = '''aaaaaa'''
+matches = [[[0, 6], [4, 6], [4, 6], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition83"
+regex = '''((..)|(.))*'''
+haystack = '''aaaaaa'''
+matches = [[[0, 6], [4, 6], [4, 6], []]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive90"
+regex = '''X(.?){0,}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [7, 8]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive91"
+regex = '''X(.?){1,}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [7, 8]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive92"
+regex = '''X(.?){2,}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [7, 8]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive93"
+regex = '''X(.?){3,}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [7, 8]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive94"
+regex = '''X(.?){4,}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [7, 8]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive95"
+regex = '''X(.?){5,}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [7, 8]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive96"
+regex = '''X(.?){6,}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [7, 8]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive97"
+regex = '''X(.?){7,}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [7, 8]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive98"
+regex = '''X(.?){8,}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [8, 8]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive100"
+regex = '''X(.?){0,8}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [8, 8]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive102"
+regex = '''X(.?){1,8}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [8, 8]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive104"
+regex = '''X(.?){2,8}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [8, 8]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive106"
+regex = '''X(.?){3,8}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [8, 8]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive108"
+regex = '''X(.?){4,8}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [8, 8]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive110"
+regex = '''X(.?){5,8}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [8, 8]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive112"
+regex = '''X(.?){6,8}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [8, 8]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive114"
+regex = '''X(.?){7,8}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [8, 8]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive115"
+regex = '''X(.?){8,8}Y'''
+haystack = '''X1234567Y'''
+matches = [[[0, 9], [8, 8]]]
+match-limit = 1
+anchored = true
+
+# Test added by Rust regex project.
+[[test]]
+name = "repetition-expensive127"
+regex = '''(a|ab|c|bcd){0,}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 1], [0, 1], [1, 1]]]
+match-limit = 1
+anchored = true
+
+# Test added by Rust regex project.
+[[test]]
+name = "repetition-expensive129"
+regex = '''(a|ab|c|bcd){1,}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 1], [0, 1], [1, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive130"
+regex = '''(a|ab|c|bcd){2,}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [3, 6], [6, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive131"
+regex = '''(a|ab|c|bcd){3,}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [3, 6], [6, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive132"
+regex = '''(a|ab|c|bcd){4,}(d*)'''
+haystack = '''ababcd'''
+matches = []
+match-limit = 1
+
+# Test added by Rust regex project.
+[[test]]
+name = "repetition-expensive134"
+regex = '''(a|ab|c|bcd){0,10}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 1], [0, 1], [1, 1]]]
+match-limit = 1
+anchored = true
+
+# Test added by Rust regex project.
+[[test]]
+name = "repetition-expensive136"
+regex = '''(a|ab|c|bcd){1,10}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 1], [0, 1], [1, 1]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive137"
+regex = '''(a|ab|c|bcd){2,10}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [3, 6], [6, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive138"
+regex = '''(a|ab|c|bcd){3,10}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [3, 6], [6, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive139"
+regex = '''(a|ab|c|bcd){4,10}(d*)'''
+haystack = '''ababcd'''
+matches = []
+match-limit = 1
+
+# Test added by Rust regex project.
+[[test]]
+name = "repetition-expensive141"
+regex = '''(a|ab|c|bcd)*(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 1], [0, 1], [1, 1]]]
+match-limit = 1
+anchored = true
+
+# Test added by Rust regex project.
+[[test]]
+name = "repetition-expensive143"
+regex = '''(a|ab|c|bcd)+(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 1], [0, 1], [1, 1]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive149"
+regex = '''(ab|a|c|bcd){0,}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [4, 5], [5, 6]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive151"
+regex = '''(ab|a|c|bcd){1,}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [4, 5], [5, 6]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive153"
+regex = '''(ab|a|c|bcd){2,}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [4, 5], [5, 6]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive155"
+regex = '''(ab|a|c|bcd){3,}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [4, 5], [5, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive156"
+regex = '''(ab|a|c|bcd){4,}(d*)'''
+haystack = '''ababcd'''
+matches = []
+match-limit = 1
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive158"
+regex = '''(ab|a|c|bcd){0,10}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [4, 5], [5, 6]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive160"
+regex = '''(ab|a|c|bcd){1,10}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [4, 5], [5, 6]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive162"
+regex = '''(ab|a|c|bcd){2,10}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [4, 5], [5, 6]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive164"
+regex = '''(ab|a|c|bcd){3,10}(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [4, 5], [5, 6]]]
+match-limit = 1
+anchored = true
+
+[[test]]
+name = "repetition-expensive165"
+regex = '''(ab|a|c|bcd){4,10}(d*)'''
+haystack = '''ababcd'''
+matches = []
+match-limit = 1
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive167"
+regex = '''(ab|a|c|bcd)*(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [4, 5], [5, 6]]]
+match-limit = 1
+anchored = true
+
+# Test added by RE2/Go project.
+[[test]]
+name = "repetition-expensive169"
+regex = '''(ab|a|c|bcd)+(d*)'''
+haystack = '''ababcd'''
+matches = [[[0, 6], [4, 5], [5, 6]]]
+match-limit = 1
+anchored = true
+
--- a/third-party/vendor/regex/testdata/iter.toml
+++ b/third-party/vendor/regex/testdata/iter.toml
@ -0,0 +1,143 @@
+[[test]]
+name = "1"
+regex = "a"
+haystack = "aaa"
+matches = [[0, 1], [1, 2], [2, 3]]
+
+[[test]]
+name = "2"
+regex = "a"
+haystack = "aba"
+matches = [[0, 1], [2, 3]]
+
+[[test]]
+name = "empty1"
+regex = ''
+haystack = ''
+matches = [[0, 0]]
+
+[[test]]
+name = "empty2"
+regex = ''
+haystack = 'abc'
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty3"
+regex = '(?:)'
+haystack = 'abc'
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty4"
+regex = '(?:)*'
+haystack = 'abc'
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty5"
+regex = '(?:)+'
+haystack = 'abc'
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty6"
+regex = '(?:)?'
+haystack = 'abc'
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty7"
+regex = '(?:)(?:)'
+haystack = 'abc'
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty8"
+regex = '(?:)+|z'
+haystack = 'abc'
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty9"
+regex = 'z|(?:)+'
+haystack = 'abc'
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty10"
+regex = '(?:)+|b'
+haystack = 'abc'
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+
+[[test]]
+name = "empty11"
+regex = 'b|(?:)+'
+haystack = 'abc'
+matches = [[0, 0], [1, 2], [3, 3]]
+
+[[test]]
+name = "start1"
+regex = "^a"
+haystack = "a"
+matches = [[0, 1]]
+
+[[test]]
+name = "start2"
+regex = "^a"
+haystack = "aa"
+matches = [[0, 1]]
+
+[[test]]
+name = "anchored1"
+regex = "a"
+haystack = "a"
+matches = [[0, 1]]
+anchored = true
+
+# This test is pretty subtle. It demonstrates the crucial difference between
+# '^a' and 'a' compiled in 'anchored' mode. The former regex exclusively
+# matches at the start of a haystack and nowhere else. The latter regex has
+# no such restriction, but its automaton is constructed such that it lacks a
+# `.*?` prefix. So it can actually produce matches at multiple locations.
+# The anchored3 test drives this point home.
+[[test]]
+name = "anchored2"
+regex = "a"
+haystack = "aa"
+matches = [[0, 1], [1, 2]]
+anchored = true
+
+# Unlikely anchored2, this test stops matching anything after it sees `b`
+# since it lacks a `.*?` prefix. Since it is looking for 'a' but sees 'b', it
+# determines that there are no remaining matches.
+[[test]]
+name = "anchored3"
+regex = "a"
+haystack = "aaba"
+matches = [[0, 1], [1, 2]]
+anchored = true
+
+[[test]]
+name = "nonempty-followedby-empty"
+regex = 'abc|.*?'
+haystack = "abczzz"
+matches = [[0, 3], [4, 4], [5, 5], [6, 6]]
+
+[[test]]
+name = "nonempty-followedby-oneempty"
+regex = 'abc|.*?'
+haystack = "abcz"
+matches = [[0, 3], [4, 4]]
+
+[[test]]
+name = "nonempty-followedby-onemixed"
+regex = 'abc|.*?'
+haystack = "abczabc"
+matches = [[0, 3], [4, 7]]
+
+[[test]]
+name = "nonempty-followedby-twomixed"
+regex = 'abc|.*?'
+haystack = "abczzabc"
+matches = [[0, 3], [4, 4], [5, 8]]
--- a/third-party/vendor/regex/testdata/leftmost-all.toml
+++ b/third-party/vendor/regex/testdata/leftmost-all.toml
@ -0,0 +1,25 @@
+[[test]]
+name = "alt"
+regex = 'foo|foobar'
+haystack = "foobar"
+matches = [[0, 6]]
+match-kind = "all"
+search-kind = "leftmost"
+
+[[test]]
+name = "multi"
+regex = ['foo', 'foobar']
+haystack = "foobar"
+matches = [
+  { id = 1, span = [0, 6] },
+]
+match-kind = "all"
+search-kind = "leftmost"
+
+[[test]]
+name = "dotall"
+regex = '(?s:.)'
+haystack = "foobar"
+matches = [[5, 6]]
+match-kind = "all"
+search-kind = "leftmost"
--- a/third-party/vendor/regex/testdata/line-terminator.toml
+++ b/third-party/vendor/regex/testdata/line-terminator.toml
@ -0,0 +1,109 @@
+# This tests that we can switch the line terminator to the NUL byte.
+[[test]]
+name = "nul"
+regex = '(?m)^[a-z]+$'
+haystack = '\x00abc\x00'
+matches = [[1, 4]]
+unescape = true
+line-terminator = '\x00'
+
+# This tests that '.' will not match the configured line terminator, but will
+# match \n.
+[[test]]
+name = "dot-changes-with-line-terminator"
+regex = '.'
+haystack = '\x00\n'
+matches = [[1, 2]]
+unescape = true
+line-terminator = '\x00'
+
+# This tests that when we switch the line terminator, \n is no longer
+# recognized as the terminator.
+[[test]]
+name = "not-line-feed"
+regex = '(?m)^[a-z]+$'
+haystack = '\nabc\n'
+matches = []
+unescape = true
+line-terminator = '\x00'
+
+# This tests that we can set the line terminator to a non-ASCII byte and have
+# it behave as expected.
+[[test]]
+name = "non-ascii"
+regex = '(?m)^[a-z]+$'
+haystack = '\xFFabc\xFF'
+matches = [[1, 4]]
+unescape = true
+line-terminator = '\xFF'
+utf8 = false
+
+# This tests a tricky case where the line terminator is set to \r. This ensures
+# that the StartLF look-behind assertion is tracked when computing the start
+# state.
+[[test]]
+name = "carriage"
+regex = '(?m)^[a-z]+'
+haystack = 'ABC\rabc'
+matches = [[4, 7]]
+bounds = [4, 7]
+unescape = true
+line-terminator = '\r'
+
+# This tests that we can set the line terminator to a byte corresponding to a
+# word character, and things work as expected.
+[[test]]
+name = "word-byte"
+regex = '(?m)^[a-z]+$'
+haystack = 'ZabcZ'
+matches = [[1, 4]]
+unescape = true
+line-terminator = 'Z'
+
+# This tests that we can set the line terminator to a byte corresponding to a
+# non-word character, and things work as expected.
+[[test]]
+name = "non-word-byte"
+regex = '(?m)^[a-z]+$'
+haystack = '%abc%'
+matches = [[1, 4]]
+unescape = true
+line-terminator = '%'
+
+# This combines "set line terminator to a word byte" with a word boundary
+# assertion, which should result in no match even though ^/$ matches.
+[[test]]
+name = "word-boundary"
+regex = '(?m)^\b[a-z]+\b$'
+haystack = 'ZabcZ'
+matches = []
+unescape = true
+line-terminator = 'Z'
+
+# Like 'word-boundary', but does an anchored search at the point where ^
+# matches, but where \b should not.
+[[test]]
+name = "word-boundary-at"
+regex = '(?m)^\b[a-z]+\b$'
+haystack = 'ZabcZ'
+matches = []
+bounds = [1, 4]
+anchored = true
+unescape = true
+line-terminator = 'Z'
+
+# Like 'word-boundary-at', but flips the word boundary to a negation. This
+# in particular tests a tricky case in DFA engines, where they must consider
+# explicitly that a starting configuration from a custom line terminator may
+# also required setting the "is from word byte" flag on a state. Otherwise,
+# it's treated as "not from a word byte," which would result in \B not matching
+# here when it should.
+[[test]]
+name = "not-word-boundary-at"
+regex = '(?m)^\B[a-z]+\B$'
+haystack = 'ZabcZ'
+matches = [[1, 4]]
+bounds = [1, 4]
+anchored = true
+unescape = true
+line-terminator = 'Z'
--- a/third-party/vendor/regex/testdata/misc.toml
+++ b/third-party/vendor/regex/testdata/misc.toml
@ -0,0 +1,99 @@
+[[test]]
+name = "ascii-literal"
+regex = "a"
+haystack = "a"
+matches = [[0, 1]]
+
+[[test]]
+name = "ascii-literal-not"
+regex = "a"
+haystack = "z"
+matches = []
+
+[[test]]
+name = "ascii-literal-anchored"
+regex = "a"
+haystack = "a"
+matches = [[0, 1]]
+anchored = true
+
+[[test]]
+name = "ascii-literal-anchored-not"
+regex = "a"
+haystack = "z"
+matches = []
+anchored = true
+
+[[test]]
+name = "anchor-start-end-line"
+regex = '(?m)^bar$'
+haystack = "foo\nbar\nbaz"
+matches = [[4, 7]]
+
+[[test]]
+name = "prefix-literal-match"
+regex = '^abc'
+haystack = "abc"
+matches = [[0, 3]]
+
+[[test]]
+name = "prefix-literal-match-ascii"
+regex = '^abc'
+haystack = "abc"
+matches = [[0, 3]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "prefix-literal-no-match"
+regex = '^abc'
+haystack = "zabc"
+matches = []
+
+[[test]]
+name = "one-literal-edge"
+regex = 'abc'
+haystack = "xxxxxab"
+matches = []
+
+[[test]]
+name = "terminates"
+regex = 'a$'
+haystack = "a"
+matches = [[0, 1]]
+
+[[test]]
+name = "suffix-100"
+regex = '.*abcd'
+haystack = "abcd"
+matches = [[0, 4]]
+
+[[test]]
+name = "suffix-200"
+regex = '.*(?:abcd)+'
+haystack = "abcd"
+matches = [[0, 4]]
+
+[[test]]
+name = "suffix-300"
+regex = '.*(?:abcd)+'
+haystack = "abcdabcd"
+matches = [[0, 8]]
+
+[[test]]
+name = "suffix-400"
+regex = '.*(?:abcd)+'
+haystack = "abcdxabcd"
+matches = [[0, 9]]
+
+[[test]]
+name = "suffix-500"
+regex = '.*x(?:abcd)+'
+haystack = "abcdxabcd"
+matches = [[0, 9]]
+
+[[test]]
+name = "suffix-600"
+regex = '[^abcd]*x(?:abcd)+'
+haystack = "abcdxabcd"
+matches = [[4, 9]]
--- a/third-party/vendor/regex/testdata/multiline.toml
+++ b/third-party/vendor/regex/testdata/multiline.toml
@ -0,0 +1,845 @@
+[[test]]
+name = "basic1"
+regex = '(?m)^[a-z]+$'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 3], [4, 7], [8, 11]]
+
+[[test]]
+name = "basic1-crlf"
+regex = '(?Rm)^[a-z]+$'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 3], [4, 7], [8, 11]]
+
+[[test]]
+name = "basic1-crlf-cr"
+regex = '(?Rm)^[a-z]+$'
+haystack = "abc\rdef\rxyz"
+matches = [[0, 3], [4, 7], [8, 11]]
+
+[[test]]
+name = "basic2"
+regex = '(?m)^$'
+haystack = "abc\ndef\nxyz"
+matches = []
+
+[[test]]
+name = "basic2-crlf"
+regex = '(?Rm)^$'
+haystack = "abc\ndef\nxyz"
+matches = []
+
+[[test]]
+name = "basic2-crlf-cr"
+regex = '(?Rm)^$'
+haystack = "abc\rdef\rxyz"
+matches = []
+
+[[test]]
+name = "basic3"
+regex = '(?m)^'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 0], [4, 4], [8, 8]]
+
+[[test]]
+name = "basic3-crlf"
+regex = '(?Rm)^'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 0], [4, 4], [8, 8]]
+
+[[test]]
+name = "basic3-crlf-cr"
+regex = '(?Rm)^'
+haystack = "abc\rdef\rxyz"
+matches = [[0, 0], [4, 4], [8, 8]]
+
+[[test]]
+name = "basic4"
+regex = '(?m)$'
+haystack = "abc\ndef\nxyz"
+matches = [[3, 3], [7, 7], [11, 11]]
+
+[[test]]
+name = "basic4-crlf"
+regex = '(?Rm)$'
+haystack = "abc\ndef\nxyz"
+matches = [[3, 3], [7, 7], [11, 11]]
+
+[[test]]
+name = "basic4-crlf-cr"
+regex = '(?Rm)$'
+haystack = "abc\rdef\rxyz"
+matches = [[3, 3], [7, 7], [11, 11]]
+
+[[test]]
+name = "basic5"
+regex = '(?m)^[a-z]'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 1], [4, 5], [8, 9]]
+
+[[test]]
+name = "basic5-crlf"
+regex = '(?Rm)^[a-z]'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 1], [4, 5], [8, 9]]
+
+[[test]]
+name = "basic5-crlf-cr"
+regex = '(?Rm)^[a-z]'
+haystack = "abc\rdef\rxyz"
+matches = [[0, 1], [4, 5], [8, 9]]
+
+[[test]]
+name = "basic6"
+regex = '(?m)[a-z]^'
+haystack = "abc\ndef\nxyz"
+matches = []
+
+[[test]]
+name = "basic6-crlf"
+regex = '(?Rm)[a-z]^'
+haystack = "abc\ndef\nxyz"
+matches = []
+
+[[test]]
+name = "basic6-crlf-cr"
+regex = '(?Rm)[a-z]^'
+haystack = "abc\rdef\rxyz"
+matches = []
+
+[[test]]
+name = "basic7"
+regex = '(?m)[a-z]$'
+haystack = "abc\ndef\nxyz"
+matches = [[2, 3], [6, 7], [10, 11]]
+
+[[test]]
+name = "basic7-crlf"
+regex = '(?Rm)[a-z]$'
+haystack = "abc\ndef\nxyz"
+matches = [[2, 3], [6, 7], [10, 11]]
+
+[[test]]
+name = "basic7-crlf-cr"
+regex = '(?Rm)[a-z]$'
+haystack = "abc\rdef\rxyz"
+matches = [[2, 3], [6, 7], [10, 11]]
+
+[[test]]
+name = "basic8"
+regex = '(?m)$[a-z]'
+haystack = "abc\ndef\nxyz"
+matches = []
+
+[[test]]
+name = "basic8-crlf"
+regex = '(?Rm)$[a-z]'
+haystack = "abc\ndef\nxyz"
+matches = []
+
+[[test]]
+name = "basic8-crlf-cr"
+regex = '(?Rm)$[a-z]'
+haystack = "abc\rdef\rxyz"
+matches = []
+
+[[test]]
+name = "basic9"
+regex = '(?m)^$'
+haystack = ""
+matches = [[0, 0]]
+
+[[test]]
+name = "basic9-crlf"
+regex = '(?Rm)^$'
+haystack = ""
+matches = [[0, 0]]
+
+[[test]]
+name = "repeat1"
+regex = '(?m)(?:^$)*'
+haystack = "a\nb\nc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
+
+[[test]]
+name = "repeat1-crlf"
+regex = '(?Rm)(?:^$)*'
+haystack = "a\nb\nc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
+
+[[test]]
+name = "repeat1-crlf-cr"
+regex = '(?Rm)(?:^$)*'
+haystack = "a\rb\rc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
+
+[[test]]
+name = "repeat1-no-multi"
+regex = '(?:^$)*'
+haystack = "a\nb\nc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
+
+[[test]]
+name = "repeat1-no-multi-crlf"
+regex = '(?R)(?:^$)*'
+haystack = "a\nb\nc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
+
+[[test]]
+name = "repeat1-no-multi-crlf-cr"
+regex = '(?R)(?:^$)*'
+haystack = "a\rb\rc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
+
+[[test]]
+name = "repeat2"
+regex = '(?m)(?:^|a)+'
+haystack = "a\naaa\n"
+matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
+
+[[test]]
+name = "repeat2-crlf"
+regex = '(?Rm)(?:^|a)+'
+haystack = "a\naaa\n"
+matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
+
+[[test]]
+name = "repeat2-crlf-cr"
+regex = '(?Rm)(?:^|a)+'
+haystack = "a\raaa\r"
+matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
+
+[[test]]
+name = "repeat2-no-multi"
+regex = '(?:^|a)+'
+haystack = "a\naaa\n"
+matches = [[0, 0], [2, 5]]
+
+[[test]]
+name = "repeat2-no-multi-crlf"
+regex = '(?R)(?:^|a)+'
+haystack = "a\naaa\n"
+matches = [[0, 0], [2, 5]]
+
+[[test]]
+name = "repeat2-no-multi-crlf-cr"
+regex = '(?R)(?:^|a)+'
+haystack = "a\raaa\r"
+matches = [[0, 0], [2, 5]]
+
+[[test]]
+name = "repeat3"
+regex = '(?m)(?:^|a)*'
+haystack = "a\naaa\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
+
+[[test]]
+name = "repeat3-crlf"
+regex = '(?Rm)(?:^|a)*'
+haystack = "a\naaa\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
+
+[[test]]
+name = "repeat3-crlf-cr"
+regex = '(?Rm)(?:^|a)*'
+haystack = "a\raaa\r"
+matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
+
+[[test]]
+name = "repeat3-no-multi"
+regex = '(?:^|a)*'
+haystack = "a\naaa\n"
+matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
+
+[[test]]
+name = "repeat3-no-multi-crlf"
+regex = '(?R)(?:^|a)*'
+haystack = "a\naaa\n"
+matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
+
+[[test]]
+name = "repeat3-no-multi-crlf-cr"
+regex = '(?R)(?:^|a)*'
+haystack = "a\raaa\r"
+matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
+
+[[test]]
+name = "repeat4"
+regex = '(?m)(?:^|a+)'
+haystack = "a\naaa\n"
+matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
+
+[[test]]
+name = "repeat4-crlf"
+regex = '(?Rm)(?:^|a+)'
+haystack = "a\naaa\n"
+matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
+
+[[test]]
+name = "repeat4-crlf-cr"
+regex = '(?Rm)(?:^|a+)'
+haystack = "a\raaa\r"
+matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
+
+[[test]]
+name = "repeat4-no-multi"
+regex = '(?:^|a+)'
+haystack = "a\naaa\n"
+matches = [[0, 0], [2, 5]]
+
+[[test]]
+name = "repeat4-no-multi-crlf"
+regex = '(?R)(?:^|a+)'
+haystack = "a\naaa\n"
+matches = [[0, 0], [2, 5]]
+
+[[test]]
+name = "repeat4-no-multi-crlf-cr"
+regex = '(?R)(?:^|a+)'
+haystack = "a\raaa\r"
+matches = [[0, 0], [2, 5]]
+
+[[test]]
+name = "repeat5"
+regex = '(?m)(?:^|a*)'
+haystack = "a\naaa\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
+
+[[test]]
+name = "repeat5-crlf"
+regex = '(?Rm)(?:^|a*)'
+haystack = "a\naaa\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
+
+[[test]]
+name = "repeat5-crlf-cr"
+regex = '(?Rm)(?:^|a*)'
+haystack = "a\raaa\r"
+matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
+
+[[test]]
+name = "repeat5-no-multi"
+regex = '(?:^|a*)'
+haystack = "a\naaa\n"
+matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
+
+[[test]]
+name = "repeat5-no-multi-crlf"
+regex = '(?R)(?:^|a*)'
+haystack = "a\naaa\n"
+matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
+
+[[test]]
+name = "repeat5-no-multi-crlf-cr"
+regex = '(?R)(?:^|a*)'
+haystack = "a\raaa\r"
+matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
+
+[[test]]
+name = "repeat6"
+regex = '(?m)(?:^[a-z])+'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 1], [4, 5], [8, 9]]
+
+[[test]]
+name = "repeat6-crlf"
+regex = '(?Rm)(?:^[a-z])+'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 1], [4, 5], [8, 9]]
+
+[[test]]
+name = "repeat6-crlf-cr"
+regex = '(?Rm)(?:^[a-z])+'
+haystack = "abc\rdef\rxyz"
+matches = [[0, 1], [4, 5], [8, 9]]
+
+[[test]]
+name = "repeat6-no-multi"
+regex = '(?:^[a-z])+'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 1]]
+
+[[test]]
+name = "repeat6-no-multi-crlf"
+regex = '(?R)(?:^[a-z])+'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 1]]
+
+[[test]]
+name = "repeat6-no-multi-crlf-cr"
+regex = '(?R)(?:^[a-z])+'
+haystack = "abc\rdef\rxyz"
+matches = [[0, 1]]
+
+[[test]]
+name = "repeat7"
+regex = '(?m)(?:^[a-z]{3}\n?)+'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 11]]
+
+[[test]]
+name = "repeat7-crlf"
+regex = '(?Rm)(?:^[a-z]{3}\n?)+'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 11]]
+
+[[test]]
+name = "repeat7-crlf-cr"
+regex = '(?Rm)(?:^[a-z]{3}\r?)+'
+haystack = "abc\rdef\rxyz"
+matches = [[0, 11]]
+
+[[test]]
+name = "repeat7-no-multi"
+regex = '(?:^[a-z]{3}\n?)+'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 4]]
+
+[[test]]
+name = "repeat7-no-multi-crlf"
+regex = '(?R)(?:^[a-z]{3}\n?)+'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 4]]
+
+[[test]]
+name = "repeat7-no-multi-crlf-cr"
+regex = '(?R)(?:^[a-z]{3}\r?)+'
+haystack = "abc\rdef\rxyz"
+matches = [[0, 4]]
+
+[[test]]
+name = "repeat8"
+regex = '(?m)(?:^[a-z]{3}\n?)*'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 11]]
+
+[[test]]
+name = "repeat8-crlf"
+regex = '(?Rm)(?:^[a-z]{3}\n?)*'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 11]]
+
+[[test]]
+name = "repeat8-crlf-cr"
+regex = '(?Rm)(?:^[a-z]{3}\r?)*'
+haystack = "abc\rdef\rxyz"
+matches = [[0, 11]]
+
+[[test]]
+name = "repeat8-no-multi"
+regex = '(?:^[a-z]{3}\n?)*'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]]
+
+[[test]]
+name = "repeat8-no-multi-crlf"
+regex = '(?R)(?:^[a-z]{3}\n?)*'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]]
+
+[[test]]
+name = "repeat8-no-multi-crlf-cr"
+regex = '(?R)(?:^[a-z]{3}\r?)*'
+haystack = "abc\rdef\rxyz"
+matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]]
+
+[[test]]
+name = "repeat9"
+regex = '(?m)(?:\n?[a-z]{3}$)+'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 11]]
+
+[[test]]
+name = "repeat9-crlf"
+regex = '(?Rm)(?:\n?[a-z]{3}$)+'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 11]]
+
+[[test]]
+name = "repeat9-crlf-cr"
+regex = '(?Rm)(?:\r?[a-z]{3}$)+'
+haystack = "abc\rdef\rxyz"
+matches = [[0, 11]]
+
+[[test]]
+name = "repeat9-no-multi"
+regex = '(?:\n?[a-z]{3}$)+'
+haystack = "abc\ndef\nxyz"
+matches = [[7, 11]]
+
+[[test]]
+name = "repeat9-no-multi-crlf"
+regex = '(?R)(?:\n?[a-z]{3}$)+'
+haystack = "abc\ndef\nxyz"
+matches = [[7, 11]]
+
+[[test]]
+name = "repeat9-no-multi-crlf-cr"
+regex = '(?R)(?:\r?[a-z]{3}$)+'
+haystack = "abc\rdef\rxyz"
+matches = [[7, 11]]
+
+[[test]]
+name = "repeat10"
+regex = '(?m)(?:\n?[a-z]{3}$)*'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 11]]
+
+[[test]]
+name = "repeat10-crlf"
+regex = '(?Rm)(?:\n?[a-z]{3}$)*'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 11]]
+
+[[test]]
+name = "repeat10-crlf-cr"
+regex = '(?Rm)(?:\r?[a-z]{3}$)*'
+haystack = "abc\rdef\rxyz"
+matches = [[0, 11]]
+
+[[test]]
+name = "repeat10-no-multi"
+regex = '(?:\n?[a-z]{3}$)*'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]]
+
+[[test]]
+name = "repeat10-no-multi-crlf"
+regex = '(?R)(?:\n?[a-z]{3}$)*'
+haystack = "abc\ndef\nxyz"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]]
+
+[[test]]
+name = "repeat10-no-multi-crlf-cr"
+regex = '(?R)(?:\r?[a-z]{3}$)*'
+haystack = "abc\rdef\rxyz"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]]
+
+[[test]]
+name = "repeat11"
+regex = '(?m)^*'
+haystack = "\naa\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat11-crlf"
+regex = '(?Rm)^*'
+haystack = "\naa\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat11-crlf-cr"
+regex = '(?Rm)^*'
+haystack = "\raa\r"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat11-no-multi"
+regex = '^*'
+haystack = "\naa\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat11-no-multi-crlf"
+regex = '(?R)^*'
+haystack = "\naa\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat11-no-multi-crlf-cr"
+regex = '(?R)^*'
+haystack = "\raa\r"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat12"
+regex = '(?m)^+'
+haystack = "\naa\n"
+matches = [[0, 0], [1, 1], [4, 4]]
+
+[[test]]
+name = "repeat12-crlf"
+regex = '(?Rm)^+'
+haystack = "\naa\n"
+matches = [[0, 0], [1, 1], [4, 4]]
+
+[[test]]
+name = "repeat12-crlf-cr"
+regex = '(?Rm)^+'
+haystack = "\raa\r"
+matches = [[0, 0], [1, 1], [4, 4]]
+
+[[test]]
+name = "repeat12-no-multi"
+regex = '^+'
+haystack = "\naa\n"
+matches = [[0, 0]]
+
+[[test]]
+name = "repeat12-no-multi-crlf"
+regex = '(?R)^+'
+haystack = "\naa\n"
+matches = [[0, 0]]
+
+[[test]]
+name = "repeat12-no-multi-crlf-cr"
+regex = '(?R)^+'
+haystack = "\raa\r"
+matches = [[0, 0]]
+
+[[test]]
+name = "repeat13"
+regex = '(?m)$*'
+haystack = "\naa\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat13-crlf"
+regex = '(?Rm)$*'
+haystack = "\naa\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat13-crlf-cr"
+regex = '(?Rm)$*'
+haystack = "\raa\r"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat13-no-multi"
+regex = '$*'
+haystack = "\naa\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat13-no-multi-crlf"
+regex = '(?R)$*'
+haystack = "\naa\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat13-no-multi-crlf-cr"
+regex = '(?R)$*'
+haystack = "\raa\r"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat14"
+regex = '(?m)$+'
+haystack = "\naa\n"
+matches = [[0, 0], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat14-crlf"
+regex = '(?Rm)$+'
+haystack = "\naa\n"
+matches = [[0, 0], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat14-crlf-cr"
+regex = '(?Rm)$+'
+haystack = "\raa\r"
+matches = [[0, 0], [3, 3], [4, 4]]
+
+[[test]]
+name = "repeat14-no-multi"
+regex = '$+'
+haystack = "\naa\n"
+matches = [[4, 4]]
+
+[[test]]
+name = "repeat14-no-multi-crlf"
+regex = '(?R)$+'
+haystack = "\naa\n"
+matches = [[4, 4]]
+
+[[test]]
+name = "repeat14-no-multi-crlf-cr"
+regex = '(?R)$+'
+haystack = "\raa\r"
+matches = [[4, 4]]
+
+[[test]]
+name = "repeat15"
+regex = '(?m)(?:$\n)+'
+haystack = "\n\naaa\n\n"
+matches = [[0, 2], [5, 7]]
+
+[[test]]
+name = "repeat15-crlf"
+regex = '(?Rm)(?:$\n)+'
+haystack = "\n\naaa\n\n"
+matches = [[0, 2], [5, 7]]
+
+[[test]]
+name = "repeat15-crlf-cr"
+regex = '(?Rm)(?:$\r)+'
+haystack = "\r\raaa\r\r"
+matches = [[0, 2], [5, 7]]
+
+[[test]]
+name = "repeat15-no-multi"
+regex = '(?:$\n)+'
+haystack = "\n\naaa\n\n"
+matches = []
+
+[[test]]
+name = "repeat15-no-multi-crlf"
+regex = '(?R)(?:$\n)+'
+haystack = "\n\naaa\n\n"
+matches = []
+
+[[test]]
+name = "repeat15-no-multi-crlf-cr"
+regex = '(?R)(?:$\r)+'
+haystack = "\r\raaa\r\r"
+matches = []
+
+[[test]]
+name = "repeat16"
+regex = '(?m)(?:$\n)*'
+haystack = "\n\naaa\n\n"
+matches = [[0, 2], [3, 3], [4, 4], [5, 7]]
+
+[[test]]
+name = "repeat16-crlf"
+regex = '(?Rm)(?:$\n)*'
+haystack = "\n\naaa\n\n"
+matches = [[0, 2], [3, 3], [4, 4], [5, 7]]
+
+[[test]]
+name = "repeat16-crlf-cr"
+regex = '(?Rm)(?:$\r)*'
+haystack = "\r\raaa\r\r"
+matches = [[0, 2], [3, 3], [4, 4], [5, 7]]
+
+[[test]]
+name = "repeat16-no-multi"
+regex = '(?:$\n)*'
+haystack = "\n\naaa\n\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]
+
+[[test]]
+name = "repeat16-no-multi-crlf"
+regex = '(?R)(?:$\n)*'
+haystack = "\n\naaa\n\n"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]
+
+[[test]]
+name = "repeat16-no-multi-crlf-cr"
+regex = '(?R)(?:$\r)*'
+haystack = "\r\raaa\r\r"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]
+
+[[test]]
+name = "repeat17"
+regex = '(?m)(?:$\n^)+'
+haystack = "\n\naaa\n\n"
+matches = [[0, 2], [5, 7]]
+
+[[test]]
+name = "repeat17-crlf"
+regex = '(?Rm)(?:$\n^)+'
+haystack = "\n\naaa\n\n"
+matches = [[0, 2], [5, 7]]
+
+[[test]]
+name = "repeat17-crlf-cr"
+regex = '(?Rm)(?:$\r^)+'
+haystack = "\r\raaa\r\r"
+matches = [[0, 2], [5, 7]]
+
+[[test]]
+name = "repeat17-no-multi"
+regex = '(?:$\n^)+'
+haystack = "\n\naaa\n\n"
+matches = []
+
+[[test]]
+name = "repeat17-no-multi-crlf"
+regex = '(?R)(?:$\n^)+'
+haystack = "\n\naaa\n\n"
+matches = []
+
+[[test]]
+name = "repeat17-no-multi-crlf-cr"
+regex = '(?R)(?:$\r^)+'
+haystack = "\r\raaa\r\r"
+matches = []
+
+[[test]]
+name = "repeat18"
+regex = '(?m)(?:^|$)+'
+haystack = "\n\naaa\n\n"
+matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]]
+
+[[test]]
+name = "repeat18-crlf"
+regex = '(?Rm)(?:^|$)+'
+haystack = "\n\naaa\n\n"
+matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]]
+
+[[test]]
+name = "repeat18-crlf-cr"
+regex = '(?Rm)(?:^|$)+'
+haystack = "\r\raaa\r\r"
+matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]]
+
+[[test]]
+name = "repeat18-no-multi"
+regex = '(?:^|$)+'
+haystack = "\n\naaa\n\n"
+matches = [[0, 0], [7, 7]]
+
+[[test]]
+name = "repeat18-no-multi-crlf"
+regex = '(?R)(?:^|$)+'
+haystack = "\n\naaa\n\n"
+matches = [[0, 0], [7, 7]]
+
+[[test]]
+name = "repeat18-no-multi-crlf-cr"
+regex = '(?R)(?:^|$)+'
+haystack = "\r\raaa\r\r"
+matches = [[0, 0], [7, 7]]
+
+[[test]]
+name = "match-line-100"
+regex = '(?m)^.+$'
+haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
+matches = [[0, 2], [3, 22]]
+
+[[test]]
+name = "match-line-100-crlf"
+regex = '(?Rm)^.+$'
+haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
+matches = [[0, 2], [3, 22]]
+
+[[test]]
+name = "match-line-100-crlf-cr"
+regex = '(?Rm)^.+$'
+haystack = "aa\raaaaaaaaaaaaaaaaaaa\r"
+matches = [[0, 2], [3, 22]]
+
+[[test]]
+name = "match-line-200"
+regex = '(?m)^.+$'
+haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
+matches = [[0, 2], [3, 22]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "match-line-200-crlf"
+regex = '(?Rm)^.+$'
+haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
+matches = [[0, 2], [3, 22]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "match-line-200-crlf-cr"
+regex = '(?Rm)^.+$'
+haystack = "aa\raaaaaaaaaaaaaaaaaaa\r"
+matches = [[0, 2], [3, 22]]
+unicode = false
+utf8 = false
--- a/third-party/vendor/regex/testdata/no-unicode.toml
+++ b/third-party/vendor/regex/testdata/no-unicode.toml
@ -0,0 +1,222 @@
+[[test]]
+name = "invalid-utf8-literal1"
+regex = '\xFF'
+haystack = '\xFF'
+matches = [[0, 1]]
+unicode = false
+utf8 = false
+unescape = true
+
+
+[[test]]
+name = "mixed"
+regex = '(?:.+)(?-u)(?:.+)'
+haystack = '\xCE\x93\xCE\x94\xFF'
+matches = [[0, 5]]
+utf8 = false
+unescape = true
+
+
+[[test]]
+name = "case1"
+regex = "a"
+haystack = "A"
+matches = [[0, 1]]
+case-insensitive = true
+unicode = false
+
+[[test]]
+name = "case2"
+regex = "[a-z]+"
+haystack = "AaAaA"
+matches = [[0, 5]]
+case-insensitive = true
+unicode = false
+
+[[test]]
+name = "case3"
+regex = "[a-z]+"
+haystack = "aA\u212AaA"
+matches = [[0, 7]]
+case-insensitive = true
+
+[[test]]
+name = "case4"
+regex = "[a-z]+"
+haystack = "aA\u212AaA"
+matches = [[0, 2], [5, 7]]
+case-insensitive = true
+unicode = false
+
+
+[[test]]
+name = "negate1"
+regex = "[^a]"
+haystack = "δ"
+matches = [[0, 2]]
+
+[[test]]
+name = "negate2"
+regex = "[^a]"
+haystack = "δ"
+matches = [[0, 1], [1, 2]]
+unicode = false
+utf8 = false
+
+
+[[test]]
+name = "dotstar-prefix1"
+regex = "a"
+haystack = '\xFFa'
+matches = [[1, 2]]
+unicode = false
+utf8 = false
+unescape = true
+
+[[test]]
+name = "dotstar-prefix2"
+regex = "a"
+haystack = '\xFFa'
+matches = [[1, 2]]
+utf8 = false
+unescape = true
+
+
+[[test]]
+name = "null-bytes1"
+regex = '[^\x00]+\x00'
+haystack = 'foo\x00'
+matches = [[0, 4]]
+unicode = false
+utf8 = false
+unescape = true
+
+
+[[test]]
+name = "word-ascii"
+regex = '\w+'
+haystack = "aδ"
+matches = [[0, 1]]
+unicode = false
+
+[[test]]
+name = "word-unicode"
+regex = '\w+'
+haystack = "aδ"
+matches = [[0, 3]]
+
+[[test]]
+name = "decimal-ascii"
+regex = '\d+'
+haystack = "1२३9"
+matches = [[0, 1], [7, 8]]
+unicode = false
+
+[[test]]
+name = "decimal-unicode"
+regex = '\d+'
+haystack = "1२३9"
+matches = [[0, 8]]
+
+[[test]]
+name = "space-ascii"
+regex = '\s+'
+haystack = " \u1680"
+matches = [[0, 1]]
+unicode = false
+
+[[test]]
+name = "space-unicode"
+regex = '\s+'
+haystack = " \u1680"
+matches = [[0, 4]]
+
+
+[[test]]
+# See: https://github.com/rust-lang/regex/issues/484
+name = "iter1-bytes"
+regex = ''
+haystack = "☃"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+utf8 = false
+
+[[test]]
+# See: https://github.com/rust-lang/regex/issues/484
+name = "iter1-utf8"
+regex = ''
+haystack = "☃"
+matches = [[0, 0], [3, 3]]
+
+[[test]]
+# See: https://github.com/rust-lang/regex/issues/484
+# Note that iter2-utf8 doesn't make sense here, since the input isn't UTF-8.
+name = "iter2-bytes"
+regex = ''
+haystack = 'b\xFFr'
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+unescape = true
+utf8 = false
+
+
+# These test that unanchored prefixes can munch through invalid UTF-8 even when
+# utf8 is enabled.
+#
+# This test actually reflects an interesting simplification in how the Thompson
+# NFA is constructed. It used to be that the NFA could be built with an
+# unanchored prefix that either matched any byte or _only_ matched valid UTF-8.
+# But the latter turns out to be pretty precarious when it comes to prefilters,
+# because if you search a haystack that contains invalid UTF-8 but have an
+# unanchored prefix that requires UTF-8, then prefilters are no longer a valid
+# optimization because you actually have to check that everything is valid
+# UTF-8.
+#
+# Originally, I had thought that we needed a valid UTF-8 unanchored prefix in
+# order to guarantee that we only match at valid UTF-8 boundaries. But this
+# isn't actually true! There are really only two things to consider here:
+#
+# 1) Will a regex match split an encoded codepoint? No. Because by construction,
+# we ensure that a MATCH state can only be reached by following valid UTF-8 (assuming
+# all of the UTF-8 modes are enabled).
+#
+# 2) Will a regex match arbitrary bytes that aren't valid UTF-8? Again, no,
+# assuming all of the UTF-8 modes are enabled.
+[[test]]
+name = "unanchored-invalid-utf8-match-100"
+regex = '[a-z]'
+haystack = '\xFFa\xFF'
+matches = [[1, 2]]
+unescape = true
+utf8 = false
+
+# This test shows that we can still prevent a match from occurring by requiring
+# that valid UTF-8 match by inserting our own unanchored prefix. Thus, if the
+# behavior of not munching through invalid UTF-8 anywhere is needed, then it
+# can be achieved thusly.
+[[test]]
+name = "unanchored-invalid-utf8-nomatch"
+regex = '^(?s:.)*?[a-z]'
+haystack = '\xFFa\xFF'
+matches = []
+unescape = true
+utf8 = false
+
+# This is a tricky test that makes sure we don't accidentally do a kind of
+# unanchored search when we've requested that a regex engine not report
+# empty matches that split a codepoint. This test caught a regression during
+# development where the code for skipping over bad empty matches would do so
+# even if the search should have been anchored. This is ultimately what led to
+# making 'anchored' an 'Input' option, so that it was always clear what kind
+# of search was being performed. (Before that, whether a search was anchored
+# or not was a config knob on the regex engine.) This did wind up making DFAs
+# a little more complex to configure (with their 'StartKind' knob), but it
+# generally smoothed out everything else.
+#
+# Great example of a test whose failure motivated a sweeping API refactoring.
+[[test]]
+name = "anchored-iter-empty-utf8"
+regex = ''
+haystack = 'a☃z'
+matches = [[0, 0], [1, 1]]
+unescape = false
+utf8 = true
+anchored = true
--- a/third-party/vendor/regex/testdata/overlapping.toml
+++ b/third-party/vendor/regex/testdata/overlapping.toml
@ -0,0 +1,280 @@
+# NOTE: We define a number of tests where the *match* kind is 'leftmost-first'
+# but the *search* kind is 'overlapping'. This is a somewhat nonsensical
+# combination and can produce odd results. Nevertheless, those results should
+# be consistent so we test them here. (At the time of writing this note, I
+# hadn't yet decided whether to make 'leftmost-first' with 'overlapping' result
+# in unspecified behavior.)
+
+# This demonstrates how a full overlapping search is obvious quadratic. This
+# regex reports a match for every substring in the haystack.
+[[test]]
+name = "ungreedy-dotstar-matches-everything-100"
+regex = [".*?"]
+haystack = "zzz"
+matches = [
+  { id = 0, span = [0, 0] },
+  { id = 0, span = [1, 1] },
+  { id = 0, span = [0, 1] },
+  { id = 0, span = [2, 2] },
+  { id = 0, span = [1, 2] },
+  { id = 0, span = [0, 2] },
+  { id = 0, span = [3, 3] },
+  { id = 0, span = [2, 3] },
+  { id = 0, span = [1, 3] },
+  { id = 0, span = [0, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "greedy-dotstar-matches-everything-100"
+regex = [".*"]
+haystack = "zzz"
+matches = [
+  { id = 0, span = [0, 0] },
+  { id = 0, span = [1, 1] },
+  { id = 0, span = [0, 1] },
+  { id = 0, span = [2, 2] },
+  { id = 0, span = [1, 2] },
+  { id = 0, span = [0, 2] },
+  { id = 0, span = [3, 3] },
+  { id = 0, span = [2, 3] },
+  { id = 0, span = [1, 3] },
+  { id = 0, span = [0, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "repetition-plus-leftmost-first-100"
+regex = 'a+'
+haystack = "aaa"
+matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]]
+match-kind = "leftmost-first"
+search-kind = "overlapping"
+
+[[test]]
+name = "repetition-plus-leftmost-first-110"
+regex = '☃+'
+haystack = "☃☃☃"
+matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]]
+match-kind = "leftmost-first"
+search-kind = "overlapping"
+
+[[test]]
+name = "repetition-plus-all-100"
+regex = 'a+'
+haystack = "aaa"
+matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "repetition-plus-all-110"
+regex = '☃+'
+haystack = "☃☃☃"
+matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "repetition-plus-leftmost-first-200"
+regex = '(abc)+'
+haystack = "zzabcabczzabc"
+matches = [
+  [[2, 5], [2, 5]],
+  [[5, 8], [5, 8]],
+  [[2, 8], [5, 8]],
+]
+match-kind = "leftmost-first"
+search-kind = "overlapping"
+
+[[test]]
+name = "repetition-plus-all-200"
+regex = '(abc)+'
+haystack = "zzabcabczzabc"
+matches = [
+  [[2, 5], [2, 5]],
+  [[5, 8], [5, 8]],
+  [[2, 8], [5, 8]],
+  [[10, 13], [10, 13]],
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "repetition-star-leftmost-first-100"
+regex = 'a*'
+haystack = "aaa"
+matches = [
+  [0, 0],
+  [1, 1],
+  [0, 1],
+  [2, 2],
+  [1, 2],
+  [0, 2],
+  [3, 3],
+  [2, 3],
+  [1, 3],
+  [0, 3],
+]
+match-kind = "leftmost-first"
+search-kind = "overlapping"
+
+[[test]]
+name = "repetition-star-all-100"
+regex = 'a*'
+haystack = "aaa"
+matches = [
+  [0, 0],
+  [1, 1],
+  [0, 1],
+  [2, 2],
+  [1, 2],
+  [0, 2],
+  [3, 3],
+  [2, 3],
+  [1, 3],
+  [0, 3],
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "repetition-star-leftmost-first-200"
+regex = '(abc)*'
+haystack = "zzabcabczzabc"
+matches = [
+  [[0, 0], []],
+]
+match-kind = "leftmost-first"
+search-kind = "overlapping"
+
+[[test]]
+name = "repetition-star-all-200"
+regex = '(abc)*'
+haystack = "zzabcabczzabc"
+matches = [
+  [[0, 0], []],
+  [[1, 1], []],
+  [[2, 2], []],
+  [[3, 3], []],
+  [[4, 4], []],
+  [[5, 5], []],
+  [[2, 5], [2, 5]],
+  [[6, 6], []],
+  [[7, 7], []],
+  [[8, 8], []],
+  [[5, 8], [5, 8]],
+  [[2, 8], [5, 8]],
+  [[9, 9], []],
+  [[10, 10], []],
+  [[11, 11], []],
+  [[12, 12], []],
+  [[13, 13], []],
+  [[10, 13], [10, 13]],
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "start-end-rep-leftmost-first"
+regex = '(^$)*'
+haystack = "abc"
+matches = [
+  [[0, 0], []],
+]
+match-kind = "leftmost-first"
+search-kind = "overlapping"
+
+[[test]]
+name = "start-end-rep-all"
+regex = '(^$)*'
+haystack = "abc"
+matches = [
+  [[0, 0], []],
+  [[1, 1], []],
+  [[2, 2], []],
+  [[3, 3], []],
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "alt-leftmost-first-100"
+regex = 'abc|a'
+haystack = "zzabcazzaabc"
+matches = [[2, 3], [2, 5]]
+match-kind = "leftmost-first"
+search-kind = "overlapping"
+
+[[test]]
+name = "alt-all-100"
+regex = 'abc|a'
+haystack = "zzabcazzaabc"
+matches = [[2, 3], [2, 5], [5, 6], [8, 9], [9, 10], [9, 12]]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "empty-000"
+regex = ""
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "empty-alt-000"
+regex = "|b"
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "empty-alt-010"
+regex = "b|"
+haystack = "abc"
+matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+# See: https://github.com/rust-lang/regex/issues/484
+name = "iter1-bytes"
+regex = ''
+haystack = "☃"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+utf8 = false
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+# See: https://github.com/rust-lang/regex/issues/484
+name = "iter1-utf8"
+regex = ''
+haystack = "☃"
+matches = [[0, 0], [3, 3]]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "iter1-incomplete-utf8"
+regex = ''
+haystack = '\xE2\x98'  # incomplete snowman
+matches = [[0, 0], [1, 1], [2, 2]]
+match-kind = "all"
+search-kind = "overlapping"
+unescape = true
+utf8 = false
+
+[[test]]
+name = "scratch"
+regex = ['sam', 'samwise']
+haystack = "samwise"
+matches = [
+  { id = 0, span = [0, 3] },
+]
+match-kind = "leftmost-first"
+search-kind = "overlapping"
--- a/third-party/vendor/regex/testdata/regex-lite.toml
+++ b/third-party/vendor/regex/testdata/regex-lite.toml
@ -0,0 +1,98 @@
+# These tests are specifically written to test the regex-lite crate. While it
+# largely has the same semantics as the regex crate, there are some differences
+# around Unicode support and UTF-8.
+#
+# To be clear, regex-lite supports far fewer patterns because of its lack of
+# Unicode support, nested character classes and character class set operations.
+# What we're talking about here are the patterns that both crates support but
+# where the semantics might differ.
+
+# regex-lite uses ASCII definitions for Perl character classes.
+[[test]]
+name = "perl-class-decimal"
+regex = '\d'
+haystack = '᠕'
+matches = []
+unicode = true
+
+# regex-lite uses ASCII definitions for Perl character classes.
+[[test]]
+name = "perl-class-space"
+regex = '\s'
+haystack = "\u2000"
+matches = []
+unicode = true
+
+# regex-lite uses ASCII definitions for Perl character classes.
+[[test]]
+name = "perl-class-word"
+regex = '\w'
+haystack = 'δ'
+matches = []
+unicode = true
+
+# regex-lite uses the ASCII definition of word for word boundary assertions.
+[[test]]
+name = "word-boundary"
+regex = '\b'
+haystack = 'δ'
+matches = []
+unicode = true
+
+# regex-lite uses the ASCII definition of word for negated word boundary
+# assertions. But note that it should still not split codepoints!
+[[test]]
+name = "word-boundary-negated"
+regex = '\B'
+haystack = 'δ'
+matches = [[0, 0], [2, 2]]
+unicode = true
+
+# While we're here, the empty regex---which matches at every
+# position---shouldn't split a codepoint either.
+[[test]]
+name = "empty-no-split-codepoint"
+regex = ''
+haystack = '💩'
+matches = [[0, 0], [4, 4]]
+unicode = true
+
+# A dot always matches a full codepoint.
+[[test]]
+name = "dot-always-matches-codepoint"
+regex = '.'
+haystack = '💩'
+matches = [[0, 4]]
+unicode = false
+
+# A negated character class also always matches a full codepoint.
+[[test]]
+name = "negated-class-always-matches-codepoint"
+regex = '[^a]'
+haystack = '💩'
+matches = [[0, 4]]
+unicode = false
+
+# regex-lite only supports ASCII-aware case insensitive matching.
+[[test]]
+name = "case-insensitive-is-ascii-only"
+regex = 's'
+haystack = 'ſ'
+matches = []
+unicode = true
+case-insensitive = true
+
+# Negated word boundaries shouldn't split a codepoint, but they will match
+# between invalid UTF-8.
+#
+# This test is only valid for a 'bytes' API, but that doesn't (yet) exist in
+# regex-lite. This can't happen in the main API because &str can't contain
+# invalid UTF-8.
+# [[test]]
+# name = "word-boundary-invalid-utf8"
+# regex = '\B'
+# haystack = '\xFF\xFF\xFF\xFF'
+# unescape = true
+# matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+# unicode = true
+# utf8 = false
--- a/third-party/vendor/regex/testdata/regression.toml
+++ b/third-party/vendor/regex/testdata/regression.toml
@ -0,0 +1,830 @@
+# See: https://github.com/rust-lang/regex/issues/48
+[[test]]
+name = "invalid-regex-no-crash-100"
+regex = '(*)'
+haystack = ""
+matches = []
+compiles = false
+
+# See: https://github.com/rust-lang/regex/issues/48
+[[test]]
+name = "invalid-regex-no-crash-200"
+regex = '(?:?)'
+haystack = ""
+matches = []
+compiles = false
+
+# See: https://github.com/rust-lang/regex/issues/48
+[[test]]
+name = "invalid-regex-no-crash-300"
+regex = '(?)'
+haystack = ""
+matches = []
+compiles = false
+
+# See: https://github.com/rust-lang/regex/issues/48
+[[test]]
+name = "invalid-regex-no-crash-400"
+regex = '*'
+haystack = ""
+matches = []
+compiles = false
+
+# See: https://github.com/rust-lang/regex/issues/75
+[[test]]
+name = "unsorted-binary-search-100"
+regex = '(?i-u)[a_]+'
+haystack = "A_"
+matches = [[0, 2]]
+
+# See: https://github.com/rust-lang/regex/issues/75
+[[test]]
+name = "unsorted-binary-search-200"
+regex = '(?i-u)[A_]+'
+haystack = "a_"
+matches = [[0, 2]]
+
+# See: https://github.com/rust-lang/regex/issues/76
+[[test]]
+name = "unicode-case-lower-nocase-flag"
+regex = '(?i)\p{Ll}+'
+haystack = "ΛΘΓΔα"
+matches = [[0, 10]]
+
+# See: https://github.com/rust-lang/regex/issues/99
+[[test]]
+name = "negated-char-class-100"
+regex = '(?i)[^x]'
+haystack = "x"
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/99
+[[test]]
+name = "negated-char-class-200"
+regex = '(?i)[^x]'
+haystack = "X"
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/101
+[[test]]
+name = "ascii-word-underscore"
+regex = '[[:word:]]'
+haystack = "_"
+matches = [[0, 1]]
+
+# See: https://github.com/rust-lang/regex/issues/129
+[[test]]
+name = "captures-repeat"
+regex = '([a-f]){2}(?P<foo>[x-z])'
+haystack = "abx"
+matches = [
+  [[0, 3], [1, 2], [2, 3]],
+]
+
+# See: https://github.com/rust-lang/regex/issues/153
+[[test]]
+name = "alt-in-alt-100"
+regex = 'ab?|$'
+haystack = "az"
+matches = [[0, 1], [2, 2]]
+
+# See: https://github.com/rust-lang/regex/issues/153
+[[test]]
+name = "alt-in-alt-200"
+regex = '^(?:.*?)(?:\n|\r\n?|$)'
+haystack = "ab\rcd"
+matches = [[0, 3]]
+
+# See: https://github.com/rust-lang/regex/issues/169
+[[test]]
+name = "leftmost-first-prefix"
+regex = 'z*azb'
+haystack = "azb"
+matches = [[0, 3]]
+
+# See: https://github.com/rust-lang/regex/issues/191
+[[test]]
+name = "many-alternates"
+regex = '1|2|3|4|5|6|7|8|9|10|int'
+haystack = "int"
+matches = [[0, 3]]
+
+# See: https://github.com/rust-lang/regex/issues/204
+[[test]]
+name = "word-boundary-alone-100"
+regex = '\b'
+haystack = "Should this (work?)"
+matches = [[0, 0], [6, 6], [7, 7], [11, 11], [13, 13], [17, 17]]
+
+# See: https://github.com/rust-lang/regex/issues/204
+[[test]]
+name = "word-boundary-alone-200"
+regex = '\b'
+haystack = "a b c"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
+
+# See: https://github.com/rust-lang/regex/issues/264
+[[test]]
+name = "word-boundary-ascii-no-capture"
+regex = '\B'
+haystack = "\U00028F3E"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+unicode = false
+utf8 = false
+
+# See: https://github.com/rust-lang/regex/issues/264
+[[test]]
+name = "word-boundary-ascii-capture"
+regex = '(?:\B)'
+haystack = "\U00028F3E"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+unicode = false
+utf8 = false
+
+# See: https://github.com/rust-lang/regex/issues/268
+[[test]]
+name = "partial-anchor"
+regex = '^a|b'
+haystack = "ba"
+matches = [[0, 1]]
+
+# See: https://github.com/rust-lang/regex/issues/271
+[[test]]
+name = "endl-or-word-boundary"
+regex = '(?m:$)|(?-u:\b)'
+haystack = "\U0006084E"
+matches = [[4, 4]]
+
+# See: https://github.com/rust-lang/regex/issues/271
+[[test]]
+name = "zero-or-end"
+regex = '(?i-u:\x00)|$'
+haystack = "\U000E682F"
+matches = [[4, 4]]
+
+# See: https://github.com/rust-lang/regex/issues/271
+[[test]]
+name = "y-or-endl"
+regex = '(?i-u:y)|(?m:$)'
+haystack = "\U000B4331"
+matches = [[4, 4]]
+
+# See: https://github.com/rust-lang/regex/issues/271
+[[test]]
+name = "word-boundary-start-x"
+regex = '(?u:\b)^(?-u:X)'
+haystack = "X"
+matches = [[0, 1]]
+
+# See: https://github.com/rust-lang/regex/issues/271
+[[test]]
+name = "word-boundary-ascii-start-x"
+regex = '(?-u:\b)^(?-u:X)'
+haystack = "X"
+matches = [[0, 1]]
+
+# See: https://github.com/rust-lang/regex/issues/271
+[[test]]
+name = "end-not-word-boundary"
+regex = '$\B'
+haystack = "\U0005C124\U000B576C"
+matches = [[8, 8]]
+unicode = false
+utf8 = false
+
+# See: https://github.com/rust-lang/regex/issues/280
+[[test]]
+name = "partial-anchor-alternate-begin"
+regex = '^a|z'
+haystack = "yyyyya"
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/280
+[[test]]
+name = "partial-anchor-alternate-end"
+regex = 'a$|z'
+haystack = "ayyyyy"
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/289
+[[test]]
+name = "lits-unambiguous-100"
+regex = '(?:ABC|CDA|BC)X'
+haystack = "CDAX"
+matches = [[0, 4]]
+
+# See: https://github.com/rust-lang/regex/issues/291
+[[test]]
+name = "lits-unambiguous-200"
+regex = '((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$'
+haystack = "CIMG2341"
+matches = [
+  [[0, 8], [0, 4], [], [0, 4], [4, 8]],
+]
+
+# See: https://github.com/rust-lang/regex/issues/303
+#
+# 2022-09-19: This has now been "properly" fixed in that empty character
+# classes are fully supported as something that can never match. This test
+# used to be marked as 'compiles = false', but now it works.
+[[test]]
+name = "negated-full-byte-range"
+regex = '[^\x00-\xFF]'
+haystack = ""
+matches = []
+compiles = true
+unicode = false
+utf8 = false
+
+# See: https://github.com/rust-lang/regex/issues/321
+[[test]]
+name = "strange-anchor-non-complete-prefix"
+regex = 'a^{2}'
+haystack = ""
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/321
+[[test]]
+name = "strange-anchor-non-complete-suffix"
+regex = '${2}a'
+haystack = ""
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/334
+# See: https://github.com/rust-lang/regex/issues/557
+[[test]]
+name = "captures-after-dfa-premature-end-100"
+regex = 'a(b*(X|$))?'
+haystack = "abcbX"
+matches = [
+  [[0, 1], [], []],
+]
+
+# See: https://github.com/rust-lang/regex/issues/334
+# See: https://github.com/rust-lang/regex/issues/557
+[[test]]
+name = "captures-after-dfa-premature-end-200"
+regex = 'a(bc*(X|$))?'
+haystack = "abcbX"
+matches = [
+  [[0, 1], [], []],
+]
+
+# See: https://github.com/rust-lang/regex/issues/334
+# See: https://github.com/rust-lang/regex/issues/557
+[[test]]
+name = "captures-after-dfa-premature-end-300"
+regex = '(aa$)?'
+haystack = "aaz"
+matches = [
+  [[0, 0], []],
+  [[1, 1], []],
+  [[2, 2], []],
+  [[3, 3], []],
+]
+
+# Plucked from "Why aren’t regular expressions a lingua franca? an empirical
+# study on the re-use and portability of regular expressions", The ACM Joint
+# European Software Engineering Conference and Symposium on the Foundations of
+# Software Engineering (ESEC/FSE), 2019.
+#
+# Link: https://dl.acm.org/doi/pdf/10.1145/3338906.3338909
+[[test]]
+name = "captures-after-dfa-premature-end-400"
+regex = '(a)\d*\.?\d+\b'
+haystack = "a0.0c"
+matches = [
+  [[0, 2], [0, 1]],
+]
+
+# See: https://github.com/rust-lang/regex/issues/437
+[[test]]
+name = "literal-panic"
+regex = 'typename type\-parameter\-[0-9]+\-[0-9]+::.+'
+haystack = "test"
+matches = []
+
+# See: https://github.com/rust-lang/regex/issues/527
+[[test]]
+name = "empty-flag-expr"
+regex = '(?:(?:(?x)))'
+haystack = ""
+matches = [[0, 0]]
+
+# See: https://github.com/rust-lang/regex/issues/533
+#[[tests]]
+#name = "blank-matches-nothing-between-space-and-tab"
+#regex = '[[:blank:]]'
+#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'
+#match = false
+#unescape = true
+
+# See: https://github.com/rust-lang/regex/issues/533
+#[[tests]]
+#name = "blank-matches-nothing-between-space-and-tab-inverted"
+#regex = '^[[:^blank:]]+$'
+#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'
+#match = true
+#unescape = true
+
+# See: https://github.com/rust-lang/regex/issues/555
+[[test]]
+name = "invalid-repetition"
+regex = '(?m){1,1}'
+haystack = ""
+matches = []
+compiles = false
+
+# See: https://github.com/rust-lang/regex/issues/640
+[[test]]
+name = "flags-are-unset"
+regex = '(?:(?i)foo)|Bar'
+haystack = "foo Foo bar Bar"
+matches = [[0, 3], [4, 7], [12, 15]]
+
+# Note that 'Ј' is not 'j', but cyrillic Je
+# https://en.wikipedia.org/wiki/Je_(Cyrillic)
+#
+# See: https://github.com/rust-lang/regex/issues/659
+[[test]]
+name = "empty-group-with-unicode"
+regex = '(?:)Ј01'
+haystack = 'zЈ01'
+matches = [[1, 5]]
+
+# See: https://github.com/rust-lang/regex/issues/579
+[[test]]
+name = "word-boundary-weird"
+regex = '\b..\b'
+haystack = "I have 12, he has 2!"
+matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]]
+
+# See: https://github.com/rust-lang/regex/issues/579
+[[test]]
+name = "word-boundary-weird-ascii"
+regex = '\b..\b'
+haystack = "I have 12, he has 2!"
+matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]]
+unicode = false
+utf8 = false
+
+# See: https://github.com/rust-lang/regex/issues/579
+[[test]]
+name = "word-boundary-weird-minimal-ascii"
+regex = '\b..\b'
+haystack = "az,,b"
+matches = [[0, 2], [2, 4]]
+unicode = false
+utf8 = false
+
+# See: https://github.com/BurntSushi/ripgrep/issues/1203
+[[test]]
+name = "reverse-suffix-100"
+regex = '[0-4][0-4][0-4]000'
+haystack = "153.230000"
+matches = [[4, 10]]
+
+# See: https://github.com/BurntSushi/ripgrep/issues/1203
+[[test]]
+name = "reverse-suffix-200"
+regex = '[0-9][0-9][0-9]000'
+haystack = "153.230000\n"
+matches = [[4, 10]]
+
+# This is a tricky case for the reverse suffix optimization, because it
+# finds the 'foobar' match but the reverse scan must fail to find a match by
+# correctly dealing with the word boundary following the 'foobar' literal when
+# computing the start state.
+#
+# This test exists because I tried to break the following assumption that
+# is currently in the code: that if a suffix is found and the reverse scan
+# succeeds, then it's guaranteed that there is an overall match. Namely, the
+# 'is_match' routine does *not* do another forward scan in this case because of
+# this assumption.
+[[test]]
+name = "reverse-suffix-300"
+regex = '\w+foobar\b'
+haystack = "xyzfoobarZ"
+matches = []
+unicode = false
+utf8 = false
+
+# See: https://github.com/BurntSushi/ripgrep/issues/1247
+[[test]]
+name = "stops"
+regex = '\bs(?:[ab])'
+haystack = 's\xE4'
+matches = []
+unescape = true
+utf8 = false
+
+# See: https://github.com/BurntSushi/ripgrep/issues/1247
+[[test]]
+name = "stops-ascii"
+regex = '(?-u:\b)s(?:[ab])'
+haystack = 's\xE4'
+matches = []
+unescape = true
+utf8 = false
+
+# See: https://github.com/rust-lang/regex/issues/850
+[[test]]
+name = "adjacent-line-boundary-100"
+regex = '(?m)^(?:[^ ]+?)$'
+haystack = "line1\nline2"
+matches = [[0, 5], [6, 11]]
+
+# Continued.
+[[test]]
+name = "adjacent-line-boundary-200"
+regex = '(?m)^(?:[^ ]+?)$'
+haystack = "A\nB"
+matches = [[0, 1], [2, 3]]
+
+# There is no issue for this bug.
+[[test]]
+name = "anchored-prefix-100"
+regex = '^a[[:^space:]]'
+haystack = "a "
+matches = []
+
+# There is no issue for this bug.
+[[test]]
+name = "anchored-prefix-200"
+regex = '^a[[:^space:]]'
+haystack = "foo boo a"
+matches = []
+
+# There is no issue for this bug.
+[[test]]
+name = "anchored-prefix-300"
+regex = '^-[a-z]'
+haystack = "r-f"
+matches = []
+
+# Tests that a possible Aho-Corasick optimization works correctly. It only
+# kicks in when we have a lot of literals. By "works correctly," we mean that
+# leftmost-first match semantics are properly respected. That is, samwise
+# should match, not sam.
+#
+# There is no issue for this bug.
+[[test]]
+name = "aho-corasick-100"
+regex = 'samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z'
+haystack = "samwise"
+matches = [[0, 7]]
+
+# See: https://github.com/rust-lang/regex/issues/921
+[[test]]
+name = "interior-anchor-capture"
+regex = '(a$)b$'
+haystack = 'ab'
+matches = []
+
+# I found this bug in the course of adding some of the regexes that Ruff uses
+# to rebar. It turns out that the lazy DFA was finding a match that was being
+# rejected by the one-pass DFA. Yikes. I then minimized the regex and haystack.
+#
+# Source: https://github.com/charliermarsh/ruff/blob/a919041ddaa64cdf6f216f90dd0480dab69fd3ba/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs#L52
+[[test]]
+name = "ruff-whitespace-around-keywords"
+regex = '^(a|ab)$'
+haystack = "ab"
+anchored = true
+unicode = false
+utf8 = true
+matches = [[[0, 2], [0, 2]]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-0"
+regex = '(?:(?-u:\b)|(?u:h))+'
+haystack = "h"
+unicode = true
+utf8 = false
+matches = [[0, 0], [1, 1]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-1"
+regex = '(?u:\B)'
+haystack = "鋸"
+unicode = true
+utf8 = false
+matches = []
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-2"
+regex = '(?:(?u:\b)|(?s-u:.))+'
+haystack = "oB"
+unicode = true
+utf8 = false
+matches = [[0, 0], [1, 2]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-3"
+regex = '(?:(?-u:\B)|(?su:.))+'
+haystack = "\U000FEF80"
+unicode = true
+utf8 = false
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-3-utf8"
+regex = '(?:(?-u:\B)|(?su:.))+'
+haystack = "\U000FEF80"
+unicode = true
+utf8 = true
+matches = [[0, 0], [4, 4]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-4"
+regex = '(?m:$)(?m:^)(?su:.)'
+haystack = "\n‣"
+unicode = true
+utf8 = false
+matches = [[0, 1]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-5"
+regex = '(?m:$)^(?m:^)'
+haystack = "\n"
+unicode = true
+utf8 = false
+matches = [[0, 0]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-6"
+regex = '(?P<kp>(?iu:do)(?m:$))*'
+haystack = "dodo"
+unicode = true
+utf8 = false
+matches = [
+  [[0, 0], []],
+  [[1, 1], []],
+  [[2, 4], [2, 4]],
+]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-7"
+regex = '(?u:\B)'
+haystack = "䡁"
+unicode = true
+utf8 = false
+matches = []
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-8"
+regex = '(?:(?-u:\b)|(?u:[\u{0}-W]))+'
+haystack = "0"
+unicode = true
+utf8 = false
+matches = [[0, 0], [1, 1]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-9"
+regex = '((?m:$)(?-u:\B)(?s-u:.)(?-u:\B)$)'
+haystack = "\n\n"
+unicode = true
+utf8 = false
+matches = [
+  [[1, 2], [1, 2]],
+]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-10"
+regex = '(?m:$)(?m:$)^(?su:.)'
+haystack = "\n\u0081¨\u200a"
+unicode = true
+utf8 = false
+matches = [[0, 1]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-11"
+regex = '(?-u:\B)(?m:^)'
+haystack = "0\n"
+unicode = true
+utf8 = false
+matches = [[2, 2]]
+
+# From: https://github.com/rust-lang/regex/issues/429
+[[test]]
+name = "i429-12"
+regex = '(?:(?u:\b)|(?-u:.))+'
+haystack = "0"
+unicode = true
+utf8 = false
+matches = [[0, 0], [1, 1]]
+
+# From: https://github.com/rust-lang/regex/issues/969
+[[test]]
+name = "i969"
+regex = 'c.*d\z'
+haystack = "ababcd"
+bounds = [4, 6]
+search-kind = "earliest"
+matches = [[4, 6]]
+
+# I found this during the regex-automata migration. This is the fowler basic
+# 154 test, but without anchored = true and without a match limit.
+#
+# This test caught a subtle bug in the hybrid reverse DFA search, where it
+# would skip over the termination condition if it entered a start state. This
+# was a double bug. Firstly, the reverse DFA shouldn't have had start states
+# specialized in the first place, and thus it shouldn't have possible to detect
+# that the DFA had entered a start state. The second bug was that the start
+# state handling was incorrect by jumping over the termination condition.
+[[test]]
+name = "fowler-basic154-unanchored"
+regex = '''a([bc]*)c*'''
+haystack = '''abc'''
+matches = [[[0, 3], [1, 3]]]
+
+# From: https://github.com/rust-lang/regex/issues/981
+#
+# This was never really a problem in the new architecture because the
+# regex-automata engines are far more principled about how they deal with
+# look-around. (This was one of the many reasons I wanted to re-work the
+# original regex crate engines.)
+[[test]]
+name = "word-boundary-interact-poorly-with-literal-optimizations"
+regex = '(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))'
+haystack = 'ubi-Darwin-x86_64.tar.gz'
+matches = []
+
+# This was found during fuzz testing of regex. It provoked a panic in the meta
+# engine as a result of the reverse suffix optimization. Namely, it hit a case
+# where a suffix match was found, a corresponding reverse match was found, but
+# the forward search turned up no match. The forward search should always match
+# if the suffix and reverse search match.
+#
+# This in turn uncovered an inconsistency between the PikeVM and the DFA (lazy
+# and fully compiled) engines. It was caused by a mishandling of the collection
+# of NFA state IDs in the generic determinization code (which is why both types
+# of DFA were impacted). Namely, when a fail state was encountered (that's the
+# `[^\s\S]` in the pattern below), then it would just stop collecting states.
+# But that's not correct since a later state could lead to a match.
+[[test]]
+name = "impossible-branch"
+regex = '.*[^\s\S]A|B'
+haystack = "B"
+matches = [[0, 1]]
+
+# This was found during fuzz testing in regex-lite. The regex crate never
+# suffered from this bug, but it causes regex-lite to incorrectly compile
+# captures.
+[[test]]
+name = "captures-wrong-order"
+regex = '(a){0}(a)'
+haystack = 'a'
+matches = [[[0, 1], [], [0, 1]]]
+
+# This tests a bug in how quit states are handled in the DFA. At some point
+# during development, the DFAs were tweaked slightly such that if they hit
+# a quit state (which means, they hit a byte that the caller configured should
+# stop the search), then it might not return an error necessarily. Namely, if a
+# match had already been found, then it would be returned instead of an error.
+#
+# But this is actually wrong! Why? Because even though a match had been found,
+# it wouldn't be fully correct to return it once a quit state has been seen
+# because you can't determine whether the match offset returned is the correct
+# greedy/leftmost-first match. Since you can't complete the search as requested
+# by the caller, the DFA should just stop and return an error.
+#
+# Interestingly, this does seem to produce an unavoidable difference between
+# 'try_is_match().unwrap()' and 'try_find().unwrap().is_some()' for the DFAs.
+# The former will stop immediately once a match is known to occur and return
+# 'Ok(true)', where as the latter could find the match but quit with an
+# 'Err(..)' first.
+#
+# Thankfully, I believe this inconsistency between 'is_match()' and 'find()'
+# cannot be observed in the higher level meta regex API because it specifically
+# will try another engine that won't fail in the case of a DFA failing.
+#
+# This regression happened in the regex crate rewrite, but before anything got
+# released.
+[[test]]
+name = "negated-unicode-word-boundary-dfa-fail"
+regex = '\B.*'
+haystack = "!\u02D7"
+matches = [[0, 3]]
+
+# This failure was found in the *old* regex crate (prior to regex 1.9), but
+# I didn't investigate why. My best guess is that it's a literal optimization
+# bug. It didn't occur in the rewrite.
+[[test]]
+name = "missed-match"
+regex = 'e..+e.ee>'
+haystack = 'Zeee.eZZZZZZZZeee>eeeeeee>'
+matches = [[1, 26]]
+
+# This test came from the 'ignore' crate and tripped a bug in how accelerated
+# DFA states were handled in an overlapping search.
+[[test]]
+name = "regex-to-glob"
+regex = ['(?-u)^path1/[^/]*$']
+haystack = "path1/foo"
+matches = [[0, 9]]
+utf8 = false
+match-kind = "all"
+search-kind = "overlapping"
+
+# See: https://github.com/rust-lang/regex/issues/1060
+[[test]]
+name = "reverse-inner-plus-shorter-than-expected"
+regex = '(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})'
+haystack = '102:12:39'
+matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]
+
+# Like reverse-inner-plus-shorter-than-expected, but using a far simpler regex
+# to demonstrate the extent of the rot. Sigh.
+#
+# See: https://github.com/rust-lang/regex/issues/1060
+[[test]]
+name = "reverse-inner-short"
+regex = '(?:([0-9][0-9][0-9]):)?([0-9][0-9]):([0-9][0-9])'
+haystack = '102:12:39'
+matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]
+
+# This regression test was found via the RegexSet APIs. It triggered a
+# particular code path where a regex was compiled with 'All' match semantics
+# (to support overlapping search), but got funneled down into a standard
+# leftmost search when calling 'is_match'. This is fine on its own, but the
+# leftmost search will use a prefilter and that's where this went awry.
+#
+# Namely, since 'All' semantics were used, the aho-corasick prefilter was
+# incorrectly compiled with 'Standard' semantics. This was wrong because
+# 'Standard' immediately attempts to report a match at every position, even if
+# that would mean reporting a match past the leftmost match before reporting
+# the leftmost match. This breaks the prefilter contract of never having false
+# negatives and leads overall to the engine not finding a match.
+#
+# See: https://github.com/rust-lang/regex/issues/1070
+[[test]]
+name = "prefilter-with-aho-corasick-standard-semantics"
+regex = '(?m)^ *v [0-9]'
+haystack = 'v 0'
+matches = [
+  { id = 0, spans = [[0, 3]] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+unicode = true
+utf8 = true
+
+# This tests that the PikeVM and the meta regex agree on a particular regex.
+# This test previously failed when the ad hoc engines inside the meta engine
+# did not handle quit states correctly. Namely, the Unicode word boundary here
+# combined with a non-ASCII codepoint provokes the quit state. The ad hoc
+# engines were previously returning a match even after entering the quit state
+# if a match had been previously detected, but this is incorrect. The reason
+# is that if a quit state is found, then the search must give up *immediately*
+# because it prevents the search from finding the "proper" leftmost-first
+# match. If it instead returns a match that has been found, it risks reporting
+# an improper match, as it did in this case.
+#
+# See: https://github.com/rust-lang/regex/issues/1046
+[[test]]
+name = "non-prefix-literal-quit-state"
+regex = '.+\b\n'
+haystack = "β77\n"
+matches = [[0, 5]]
+
+# This is a regression test for some errant HIR interval set operations that
+# were made in the regex-syntax 0.8.0 release and then reverted in 0.8.1. The
+# issue here is that the HIR produced from the regex had out-of-order ranges.
+#
+# See: https://github.com/rust-lang/regex/issues/1103
+# Ref: https://github.com/rust-lang/regex/pull/1051
+# Ref: https://github.com/rust-lang/regex/pull/1102
+[[test]]
+name = "hir-optimization-out-of-order-class"
+regex = '^[[:alnum:]./-]+$'
+haystack = "a-b"
+matches = [[0, 3]]
+
+# This is a regression test for an improper reverse suffix optimization. This
+# occurred when I "broadened" the applicability of the optimization to include
+# multiple possible literal suffixes instead of only sticking to a non-empty
+# longest common suffix. It turns out that, at least given how the reverse
+# suffix optimization works, we need to stick to the longest common suffix for
+# now.
+#
+# See: https://github.com/rust-lang/regex/issues/1110
+# See also: https://github.com/astral-sh/ruff/pull/7980
+[[test]]
+name = 'improper-reverse-suffix-optimization'
+regex = '(\\N\{[^}]+})|([{}])'
+haystack = 'hiya \N{snowman} bye'
+matches = [[[5, 16], [5, 16], []]]
--- a/third-party/vendor/regex/testdata/set.toml
+++ b/third-party/vendor/regex/testdata/set.toml
@ -0,0 +1,641 @@
+# Basic multi-regex tests.
+
+[[test]]
+name = "basic10"
+regex = ["a", "a"]
+haystack = "a"
+matches = [
+  { id = 0, span = [0, 1] },
+  { id = 1, span = [0, 1] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic10-leftmost-first"
+regex = ["a", "a"]
+haystack = "a"
+matches = [
+  { id = 0, span = [0, 1] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+
+[[test]]
+name = "basic20"
+regex = ["a", "a"]
+haystack = "ba"
+matches = [
+  { id = 0, span = [1, 2] },
+  { id = 1, span = [1, 2] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic30"
+regex = ["a", "b"]
+haystack = "a"
+matches = [
+  { id = 0, span = [0, 1] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic40"
+regex = ["a", "b"]
+haystack = "b"
+matches = [
+  { id = 1, span = [0, 1] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic50"
+regex = ["a|b", "b|a"]
+haystack = "b"
+matches = [
+  { id = 0, span = [0, 1] },
+  { id = 1, span = [0, 1] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic60"
+regex = ["foo", "oo"]
+haystack = "foo"
+matches = [
+  { id = 0, span = [0, 3] },
+  { id = 1, span = [1, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic60-leftmost-first"
+regex = ["foo", "oo"]
+haystack = "foo"
+matches = [
+  { id = 0, span = [0, 3] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+
+[[test]]
+name = "basic61"
+regex = ["oo", "foo"]
+haystack = "foo"
+matches = [
+  { id = 1, span = [0, 3] },
+  { id = 0, span = [1, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic61-leftmost-first"
+regex = ["oo", "foo"]
+haystack = "foo"
+matches = [
+  { id = 1, span = [0, 3] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+
+[[test]]
+name = "basic70"
+regex = ["abcd", "bcd", "cd", "d"]
+haystack = "abcd"
+matches = [
+  { id = 0, span = [0, 4] },
+  { id = 1, span = [1, 4] },
+  { id = 2, span = [2, 4] },
+  { id = 3, span = [3, 4] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic71"
+regex = ["bcd", "cd", "d", "abcd"]
+haystack = "abcd"
+matches = [
+  { id = 3, span = [0, 4] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+
+[[test]]
+name = "basic80"
+regex = ["^foo", "bar$"]
+haystack = "foo"
+matches = [
+  { id = 0, span = [0, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic81"
+regex = ["^foo", "bar$"]
+haystack = "foo bar"
+matches = [
+  { id = 0, span = [0, 3] },
+  { id = 1, span = [4, 7] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic82"
+regex = ["^foo", "bar$"]
+haystack = "bar"
+matches = [
+  { id = 1, span = [0, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic90"
+regex = ["[a-z]+$", "foo"]
+haystack = "01234 foo"
+matches = [
+  { id = 0, span = [8, 9] },
+  { id = 0, span = [7, 9] },
+  { id = 0, span = [6, 9] },
+  { id = 1, span = [6, 9] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic91"
+regex = ["[a-z]+$", "foo"]
+haystack = "foo 01234"
+matches = [
+  { id = 1, span = [0, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic100"
+regex = [".*?", "a"]
+haystack = "zzza"
+matches = [
+  { id = 0, span = [0, 0] },
+  { id = 0, span = [1, 1] },
+  { id = 0, span = [0, 1] },
+  { id = 0, span = [2, 2] },
+  { id = 0, span = [1, 2] },
+  { id = 0, span = [0, 2] },
+  { id = 0, span = [3, 3] },
+  { id = 0, span = [2, 3] },
+  { id = 0, span = [1, 3] },
+  { id = 0, span = [0, 3] },
+  { id = 0, span = [4, 4] },
+  { id = 0, span = [3, 4] },
+  { id = 0, span = [2, 4] },
+  { id = 0, span = [1, 4] },
+  { id = 0, span = [0, 4] },
+  { id = 1, span = [3, 4] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic101"
+regex = [".*", "a"]
+haystack = "zzza"
+matches = [
+  { id = 0, span = [0, 0] },
+  { id = 0, span = [1, 1] },
+  { id = 0, span = [0, 1] },
+  { id = 0, span = [2, 2] },
+  { id = 0, span = [1, 2] },
+  { id = 0, span = [0, 2] },
+  { id = 0, span = [3, 3] },
+  { id = 0, span = [2, 3] },
+  { id = 0, span = [1, 3] },
+  { id = 0, span = [0, 3] },
+  { id = 0, span = [4, 4] },
+  { id = 0, span = [3, 4] },
+  { id = 0, span = [2, 4] },
+  { id = 0, span = [1, 4] },
+  { id = 0, span = [0, 4] },
+  { id = 1, span = [3, 4] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic102"
+regex = [".*", "a"]
+haystack = "zzz"
+matches = [
+  { id = 0, span = [0, 0] },
+  { id = 0, span = [1, 1] },
+  { id = 0, span = [0, 1] },
+  { id = 0, span = [2, 2] },
+  { id = 0, span = [1, 2] },
+  { id = 0, span = [0, 2] },
+  { id = 0, span = [3, 3] },
+  { id = 0, span = [2, 3] },
+  { id = 0, span = [1, 3] },
+  { id = 0, span = [0, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic110"
+regex = ['\ba\b']
+haystack = "hello a bye"
+matches = [
+  { id = 0, span = [6, 7] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic111"
+regex = ['\ba\b', '\be\b']
+haystack = "hello a bye e"
+matches = [
+  { id = 0, span = [6, 7] },
+  { id = 1, span = [12, 13] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic120"
+regex = ["a"]
+haystack = "a"
+matches = [
+  { id = 0, span = [0, 1] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic121"
+regex = [".*a"]
+haystack = "a"
+matches = [
+  { id = 0, span = [0, 1] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic122"
+regex = [".*a", "β"]
+haystack = "β"
+matches = [
+  { id = 1, span = [0, 2] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "basic130"
+regex = ["ab", "b"]
+haystack = "ba"
+matches = [
+  { id = 1, span = [0, 1] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+# These test cases where one of the regexes matches the empty string.
+
+[[test]]
+name = "empty10"
+regex = ["", "a"]
+haystack = "abc"
+matches = [
+  { id = 0, span = [0, 0] },
+  { id = 1, span = [0, 1] },
+  { id = 0, span = [1, 1] },
+  { id = 0, span = [2, 2] },
+  { id = 0, span = [3, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "empty10-leftmost-first"
+regex = ["", "a"]
+haystack = "abc"
+matches = [
+  { id = 0, span = [0, 0] },
+  { id = 0, span = [1, 1] },
+  { id = 0, span = [2, 2] },
+  { id = 0, span = [3, 3] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+
+[[test]]
+name = "empty11"
+regex = ["a", ""]
+haystack = "abc"
+matches = [
+  { id = 1, span = [0, 0] },
+  { id = 0, span = [0, 1] },
+  { id = 1, span = [1, 1] },
+  { id = 1, span = [2, 2] },
+  { id = 1, span = [3, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "empty11-leftmost-first"
+regex = ["a", ""]
+haystack = "abc"
+matches = [
+  { id = 0, span = [0, 1] },
+  { id = 1, span = [2, 2] },
+  { id = 1, span = [3, 3] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+
+[[test]]
+name = "empty20"
+regex = ["", "b"]
+haystack = "abc"
+matches = [
+  { id = 0, span = [0, 0] },
+  { id = 0, span = [1, 1] },
+  { id = 1, span = [1, 2] },
+  { id = 0, span = [2, 2] },
+  { id = 0, span = [3, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "empty20-leftmost-first"
+regex = ["", "b"]
+haystack = "abc"
+matches = [
+  { id = 0, span = [0, 0] },
+  { id = 0, span = [1, 1] },
+  { id = 0, span = [2, 2] },
+  { id = 0, span = [3, 3] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+
+[[test]]
+name = "empty21"
+regex = ["b", ""]
+haystack = "abc"
+matches = [
+  { id = 1, span = [0, 0] },
+  { id = 1, span = [1, 1] },
+  { id = 0, span = [1, 2] },
+  { id = 1, span = [2, 2] },
+  { id = 1, span = [3, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "empty21-leftmost-first"
+regex = ["b", ""]
+haystack = "abc"
+matches = [
+  { id = 1, span = [0, 0] },
+  { id = 0, span = [1, 2] },
+  { id = 1, span = [3, 3] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+
+[[test]]
+name = "empty22"
+regex = ["(?:)", "b"]
+haystack = "abc"
+matches = [
+  { id = 0, span = [0, 0] },
+  { id = 0, span = [1, 1] },
+  { id = 1, span = [1, 2] },
+  { id = 0, span = [2, 2] },
+  { id = 0, span = [3, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "empty23"
+regex = ["b", "(?:)"]
+haystack = "abc"
+matches = [
+  { id = 1, span = [0, 0] },
+  { id = 1, span = [1, 1] },
+  { id = 0, span = [1, 2] },
+  { id = 1, span = [2, 2] },
+  { id = 1, span = [3, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "empty30"
+regex = ["", "z"]
+haystack = "abc"
+matches = [
+  { id = 0, span = [0, 0] },
+  { id = 0, span = [1, 1] },
+  { id = 0, span = [2, 2] },
+  { id = 0, span = [3, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "empty30-leftmost-first"
+regex = ["", "z"]
+haystack = "abc"
+matches = [
+  { id = 0, span = [0, 0] },
+  { id = 0, span = [1, 1] },
+  { id = 0, span = [2, 2] },
+  { id = 0, span = [3, 3] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+
+[[test]]
+name = "empty31"
+regex = ["z", ""]
+haystack = "abc"
+matches = [
+  { id = 1, span = [0, 0] },
+  { id = 1, span = [1, 1] },
+  { id = 1, span = [2, 2] },
+  { id = 1, span = [3, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "empty31-leftmost-first"
+regex = ["z", ""]
+haystack = "abc"
+matches = [
+  { id = 1, span = [0, 0] },
+  { id = 1, span = [1, 1] },
+  { id = 1, span = [2, 2] },
+  { id = 1, span = [3, 3] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+
+[[test]]
+name = "empty40"
+regex = ["c(?:)", "b"]
+haystack = "abc"
+matches = [
+  { id = 1, span = [1, 2] },
+  { id = 0, span = [2, 3] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "empty40-leftmost-first"
+regex = ["c(?:)", "b"]
+haystack = "abc"
+matches = [
+  { id = 1, span = [1, 2] },
+  { id = 0, span = [2, 3] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+
+# These test cases where there are no matches.
+
+[[test]]
+name = "nomatch10"
+regex = ["a", "a"]
+haystack = "b"
+matches = []
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "nomatch20"
+regex = ["^foo", "bar$"]
+haystack = "bar foo"
+matches = []
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "nomatch30"
+regex = []
+haystack = "a"
+matches = []
+match-kind = "all"
+search-kind = "overlapping"
+
+[[test]]
+name = "nomatch40"
+regex = ["^rooted$", '\.log$']
+haystack = "notrooted"
+matches = []
+match-kind = "all"
+search-kind = "overlapping"
+
+# These test multi-regex searches with capture groups.
+#
+# NOTE: I wrote these tests in the course of developing a first class API for
+# overlapping capturing group matches, but ultimately removed that API because
+# the semantics for overlapping matches aren't totally clear. However, I've
+# left the tests because I believe the semantics for these patterns are clear
+# and because we can still test our "which patterns matched" APIs with them.
+
+[[test]]
+name = "caps-010"
+regex = ['^(\w+) (\w+)$', '^(\S+) (\S+)$']
+haystack = "Bruce Springsteen"
+matches = [
+  { id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
+  { id = 1, spans = [[0, 17], [0, 5], [6, 17]] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+unicode = false
+utf8 = false
+
+[[test]]
+name = "caps-020"
+regex = ['^(\w+) (\w+)$', '^[A-Z](\S+) [A-Z](\S+)$']
+haystack = "Bruce Springsteen"
+matches = [
+  { id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
+  { id = 1, spans = [[0, 17], [1, 5], [7, 17]] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+unicode = false
+utf8 = false
+
+[[test]]
+name = "caps-030"
+regex = ['^(\w+) (\w+)$', '^([A-Z])(\S+) ([A-Z])(\S+)$']
+haystack = "Bruce Springsteen"
+matches = [
+  { id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
+  { id = 1, spans = [[0, 17], [0, 1], [1, 5], [6, 7], [7, 17]] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+unicode = false
+utf8 = false
+
+[[test]]
+name = "caps-110"
+regex = ['(\w+) (\w+)', '(\S+) (\S+)']
+haystack = "Bruce Springsteen"
+matches = [
+  { id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+unicode = false
+utf8 = false
+
+[[test]]
+name = "caps-120"
+regex = ['(\w+) (\w+)', '(\S+) (\S+)']
+haystack = "&ruce $pringsteen"
+matches = [
+  { id = 1, spans = [[0, 17], [0, 5], [6, 17]] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+unicode = false
+utf8 = false
+
+[[test]]
+name = "caps-121"
+regex = ['(\w+) (\w+)', '(\S+) (\S+)']
+haystack = "&ruce $pringsteen Foo Bar"
+matches = [
+  { id = 1, spans = [[0, 17], [0, 5], [6, 17]] },
+  { id = 0, spans = [[18, 25], [18, 21], [22, 25]] },
+]
+match-kind = "leftmost-first"
+search-kind = "leftmost"
+unicode = false
+utf8 = false
--- a/third-party/vendor/regex/testdata/substring.toml
+++ b/third-party/vendor/regex/testdata/substring.toml
@ -0,0 +1,36 @@
+# These tests check that regex engines perform as expected when the search is
+# instructed to only search a substring of a haystack instead of the entire
+# haystack. This tends to exercise interesting edge cases that are otherwise
+# difficult to provoke. (But not necessarily impossible. Regex search iterators
+# for example, make use of the "search just a substring" APIs by changing the
+# starting position of a search to the end position of the previous match.)
+
+[[test]]
+name = "unicode-word-start"
+regex = '\b[0-9]+\b'
+haystack = "β123"
+bounds = { start = 2, end = 5 }
+matches = []
+
+[[test]]
+name = "unicode-word-end"
+regex = '\b[0-9]+\b'
+haystack = "123β"
+bounds = { start = 0, end = 3 }
+matches = []
+
+[[test]]
+name = "ascii-word-start"
+regex = '\b[0-9]+\b'
+haystack = "β123"
+bounds = { start = 2, end = 5 }
+matches = [[2, 5]]
+unicode = false
+
+[[test]]
+name = "ascii-word-end"
+regex = '\b[0-9]+\b'
+haystack = "123β"
+bounds = { start = 0, end = 3 }
+matches = [[0, 3]]
+unicode = false
--- a/third-party/vendor/regex/testdata/unicode.toml
+++ b/third-party/vendor/regex/testdata/unicode.toml
@ -0,0 +1,517 @@
+# Basic Unicode literal support.
+[[test]]
+name = "literal1"
+regex = '☃'
+haystack = "☃"
+matches = [[0, 3]]
+
+[[test]]
+name = "literal2"
+regex = '☃+'
+haystack = "☃"
+matches = [[0, 3]]
+
+[[test]]
+name = "literal3"
+regex = '☃+'
+haystack = "☃"
+matches = [[0, 3]]
+case-insensitive = true
+
+[[test]]
+name = "literal4"
+regex = 'Δ'
+haystack = "δ"
+matches = [[0, 2]]
+case-insensitive = true
+
+# Unicode word boundaries.
+[[test]]
+name = "wb-100"
+regex = '\d\b'
+haystack = "6δ"
+matches = []
+
+[[test]]
+name = "wb-200"
+regex = '\d\b'
+haystack = "6 "
+matches = [[0, 1]]
+
+[[test]]
+name = "wb-300"
+regex = '\d\B'
+haystack = "6δ"
+matches = [[0, 1]]
+
+[[test]]
+name = "wb-400"
+regex = '\d\B'
+haystack = "6 "
+matches = []
+
+# Unicode character class support.
+[[test]]
+name = "class1"
+regex = '[☃Ⅰ]+'
+haystack = "☃"
+matches = [[0, 3]]
+
+[[test]]
+name = "class2"
+regex = '\pN'
+haystack = "Ⅰ"
+matches = [[0, 3]]
+
+[[test]]
+name = "class3"
+regex = '\pN+'
+haystack = "Ⅰ1Ⅱ2"
+matches = [[0, 8]]
+
+[[test]]
+name = "class4"
+regex = '\PN+'
+haystack = "abⅠ"
+matches = [[0, 2]]
+
+[[test]]
+name = "class5"
+regex = '[\PN]+'
+haystack = "abⅠ"
+matches = [[0, 2]]
+
+[[test]]
+name = "class6"
+regex = '[^\PN]+'
+haystack = "abⅠ"
+matches = [[2, 5]]
+
+[[test]]
+name = "class7"
+regex = '\p{Lu}+'
+haystack = "ΛΘΓΔα"
+matches = [[0, 8]]
+
+[[test]]
+name = "class8"
+regex = '\p{Lu}+'
+haystack = "ΛΘΓΔα"
+matches = [[0, 10]]
+case-insensitive = true
+
+[[test]]
+name = "class9"
+regex = '\pL+'
+haystack = "ΛΘΓΔα"
+matches = [[0, 10]]
+
+[[test]]
+name = "class10"
+regex = '\p{Ll}+'
+haystack = "ΛΘΓΔα"
+matches = [[8, 10]]
+
+# Unicode aware "Perl" character classes.
+[[test]]
+name = "perl1"
+regex = '\w+'
+haystack = "dδd"
+matches = [[0, 4]]
+
+[[test]]
+name = "perl2"
+regex = '\w+'
+haystack = "⥡"
+matches = []
+
+[[test]]
+name = "perl3"
+regex = '\W+'
+haystack = "⥡"
+matches = [[0, 3]]
+
+[[test]]
+name = "perl4"
+regex = '\d+'
+haystack = "1२३9"
+matches = [[0, 8]]
+
+[[test]]
+name = "perl5"
+regex = '\d+'
+haystack = "Ⅱ"
+matches = []
+
+[[test]]
+name = "perl6"
+regex = '\D+'
+haystack = "Ⅱ"
+matches = [[0, 3]]
+
+[[test]]
+name = "perl7"
+regex = '\s+'
+haystack = " "
+matches = [[0, 3]]
+
+[[test]]
+name = "perl8"
+regex = '\s+'
+haystack = "☃"
+matches = []
+
+[[test]]
+name = "perl9"
+regex = '\S+'
+haystack = "☃"
+matches = [[0, 3]]
+
+# Specific tests for Unicode general category classes.
+[[test]]
+name = "class-gencat1"
+regex = '\p{Cased_Letter}'
+haystack = "Ａ"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat2"
+regex = '\p{Close_Punctuation}'
+haystack = "❯"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat3"
+regex = '\p{Connector_Punctuation}'
+haystack = "⁀"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat4"
+regex = '\p{Control}'
+haystack = "\u009F"
+matches = [[0, 2]]
+
+[[test]]
+name = "class-gencat5"
+regex = '\p{Currency_Symbol}'
+haystack = "￡"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat6"
+regex = '\p{Dash_Punctuation}'
+haystack = "〰"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat7"
+regex = '\p{Decimal_Number}'
+haystack = "𑓙"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gencat8"
+regex = '\p{Enclosing_Mark}'
+haystack = "\uA672"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat9"
+regex = '\p{Final_Punctuation}'
+haystack = "⸡"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat10"
+regex = '\p{Format}'
+haystack = "\U000E007F"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gencat11"
+regex = '\p{Initial_Punctuation}'
+haystack = "⸜"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat12"
+regex = '\p{Letter}'
+haystack = "Έ"
+matches = [[0, 2]]
+
+[[test]]
+name = "class-gencat13"
+regex = '\p{Letter_Number}'
+haystack = "ↂ"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat14"
+regex = '\p{Line_Separator}'
+haystack = "\u2028"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat15"
+regex = '\p{Lowercase_Letter}'
+haystack = "ϛ"
+matches = [[0, 2]]
+
+[[test]]
+name = "class-gencat16"
+regex = '\p{Mark}'
+haystack = "\U000E01EF"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gencat17"
+regex = '\p{Math}'
+haystack = "⋿"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat18"
+regex = '\p{Modifier_Letter}'
+haystack = "𖭃"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gencat19"
+regex = '\p{Modifier_Symbol}'
+haystack = "🏿"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gencat20"
+regex = '\p{Nonspacing_Mark}'
+haystack = "\U0001E94A"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gencat21"
+regex = '\p{Number}'
+haystack = "⓿"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat22"
+regex = '\p{Open_Punctuation}'
+haystack = "｟"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat23"
+regex = '\p{Other}'
+haystack = "\u0BC9"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat24"
+regex = '\p{Other_Letter}'
+haystack = "ꓷ"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat25"
+regex = '\p{Other_Number}'
+haystack = "㉏"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat26"
+regex = '\p{Other_Punctuation}'
+haystack = "𞥞"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gencat27"
+regex = '\p{Other_Symbol}'
+haystack = "⅌"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat28"
+regex = '\p{Paragraph_Separator}'
+haystack = "\u2029"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat29"
+regex = '\p{Private_Use}'
+haystack = "\U0010FFFD"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gencat30"
+regex = '\p{Punctuation}'
+haystack = "𑁍"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gencat31"
+regex = '\p{Separator}'
+haystack = "\u3000"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat32"
+regex = '\p{Space_Separator}'
+haystack = "\u205F"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat33"
+regex = '\p{Spacing_Mark}'
+haystack = "\U00016F7E"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gencat34"
+regex = '\p{Symbol}'
+haystack = "⯈"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat35"
+regex = '\p{Titlecase_Letter}'
+haystack = "ῼ"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gencat36"
+regex = '\p{Unassigned}'
+haystack = "\U0010FFFF"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gencat37"
+regex = '\p{Uppercase_Letter}'
+haystack = "Ꝋ"
+matches = [[0, 3]]
+
+
+# Tests for Unicode emoji properties.
+[[test]]
+name = "class-emoji1"
+regex = '\p{Emoji}'
+haystack = "\u23E9"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-emoji2"
+regex = '\p{emoji}'
+haystack = "\U0001F21A"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-emoji3"
+regex = '\p{extendedpictographic}'
+haystack = "\U0001FA6E"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-emoji4"
+regex = '\p{extendedpictographic}'
+haystack = "\U0001FFFD"
+matches = [[0, 4]]
+
+
+# Tests for Unicode grapheme cluster properties.
+[[test]]
+name = "class-gcb1"
+regex = '\p{grapheme_cluster_break=prepend}'
+haystack = "\U00011D46"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gcb2"
+regex = '\p{gcb=regional_indicator}'
+haystack = "\U0001F1E6"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gcb3"
+regex = '\p{gcb=ri}'
+haystack = "\U0001F1E7"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gcb4"
+regex = '\p{regionalindicator}'
+haystack = "\U0001F1FF"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-gcb5"
+regex = '\p{gcb=lvt}'
+haystack = "\uC989"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-gcb6"
+regex = '\p{gcb=zwj}'
+haystack = "\u200D"
+matches = [[0, 3]]
+
+# Tests for Unicode word boundary properties.
+[[test]]
+name = "class-word-break1"
+regex = '\p{word_break=Hebrew_Letter}'
+haystack = "\uFB46"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-word-break2"
+regex = '\p{wb=hebrewletter}'
+haystack = "\uFB46"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-word-break3"
+regex = '\p{wb=ExtendNumLet}'
+haystack = "\uFF3F"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-word-break4"
+regex = '\p{wb=WSegSpace}'
+haystack = "\u3000"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-word-break5"
+regex = '\p{wb=numeric}'
+haystack = "\U0001E950"
+matches = [[0, 4]]
+
+# Tests for Unicode sentence boundary properties.
+[[test]]
+name = "class-sentence-break1"
+regex = '\p{sentence_break=Lower}'
+haystack = "\u0469"
+matches = [[0, 2]]
+
+[[test]]
+name = "class-sentence-break2"
+regex = '\p{sb=lower}'
+haystack = "\u0469"
+matches = [[0, 2]]
+
+[[test]]
+name = "class-sentence-break3"
+regex = '\p{sb=Close}'
+haystack = "\uFF60"
+matches = [[0, 3]]
+
+[[test]]
+name = "class-sentence-break4"
+regex = '\p{sb=Close}'
+haystack = "\U0001F677"
+matches = [[0, 4]]
+
+[[test]]
+name = "class-sentence-break5"
+regex = '\p{sb=SContinue}'
+haystack = "\uFF64"
+matches = [[0, 3]]
--- a/third-party/vendor/regex/testdata/utf8.toml
+++ b/third-party/vendor/regex/testdata/utf8.toml
@ -0,0 +1,399 @@
+# These test the UTF-8 modes expose by regex-automata. Namely, when utf8 is
+# true, then we promise that the haystack is valid UTF-8. (Otherwise behavior
+# is unspecified.) This also corresponds to building the regex engine with the
+# following two guarantees:
+#
+# 1) For any non-empty match reported, its span is guaranteed to correspond to
+# valid UTF-8.
+# 2) All empty or zero-width matches reported must never split a UTF-8
+# encoded codepoint. If the haystack has invalid UTF-8, then this results in
+# unspecified behavior.
+#
+# The (2) is in particular what we focus our testing on since (1) is generally
+# guaranteed by regex-syntax's AST-to-HIR translator and is well tested there.
+# The thing with (2) is that it can't be described in the HIR, so the regex
+# engines have to handle that case. Thus, we test it here.
+#
+# Note that it is possible to build a regex that has property (1) but not
+# (2), and vice versa. This is done by building the HIR with 'utf8=true' but
+# building the Thompson NFA with 'utf8=false'. We don't test that here because
+# the harness doesn't expose a way to enable or disable UTF-8 mode with that
+# granularity. Instead, those combinations are lightly tested via doc examples.
+# That's not to say that (1) without (2) is uncommon. Indeed, ripgrep uses it
+# because it cannot guarantee that its haystack is valid UTF-8.
+
+# This tests that an empty regex doesn't split a codepoint.
+[[test]]
+name = "empty-utf8yes"
+regex = ''
+haystack = '☃'
+matches = [[0, 0], [3, 3]]
+unicode = true
+utf8 = true
+
+# Tests the overlapping case of the above.
+[[test]]
+name = "empty-utf8yes-overlapping"
+regex = ''
+haystack = '☃'
+matches = [[0, 0], [3, 3]]
+unicode = true
+utf8 = true
+match-kind = "all"
+search-kind = "overlapping"
+
+# This tests that an empty regex DOES split a codepoint when utf=false.
+[[test]]
+name = "empty-utf8no"
+regex = ''
+haystack = '☃'
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+unicode = true
+utf8 = false
+
+# Tests the overlapping case of the above.
+[[test]]
+name = "empty-utf8no-overlapping"
+regex = ''
+haystack = '☃'
+matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
+unicode = true
+utf8 = false
+match-kind = "all"
+search-kind = "overlapping"
+
+# This tests that an empty regex doesn't split a codepoint, even if we give
+# it bounds entirely within the codepoint.
+#
+# This is one of the trickier cases and is what motivated the current UTF-8
+# mode design. In particular, at one point, this test failed the 'is_match'
+# variant of the test but not 'find'. This is because the 'is_match' code path
+# is specifically optimized for "was a match found" rather than "where is the
+# match." In the former case, you don't really care about the empty-vs-non-empty
+# matches, and thus, the codepoint splitting filtering logic wasn't getting
+# applied. (In multiple ways across multiple regex engines.) In this way, you
+# can wind up with a situation where 'is_match' says "yes," but 'find' says,
+# "I didn't find anything." Which is... not great.
+#
+# I could have decided to say that providing boundaries that themselves split
+# a codepoint would have unspecified behavior. But I couldn't quite convince
+# myself that such boundaries were the only way to get an inconsistency between
+# 'is_match' and 'find'.
+#
+# Note that I also tried to come up with a test like this that fails without
+# using `bounds`. Specifically, a test where 'is_match' and 'find' disagree.
+# But I couldn't do it, and I'm tempted to conclude it is impossible. The
+# fundamental problem is that you need to simultaneously produce an empty match
+# that splits a codepoint while *not* matching before or after the codepoint.
+[[test]]
+name = "empty-utf8yes-bounds"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 3]
+matches = []
+unicode = true
+utf8 = true
+
+# Tests the overlapping case of the above.
+[[test]]
+name = "empty-utf8yes-bounds-overlapping"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 3]
+matches = []
+unicode = true
+utf8 = true
+match-kind = "all"
+search-kind = "overlapping"
+
+# This tests that an empty regex splits a codepoint when the bounds are
+# entirely within the codepoint.
+[[test]]
+name = "empty-utf8no-bounds"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 3]
+matches = [[1, 1], [2, 2], [3, 3]]
+unicode = true
+utf8 = false
+
+# Tests the overlapping case of the above.
+[[test]]
+name = "empty-utf8no-bounds-overlapping"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 3]
+matches = [[1, 1], [2, 2], [3, 3]]
+unicode = true
+utf8 = false
+match-kind = "all"
+search-kind = "overlapping"
+
+# In this test, we anchor the search. Since the start position is also a UTF-8
+# boundary, we get a match.
+[[test]]
+name = "empty-utf8yes-anchored"
+regex = ''
+haystack = '𝛃'
+matches = [[0, 0]]
+anchored = true
+unicode = true
+utf8 = true
+
+# Tests the overlapping case of the above.
+[[test]]
+name = "empty-utf8yes-anchored-overlapping"
+regex = ''
+haystack = '𝛃'
+matches = [[0, 0]]
+anchored = true
+unicode = true
+utf8 = true
+match-kind = "all"
+search-kind = "overlapping"
+
+# Same as above, except with UTF-8 mode disabled. It almost doesn't change the
+# result, except for the fact that since this is an anchored search and we
+# always find all matches, the test harness will keep reporting matches until
+# none are found. Because it's anchored, matches will be reported so long as
+# they are directly adjacent. Since with UTF-8 mode the next anchored search
+# after the match at [0, 0] fails, iteration stops (and doesn't find the last
+# match at [4, 4]).
+[[test]]
+name = "empty-utf8no-anchored"
+regex = ''
+haystack = '𝛃'
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+anchored = true
+unicode = true
+utf8 = false
+
+# Tests the overlapping case of the above.
+#
+# Note that overlapping anchored searches are a little weird, and it's not
+# totally clear what their semantics ought to be. For now, we just test the
+# current behavior of our test shim that implements overlapping search. (This
+# is one of the reasons why we don't really expose regex-level overlapping
+# searches.)
+[[test]]
+name = "empty-utf8no-anchored-overlapping"
+regex = ''
+haystack = '𝛃'
+matches = [[0, 0]]
+anchored = true
+unicode = true
+utf8 = false
+match-kind = "all"
+search-kind = "overlapping"
+
+# In this test, we anchor the search, but also set bounds. The bounds start the
+# search in the middle of a codepoint, so there should never be a match.
+[[test]]
+name = "empty-utf8yes-anchored-bounds"
+regex = ''
+haystack = '𝛃'
+matches = []
+bounds = [1, 3]
+anchored = true
+unicode = true
+utf8 = true
+
+# Tests the overlapping case of the above.
+[[test]]
+name = "empty-utf8yes-anchored-bounds-overlapping"
+regex = ''
+haystack = '𝛃'
+matches = []
+bounds = [1, 3]
+anchored = true
+unicode = true
+utf8 = true
+match-kind = "all"
+search-kind = "overlapping"
+
+# Same as above, except with UTF-8 mode disabled. Without UTF-8 mode enabled,
+# matching within a codepoint is allowed. And remember, as in the anchored test
+# above with UTF-8 mode disabled, iteration will report all adjacent matches.
+# The matches at [0, 0] and [4, 4] are not included because of the bounds of
+# the search.
+[[test]]
+name = "empty-utf8no-anchored-bounds"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 3]
+matches = [[1, 1], [2, 2], [3, 3]]
+anchored = true
+unicode = true
+utf8 = false
+
+# Tests the overlapping case of the above.
+#
+# Note that overlapping anchored searches are a little weird, and it's not
+# totally clear what their semantics ought to be. For now, we just test the
+# current behavior of our test shim that implements overlapping search. (This
+# is one of the reasons why we don't really expose regex-level overlapping
+# searches.)
+[[test]]
+name = "empty-utf8no-anchored-bounds-overlapping"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 3]
+matches = [[1, 1]]
+anchored = true
+unicode = true
+utf8 = false
+match-kind = "all"
+search-kind = "overlapping"
+
+# This tests that we find the match at the end of the string when the bounds
+# exclude the first match.
+[[test]]
+name = "empty-utf8yes-startbound"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 4]
+matches = [[4, 4]]
+unicode = true
+utf8 = true
+
+# Tests the overlapping case of the above.
+[[test]]
+name = "empty-utf8yes-startbound-overlapping"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 4]
+matches = [[4, 4]]
+unicode = true
+utf8 = true
+match-kind = "all"
+search-kind = "overlapping"
+
+# Same as above, except since UTF-8 mode is disabled, we also find the matches
+# inbetween that split the codepoint.
+[[test]]
+name = "empty-utf8no-startbound"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 4]
+matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
+unicode = true
+utf8 = false
+
+# Tests the overlapping case of the above.
+[[test]]
+name = "empty-utf8no-startbound-overlapping"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 4]
+matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
+unicode = true
+utf8 = false
+match-kind = "all"
+search-kind = "overlapping"
+
+# This tests that we don't find any matches in an anchored search, even when
+# the bounds include a match (at the end).
+[[test]]
+name = "empty-utf8yes-anchored-startbound"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 4]
+matches = []
+anchored = true
+unicode = true
+utf8 = true
+
+# Tests the overlapping case of the above.
+[[test]]
+name = "empty-utf8yes-anchored-startbound-overlapping"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 4]
+matches = []
+anchored = true
+unicode = true
+utf8 = true
+match-kind = "all"
+search-kind = "overlapping"
+
+# Same as above, except since UTF-8 mode is disabled, we also find the matches
+# inbetween that split the codepoint. Even though this is an anchored search,
+# since the matches are adjacent, we find all of them.
+[[test]]
+name = "empty-utf8no-anchored-startbound"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 4]
+matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
+anchored = true
+unicode = true
+utf8 = false
+
+# Tests the overlapping case of the above.
+#
+# Note that overlapping anchored searches are a little weird, and it's not
+# totally clear what their semantics ought to be. For now, we just test the
+# current behavior of our test shim that implements overlapping search. (This
+# is one of the reasons why we don't really expose regex-level overlapping
+# searches.)
+[[test]]
+name = "empty-utf8no-anchored-startbound-overlapping"
+regex = ''
+haystack = '𝛃'
+bounds = [1, 4]
+matches = [[1, 1]]
+anchored = true
+unicode = true
+utf8 = false
+match-kind = "all"
+search-kind = "overlapping"
+
+# This tests that we find the match at the end of the haystack in UTF-8 mode
+# when our bounds only include the empty string at the end of the haystack.
+[[test]]
+name = "empty-utf8yes-anchored-endbound"
+regex = ''
+haystack = '𝛃'
+bounds = [4, 4]
+matches = [[4, 4]]
+anchored = true
+unicode = true
+utf8 = true
+
+# Tests the overlapping case of the above.
+[[test]]
+name = "empty-utf8yes-anchored-endbound-overlapping"
+regex = ''
+haystack = '𝛃'
+bounds = [4, 4]
+matches = [[4, 4]]
+anchored = true
+unicode = true
+utf8 = true
+match-kind = "all"
+search-kind = "overlapping"
+
+# Same as above, but with UTF-8 mode disabled. Results remain the same since
+# the only possible match does not split a codepoint.
+[[test]]
+name = "empty-utf8no-anchored-endbound"
+regex = ''
+haystack = '𝛃'
+bounds = [4, 4]
+matches = [[4, 4]]
+anchored = true
+unicode = true
+utf8 = false
+
+# Tests the overlapping case of the above.
+[[test]]
+name = "empty-utf8no-anchored-endbound-overlapping"
+regex = ''
+haystack = '𝛃'
+bounds = [4, 4]
+matches = [[4, 4]]
+anchored = true
+unicode = true
+utf8 = false
+match-kind = "all"
+search-kind = "overlapping"
--- a/third-party/vendor/regex/testdata/word-boundary-special.toml
+++ b/third-party/vendor/regex/testdata/word-boundary-special.toml
@ -0,0 +1,687 @@
+# These tests are for the "special" word boundary assertions. That is,
+# \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty
+# assertions for more niche use cases, but hitting those cases without these
+# assertions is difficult. For example, \b{start-half} and \b{end-half} are
+# used to implement the -w/--word-regexp flag in a grep program.
+
+# Tests for (?-u:\b{start})
+
+[[test]]
+name = "word-start-ascii-010"
+regex = '\b{start}'
+haystack = "a"
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-start-ascii-020"
+regex = '\b{start}'
+haystack = "a "
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-start-ascii-030"
+regex = '\b{start}'
+haystack = " a "
+matches = [[1, 1]]
+unicode = false
+
+[[test]]
+name = "word-start-ascii-040"
+regex = '\b{start}'
+haystack = ""
+matches = []
+unicode = false
+
+[[test]]
+name = "word-start-ascii-050"
+regex = '\b{start}'
+haystack = "ab"
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-start-ascii-060"
+regex = '\b{start}'
+haystack = "𝛃"
+matches = []
+unicode = false
+
+[[test]]
+name = "word-start-ascii-060-bounds"
+regex = '\b{start}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = false
+
+[[test]]
+name = "word-start-ascii-070"
+regex = '\b{start}'
+haystack = " 𝛃 "
+matches = []
+unicode = false
+
+[[test]]
+name = "word-start-ascii-080"
+regex = '\b{start}'
+haystack = "𝛃𐆀"
+matches = []
+unicode = false
+
+[[test]]
+name = "word-start-ascii-090"
+regex = '\b{start}'
+haystack = "𝛃b"
+matches = [[4, 4]]
+unicode = false
+
+[[test]]
+name = "word-start-ascii-110"
+regex = '\b{start}'
+haystack = "b𝛃"
+matches = [[0, 0]]
+unicode = false
+
+# Tests for (?-u:\b{end})
+
+[[test]]
+name = "word-end-ascii-010"
+regex = '\b{end}'
+haystack = "a"
+matches = [[1, 1]]
+unicode = false
+
+[[test]]
+name = "word-end-ascii-020"
+regex = '\b{end}'
+haystack = "a "
+matches = [[1, 1]]
+unicode = false
+
+[[test]]
+name = "word-end-ascii-030"
+regex = '\b{end}'
+haystack = " a "
+matches = [[2, 2]]
+unicode = false
+
+[[test]]
+name = "word-end-ascii-040"
+regex = '\b{end}'
+haystack = ""
+matches = []
+unicode = false
+
+[[test]]
+name = "word-end-ascii-050"
+regex = '\b{end}'
+haystack = "ab"
+matches = [[2, 2]]
+unicode = false
+
+[[test]]
+name = "word-end-ascii-060"
+regex = '\b{end}'
+haystack = "𝛃"
+matches = []
+unicode = false
+
+[[test]]
+name = "word-end-ascii-060-bounds"
+regex = '\b{end}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = false
+
+[[test]]
+name = "word-end-ascii-070"
+regex = '\b{end}'
+haystack = " 𝛃 "
+matches = []
+unicode = false
+
+[[test]]
+name = "word-end-ascii-080"
+regex = '\b{end}'
+haystack = "𝛃𐆀"
+matches = []
+unicode = false
+
+[[test]]
+name = "word-end-ascii-090"
+regex = '\b{end}'
+haystack = "𝛃b"
+matches = [[5, 5]]
+unicode = false
+
+[[test]]
+name = "word-end-ascii-110"
+regex = '\b{end}'
+haystack = "b𝛃"
+matches = [[1, 1]]
+unicode = false
+
+# Tests for \b{start}
+
+[[test]]
+name = "word-start-unicode-010"
+regex = '\b{start}'
+haystack = "a"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-020"
+regex = '\b{start}'
+haystack = "a "
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-030"
+regex = '\b{start}'
+haystack = " a "
+matches = [[1, 1]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-040"
+regex = '\b{start}'
+haystack = ""
+matches = []
+unicode = true
+
+[[test]]
+name = "word-start-unicode-050"
+regex = '\b{start}'
+haystack = "ab"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-060"
+regex = '\b{start}'
+haystack = "𝛃"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-060-bounds"
+regex = '\b{start}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = true
+
+[[test]]
+name = "word-start-unicode-070"
+regex = '\b{start}'
+haystack = " 𝛃 "
+matches = [[1, 1]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-080"
+regex = '\b{start}'
+haystack = "𝛃𐆀"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-090"
+regex = '\b{start}'
+haystack = "𝛃b"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-110"
+regex = '\b{start}'
+haystack = "b𝛃"
+matches = [[0, 0]]
+unicode = true
+
+# Tests for \b{end}
+
+[[test]]
+name = "word-end-unicode-010"
+regex = '\b{end}'
+haystack = "a"
+matches = [[1, 1]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-020"
+regex = '\b{end}'
+haystack = "a "
+matches = [[1, 1]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-030"
+regex = '\b{end}'
+haystack = " a "
+matches = [[2, 2]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-040"
+regex = '\b{end}'
+haystack = ""
+matches = []
+unicode = true
+
+[[test]]
+name = "word-end-unicode-050"
+regex = '\b{end}'
+haystack = "ab"
+matches = [[2, 2]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-060"
+regex = '\b{end}'
+haystack = "𝛃"
+matches = [[4, 4]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-060-bounds"
+regex = '\b{end}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = true
+
+[[test]]
+name = "word-end-unicode-070"
+regex = '\b{end}'
+haystack = " 𝛃 "
+matches = [[5, 5]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-080"
+regex = '\b{end}'
+haystack = "𝛃𐆀"
+matches = [[4, 4]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-090"
+regex = '\b{end}'
+haystack = "𝛃b"
+matches = [[5, 5]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-110"
+regex = '\b{end}'
+haystack = "b𝛃"
+matches = [[5, 5]]
+unicode = true
+
+# Tests for (?-u:\b{start-half})
+
+[[test]]
+name = "word-start-half-ascii-010"
+regex = '\b{start-half}'
+haystack = "a"
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-020"
+regex = '\b{start-half}'
+haystack = "a "
+matches = [[0, 0], [2, 2]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-030"
+regex = '\b{start-half}'
+haystack = " a "
+matches = [[0, 0], [1, 1], [3, 3]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-040"
+regex = '\b{start-half}'
+haystack = ""
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-050"
+regex = '\b{start-half}'
+haystack = "ab"
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-060"
+regex = '\b{start-half}'
+haystack = "𝛃"
+matches = [[0, 0], [4, 4]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-060-noutf8"
+regex = '\b{start-half}'
+haystack = "𝛃"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "word-start-half-ascii-060-bounds"
+regex = '\b{start-half}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-070"
+regex = '\b{start-half}'
+haystack = " 𝛃 "
+matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-080"
+regex = '\b{start-half}'
+haystack = "𝛃𐆀"
+matches = [[0, 0], [4, 4], [8, 8]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-090"
+regex = '\b{start-half}'
+haystack = "𝛃b"
+matches = [[0, 0], [4, 4]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-110"
+regex = '\b{start-half}'
+haystack = "b𝛃"
+matches = [[0, 0], [5, 5]]
+unicode = false
+
+# Tests for (?-u:\b{end-half})
+
+[[test]]
+name = "word-end-half-ascii-010"
+regex = '\b{end-half}'
+haystack = "a"
+matches = [[1, 1]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-020"
+regex = '\b{end-half}'
+haystack = "a "
+matches = [[1, 1], [2, 2]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-030"
+regex = '\b{end-half}'
+haystack = " a "
+matches = [[0, 0], [2, 2], [3, 3]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-040"
+regex = '\b{end-half}'
+haystack = ""
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-050"
+regex = '\b{end-half}'
+haystack = "ab"
+matches = [[2, 2]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-060"
+regex = '\b{end-half}'
+haystack = "𝛃"
+matches = [[0, 0], [4, 4]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-060-bounds"
+regex = '\b{end-half}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-070"
+regex = '\b{end-half}'
+haystack = " 𝛃 "
+matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-080"
+regex = '\b{end-half}'
+haystack = "𝛃𐆀"
+matches = [[0, 0], [4, 4], [8, 8]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-090"
+regex = '\b{end-half}'
+haystack = "𝛃b"
+matches = [[0, 0], [5, 5]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-110"
+regex = '\b{end-half}'
+haystack = "b𝛃"
+matches = [[1, 1], [5, 5]]
+unicode = false
+
+# Tests for \b{start-half}
+
+[[test]]
+name = "word-start-half-unicode-010"
+regex = '\b{start-half}'
+haystack = "a"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-020"
+regex = '\b{start-half}'
+haystack = "a "
+matches = [[0, 0], [2, 2]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-030"
+regex = '\b{start-half}'
+haystack = " a "
+matches = [[0, 0], [1, 1], [3, 3]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-040"
+regex = '\b{start-half}'
+haystack = ""
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-050"
+regex = '\b{start-half}'
+haystack = "ab"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-060"
+regex = '\b{start-half}'
+haystack = "𝛃"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-060-bounds"
+regex = '\b{start-half}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-070"
+regex = '\b{start-half}'
+haystack = " 𝛃 "
+matches = [[0, 0], [1, 1], [6, 6]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-080"
+regex = '\b{start-half}'
+haystack = "𝛃𐆀"
+matches = [[0, 0], [8, 8]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-090"
+regex = '\b{start-half}'
+haystack = "𝛃b"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-110"
+regex = '\b{start-half}'
+haystack = "b𝛃"
+matches = [[0, 0]]
+unicode = true
+
+# Tests for \b{end-half}
+
+[[test]]
+name = "word-end-half-unicode-010"
+regex = '\b{end-half}'
+haystack = "a"
+matches = [[1, 1]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-020"
+regex = '\b{end-half}'
+haystack = "a "
+matches = [[1, 1], [2, 2]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-030"
+regex = '\b{end-half}'
+haystack = " a "
+matches = [[0, 0], [2, 2], [3, 3]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-040"
+regex = '\b{end-half}'
+haystack = ""
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-050"
+regex = '\b{end-half}'
+haystack = "ab"
+matches = [[2, 2]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-060"
+regex = '\b{end-half}'
+haystack = "𝛃"
+matches = [[4, 4]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-060-bounds"
+regex = '\b{end-half}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-070"
+regex = '\b{end-half}'
+haystack = " 𝛃 "
+matches = [[0, 0], [5, 5], [6, 6]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-080"
+regex = '\b{end-half}'
+haystack = "𝛃𐆀"
+matches = [[4, 4], [8, 8]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-090"
+regex = '\b{end-half}'
+haystack = "𝛃b"
+matches = [[5, 5]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-110"
+regex = '\b{end-half}'
+haystack = "b𝛃"
+matches = [[5, 5]]
+unicode = true
+
+# Specialty tests.
+
+# Since \r is special cased in the start state computation (to deal with CRLF
+# mode), this test ensures that the correct start state is computed when the
+# pattern starts with a half word boundary assertion.
+[[test]]
+name = "word-start-half-ascii-carriage"
+regex = '\b{start-half}[a-z]+'
+haystack = 'ABC\rabc'
+matches = [[4, 7]]
+bounds = [4, 7]
+unescape = true
+
+# Since \n is also special cased in the start state computation, this test
+# ensures that the correct start state is computed when the pattern starts with
+# a half word boundary assertion.
+[[test]]
+name = "word-start-half-ascii-linefeed"
+regex = '\b{start-half}[a-z]+'
+haystack = 'ABC\nabc'
+matches = [[4, 7]]
+bounds = [4, 7]
+unescape = true
+
+# Like the carriage return test above, but with a custom line terminator.
+[[test]]
+name = "word-start-half-ascii-customlineterm"
+regex = '\b{start-half}[a-z]+'
+haystack = 'ABC!abc'
+matches = [[4, 7]]
+bounds = [4, 7]
+unescape = true
+line-terminator = '!'
--- a/third-party/vendor/regex/testdata/word-boundary.toml
+++ b/third-party/vendor/regex/testdata/word-boundary.toml
@ -0,0 +1,781 @@
+# Some of these are cribbed from RE2's test suite.
+
+# These test \b. Below are tests for \B.
+[[test]]
+name = "wb1"
+regex = '\b'
+haystack = ""
+matches = []
+unicode = false
+
+[[test]]
+name = "wb2"
+regex = '\b'
+haystack = "a"
+matches = [[0, 0], [1, 1]]
+unicode = false
+
+[[test]]
+name = "wb3"
+regex = '\b'
+haystack = "ab"
+matches = [[0, 0], [2, 2]]
+unicode = false
+
+[[test]]
+name = "wb4"
+regex = '^\b'
+haystack = "ab"
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "wb5"
+regex = '\b$'
+haystack = "ab"
+matches = [[2, 2]]
+unicode = false
+
+[[test]]
+name = "wb6"
+regex = '^\b$'
+haystack = "ab"
+matches = []
+unicode = false
+
+[[test]]
+name = "wb7"
+regex = '\bbar\b'
+haystack = "nobar bar foo bar"
+matches = [[6, 9], [14, 17]]
+unicode = false
+
+[[test]]
+name = "wb8"
+regex = 'a\b'
+haystack = "faoa x"
+matches = [[3, 4]]
+unicode = false
+
+[[test]]
+name = "wb9"
+regex = '\bbar'
+haystack = "bar x"
+matches = [[0, 3]]
+unicode = false
+
+[[test]]
+name = "wb10"
+regex = '\bbar'
+haystack = "foo\nbar x"
+matches = [[4, 7]]
+unicode = false
+
+[[test]]
+name = "wb11"
+regex = 'bar\b'
+haystack = "foobar"
+matches = [[3, 6]]
+unicode = false
+
+[[test]]
+name = "wb12"
+regex = 'bar\b'
+haystack = "foobar\nxxx"
+matches = [[3, 6]]
+unicode = false
+
+[[test]]
+name = "wb13"
+regex = '(?:foo|bar|[A-Z])\b'
+haystack = "foo"
+matches = [[0, 3]]
+unicode = false
+
+[[test]]
+name = "wb14"
+regex = '(?:foo|bar|[A-Z])\b'
+haystack = "foo\n"
+matches = [[0, 3]]
+unicode = false
+
+[[test]]
+name = "wb15"
+regex = '\b(?:foo|bar|[A-Z])'
+haystack = "foo"
+matches = [[0, 3]]
+unicode = false
+
+[[test]]
+name = "wb16"
+regex = '\b(?:foo|bar|[A-Z])\b'
+haystack = "X"
+matches = [[0, 1]]
+unicode = false
+
+[[test]]
+name = "wb17"
+regex = '\b(?:foo|bar|[A-Z])\b'
+haystack = "XY"
+matches = []
+unicode = false
+
+[[test]]
+name = "wb18"
+regex = '\b(?:foo|bar|[A-Z])\b'
+haystack = "bar"
+matches = [[0, 3]]
+unicode = false
+
+[[test]]
+name = "wb19"
+regex = '\b(?:foo|bar|[A-Z])\b'
+haystack = "foo"
+matches = [[0, 3]]
+unicode = false
+
+[[test]]
+name = "wb20"
+regex = '\b(?:foo|bar|[A-Z])\b'
+haystack = "foo\n"
+matches = [[0, 3]]
+unicode = false
+
+[[test]]
+name = "wb21"
+regex = '\b(?:foo|bar|[A-Z])\b'
+haystack = "ffoo bbar N x"
+matches = [[10, 11]]
+unicode = false
+
+[[test]]
+name = "wb22"
+regex = '\b(?:fo|foo)\b'
+haystack = "fo"
+matches = [[0, 2]]
+unicode = false
+
+[[test]]
+name = "wb23"
+regex = '\b(?:fo|foo)\b'
+haystack = "foo"
+matches = [[0, 3]]
+unicode = false
+
+[[test]]
+name = "wb24"
+regex = '\b\b'
+haystack = ""
+matches = []
+unicode = false
+
+[[test]]
+name = "wb25"
+regex = '\b\b'
+haystack = "a"
+matches = [[0, 0], [1, 1]]
+unicode = false
+
+[[test]]
+name = "wb26"
+regex = '\b$'
+haystack = ""
+matches = []
+unicode = false
+
+[[test]]
+name = "wb27"
+regex = '\b$'
+haystack = "x"
+matches = [[1, 1]]
+unicode = false
+
+[[test]]
+name = "wb28"
+regex = '\b$'
+haystack = "y x"
+matches = [[3, 3]]
+unicode = false
+
+[[test]]
+name = "wb29"
+regex = '(?-u:\b).$'
+haystack = "x"
+matches = [[0, 1]]
+
+[[test]]
+name = "wb30"
+regex = '^\b(?:fo|foo)\b'
+haystack = "fo"
+matches = [[0, 2]]
+unicode = false
+
+[[test]]
+name = "wb31"
+regex = '^\b(?:fo|foo)\b'
+haystack = "foo"
+matches = [[0, 3]]
+unicode = false
+
+[[test]]
+name = "wb32"
+regex = '^\b$'
+haystack = ""
+matches = []
+unicode = false
+
+[[test]]
+name = "wb33"
+regex = '^\b$'
+haystack = "x"
+matches = []
+unicode = false
+
+[[test]]
+name = "wb34"
+regex = '^(?-u:\b).$'
+haystack = "x"
+matches = [[0, 1]]
+
+[[test]]
+name = "wb35"
+regex = '^(?-u:\b).(?-u:\b)$'
+haystack = "x"
+matches = [[0, 1]]
+
+[[test]]
+name = "wb36"
+regex = '^^^^^\b$$$$$'
+haystack = ""
+matches = []
+unicode = false
+
+[[test]]
+name = "wb37"
+regex = '^^^^^(?-u:\b).$$$$$'
+haystack = "x"
+matches = [[0, 1]]
+
+[[test]]
+name = "wb38"
+regex = '^^^^^\b$$$$$'
+haystack = "x"
+matches = []
+unicode = false
+
+[[test]]
+name = "wb39"
+regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$'
+haystack = "x"
+matches = [[0, 1]]
+
+[[test]]
+name = "wb40"
+regex = '(?-u:\b).+(?-u:\b)'
+haystack = "$$abc$$"
+matches = [[2, 5]]
+
+[[test]]
+name = "wb41"
+regex = '\b'
+haystack = "a b c"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
+unicode = false
+
+[[test]]
+name = "wb42"
+regex = '\bfoo\b'
+haystack = "zzz foo zzz"
+matches = [[4, 7]]
+unicode = false
+
+[[test]]
+name = "wb43"
+regex = '\b^'
+haystack = "ab"
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "wb44"
+regex = '$\b'
+haystack = "ab"
+matches = [[2, 2]]
+unicode = false
+
+
+# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we
+# have to disable it for most of these tests. This is because \B can match at
+# non-UTF-8 boundaries.
+[[test]]
+name = "nb1"
+regex = '\Bfoo\B'
+haystack = "n foo xfoox that"
+matches = [[7, 10]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb2"
+regex = 'a\B'
+haystack = "faoa x"
+matches = [[1, 2]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb3"
+regex = '\Bbar'
+haystack = "bar x"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb4"
+regex = '\Bbar'
+haystack = "foo\nbar x"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb5"
+regex = 'bar\B'
+haystack = "foobar"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb6"
+regex = 'bar\B'
+haystack = "foobar\nxxx"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb7"
+regex = '(?:foo|bar|[A-Z])\B'
+haystack = "foox"
+matches = [[0, 3]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb8"
+regex = '(?:foo|bar|[A-Z])\B'
+haystack = "foo\n"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb9"
+regex = '\B'
+haystack = ""
+matches = [[0, 0]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb10"
+regex = '\B'
+haystack = "x"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb11"
+regex = '\B(?:foo|bar|[A-Z])'
+haystack = "foo"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb12"
+regex = '\B(?:foo|bar|[A-Z])\B'
+haystack = "xXy"
+matches = [[1, 2]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb13"
+regex = '\B(?:foo|bar|[A-Z])\B'
+haystack = "XY"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb14"
+regex = '\B(?:foo|bar|[A-Z])\B'
+haystack = "XYZ"
+matches = [[1, 2]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb15"
+regex = '\B(?:foo|bar|[A-Z])\B'
+haystack = "abara"
+matches = [[1, 4]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb16"
+regex = '\B(?:foo|bar|[A-Z])\B'
+haystack = "xfoo_"
+matches = [[1, 4]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb17"
+regex = '\B(?:foo|bar|[A-Z])\B'
+haystack = "xfoo\n"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb18"
+regex = '\B(?:foo|bar|[A-Z])\B'
+haystack = "foo bar vNX"
+matches = [[9, 10]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb19"
+regex = '\B(?:fo|foo)\B'
+haystack = "xfoo"
+matches = [[1, 3]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb20"
+regex = '\B(?:foo|fo)\B'
+haystack = "xfooo"
+matches = [[1, 4]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb21"
+regex = '\B\B'
+haystack = ""
+matches = [[0, 0]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb22"
+regex = '\B\B'
+haystack = "x"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb23"
+regex = '\B$'
+haystack = ""
+matches = [[0, 0]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb24"
+regex = '\B$'
+haystack = "x"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb25"
+regex = '\B$'
+haystack = "y x"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb26"
+regex = '\B.$'
+haystack = "x"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb27"
+regex = '^\B(?:fo|foo)\B'
+haystack = "fo"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb28"
+regex = '^\B(?:fo|foo)\B'
+haystack = "fo"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb29"
+regex = '^\B'
+haystack = ""
+matches = [[0, 0]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb30"
+regex = '^\B'
+haystack = "x"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb31"
+regex = '^\B\B'
+haystack = ""
+matches = [[0, 0]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb32"
+regex = '^\B\B'
+haystack = "x"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb33"
+regex = '^\B$'
+haystack = ""
+matches = [[0, 0]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb34"
+regex = '^\B$'
+haystack = "x"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb35"
+regex = '^\B.$'
+haystack = "x"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb36"
+regex = '^\B.\B$'
+haystack = "x"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb37"
+regex = '^^^^^\B$$$$$'
+haystack = ""
+matches = [[0, 0]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb38"
+regex = '^^^^^\B.$$$$$'
+haystack = "x"
+matches = []
+unicode = false
+utf8 = false
+
+[[test]]
+name = "nb39"
+regex = '^^^^^\B$$$$$'
+haystack = "x"
+matches = []
+unicode = false
+utf8 = false
+
+
+# unicode1* and unicode2* work for both Unicode and ASCII because all matches
+# are reported as byte offsets, and « and » do not correspond to word
+# boundaries at either the character or byte level.
+[[test]]
+name = "unicode1"
+regex = '\bx\b'
+haystack = "«x"
+matches = [[2, 3]]
+
+[[test]]
+name = "unicode1-only-ascii"
+regex = '\bx\b'
+haystack = "«x"
+matches = [[2, 3]]
+unicode = false
+
+[[test]]
+name = "unicode2"
+regex = '\bx\b'
+haystack = "x»"
+matches = [[0, 1]]
+
+[[test]]
+name = "unicode2-only-ascii"
+regex = '\bx\b'
+haystack = "x»"
+matches = [[0, 1]]
+unicode = false
+
+# ASCII word boundaries are completely oblivious to Unicode characters, so
+# even though β is a character, an ASCII \b treats it as a word boundary
+# when it is adjacent to another ASCII character. (The ASCII \b only looks
+# at the leading byte of β.) For Unicode \b, the tests are precisely inverted.
+[[test]]
+name = "unicode3"
+regex = '\bx\b'
+haystack = 'áxβ'
+matches = []
+
+[[test]]
+name = "unicode3-only-ascii"
+regex = '\bx\b'
+haystack = 'áxβ'
+matches = [[2, 3]]
+unicode = false
+
+[[test]]
+name = "unicode4"
+regex = '\Bx\B'
+haystack = 'áxβ'
+matches = [[2, 3]]
+
+[[test]]
+name = "unicode4-only-ascii"
+regex = '\Bx\B'
+haystack = 'áxβ'
+matches = []
+unicode = false
+utf8 = false
+
+# The same as above, but with \b instead of \B as a sanity check.
+[[test]]
+name = "unicode5"
+regex = '\b'
+haystack = "0\U0007EF5E"
+matches = [[0, 0], [1, 1]]
+
+[[test]]
+name = "unicode5-only-ascii"
+regex = '\b'
+haystack = "0\U0007EF5E"
+matches = [[0, 0], [1, 1]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "unicode5-noutf8"
+regex = '\b'
+haystack = '0\xFF\xFF\xFF\xFF'
+matches = [[0, 0], [1, 1]]
+unescape = true
+utf8 = false
+
+[[test]]
+name = "unicode5-noutf8-only-ascii"
+regex = '\b'
+haystack = '0\xFF\xFF\xFF\xFF'
+matches = [[0, 0], [1, 1]]
+unescape = true
+unicode = false
+utf8 = false
+
+# Weird special case to ensure that ASCII \B treats each individual code unit
+# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary
+# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the
+# \w character class.)
+[[test]]
+name = "unicode5-not"
+regex = '\B'
+haystack = "0\U0007EF5E"
+matches = [[5, 5]]
+
+[[test]]
+name = "unicode5-not-only-ascii"
+regex = '\B'
+haystack = "0\U0007EF5E"
+matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
+unicode = false
+utf8 = false
+
+# This gets no matches since \B only matches in the presence of valid UTF-8
+# when Unicode is enabled, even when UTF-8 mode is disabled.
+[[test]]
+name = "unicode5-not-noutf8"
+regex = '\B'
+haystack = '0\xFF\xFF\xFF\xFF'
+matches = []
+unescape = true
+utf8 = false
+
+# But this DOES get matches since \B in ASCII mode only looks at individual
+# bytes.
+[[test]]
+name = "unicode5-not-noutf8-only-ascii"
+regex = '\B'
+haystack = '0\xFF\xFF\xFF\xFF'
+matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
+unescape = true
+unicode = false
+utf8 = false
+
+# Some tests of no particular significance.
+[[test]]
+name = "unicode6"
+regex = '\b[0-9]+\b'
+haystack = "foo 123 bar 456 quux 789"
+matches = [[4, 7], [12, 15], [21, 24]]
+
+[[test]]
+name = "unicode7"
+regex = '\b[0-9]+\b'
+haystack = "foo 123 bar a456 quux 789"
+matches = [[4, 7], [22, 25]]
+
+[[test]]
+name = "unicode8"
+regex = '\b[0-9]+\b'
+haystack = "foo 123 bar 456a quux 789"
+matches = [[4, 7], [22, 25]]
+
+# A variant of the problem described here:
+# https://github.com/google/re2/blob/89567f5de5b23bb5ad0c26cbafc10bdc7389d1fa/re2/dfa.cc#L658-L667
+[[test]]
+name = "alt-with-assertion-repetition"
+regex = '(?:\b|%)+'
+haystack = "z%"
+bounds = [1, 2]
+anchored = true
+matches = [[1, 1]]