Vendor things

2024-03-08 11:03:01 -08:00 · 2024-03-08 11:03:01 -08:00 · 977e3c17e5
commit 977e3c17e5
parent 5deceec006
19434 changed files with 10682014 additions and 0 deletions
--- a/third-party/vendor/regex/src/builders.rs
+++ b/third-party/vendor/regex/src/builders.rs
--- a/third-party/vendor/regex/src/bytes.rs
+++ b/third-party/vendor/regex/src/bytes.rs
@ -0,0 +1,91 @@
+/*!
+Search for regex matches in `&[u8]` haystacks.
+
+This module provides a nearly identical API via [`Regex`] to the one found in
+the top-level of this crate. There are two important differences:
+
+1. Matching is done on `&[u8]` instead of `&str`. Additionally, `Vec<u8>`
+is used where `String` would have been used in the top-level API.
+2. Unicode support can be disabled even when disabling it would result in
+matching invalid UTF-8 bytes.
+
+# Example: match null terminated string
+
+This shows how to find all null-terminated strings in a slice of bytes. This
+works even if a C string contains invalid UTF-8.
+
+```rust
+use regex::bytes::Regex;
+
+let re = Regex::new(r"(?-u)(?<cstr>[^\x00]+)\x00").unwrap();
+let hay = b"foo\x00qu\xFFux\x00baz\x00";
+
+// Extract all of the strings without the NUL terminator from each match.
+// The unwrap is OK here since a match requires the `cstr` capture to match.
+let cstrs: Vec<&[u8]> =
+    re.captures_iter(hay)
+      .map(|c| c.name("cstr").unwrap().as_bytes())
+      .collect();
+assert_eq!(cstrs, vec![&b"foo"[..], &b"qu\xFFux"[..], &b"baz"[..]]);
+```
+
+# Example: selectively enable Unicode support
+
+This shows how to match an arbitrary byte pattern followed by a UTF-8 encoded
+string (e.g., to extract a title from a Matroska file):
+
+```rust
+use regex::bytes::Regex;
+
+let re = Regex::new(
+    r"(?-u)\x7b\xa9(?:[\x80-\xfe]|[\x40-\xff].)(?u:(.*))"
+).unwrap();
+let hay = b"\x12\xd0\x3b\x5f\x7b\xa9\x85\xe2\x98\x83\x80\x98\x54\x76\x68\x65";
+
+// Notice that despite the `.*` at the end, it will only match valid UTF-8
+// because Unicode mode was enabled with the `u` flag. Without the `u` flag,
+// the `.*` would match the rest of the bytes regardless of whether they were
+// valid UTF-8.
+let (_, [title]) = re.captures(hay).unwrap().extract();
+assert_eq!(title, b"\xE2\x98\x83");
+// We can UTF-8 decode the title now. And the unwrap here
+// is correct because the existence of a match guarantees
+// that `title` is valid UTF-8.
+let title = std::str::from_utf8(title).unwrap();
+assert_eq!(title, "☃");
+```
+
+In general, if the Unicode flag is enabled in a capture group and that capture
+is part of the overall match, then the capture is *guaranteed* to be valid
+UTF-8.
+
+# Syntax
+
+The supported syntax is pretty much the same as the syntax for Unicode
+regular expressions with a few changes that make sense for matching arbitrary
+bytes:
+
+1. The `u` flag can be disabled even when disabling it might cause the regex to
+match invalid UTF-8. When the `u` flag is disabled, the regex is said to be in
+"ASCII compatible" mode.
+2. In ASCII compatible mode, Unicode character classes are not allowed. Literal
+Unicode scalar values outside of character classes are allowed.
+3. In ASCII compatible mode, Perl character classes (`\w`, `\d` and `\s`)
+revert to their typical ASCII definition. `\w` maps to `[[:word:]]`, `\d` maps
+to `[[:digit:]]` and `\s` maps to `[[:space:]]`.
+4. In ASCII compatible mode, word boundaries use the ASCII compatible `\w` to
+determine whether a byte is a word byte or not.
+5. Hexadecimal notation can be used to specify arbitrary bytes instead of
+Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the
+literal byte `\xFF`, while in Unicode mode, `\xFF` is the Unicode codepoint
+`U+00FF` that matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal
+notation when enabled.
+6. In ASCII compatible mode, `.` matches any *byte* except for `\n`. When the
+`s` flag is additionally enabled, `.` matches any byte.
+
+# Performance
+
+In general, one should expect performance on `&[u8]` to be roughly similar to
+performance on `&str`.
+*/
+pub use crate::{builders::bytes::*, regex::bytes::*, regexset::bytes::*};
--- a/third-party/vendor/regex/src/error.rs
+++ b/third-party/vendor/regex/src/error.rs
@ -0,0 +1,102 @@
+use alloc::string::{String, ToString};
+
+use regex_automata::meta;
+
+/// An error that occurred during parsing or compiling a regular expression.
+#[non_exhaustive]
+#[derive(Clone, PartialEq)]
+pub enum Error {
+    /// A syntax error.
+    Syntax(String),
+    /// The compiled program exceeded the set size
+    /// limit. The argument is the size limit imposed by
+    /// [`RegexBuilder::size_limit`](crate::RegexBuilder::size_limit). Even
+    /// when not configured explicitly, it defaults to a reasonable limit.
+    ///
+    /// If you're getting this error, it occurred because your regex has been
+    /// compiled to an intermediate state that is too big. It is important to
+    /// note that exceeding this limit does _not_ mean the regex is too big to
+    /// _work_, but rather, the regex is big enough that it may wind up being
+    /// surprisingly slow when used in a search. In other words, this error is
+    /// meant to be a practical heuristic for avoiding a performance footgun,
+    /// and especially so for the case where the regex pattern is coming from
+    /// an untrusted source.
+    ///
+    /// There are generally two ways to move forward if you hit this error.
+    /// The first is to find some way to use a smaller regex. The second is to
+    /// increase the size limit via `RegexBuilder::size_limit`. However, if
+    /// your regex pattern is not from a trusted source, then neither of these
+    /// approaches may be appropriate. Instead, you'll have to determine just
+    /// how big of a regex you want to allow.
+    CompiledTooBig(usize),
+}
+
+impl Error {
+    pub(crate) fn from_meta_build_error(err: meta::BuildError) -> Error {
+        if let Some(size_limit) = err.size_limit() {
+            Error::CompiledTooBig(size_limit)
+        } else if let Some(ref err) = err.syntax_error() {
+            Error::Syntax(err.to_string())
+        } else {
+            // This is a little suspect. Technically there are more ways for
+            // a meta regex to fail to build other than "exceeded size limit"
+            // and "syntax error." For example, if there are too many states
+            // or even too many patterns. But in practice this is probably
+            // good enough. The worst thing that happens is that Error::Syntax
+            // represents an error that isn't technically a syntax error, but
+            // the actual message will still be shown. So... it's not too bad.
+            //
+            // We really should have made the Error type in the regex crate
+            // completely opaque. Rookie mistake.
+            Error::Syntax(err.to_string())
+        }
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for Error {
+    // TODO: Remove this method entirely on the next breaking semver release.
+    #[allow(deprecated)]
+    fn description(&self) -> &str {
+        match *self {
+            Error::Syntax(ref err) => err,
+            Error::CompiledTooBig(_) => "compiled program too big",
+        }
+    }
+}
+
+impl core::fmt::Display for Error {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match *self {
+            Error::Syntax(ref err) => err.fmt(f),
+            Error::CompiledTooBig(limit) => write!(
+                f,
+                "Compiled regex exceeds size limit of {} bytes.",
+                limit
+            ),
+        }
+    }
+}
+
+// We implement our own Debug implementation so that we show nicer syntax
+// errors when people use `Regex::new(...).unwrap()`. It's a little weird,
+// but the `Syntax` variant is already storing a `String` anyway, so we might
+// as well format it nicely.
+impl core::fmt::Debug for Error {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match *self {
+            Error::Syntax(ref err) => {
+                let hr: String = core::iter::repeat('~').take(79).collect();
+                writeln!(f, "Syntax(")?;
+                writeln!(f, "{}", hr)?;
+                writeln!(f, "{}", err)?;
+                writeln!(f, "{}", hr)?;
+                write!(f, ")")?;
+                Ok(())
+            }
+            Error::CompiledTooBig(limit) => {
+                f.debug_tuple("CompiledTooBig").field(&limit).finish()
+            }
+        }
+    }
+}
--- a/third-party/vendor/regex/src/find_byte.rs
+++ b/third-party/vendor/regex/src/find_byte.rs
@ -0,0 +1,17 @@
+/// Searches for the given needle in the given haystack.
+///
+/// If the perf-literal feature is enabled, then this uses the super optimized
+/// memchr crate. Otherwise, it uses the naive byte-at-a-time implementation.
+pub(crate) fn find_byte(needle: u8, haystack: &[u8]) -> Option<usize> {
+    #[cfg(not(feature = "perf-literal"))]
+    fn imp(needle: u8, haystack: &[u8]) -> Option<usize> {
+        haystack.iter().position(|&b| b == needle)
+    }
+
+    #[cfg(feature = "perf-literal")]
+    fn imp(needle: u8, haystack: &[u8]) -> Option<usize> {
+        memchr::memchr(needle, haystack)
+    }
+
+    imp(needle, haystack)
+}
--- a/third-party/vendor/regex/src/lib.rs
+++ b/third-party/vendor/regex/src/lib.rs
--- a/third-party/vendor/regex/src/pattern.rs
+++ b/third-party/vendor/regex/src/pattern.rs
@ -0,0 +1,63 @@
+use core::str::pattern::{Pattern, SearchStep, Searcher};
+
+use crate::{Matches, Regex};
+
+#[derive(Debug)]
+pub struct RegexSearcher<'r, 't> {
+    haystack: &'t str,
+    it: Matches<'r, 't>,
+    last_step_end: usize,
+    next_match: Option<(usize, usize)>,
+}
+
+impl<'r, 't> Pattern<'t> for &'r Regex {
+    type Searcher = RegexSearcher<'r, 't>;
+
+    fn into_searcher(self, haystack: &'t str) -> RegexSearcher<'r, 't> {
+        RegexSearcher {
+            haystack,
+            it: self.find_iter(haystack),
+            last_step_end: 0,
+            next_match: None,
+        }
+    }
+}
+
+unsafe impl<'r, 't> Searcher<'t> for RegexSearcher<'r, 't> {
+    #[inline]
+    fn haystack(&self) -> &'t str {
+        self.haystack
+    }
+
+    #[inline]
+    fn next(&mut self) -> SearchStep {
+        if let Some((s, e)) = self.next_match {
+            self.next_match = None;
+            self.last_step_end = e;
+            return SearchStep::Match(s, e);
+        }
+        match self.it.next() {
+            None => {
+                if self.last_step_end < self.haystack().len() {
+                    let last = self.last_step_end;
+                    self.last_step_end = self.haystack().len();
+                    SearchStep::Reject(last, self.haystack().len())
+                } else {
+                    SearchStep::Done
+                }
+            }
+            Some(m) => {
+                let (s, e) = (m.start(), m.end());
+                if s == self.last_step_end {
+                    self.last_step_end = e;
+                    SearchStep::Match(s, e)
+                } else {
+                    self.next_match = Some((s, e));
+                    let last = self.last_step_end;
+                    self.last_step_end = s;
+                    SearchStep::Reject(last, s)
+                }
+            }
+        }
+    }
+}
--- a/third-party/vendor/regex/src/regex/bytes.rs
+++ b/third-party/vendor/regex/src/regex/bytes.rs
--- a/third-party/vendor/regex/src/regex/mod.rs
+++ b/third-party/vendor/regex/src/regex/mod.rs
@ -0,0 +1,2 @@
+pub(crate) mod bytes;
+pub(crate) mod string;
--- a/third-party/vendor/regex/src/regex/string.rs
+++ b/third-party/vendor/regex/src/regex/string.rs
--- a/third-party/vendor/regex/src/regexset/bytes.rs
+++ b/third-party/vendor/regex/src/regexset/bytes.rs
@ -0,0 +1,710 @@
+use alloc::string::String;
+
+use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter};
+
+use crate::{bytes::RegexSetBuilder, Error};
+
+/// Match multiple, possibly overlapping, regexes in a single search.
+///
+/// A regex set corresponds to the union of zero or more regular expressions.
+/// That is, a regex set will match a haystack when at least one of its
+/// constituent regexes matches. A regex set as its formulated here provides a
+/// touch more power: it will also report *which* regular expressions in the
+/// set match. Indeed, this is the key difference between regex sets and a
+/// single `Regex` with many alternates, since only one alternate can match at
+/// a time.
+///
+/// For example, consider regular expressions to match email addresses and
+/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a
+/// regex set is constructed from those regexes, then searching the haystack
+/// `foo@example.com` will report both regexes as matching. Of course, one
+/// could accomplish this by compiling each regex on its own and doing two
+/// searches over the haystack. The key advantage of using a regex set is
+/// that it will report the matching regexes using a *single pass through the
+/// haystack*. If one has hundreds or thousands of regexes to match repeatedly
+/// (like a URL router for a complex web application or a user agent matcher),
+/// then a regex set *can* realize huge performance gains.
+///
+/// Unlike the top-level [`RegexSet`](crate::RegexSet), this `RegexSet`
+/// searches haystacks with type `&[u8]` instead of `&str`. Consequently, this
+/// `RegexSet` is permitted to match invalid UTF-8.
+///
+/// # Limitations
+///
+/// Regex sets are limited to answering the following two questions:
+///
+/// 1. Does any regex in the set match?
+/// 2. If so, which regexes in the set match?
+///
+/// As with the main [`Regex`][crate::bytes::Regex] type, it is cheaper to ask
+/// (1) instead of (2) since the matching engines can stop after the first
+/// match is found.
+///
+/// You cannot directly extract [`Match`][crate::bytes::Match] or
+/// [`Captures`][crate::bytes::Captures] objects from a regex set. If you need
+/// these operations, the recommended approach is to compile each pattern in
+/// the set independently and scan the exact same haystack a second time with
+/// those independently compiled patterns:
+///
+/// ```
+/// use regex::bytes::{Regex, RegexSet};
+///
+/// let patterns = ["foo", "bar"];
+/// // Both patterns will match different ranges of this string.
+/// let hay = b"barfoo";
+///
+/// // Compile a set matching any of our patterns.
+/// let set = RegexSet::new(patterns).unwrap();
+/// // Compile each pattern independently.
+/// let regexes: Vec<_> = set
+///     .patterns()
+///     .iter()
+///     .map(|pat| Regex::new(pat).unwrap())
+///     .collect();
+///
+/// // Match against the whole set first and identify the individual
+/// // matching patterns.
+/// let matches: Vec<&[u8]> = set
+///     .matches(hay)
+///     .into_iter()
+///     // Dereference the match index to get the corresponding
+///     // compiled pattern.
+///     .map(|index| &regexes[index])
+///     // To get match locations or any other info, we then have to search the
+///     // exact same haystack again, using our separately-compiled pattern.
+///     .map(|re| re.find(hay).unwrap().as_bytes())
+///     .collect();
+///
+/// // Matches arrive in the order the constituent patterns were declared,
+/// // not the order they appear in the haystack.
+/// assert_eq!(vec![&b"foo"[..], &b"bar"[..]], matches);
+/// ```
+///
+/// # Performance
+///
+/// A `RegexSet` has the same performance characteristics as `Regex`. Namely,
+/// search takes `O(m * n)` time, where `m` is proportional to the size of the
+/// regex set and `n` is proportional to the length of the haystack.
+///
+/// # Trait implementations
+///
+/// The `Default` trait is implemented for `RegexSet`. The default value
+/// is an empty set. An empty set can also be explicitly constructed via
+/// [`RegexSet::empty`].
+///
+/// # Example
+///
+/// This shows how the above two regexes (for matching email addresses and
+/// domains) might work:
+///
+/// ```
+/// use regex::bytes::RegexSet;
+///
+/// let set = RegexSet::new(&[
+///     r"[a-z]+@[a-z]+\.(com|org|net)",
+///     r"[a-z]+\.(com|org|net)",
+/// ]).unwrap();
+///
+/// // Ask whether any regexes in the set match.
+/// assert!(set.is_match(b"foo@example.com"));
+///
+/// // Identify which regexes in the set match.
+/// let matches: Vec<_> = set.matches(b"foo@example.com").into_iter().collect();
+/// assert_eq!(vec![0, 1], matches);
+///
+/// // Try again, but with a haystack that only matches one of the regexes.
+/// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect();
+/// assert_eq!(vec![1], matches);
+///
+/// // Try again, but with a haystack that doesn't match any regex in the set.
+/// let matches: Vec<_> = set.matches(b"example").into_iter().collect();
+/// assert!(matches.is_empty());
+/// ```
+///
+/// Note that it would be possible to adapt the above example to using `Regex`
+/// with an expression like:
+///
+/// ```text
+/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net))
+/// ```
+///
+/// After a match, one could then inspect the capture groups to figure out
+/// which alternates matched. The problem is that it is hard to make this
+/// approach scale when there are many regexes since the overlap between each
+/// alternate isn't always obvious to reason about.
+#[derive(Clone)]
+pub struct RegexSet {
+    pub(crate) meta: meta::Regex,
+    pub(crate) patterns: alloc::sync::Arc<[String]>,
+}
+
+impl RegexSet {
+    /// Create a new regex set with the given regular expressions.
+    ///
+    /// This takes an iterator of `S`, where `S` is something that can produce
+    /// a `&str`. If any of the strings in the iterator are not valid regular
+    /// expressions, then an error is returned.
+    ///
+    /// # Example
+    ///
+    /// Create a new regex set from an iterator of strings:
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
+    /// assert!(set.is_match(b"foo"));
+    /// ```
+    pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error>
+    where
+        S: AsRef<str>,
+        I: IntoIterator<Item = S>,
+    {
+        RegexSetBuilder::new(exprs).build()
+    }
+
+    /// Create a new empty regex set.
+    ///
+    /// An empty regex never matches anything.
+    ///
+    /// This is a convenience function for `RegexSet::new([])`, but doesn't
+    /// require one to specify the type of the input.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// let set = RegexSet::empty();
+    /// assert!(set.is_empty());
+    /// // an empty set matches nothing
+    /// assert!(!set.is_match(b""));
+    /// ```
+    pub fn empty() -> RegexSet {
+        let empty: [&str; 0] = [];
+        RegexSetBuilder::new(empty).build().unwrap()
+    }
+
+    /// Returns true if and only if one of the regexes in this set matches
+    /// the haystack given.
+    ///
+    /// This method should be preferred if you only need to test whether any
+    /// of the regexes in the set should match, but don't care about *which*
+    /// regexes matched. This is because the underlying matching engine will
+    /// quit immediately after seeing the first match instead of continuing to
+    /// find all matches.
+    ///
+    /// Note that as with searches using [`Regex`](crate::bytes::Regex), the
+    /// expression is unanchored by default. That is, if the regex does not
+    /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
+    /// to match anywhere in the haystack.
+    ///
+    /// # Example
+    ///
+    /// Tests whether a set matches somewhere in a haystack:
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
+    /// assert!(set.is_match(b"foo"));
+    /// assert!(!set.is_match("☃".as_bytes()));
+    /// ```
+    #[inline]
+    pub fn is_match(&self, haystack: &[u8]) -> bool {
+        self.is_match_at(haystack, 0)
+    }
+
+    /// Returns true if and only if one of the regexes in this set matches the
+    /// haystack given, with the search starting at the offset given.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    ///
+    /// # Panics
+    ///
+    /// This panics when `start >= haystack.len() + 1`.
+    ///
+    /// # Example
+    ///
+    /// This example shows the significance of `start`. Namely, consider a
+    /// haystack `foobar` and a desire to execute a search starting at offset
+    /// `3`. You could search a substring explicitly, but then the look-around
+    /// assertions won't work correctly. Instead, you can use this method to
+    /// specify the start position of a search.
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
+    /// let hay = b"foobar";
+    /// // We get a match here, but it's probably not intended.
+    /// assert!(set.is_match(&hay[3..]));
+    /// // No match because the  assertions take the context into account.
+    /// assert!(!set.is_match_at(hay, 3));
+    /// ```
+    #[inline]
+    pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool {
+        self.meta.is_match(Input::new(haystack).span(start..haystack.len()))
+    }
+
+    /// Returns the set of regexes that match in the given haystack.
+    ///
+    /// The set returned contains the index of each regex that matches in
+    /// the given haystack. The index is in correspondence with the order of
+    /// regular expressions given to `RegexSet`'s constructor.
+    ///
+    /// The set can also be used to iterate over the matched indices. The order
+    /// of iteration is always ascending with respect to the matching indices.
+    ///
+    /// Note that as with searches using [`Regex`](crate::bytes::Regex), the
+    /// expression is unanchored by default. That is, if the regex does not
+    /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
+    /// to match anywhere in the haystack.
+    ///
+    /// # Example
+    ///
+    /// Tests which regular expressions match the given haystack:
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// let set = RegexSet::new([
+    ///     r"\w+",
+    ///     r"\d+",
+    ///     r"\pL+",
+    ///     r"foo",
+    ///     r"bar",
+    ///     r"barfoo",
+    ///     r"foobar",
+    /// ]).unwrap();
+    /// let matches: Vec<_> = set.matches(b"foobar").into_iter().collect();
+    /// assert_eq!(matches, vec![0, 2, 3, 4, 6]);
+    ///
+    /// // You can also test whether a particular regex matched:
+    /// let matches = set.matches(b"foobar");
+    /// assert!(!matches.matched(5));
+    /// assert!(matches.matched(6));
+    /// ```
+    #[inline]
+    pub fn matches(&self, haystack: &[u8]) -> SetMatches {
+        self.matches_at(haystack, 0)
+    }
+
+    /// Returns the set of regexes that match in the given haystack.
+    ///
+    /// The set returned contains the index of each regex that matches in
+    /// the given haystack. The index is in correspondence with the order of
+    /// regular expressions given to `RegexSet`'s constructor.
+    ///
+    /// The set can also be used to iterate over the matched indices. The order
+    /// of iteration is always ascending with respect to the matching indices.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    ///
+    /// # Panics
+    ///
+    /// This panics when `start >= haystack.len() + 1`.
+    ///
+    /// # Example
+    ///
+    /// Tests which regular expressions match the given haystack:
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
+    /// let hay = b"foobar";
+    /// // We get matches here, but it's probably not intended.
+    /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect();
+    /// assert_eq!(matches, vec![0, 1]);
+    /// // No matches because the  assertions take the context into account.
+    /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect();
+    /// assert_eq!(matches, vec![]);
+    /// ```
+    #[inline]
+    pub fn matches_at(&self, haystack: &[u8], start: usize) -> SetMatches {
+        let input = Input::new(haystack).span(start..haystack.len());
+        let mut patset = PatternSet::new(self.meta.pattern_len());
+        self.meta.which_overlapping_matches(&input, &mut patset);
+        SetMatches(patset)
+    }
+
+    /// Returns the same as matches, but starts the search at the given
+    /// offset and stores the matches into the slice given.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    ///
+    /// `matches` must have a length that is at least the number of regexes
+    /// in this set.
+    ///
+    /// This method returns true if and only if at least one member of
+    /// `matches` is true after executing the set against `haystack`.
+    #[doc(hidden)]
+    #[inline]
+    pub fn matches_read_at(
+        &self,
+        matches: &mut [bool],
+        haystack: &[u8],
+        start: usize,
+    ) -> bool {
+        // This is pretty dumb. We should try to fix this, but the
+        // regex-automata API doesn't provide a way to store matches in an
+        // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and
+        // thus not public... But regex-capi currently uses it. We should
+        // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet
+        // is in regex-automata, not regex. So maybe we should just accept a
+        // 'SetMatches', which is basically just a newtype around PatternSet.
+        let mut patset = PatternSet::new(self.meta.pattern_len());
+        let mut input = Input::new(haystack);
+        input.set_start(start);
+        self.meta.which_overlapping_matches(&input, &mut patset);
+        for pid in patset.iter() {
+            matches[pid] = true;
+        }
+        !patset.is_empty()
+    }
+
+    /// An alias for `matches_read_at` to preserve backward compatibility.
+    ///
+    /// The `regex-capi` crate used this method, so to avoid breaking that
+    /// crate, we continue to export it as an undocumented API.
+    #[doc(hidden)]
+    #[inline]
+    pub fn read_matches_at(
+        &self,
+        matches: &mut [bool],
+        haystack: &[u8],
+        start: usize,
+    ) -> bool {
+        self.matches_read_at(matches, haystack, start)
+    }
+
+    /// Returns the total number of regexes in this set.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// assert_eq!(0, RegexSet::empty().len());
+    /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len());
+    /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len());
+    /// ```
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.meta.pattern_len()
+    }
+
+    /// Returns `true` if this set contains no regexes.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// assert!(RegexSet::empty().is_empty());
+    /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty());
+    /// ```
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.meta.pattern_len() == 0
+    }
+
+    /// Returns the regex patterns that this regex set was constructed from.
+    ///
+    /// This function can be used to determine the pattern for a match. The
+    /// slice returned has exactly as many patterns givens to this regex set,
+    /// and the order of the slice is the same as the order of the patterns
+    /// provided to the set.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// let set = RegexSet::new(&[
+    ///     r"\w+",
+    ///     r"\d+",
+    ///     r"\pL+",
+    ///     r"foo",
+    ///     r"bar",
+    ///     r"barfoo",
+    ///     r"foobar",
+    /// ]).unwrap();
+    /// let matches: Vec<_> = set
+    ///     .matches(b"foobar")
+    ///     .into_iter()
+    ///     .map(|index| &set.patterns()[index])
+    ///     .collect();
+    /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]);
+    /// ```
+    #[inline]
+    pub fn patterns(&self) -> &[String] {
+        &self.patterns
+    }
+}
+
+impl Default for RegexSet {
+    fn default() -> Self {
+        RegexSet::empty()
+    }
+}
+
+/// A set of matches returned by a regex set.
+///
+/// Values of this type are constructed by [`RegexSet::matches`].
+#[derive(Clone, Debug)]
+pub struct SetMatches(PatternSet);
+
+impl SetMatches {
+    /// Whether this set contains any matches.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// let set = RegexSet::new(&[
+    ///     r"[a-z]+@[a-z]+\.(com|org|net)",
+    ///     r"[a-z]+\.(com|org|net)",
+    /// ]).unwrap();
+    /// let matches = set.matches(b"foo@example.com");
+    /// assert!(matches.matched_any());
+    /// ```
+    #[inline]
+    pub fn matched_any(&self) -> bool {
+        !self.0.is_empty()
+    }
+
+    /// Whether the regex at the given index matched.
+    ///
+    /// The index for a regex is determined by its insertion order upon the
+    /// initial construction of a `RegexSet`, starting at `0`.
+    ///
+    /// # Panics
+    ///
+    /// If `index` is greater than or equal to the number of regexes in the
+    /// original set that produced these matches. Equivalently, when `index`
+    /// is greater than or equal to [`SetMatches::len`].
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// let set = RegexSet::new([
+    ///     r"[a-z]+@[a-z]+\.(com|org|net)",
+    ///     r"[a-z]+\.(com|org|net)",
+    /// ]).unwrap();
+    /// let matches = set.matches(b"example.com");
+    /// assert!(!matches.matched(0));
+    /// assert!(matches.matched(1));
+    /// ```
+    #[inline]
+    pub fn matched(&self, index: usize) -> bool {
+        self.0.contains(PatternID::new_unchecked(index))
+    }
+
+    /// The total number of regexes in the set that created these matches.
+    ///
+    /// **WARNING:** This always returns the same value as [`RegexSet::len`].
+    /// In particular, it does *not* return the number of elements yielded by
+    /// [`SetMatches::iter`]. The only way to determine the total number of
+    /// matched regexes is to iterate over them.
+    ///
+    /// # Example
+    ///
+    /// Notice that this method returns the total number of regexes in the
+    /// original set, and *not* the total number of regexes that matched.
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// let set = RegexSet::new([
+    ///     r"[a-z]+@[a-z]+\.(com|org|net)",
+    ///     r"[a-z]+\.(com|org|net)",
+    /// ]).unwrap();
+    /// let matches = set.matches(b"example.com");
+    /// // Total number of patterns that matched.
+    /// assert_eq!(1, matches.iter().count());
+    /// // Total number of patterns in the set.
+    /// assert_eq!(2, matches.len());
+    /// ```
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.0.capacity()
+    }
+
+    /// Returns an iterator over the indices of the regexes that matched.
+    ///
+    /// This will always produces matches in ascending order, where the index
+    /// yielded corresponds to the index of the regex that matched with respect
+    /// to its position when initially building the set.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// let set = RegexSet::new([
+    ///     r"[0-9]",
+    ///     r"[a-z]",
+    ///     r"[A-Z]",
+    ///     r"\p{Greek}",
+    /// ]).unwrap();
+    /// let hay = "βa1".as_bytes();
+    /// let matches: Vec<_> = set.matches(hay).iter().collect();
+    /// assert_eq!(matches, vec![0, 1, 3]);
+    /// ```
+    ///
+    /// Note that `SetMatches` also implemnets the `IntoIterator` trait, so
+    /// this method is not always needed. For example:
+    ///
+    /// ```
+    /// use regex::bytes::RegexSet;
+    ///
+    /// let set = RegexSet::new([
+    ///     r"[0-9]",
+    ///     r"[a-z]",
+    ///     r"[A-Z]",
+    ///     r"\p{Greek}",
+    /// ]).unwrap();
+    /// let hay = "βa1".as_bytes();
+    /// let mut matches = vec![];
+    /// for index in set.matches(hay) {
+    ///     matches.push(index);
+    /// }
+    /// assert_eq!(matches, vec![0, 1, 3]);
+    /// ```
+    #[inline]
+    pub fn iter(&self) -> SetMatchesIter<'_> {
+        SetMatchesIter(self.0.iter())
+    }
+}
+
+impl IntoIterator for SetMatches {
+    type IntoIter = SetMatchesIntoIter;
+    type Item = usize;
+
+    fn into_iter(self) -> Self::IntoIter {
+        let it = 0..self.0.capacity();
+        SetMatchesIntoIter { patset: self.0, it }
+    }
+}
+
+impl<'a> IntoIterator for &'a SetMatches {
+    type IntoIter = SetMatchesIter<'a>;
+    type Item = usize;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.iter()
+    }
+}
+
+/// An owned iterator over the set of matches from a regex set.
+///
+/// This will always produces matches in ascending order of index, where the
+/// index corresponds to the index of the regex that matched with respect to
+/// its position when initially building the set.
+///
+/// This iterator is created by calling `SetMatches::into_iter` via the
+/// `IntoIterator` trait. This is automatically done in `for` loops.
+///
+/// # Example
+///
+/// ```
+/// use regex::bytes::RegexSet;
+///
+/// let set = RegexSet::new([
+///     r"[0-9]",
+///     r"[a-z]",
+///     r"[A-Z]",
+///     r"\p{Greek}",
+/// ]).unwrap();
+/// let hay = "βa1".as_bytes();
+/// let mut matches = vec![];
+/// for index in set.matches(hay) {
+///     matches.push(index);
+/// }
+/// assert_eq!(matches, vec![0, 1, 3]);
+/// ```
+#[derive(Debug)]
+pub struct SetMatchesIntoIter {
+    patset: PatternSet,
+    it: core::ops::Range<usize>,
+}
+
+impl Iterator for SetMatchesIntoIter {
+    type Item = usize;
+
+    fn next(&mut self) -> Option<usize> {
+        loop {
+            let id = self.it.next()?;
+            if self.patset.contains(PatternID::new_unchecked(id)) {
+                return Some(id);
+            }
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.it.size_hint()
+    }
+}
+
+impl DoubleEndedIterator for SetMatchesIntoIter {
+    fn next_back(&mut self) -> Option<usize> {
+        loop {
+            let id = self.it.next_back()?;
+            if self.patset.contains(PatternID::new_unchecked(id)) {
+                return Some(id);
+            }
+        }
+    }
+}
+
+impl core::iter::FusedIterator for SetMatchesIntoIter {}
+
+/// A borrowed iterator over the set of matches from a regex set.
+///
+/// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that
+/// created this iterator.
+///
+/// This will always produces matches in ascending order, where the index
+/// corresponds to the index of the regex that matched with respect to its
+/// position when initially building the set.
+///
+/// This iterator is created by the [`SetMatches::iter`] method.
+#[derive(Clone, Debug)]
+pub struct SetMatchesIter<'a>(PatternSetIter<'a>);
+
+impl<'a> Iterator for SetMatchesIter<'a> {
+    type Item = usize;
+
+    fn next(&mut self) -> Option<usize> {
+        self.0.next().map(|pid| pid.as_usize())
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.0.size_hint()
+    }
+}
+
+impl<'a> DoubleEndedIterator for SetMatchesIter<'a> {
+    fn next_back(&mut self) -> Option<usize> {
+        self.0.next_back().map(|pid| pid.as_usize())
+    }
+}
+
+impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {}
+
+impl core::fmt::Debug for RegexSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "RegexSet({:?})", self.patterns())
+    }
+}
--- a/third-party/vendor/regex/src/regexset/mod.rs
+++ b/third-party/vendor/regex/src/regexset/mod.rs
@ -0,0 +1,2 @@
+pub(crate) mod bytes;
+pub(crate) mod string;
--- a/third-party/vendor/regex/src/regexset/string.rs
+++ b/third-party/vendor/regex/src/regexset/string.rs
@ -0,0 +1,706 @@
+use alloc::string::String;
+
+use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter};
+
+use crate::{Error, RegexSetBuilder};
+
+/// Match multiple, possibly overlapping, regexes in a single search.
+///
+/// A regex set corresponds to the union of zero or more regular expressions.
+/// That is, a regex set will match a haystack when at least one of its
+/// constituent regexes matches. A regex set as its formulated here provides a
+/// touch more power: it will also report *which* regular expressions in the
+/// set match. Indeed, this is the key difference between regex sets and a
+/// single `Regex` with many alternates, since only one alternate can match at
+/// a time.
+///
+/// For example, consider regular expressions to match email addresses and
+/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a
+/// regex set is constructed from those regexes, then searching the haystack
+/// `foo@example.com` will report both regexes as matching. Of course, one
+/// could accomplish this by compiling each regex on its own and doing two
+/// searches over the haystack. The key advantage of using a regex set is
+/// that it will report the matching regexes using a *single pass through the
+/// haystack*. If one has hundreds or thousands of regexes to match repeatedly
+/// (like a URL router for a complex web application or a user agent matcher),
+/// then a regex set *can* realize huge performance gains.
+///
+/// # Limitations
+///
+/// Regex sets are limited to answering the following two questions:
+///
+/// 1. Does any regex in the set match?
+/// 2. If so, which regexes in the set match?
+///
+/// As with the main [`Regex`][crate::Regex] type, it is cheaper to ask (1)
+/// instead of (2) since the matching engines can stop after the first match
+/// is found.
+///
+/// You cannot directly extract [`Match`][crate::Match] or
+/// [`Captures`][crate::Captures] objects from a regex set. If you need these
+/// operations, the recommended approach is to compile each pattern in the set
+/// independently and scan the exact same haystack a second time with those
+/// independently compiled patterns:
+///
+/// ```
+/// use regex::{Regex, RegexSet};
+///
+/// let patterns = ["foo", "bar"];
+/// // Both patterns will match different ranges of this string.
+/// let hay = "barfoo";
+///
+/// // Compile a set matching any of our patterns.
+/// let set = RegexSet::new(patterns).unwrap();
+/// // Compile each pattern independently.
+/// let regexes: Vec<_> = set
+///     .patterns()
+///     .iter()
+///     .map(|pat| Regex::new(pat).unwrap())
+///     .collect();
+///
+/// // Match against the whole set first and identify the individual
+/// // matching patterns.
+/// let matches: Vec<&str> = set
+///     .matches(hay)
+///     .into_iter()
+///     // Dereference the match index to get the corresponding
+///     // compiled pattern.
+///     .map(|index| &regexes[index])
+///     // To get match locations or any other info, we then have to search the
+///     // exact same haystack again, using our separately-compiled pattern.
+///     .map(|re| re.find(hay).unwrap().as_str())
+///     .collect();
+///
+/// // Matches arrive in the order the constituent patterns were declared,
+/// // not the order they appear in the haystack.
+/// assert_eq!(vec!["foo", "bar"], matches);
+/// ```
+///
+/// # Performance
+///
+/// A `RegexSet` has the same performance characteristics as `Regex`. Namely,
+/// search takes `O(m * n)` time, where `m` is proportional to the size of the
+/// regex set and `n` is proportional to the length of the haystack.
+///
+/// # Trait implementations
+///
+/// The `Default` trait is implemented for `RegexSet`. The default value
+/// is an empty set. An empty set can also be explicitly constructed via
+/// [`RegexSet::empty`].
+///
+/// # Example
+///
+/// This shows how the above two regexes (for matching email addresses and
+/// domains) might work:
+///
+/// ```
+/// use regex::RegexSet;
+///
+/// let set = RegexSet::new(&[
+///     r"[a-z]+@[a-z]+\.(com|org|net)",
+///     r"[a-z]+\.(com|org|net)",
+/// ]).unwrap();
+///
+/// // Ask whether any regexes in the set match.
+/// assert!(set.is_match("foo@example.com"));
+///
+/// // Identify which regexes in the set match.
+/// let matches: Vec<_> = set.matches("foo@example.com").into_iter().collect();
+/// assert_eq!(vec![0, 1], matches);
+///
+/// // Try again, but with a haystack that only matches one of the regexes.
+/// let matches: Vec<_> = set.matches("example.com").into_iter().collect();
+/// assert_eq!(vec![1], matches);
+///
+/// // Try again, but with a haystack that doesn't match any regex in the set.
+/// let matches: Vec<_> = set.matches("example").into_iter().collect();
+/// assert!(matches.is_empty());
+/// ```
+///
+/// Note that it would be possible to adapt the above example to using `Regex`
+/// with an expression like:
+///
+/// ```text
+/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net))
+/// ```
+///
+/// After a match, one could then inspect the capture groups to figure out
+/// which alternates matched. The problem is that it is hard to make this
+/// approach scale when there are many regexes since the overlap between each
+/// alternate isn't always obvious to reason about.
+#[derive(Clone)]
+pub struct RegexSet {
+    pub(crate) meta: meta::Regex,
+    pub(crate) patterns: alloc::sync::Arc<[String]>,
+}
+
+impl RegexSet {
+    /// Create a new regex set with the given regular expressions.
+    ///
+    /// This takes an iterator of `S`, where `S` is something that can produce
+    /// a `&str`. If any of the strings in the iterator are not valid regular
+    /// expressions, then an error is returned.
+    ///
+    /// # Example
+    ///
+    /// Create a new regex set from an iterator of strings:
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
+    /// assert!(set.is_match("foo"));
+    /// ```
+    pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error>
+    where
+        S: AsRef<str>,
+        I: IntoIterator<Item = S>,
+    {
+        RegexSetBuilder::new(exprs).build()
+    }
+
+    /// Create a new empty regex set.
+    ///
+    /// An empty regex never matches anything.
+    ///
+    /// This is a convenience function for `RegexSet::new([])`, but doesn't
+    /// require one to specify the type of the input.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::empty();
+    /// assert!(set.is_empty());
+    /// // an empty set matches nothing
+    /// assert!(!set.is_match(""));
+    /// ```
+    pub fn empty() -> RegexSet {
+        let empty: [&str; 0] = [];
+        RegexSetBuilder::new(empty).build().unwrap()
+    }
+
+    /// Returns true if and only if one of the regexes in this set matches
+    /// the haystack given.
+    ///
+    /// This method should be preferred if you only need to test whether any
+    /// of the regexes in the set should match, but don't care about *which*
+    /// regexes matched. This is because the underlying matching engine will
+    /// quit immediately after seeing the first match instead of continuing to
+    /// find all matches.
+    ///
+    /// Note that as with searches using [`Regex`](crate::Regex), the
+    /// expression is unanchored by default. That is, if the regex does not
+    /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
+    /// to match anywhere in the haystack.
+    ///
+    /// # Example
+    ///
+    /// Tests whether a set matches somewhere in a haystack:
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
+    /// assert!(set.is_match("foo"));
+    /// assert!(!set.is_match("☃"));
+    /// ```
+    #[inline]
+    pub fn is_match(&self, haystack: &str) -> bool {
+        self.is_match_at(haystack, 0)
+    }
+
+    /// Returns true if and only if one of the regexes in this set matches the
+    /// haystack given, with the search starting at the offset given.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    ///
+    /// # Panics
+    ///
+    /// This panics when `start >= haystack.len() + 1`.
+    ///
+    /// # Example
+    ///
+    /// This example shows the significance of `start`. Namely, consider a
+    /// haystack `foobar` and a desire to execute a search starting at offset
+    /// `3`. You could search a substring explicitly, but then the look-around
+    /// assertions won't work correctly. Instead, you can use this method to
+    /// specify the start position of a search.
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
+    /// let hay = "foobar";
+    /// // We get a match here, but it's probably not intended.
+    /// assert!(set.is_match(&hay[3..]));
+    /// // No match because the  assertions take the context into account.
+    /// assert!(!set.is_match_at(hay, 3));
+    /// ```
+    #[inline]
+    pub fn is_match_at(&self, haystack: &str, start: usize) -> bool {
+        self.meta.is_match(Input::new(haystack).span(start..haystack.len()))
+    }
+
+    /// Returns the set of regexes that match in the given haystack.
+    ///
+    /// The set returned contains the index of each regex that matches in
+    /// the given haystack. The index is in correspondence with the order of
+    /// regular expressions given to `RegexSet`'s constructor.
+    ///
+    /// The set can also be used to iterate over the matched indices. The order
+    /// of iteration is always ascending with respect to the matching indices.
+    ///
+    /// Note that as with searches using [`Regex`](crate::Regex), the
+    /// expression is unanchored by default. That is, if the regex does not
+    /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
+    /// to match anywhere in the haystack.
+    ///
+    /// # Example
+    ///
+    /// Tests which regular expressions match the given haystack:
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new([
+    ///     r"\w+",
+    ///     r"\d+",
+    ///     r"\pL+",
+    ///     r"foo",
+    ///     r"bar",
+    ///     r"barfoo",
+    ///     r"foobar",
+    /// ]).unwrap();
+    /// let matches: Vec<_> = set.matches("foobar").into_iter().collect();
+    /// assert_eq!(matches, vec![0, 2, 3, 4, 6]);
+    ///
+    /// // You can also test whether a particular regex matched:
+    /// let matches = set.matches("foobar");
+    /// assert!(!matches.matched(5));
+    /// assert!(matches.matched(6));
+    /// ```
+    #[inline]
+    pub fn matches(&self, haystack: &str) -> SetMatches {
+        self.matches_at(haystack, 0)
+    }
+
+    /// Returns the set of regexes that match in the given haystack.
+    ///
+    /// The set returned contains the index of each regex that matches in
+    /// the given haystack. The index is in correspondence with the order of
+    /// regular expressions given to `RegexSet`'s constructor.
+    ///
+    /// The set can also be used to iterate over the matched indices. The order
+    /// of iteration is always ascending with respect to the matching indices.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    ///
+    /// # Panics
+    ///
+    /// This panics when `start >= haystack.len() + 1`.
+    ///
+    /// # Example
+    ///
+    /// Tests which regular expressions match the given haystack:
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
+    /// let hay = "foobar";
+    /// // We get matches here, but it's probably not intended.
+    /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect();
+    /// assert_eq!(matches, vec![0, 1]);
+    /// // No matches because the  assertions take the context into account.
+    /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect();
+    /// assert_eq!(matches, vec![]);
+    /// ```
+    #[inline]
+    pub fn matches_at(&self, haystack: &str, start: usize) -> SetMatches {
+        let input = Input::new(haystack).span(start..haystack.len());
+        let mut patset = PatternSet::new(self.meta.pattern_len());
+        self.meta.which_overlapping_matches(&input, &mut patset);
+        SetMatches(patset)
+    }
+
+    /// Returns the same as matches, but starts the search at the given
+    /// offset and stores the matches into the slice given.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    ///
+    /// `matches` must have a length that is at least the number of regexes
+    /// in this set.
+    ///
+    /// This method returns true if and only if at least one member of
+    /// `matches` is true after executing the set against `haystack`.
+    #[doc(hidden)]
+    #[inline]
+    pub fn matches_read_at(
+        &self,
+        matches: &mut [bool],
+        haystack: &str,
+        start: usize,
+    ) -> bool {
+        // This is pretty dumb. We should try to fix this, but the
+        // regex-automata API doesn't provide a way to store matches in an
+        // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and
+        // thus not public... But regex-capi currently uses it. We should
+        // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet
+        // is in regex-automata, not regex. So maybe we should just accept a
+        // 'SetMatches', which is basically just a newtype around PatternSet.
+        let mut patset = PatternSet::new(self.meta.pattern_len());
+        let mut input = Input::new(haystack);
+        input.set_start(start);
+        self.meta.which_overlapping_matches(&input, &mut patset);
+        for pid in patset.iter() {
+            matches[pid] = true;
+        }
+        !patset.is_empty()
+    }
+
+    /// An alias for `matches_read_at` to preserve backward compatibility.
+    ///
+    /// The `regex-capi` crate used this method, so to avoid breaking that
+    /// crate, we continue to export it as an undocumented API.
+    #[doc(hidden)]
+    #[inline]
+    pub fn read_matches_at(
+        &self,
+        matches: &mut [bool],
+        haystack: &str,
+        start: usize,
+    ) -> bool {
+        self.matches_read_at(matches, haystack, start)
+    }
+
+    /// Returns the total number of regexes in this set.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// assert_eq!(0, RegexSet::empty().len());
+    /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len());
+    /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len());
+    /// ```
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.meta.pattern_len()
+    }
+
+    /// Returns `true` if this set contains no regexes.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// assert!(RegexSet::empty().is_empty());
+    /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty());
+    /// ```
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.meta.pattern_len() == 0
+    }
+
+    /// Returns the regex patterns that this regex set was constructed from.
+    ///
+    /// This function can be used to determine the pattern for a match. The
+    /// slice returned has exactly as many patterns givens to this regex set,
+    /// and the order of the slice is the same as the order of the patterns
+    /// provided to the set.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new(&[
+    ///     r"\w+",
+    ///     r"\d+",
+    ///     r"\pL+",
+    ///     r"foo",
+    ///     r"bar",
+    ///     r"barfoo",
+    ///     r"foobar",
+    /// ]).unwrap();
+    /// let matches: Vec<_> = set
+    ///     .matches("foobar")
+    ///     .into_iter()
+    ///     .map(|index| &set.patterns()[index])
+    ///     .collect();
+    /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]);
+    /// ```
+    #[inline]
+    pub fn patterns(&self) -> &[String] {
+        &self.patterns
+    }
+}
+
+impl Default for RegexSet {
+    fn default() -> Self {
+        RegexSet::empty()
+    }
+}
+
+/// A set of matches returned by a regex set.
+///
+/// Values of this type are constructed by [`RegexSet::matches`].
+#[derive(Clone, Debug)]
+pub struct SetMatches(PatternSet);
+
+impl SetMatches {
+    /// Whether this set contains any matches.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new(&[
+    ///     r"[a-z]+@[a-z]+\.(com|org|net)",
+    ///     r"[a-z]+\.(com|org|net)",
+    /// ]).unwrap();
+    /// let matches = set.matches("foo@example.com");
+    /// assert!(matches.matched_any());
+    /// ```
+    #[inline]
+    pub fn matched_any(&self) -> bool {
+        !self.0.is_empty()
+    }
+
+    /// Whether the regex at the given index matched.
+    ///
+    /// The index for a regex is determined by its insertion order upon the
+    /// initial construction of a `RegexSet`, starting at `0`.
+    ///
+    /// # Panics
+    ///
+    /// If `index` is greater than or equal to the number of regexes in the
+    /// original set that produced these matches. Equivalently, when `index`
+    /// is greater than or equal to [`SetMatches::len`].
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new([
+    ///     r"[a-z]+@[a-z]+\.(com|org|net)",
+    ///     r"[a-z]+\.(com|org|net)",
+    /// ]).unwrap();
+    /// let matches = set.matches("example.com");
+    /// assert!(!matches.matched(0));
+    /// assert!(matches.matched(1));
+    /// ```
+    #[inline]
+    pub fn matched(&self, index: usize) -> bool {
+        self.0.contains(PatternID::new_unchecked(index))
+    }
+
+    /// The total number of regexes in the set that created these matches.
+    ///
+    /// **WARNING:** This always returns the same value as [`RegexSet::len`].
+    /// In particular, it does *not* return the number of elements yielded by
+    /// [`SetMatches::iter`]. The only way to determine the total number of
+    /// matched regexes is to iterate over them.
+    ///
+    /// # Example
+    ///
+    /// Notice that this method returns the total number of regexes in the
+    /// original set, and *not* the total number of regexes that matched.
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new([
+    ///     r"[a-z]+@[a-z]+\.(com|org|net)",
+    ///     r"[a-z]+\.(com|org|net)",
+    /// ]).unwrap();
+    /// let matches = set.matches("example.com");
+    /// // Total number of patterns that matched.
+    /// assert_eq!(1, matches.iter().count());
+    /// // Total number of patterns in the set.
+    /// assert_eq!(2, matches.len());
+    /// ```
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.0.capacity()
+    }
+
+    /// Returns an iterator over the indices of the regexes that matched.
+    ///
+    /// This will always produces matches in ascending order, where the index
+    /// yielded corresponds to the index of the regex that matched with respect
+    /// to its position when initially building the set.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new([
+    ///     r"[0-9]",
+    ///     r"[a-z]",
+    ///     r"[A-Z]",
+    ///     r"\p{Greek}",
+    /// ]).unwrap();
+    /// let hay = "βa1";
+    /// let matches: Vec<_> = set.matches(hay).iter().collect();
+    /// assert_eq!(matches, vec![0, 1, 3]);
+    /// ```
+    ///
+    /// Note that `SetMatches` also implemnets the `IntoIterator` trait, so
+    /// this method is not always needed. For example:
+    ///
+    /// ```
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new([
+    ///     r"[0-9]",
+    ///     r"[a-z]",
+    ///     r"[A-Z]",
+    ///     r"\p{Greek}",
+    /// ]).unwrap();
+    /// let hay = "βa1";
+    /// let mut matches = vec![];
+    /// for index in set.matches(hay) {
+    ///     matches.push(index);
+    /// }
+    /// assert_eq!(matches, vec![0, 1, 3]);
+    /// ```
+    #[inline]
+    pub fn iter(&self) -> SetMatchesIter<'_> {
+        SetMatchesIter(self.0.iter())
+    }
+}
+
+impl IntoIterator for SetMatches {
+    type IntoIter = SetMatchesIntoIter;
+    type Item = usize;
+
+    fn into_iter(self) -> Self::IntoIter {
+        let it = 0..self.0.capacity();
+        SetMatchesIntoIter { patset: self.0, it }
+    }
+}
+
+impl<'a> IntoIterator for &'a SetMatches {
+    type IntoIter = SetMatchesIter<'a>;
+    type Item = usize;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.iter()
+    }
+}
+
+/// An owned iterator over the set of matches from a regex set.
+///
+/// This will always produces matches in ascending order of index, where the
+/// index corresponds to the index of the regex that matched with respect to
+/// its position when initially building the set.
+///
+/// This iterator is created by calling `SetMatches::into_iter` via the
+/// `IntoIterator` trait. This is automatically done in `for` loops.
+///
+/// # Example
+///
+/// ```
+/// use regex::RegexSet;
+///
+/// let set = RegexSet::new([
+///     r"[0-9]",
+///     r"[a-z]",
+///     r"[A-Z]",
+///     r"\p{Greek}",
+/// ]).unwrap();
+/// let hay = "βa1";
+/// let mut matches = vec![];
+/// for index in set.matches(hay) {
+///     matches.push(index);
+/// }
+/// assert_eq!(matches, vec![0, 1, 3]);
+/// ```
+#[derive(Debug)]
+pub struct SetMatchesIntoIter {
+    patset: PatternSet,
+    it: core::ops::Range<usize>,
+}
+
+impl Iterator for SetMatchesIntoIter {
+    type Item = usize;
+
+    fn next(&mut self) -> Option<usize> {
+        loop {
+            let id = self.it.next()?;
+            if self.patset.contains(PatternID::new_unchecked(id)) {
+                return Some(id);
+            }
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.it.size_hint()
+    }
+}
+
+impl DoubleEndedIterator for SetMatchesIntoIter {
+    fn next_back(&mut self) -> Option<usize> {
+        loop {
+            let id = self.it.next_back()?;
+            if self.patset.contains(PatternID::new_unchecked(id)) {
+                return Some(id);
+            }
+        }
+    }
+}
+
+impl core::iter::FusedIterator for SetMatchesIntoIter {}
+
+/// A borrowed iterator over the set of matches from a regex set.
+///
+/// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that
+/// created this iterator.
+///
+/// This will always produces matches in ascending order, where the index
+/// corresponds to the index of the regex that matched with respect to its
+/// position when initially building the set.
+///
+/// This iterator is created by the [`SetMatches::iter`] method.
+#[derive(Clone, Debug)]
+pub struct SetMatchesIter<'a>(PatternSetIter<'a>);
+
+impl<'a> Iterator for SetMatchesIter<'a> {
+    type Item = usize;
+
+    fn next(&mut self) -> Option<usize> {
+        self.0.next().map(|pid| pid.as_usize())
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.0.size_hint()
+    }
+}
+
+impl<'a> DoubleEndedIterator for SetMatchesIter<'a> {
+    fn next_back(&mut self) -> Option<usize> {
+        self.0.next_back().map(|pid| pid.as_usize())
+    }
+}
+
+impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {}
+
+impl core::fmt::Debug for RegexSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "RegexSet({:?})", self.patterns())
+    }
+}