Vendor things
This commit is contained in:
parent
5deceec006
commit
977e3c17e5
19434 changed files with 10682014 additions and 0 deletions
166
third-party/vendor/regex/tests/fuzz/mod.rs
vendored
Normal file
166
third-party/vendor/regex/tests/fuzz/mod.rs
vendored
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
// This set of tests is different from regression_fuzz in that the tests start
|
||||
// from the fuzzer data directly. The test essentially duplicates the fuzz
|
||||
// target. I wonder if there's a better way to set this up... Hmmm. I bet
|
||||
// `cargo fuzz` has something where it can run a target against crash files and
|
||||
// verify that they pass.
|
||||
|
||||
// This case found by the fuzzer causes the meta engine to use the "reverse
|
||||
// inner" literal strategy. That in turn uses a specialized search routine
|
||||
// for the lazy DFA in order to avoid worst case quadratic behavior. That
|
||||
// specialized search routine had a bug where it assumed that start state
|
||||
// specialization was disabled. But this is indeed not the case, since it
|
||||
// reuses the "general" lazy DFA for the full regex created as part of the core
|
||||
// strategy, which might very well have start states specialized due to the
|
||||
// existence of a prefilter.
|
||||
//
|
||||
// This is a somewhat weird case because if the core engine has a prefilter,
|
||||
// then it's usually the case that the "reverse inner" optimization won't be
|
||||
// pursued in that case. But there are some heuristics that try to detect
|
||||
// whether a prefilter is "fast" or not. If it's not, then the meta engine will
|
||||
// attempt the reverse inner optimization. And indeed, that's what happens
|
||||
// here. So the reverse inner optimization ends up with a lazy DFA that has
|
||||
// start states specialized. Ideally this wouldn't happen because specializing
|
||||
// start states without a prefilter inside the DFA can be disastrous for
|
||||
// performance by causing the DFA to ping-pong in and out of the special state
|
||||
// handling. In this case, it's probably not a huge deal because the lazy
|
||||
// DFA is only used for part of the matching where as the work horse is the
|
||||
// prefilter found by the reverse inner optimization.
|
||||
//
|
||||
// We could maybe fix this by refactoring the meta engine to be a little more
|
||||
// careful. For example, by attempting the optimizations before building the
|
||||
// core engine. But this is perhaps a little tricky.
|
||||
#[test]
|
||||
fn meta_stopat_specialize_start_states() {
|
||||
let data = include_bytes!(
|
||||
"testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9",
|
||||
);
|
||||
let _ = run(data);
|
||||
}
|
||||
|
||||
// Same bug as meta_stopat_specialize_start_states, but minimized by the
|
||||
// fuzzer.
|
||||
#[test]
|
||||
fn meta_stopat_specialize_start_states_min() {
|
||||
let data = include_bytes!(
|
||||
"testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9",
|
||||
);
|
||||
let _ = run(data);
|
||||
}
|
||||
|
||||
// This input generated a pattern with a fail state (e.g., \P{any}, [^\s\S]
|
||||
// or [a&&b]). But the fail state was in a branch, where a subsequent branch
|
||||
// should have led to an overall match, but handling of the fail state
|
||||
// prevented it from doing so. A hand-minimized version of this is '[^\s\S]A|B'
|
||||
// on the haystack 'B'. That should yield a match of 'B'.
|
||||
//
|
||||
// The underlying cause was an issue in how DFA determinization handled fail
|
||||
// states. The bug didn't impact the PikeVM or the bounded backtracker.
|
||||
#[test]
|
||||
fn fail_branch_prevents_match() {
|
||||
let data = include_bytes!(
|
||||
"testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04",
|
||||
);
|
||||
let _ = run(data);
|
||||
}
|
||||
|
||||
// This input generated a pattern that contained a sub-expression like this:
|
||||
//
|
||||
// a{0}{50000}
|
||||
//
|
||||
// This turned out to provoke quadratic behavior in the NFA compiler.
|
||||
// Basically, the NFA compiler works in two phases. The first phase builds
|
||||
// a more complicated-but-simpler-to-construct sequence of NFA states that
|
||||
// includes unconditional epsilon transitions. As part of converting this
|
||||
// sequence to the "final" NFA, we remove those unconditional espilon
|
||||
// transition. The code responsible for doing this follows every chain of
|
||||
// these transitions and remaps the state IDs. The way we were doing this
|
||||
// before resulted in re-following every subsequent part of the chain for each
|
||||
// state in the chain, which ended up being quadratic behavior. We effectively
|
||||
// memoized this, which fixed the performance bug.
|
||||
#[test]
|
||||
fn slow_big_empty_chain() {
|
||||
let data = include_bytes!(
|
||||
"testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a",
|
||||
);
|
||||
let _ = run(data);
|
||||
}
|
||||
|
||||
// A different case of slow_big_empty_chain.
|
||||
#[test]
|
||||
fn slow_big_empty_chain2() {
|
||||
let data = include_bytes!(
|
||||
"testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e",
|
||||
);
|
||||
let _ = run(data);
|
||||
}
|
||||
|
||||
// A different case of slow_big_empty_chain.
|
||||
#[test]
|
||||
fn slow_big_empty_chain3() {
|
||||
let data = include_bytes!(
|
||||
"testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6",
|
||||
);
|
||||
let _ = run(data);
|
||||
}
|
||||
|
||||
// A different case of slow_big_empty_chain.
|
||||
#[test]
|
||||
fn slow_big_empty_chain4() {
|
||||
let data = include_bytes!(
|
||||
"testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0",
|
||||
);
|
||||
let _ = run(data);
|
||||
}
|
||||
|
||||
// A different case of slow_big_empty_chain.
|
||||
#[test]
|
||||
fn slow_big_empty_chain5() {
|
||||
let data = include_bytes!(
|
||||
"testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c",
|
||||
);
|
||||
let _ = run(data);
|
||||
}
|
||||
|
||||
// A different case of slow_big_empty_chain.
|
||||
#[test]
|
||||
fn slow_big_empty_chain6() {
|
||||
let data = include_bytes!(
|
||||
"testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085",
|
||||
);
|
||||
let _ = run(data);
|
||||
}
|
||||
|
||||
// This fuzz input generated a pattern with a large repetition that would fail
|
||||
// NFA compilation, but its HIR was small. (HIR doesn't expand repetitions.)
|
||||
// But, the bounds were high enough that the minimum length calculation
|
||||
// overflowed. We fixed this by using saturating arithmetic (and also checked
|
||||
// arithmetic for the maximum length calculation).
|
||||
//
|
||||
// Incidentally, this was the only unguarded arithmetic operation performed in
|
||||
// the HIR smart constructors. And the fuzzer found it. Hah. Nice.
|
||||
#[test]
|
||||
fn minimum_len_overflow() {
|
||||
let data = include_bytes!(
|
||||
"testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff",
|
||||
);
|
||||
let _ = run(data);
|
||||
}
|
||||
|
||||
// This is the fuzz target function. We duplicate it here since this is the
|
||||
// thing we use to interpret the data. It is ultimately what we want to
|
||||
// succeed.
|
||||
fn run(data: &[u8]) -> Option<()> {
|
||||
if data.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
let mut split_at = usize::from(data[0]);
|
||||
let data = std::str::from_utf8(&data[1..]).ok()?;
|
||||
// Split data into a regex and haystack to search.
|
||||
let len = usize::try_from(data.chars().count()).ok()?;
|
||||
split_at = std::cmp::max(split_at, 1) % len;
|
||||
let char_index = data.char_indices().nth(split_at)?.0;
|
||||
let (pattern, input) = data.split_at(char_index);
|
||||
let re = regex::Regex::new(pattern).ok()?;
|
||||
re.is_match(input);
|
||||
Some(())
|
||||
}
|
||||
BIN
third-party/vendor/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff
vendored
Normal file
BIN
third-party/vendor/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff
vendored
Normal file
Binary file not shown.
BIN
third-party/vendor/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9
vendored
Normal file
BIN
third-party/vendor/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9
vendored
Normal file
Binary file not shown.
BIN
third-party/vendor/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04
vendored
Normal file
BIN
third-party/vendor/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04
vendored
Normal file
Binary file not shown.
BIN
third-party/vendor/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9
vendored
Normal file
BIN
third-party/vendor/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9
vendored
Normal file
Binary file not shown.
BIN
third-party/vendor/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e
vendored
Normal file
BIN
third-party/vendor/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e
vendored
Normal file
Binary file not shown.
BIN
third-party/vendor/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c
vendored
Normal file
BIN
third-party/vendor/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c
vendored
Normal file
Binary file not shown.
BIN
third-party/vendor/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085
vendored
Normal file
BIN
third-party/vendor/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085
vendored
Normal file
Binary file not shown.
BIN
third-party/vendor/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0
vendored
Normal file
BIN
third-party/vendor/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0
vendored
Normal file
Binary file not shown.
BIN
third-party/vendor/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a
vendored
Normal file
BIN
third-party/vendor/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a
vendored
Normal file
Binary file not shown.
BIN
third-party/vendor/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6
vendored
Normal file
BIN
third-party/vendor/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6
vendored
Normal file
Binary file not shown.
58
third-party/vendor/regex/tests/lib.rs
vendored
Normal file
58
third-party/vendor/regex/tests/lib.rs
vendored
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
#![cfg_attr(feature = "pattern", feature(pattern))]
|
||||
|
||||
mod fuzz;
|
||||
mod misc;
|
||||
mod regression;
|
||||
mod regression_fuzz;
|
||||
mod replace;
|
||||
#[cfg(feature = "pattern")]
|
||||
mod searcher;
|
||||
mod suite_bytes;
|
||||
mod suite_bytes_set;
|
||||
mod suite_string;
|
||||
mod suite_string_set;
|
||||
|
||||
const BLACKLIST: &[&str] = &[
|
||||
// Nothing to blacklist yet!
|
||||
];
|
||||
|
||||
fn suite() -> anyhow::Result<regex_test::RegexTests> {
|
||||
let _ = env_logger::try_init();
|
||||
|
||||
let mut tests = regex_test::RegexTests::new();
|
||||
macro_rules! load {
|
||||
($name:expr) => {{
|
||||
const DATA: &[u8] =
|
||||
include_bytes!(concat!("../testdata/", $name, ".toml"));
|
||||
tests.load_slice($name, DATA)?;
|
||||
}};
|
||||
}
|
||||
|
||||
load!("anchored");
|
||||
load!("bytes");
|
||||
load!("crazy");
|
||||
load!("crlf");
|
||||
load!("earliest");
|
||||
load!("empty");
|
||||
load!("expensive");
|
||||
load!("flags");
|
||||
load!("iter");
|
||||
load!("leftmost-all");
|
||||
load!("line-terminator");
|
||||
load!("misc");
|
||||
load!("multiline");
|
||||
load!("no-unicode");
|
||||
load!("overlapping");
|
||||
load!("regression");
|
||||
load!("set");
|
||||
load!("substring");
|
||||
load!("unicode");
|
||||
load!("utf8");
|
||||
load!("word-boundary");
|
||||
load!("word-boundary-special");
|
||||
load!("fowler/basic");
|
||||
load!("fowler/nullsubexpr");
|
||||
load!("fowler/repetition");
|
||||
|
||||
Ok(tests)
|
||||
}
|
||||
143
third-party/vendor/regex/tests/misc.rs
vendored
Normal file
143
third-party/vendor/regex/tests/misc.rs
vendored
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
use regex::Regex;
|
||||
|
||||
macro_rules! regex {
|
||||
($pattern:expr) => {
|
||||
regex::Regex::new($pattern).unwrap()
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unclosed_group_error() {
|
||||
let err = Regex::new(r"(").unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(msg.contains("unclosed group"), "error message: {:?}", msg);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regex_string() {
|
||||
assert_eq!(r"[a-zA-Z0-9]+", regex!(r"[a-zA-Z0-9]+").as_str());
|
||||
assert_eq!(r"[a-zA-Z0-9]+", &format!("{}", regex!(r"[a-zA-Z0-9]+")));
|
||||
assert_eq!(
|
||||
r#"Regex("[a-zA-Z0-9]+")"#,
|
||||
&format!("{:?}", regex!(r"[a-zA-Z0-9]+"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn capture_names() {
|
||||
let re = regex!(r"(.)(?P<a>.)");
|
||||
assert_eq!(3, re.captures_len());
|
||||
assert_eq!((3, Some(3)), re.capture_names().size_hint());
|
||||
assert_eq!(
|
||||
vec![None, None, Some("a")],
|
||||
re.capture_names().collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn capture_index() {
|
||||
let re = regex!(r"^(?P<name>.+)$");
|
||||
let cap = re.captures("abc").unwrap();
|
||||
assert_eq!(&cap[0], "abc");
|
||||
assert_eq!(&cap[1], "abc");
|
||||
assert_eq!(&cap["name"], "abc");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn capture_index_panic_usize() {
|
||||
let re = regex!(r"^(?P<name>.+)$");
|
||||
let cap = re.captures("abc").unwrap();
|
||||
let _ = cap[2];
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn capture_index_panic_name() {
|
||||
let re = regex!(r"^(?P<name>.+)$");
|
||||
let cap = re.captures("abc").unwrap();
|
||||
let _ = cap["bad name"];
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn capture_index_lifetime() {
|
||||
// This is a test of whether the types on `caps["..."]` are general
|
||||
// enough. If not, this will fail to typecheck.
|
||||
fn inner(s: &str) -> usize {
|
||||
let re = regex!(r"(?P<number>[0-9]+)");
|
||||
let caps = re.captures(s).unwrap();
|
||||
caps["number"].len()
|
||||
}
|
||||
assert_eq!(3, inner("123"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn capture_misc() {
|
||||
let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)");
|
||||
let cap = re.captures("abc").unwrap();
|
||||
|
||||
assert_eq!(5, cap.len());
|
||||
|
||||
assert_eq!((0, 3), {
|
||||
let m = cap.get(0).unwrap();
|
||||
(m.start(), m.end())
|
||||
});
|
||||
assert_eq!(None, cap.get(2));
|
||||
assert_eq!((2, 3), {
|
||||
let m = cap.get(4).unwrap();
|
||||
(m.start(), m.end())
|
||||
});
|
||||
|
||||
assert_eq!("abc", cap.get(0).unwrap().as_str());
|
||||
assert_eq!(None, cap.get(2));
|
||||
assert_eq!("c", cap.get(4).unwrap().as_str());
|
||||
|
||||
assert_eq!(None, cap.name("a"));
|
||||
assert_eq!("c", cap.name("b").unwrap().as_str());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_capture_matches() {
|
||||
let re = regex!(r"([a-z])(([a-z])|([0-9]))");
|
||||
let cap = re.captures("a5").unwrap();
|
||||
let subs: Vec<_> = cap.iter().collect();
|
||||
|
||||
assert_eq!(5, subs.len());
|
||||
assert!(subs[0].is_some());
|
||||
assert!(subs[1].is_some());
|
||||
assert!(subs[2].is_some());
|
||||
assert!(subs[3].is_none());
|
||||
assert!(subs[4].is_some());
|
||||
|
||||
assert_eq!("a5", subs[0].unwrap().as_str());
|
||||
assert_eq!("a", subs[1].unwrap().as_str());
|
||||
assert_eq!("5", subs[2].unwrap().as_str());
|
||||
assert_eq!("5", subs[4].unwrap().as_str());
|
||||
}
|
||||
|
||||
// Test that the DFA can handle pathological cases. (This should result in the
|
||||
// DFA's cache being flushed too frequently, which should cause it to quit and
|
||||
// fall back to the NFA algorithm.)
|
||||
#[test]
|
||||
fn dfa_handles_pathological_case() {
|
||||
fn ones_and_zeroes(count: usize) -> String {
|
||||
let mut s = String::new();
|
||||
for i in 0..count {
|
||||
if i % 3 == 0 {
|
||||
s.push('1');
|
||||
} else {
|
||||
s.push('0');
|
||||
}
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
let re = regex!(r"[01]*1[01]{20}$");
|
||||
let text = {
|
||||
let mut pieces = ones_and_zeroes(100_000);
|
||||
pieces.push('1');
|
||||
pieces.push_str(&ones_and_zeroes(20));
|
||||
pieces
|
||||
};
|
||||
assert!(re.is_match(&text));
|
||||
}
|
||||
94
third-party/vendor/regex/tests/regression.rs
vendored
Normal file
94
third-party/vendor/regex/tests/regression.rs
vendored
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
use regex::Regex;
|
||||
|
||||
macro_rules! regex {
|
||||
($pattern:expr) => {
|
||||
regex::Regex::new($pattern).unwrap()
|
||||
};
|
||||
}
|
||||
|
||||
// See: https://github.com/rust-lang/regex/issues/48
|
||||
#[test]
|
||||
fn invalid_regexes_no_crash() {
|
||||
assert!(Regex::new("(*)").is_err());
|
||||
assert!(Regex::new("(?:?)").is_err());
|
||||
assert!(Regex::new("(?)").is_err());
|
||||
assert!(Regex::new("*").is_err());
|
||||
}
|
||||
|
||||
// See: https://github.com/rust-lang/regex/issues/98
|
||||
#[test]
|
||||
fn regression_many_repeat_stack_overflow() {
|
||||
let re = regex!("^.{1,2500}");
|
||||
assert_eq!(
|
||||
vec![0..1],
|
||||
re.find_iter("a").map(|m| m.range()).collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
// See: https://github.com/rust-lang/regex/issues/555
|
||||
#[test]
|
||||
fn regression_invalid_repetition_expr() {
|
||||
assert!(Regex::new("(?m){1,1}").is_err());
|
||||
}
|
||||
|
||||
// See: https://github.com/rust-lang/regex/issues/527
|
||||
#[test]
|
||||
fn regression_invalid_flags_expression() {
|
||||
assert!(Regex::new("(((?x)))").is_ok());
|
||||
}
|
||||
|
||||
// See: https://github.com/rust-lang/regex/issues/129
|
||||
#[test]
|
||||
fn regression_captures_rep() {
|
||||
let re = regex!(r"([a-f]){2}(?P<foo>[x-z])");
|
||||
let caps = re.captures("abx").unwrap();
|
||||
assert_eq!(&caps["foo"], "x");
|
||||
}
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1247
|
||||
#[cfg(feature = "unicode-perl")]
|
||||
#[test]
|
||||
fn regression_nfa_stops1() {
|
||||
let re = regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap();
|
||||
assert_eq!(0, re.find_iter(b"s\xE4").count());
|
||||
}
|
||||
|
||||
// See: https://github.com/rust-lang/regex/issues/981
|
||||
#[cfg(feature = "unicode")]
|
||||
#[test]
|
||||
fn regression_bad_word_boundary() {
|
||||
let re = regex!(r#"(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))"#);
|
||||
let hay = "ubi-Darwin-x86_64.tar.gz";
|
||||
assert!(!re.is_match(hay));
|
||||
let hay = "ubi-Windows-x86_64.zip";
|
||||
assert!(re.is_match(hay));
|
||||
}
|
||||
|
||||
// See: https://github.com/rust-lang/regex/issues/982
|
||||
#[cfg(feature = "unicode-perl")]
|
||||
#[test]
|
||||
fn regression_unicode_perl_not_enabled() {
|
||||
let pat = r"(\d+\s?(years|year|y))?\s?(\d+\s?(months|month|m))?\s?(\d+\s?(weeks|week|w))?\s?(\d+\s?(days|day|d))?\s?(\d+\s?(hours|hour|h))?";
|
||||
assert!(Regex::new(pat).is_ok());
|
||||
}
|
||||
|
||||
// See: https://github.com/rust-lang/regex/issues/995
|
||||
#[test]
|
||||
fn regression_big_regex_overflow() {
|
||||
let pat = r" {2147483516}{2147483416}{5}";
|
||||
assert!(Regex::new(pat).is_err());
|
||||
}
|
||||
|
||||
// See: https://github.com/rust-lang/regex/issues/999
|
||||
#[test]
|
||||
fn regression_complete_literals_suffix_incorrect() {
|
||||
let needles = vec![
|
||||
"aA", "bA", "cA", "dA", "eA", "fA", "gA", "hA", "iA", "jA", "kA",
|
||||
"lA", "mA", "nA", "oA", "pA", "qA", "rA", "sA", "tA", "uA", "vA",
|
||||
"wA", "xA", "yA", "zA",
|
||||
];
|
||||
let pattern = needles.join("|");
|
||||
let re = regex!(&pattern);
|
||||
let hay = "FUBAR";
|
||||
assert_eq!(0, re.find_iter(hay).count());
|
||||
}
|
||||
61
third-party/vendor/regex/tests/regression_fuzz.rs
vendored
Normal file
61
third-party/vendor/regex/tests/regression_fuzz.rs
vendored
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
// These tests are only run for the "default" test target because some of them
|
||||
// can take quite a long time. Some of them take long enough that it's not
|
||||
// practical to run them in debug mode. :-/
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
macro_rules! regex {
|
||||
($pattern:expr) => {
|
||||
regex::Regex::new($pattern).unwrap()
|
||||
};
|
||||
}
|
||||
|
||||
// See: https://oss-fuzz.com/testcase-detail/5673225499181056
|
||||
//
|
||||
// Ignored by default since it takes too long in debug mode (almost a minute).
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn fuzz1() {
|
||||
regex!(r"1}{55}{0}*{1}{55}{55}{5}*{1}{55}+{56}|;**");
|
||||
}
|
||||
|
||||
// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=26505
|
||||
// See: https://github.com/rust-lang/regex/issues/722
|
||||
#[test]
|
||||
#[cfg(feature = "unicode")]
|
||||
fn empty_any_errors_no_panic() {
|
||||
assert!(Regex::new(r"\P{any}").is_ok());
|
||||
}
|
||||
|
||||
// This tests that a very large regex errors during compilation instead of
|
||||
// using gratuitous amounts of memory. The specific problem is that the
|
||||
// compiler wasn't accounting for the memory used by Unicode character classes
|
||||
// correctly.
|
||||
//
|
||||
// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=33579
|
||||
#[test]
|
||||
fn big_regex_fails_to_compile() {
|
||||
let pat = "[\u{0}\u{e}\u{2}\\w~~>[l\t\u{0}]p?<]{971158}";
|
||||
assert!(Regex::new(pat).is_err());
|
||||
}
|
||||
|
||||
// This was caught while on master but before a release went out(!).
|
||||
//
|
||||
// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58173
|
||||
#[test]
|
||||
fn todo() {
|
||||
let pat = "(?:z|xx)@|xx";
|
||||
assert!(Regex::new(pat).is_ok());
|
||||
}
|
||||
|
||||
// This was caused by the fuzzer, and then minimized by hand.
|
||||
//
|
||||
// This was caused by a bug in DFA determinization that mishandled NFA fail
|
||||
// states.
|
||||
#[test]
|
||||
fn fail_branch_prevents_match() {
|
||||
let pat = r".*[a&&b]A|B";
|
||||
let hay = "B";
|
||||
let re = Regex::new(pat).unwrap();
|
||||
assert!(re.is_match(hay));
|
||||
}
|
||||
183
third-party/vendor/regex/tests/replace.rs
vendored
Normal file
183
third-party/vendor/regex/tests/replace.rs
vendored
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
macro_rules! replace(
|
||||
($name:ident, $which:ident, $re:expr,
|
||||
$search:expr, $replace:expr, $result:expr) => (
|
||||
#[test]
|
||||
fn $name() {
|
||||
let re = regex::Regex::new($re).unwrap();
|
||||
assert_eq!(re.$which($search, $replace), $result);
|
||||
}
|
||||
);
|
||||
);
|
||||
|
||||
replace!(first, replace, r"[0-9]", "age: 26", "Z", "age: Z6");
|
||||
replace!(plus, replace, r"[0-9]+", "age: 26", "Z", "age: Z");
|
||||
replace!(all, replace_all, r"[0-9]", "age: 26", "Z", "age: ZZ");
|
||||
replace!(groups, replace, r"([^ ]+)[ ]+([^ ]+)", "w1 w2", "$2 $1", "w2 w1");
|
||||
replace!(
|
||||
double_dollar,
|
||||
replace,
|
||||
r"([^ ]+)[ ]+([^ ]+)",
|
||||
"w1 w2",
|
||||
"$2 $$1",
|
||||
"w2 $1"
|
||||
);
|
||||
// replace!(adjacent_index, replace,
|
||||
// r"([^aeiouy])ies$", "skies", "$1y", "sky");
|
||||
replace!(
|
||||
named,
|
||||
replace_all,
|
||||
r"(?P<first>[^ ]+)[ ]+(?P<last>[^ ]+)(?P<space>[ ]*)",
|
||||
"w1 w2 w3 w4",
|
||||
"$last $first$space",
|
||||
"w2 w1 w4 w3"
|
||||
);
|
||||
replace!(
|
||||
trim,
|
||||
replace_all,
|
||||
"^[ \t]+|[ \t]+$",
|
||||
" \t trim me\t \t",
|
||||
"",
|
||||
"trim me"
|
||||
);
|
||||
replace!(number_hyphen, replace, r"(.)(.)", "ab", "$1-$2", "a-b");
|
||||
// replace!(number_underscore, replace, r"(.)(.)", "ab", "$1_$2", "a_b");
|
||||
replace!(
|
||||
simple_expand,
|
||||
replace_all,
|
||||
r"([a-z]) ([a-z])",
|
||||
"a b",
|
||||
"$2 $1",
|
||||
"b a"
|
||||
);
|
||||
replace!(
|
||||
literal_dollar1,
|
||||
replace_all,
|
||||
r"([a-z]+) ([a-z]+)",
|
||||
"a b",
|
||||
"$$1",
|
||||
"$1"
|
||||
);
|
||||
replace!(
|
||||
literal_dollar2,
|
||||
replace_all,
|
||||
r"([a-z]+) ([a-z]+)",
|
||||
"a b",
|
||||
"$2 $$c $1",
|
||||
"b $c a"
|
||||
);
|
||||
replace!(
|
||||
no_expand1,
|
||||
replace,
|
||||
r"([^ ]+)[ ]+([^ ]+)",
|
||||
"w1 w2",
|
||||
regex::NoExpand("$2 $1"),
|
||||
"$2 $1"
|
||||
);
|
||||
replace!(
|
||||
no_expand2,
|
||||
replace,
|
||||
r"([^ ]+)[ ]+([^ ]+)",
|
||||
"w1 w2",
|
||||
regex::NoExpand("$$1"),
|
||||
"$$1"
|
||||
);
|
||||
replace!(
|
||||
closure_returning_reference,
|
||||
replace,
|
||||
r"([0-9]+)",
|
||||
"age: 26",
|
||||
|captures: ®ex::Captures<'_>| { captures[1][0..1].to_owned() },
|
||||
"age: 2"
|
||||
);
|
||||
replace!(
|
||||
closure_returning_value,
|
||||
replace,
|
||||
r"[0-9]+",
|
||||
"age: 26",
|
||||
|_captures: ®ex::Captures<'_>| "Z".to_owned(),
|
||||
"age: Z"
|
||||
);
|
||||
|
||||
// See https://github.com/rust-lang/regex/issues/314
|
||||
replace!(
|
||||
match_at_start_replace_with_empty,
|
||||
replace_all,
|
||||
r"foo",
|
||||
"foobar",
|
||||
"",
|
||||
"bar"
|
||||
);
|
||||
|
||||
// See https://github.com/rust-lang/regex/issues/393
|
||||
replace!(single_empty_match, replace, r"^", "bar", "foo", "foobar");
|
||||
|
||||
// See https://github.com/rust-lang/regex/issues/399
|
||||
replace!(
|
||||
capture_longest_possible_name,
|
||||
replace_all,
|
||||
r"(.)",
|
||||
"b",
|
||||
"${1}a $1a",
|
||||
"ba "
|
||||
);
|
||||
|
||||
replace!(
|
||||
impl_string,
|
||||
replace,
|
||||
r"[0-9]",
|
||||
"age: 26",
|
||||
"Z".to_string(),
|
||||
"age: Z6"
|
||||
);
|
||||
replace!(
|
||||
impl_string_ref,
|
||||
replace,
|
||||
r"[0-9]",
|
||||
"age: 26",
|
||||
&"Z".to_string(),
|
||||
"age: Z6"
|
||||
);
|
||||
replace!(
|
||||
impl_cow_str_borrowed,
|
||||
replace,
|
||||
r"[0-9]",
|
||||
"age: 26",
|
||||
std::borrow::Cow::<'_, str>::Borrowed("Z"),
|
||||
"age: Z6"
|
||||
);
|
||||
replace!(
|
||||
impl_cow_str_borrowed_ref,
|
||||
replace,
|
||||
r"[0-9]",
|
||||
"age: 26",
|
||||
&std::borrow::Cow::<'_, str>::Borrowed("Z"),
|
||||
"age: Z6"
|
||||
);
|
||||
replace!(
|
||||
impl_cow_str_owned,
|
||||
replace,
|
||||
r"[0-9]",
|
||||
"age: 26",
|
||||
std::borrow::Cow::<'_, str>::Owned("Z".to_string()),
|
||||
"age: Z6"
|
||||
);
|
||||
replace!(
|
||||
impl_cow_str_owned_ref,
|
||||
replace,
|
||||
r"[0-9]",
|
||||
"age: 26",
|
||||
&std::borrow::Cow::<'_, str>::Owned("Z".to_string()),
|
||||
"age: Z6"
|
||||
);
|
||||
|
||||
#[test]
|
||||
fn replacen_no_captures() {
|
||||
let re = regex::Regex::new(r"[0-9]").unwrap();
|
||||
assert_eq!(re.replacen("age: 1234", 2, "Z"), "age: ZZ34");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replacen_with_captures() {
|
||||
let re = regex::Regex::new(r"([0-9])").unwrap();
|
||||
assert_eq!(re.replacen("age: 1234", 2, "${1}Z"), "age: 1Z2Z34");
|
||||
}
|
||||
93
third-party/vendor/regex/tests/searcher.rs
vendored
Normal file
93
third-party/vendor/regex/tests/searcher.rs
vendored
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
macro_rules! searcher {
|
||||
($name:ident, $re:expr, $haystack:expr) => (
|
||||
searcher!($name, $re, $haystack, vec vec![]);
|
||||
);
|
||||
($name:ident, $re:expr, $haystack:expr, $($steps:expr,)*) => (
|
||||
searcher!($name, $re, $haystack, vec vec![$($steps),*]);
|
||||
);
|
||||
($name:ident, $re:expr, $haystack:expr, $($steps:expr),*) => (
|
||||
searcher!($name, $re, $haystack, vec vec![$($steps),*]);
|
||||
);
|
||||
($name:ident, $re:expr, $haystack:expr, vec $expect_steps:expr) => (
|
||||
#[test]
|
||||
#[allow(unused_imports)]
|
||||
fn $name() {
|
||||
use std::str::pattern::{Pattern, Searcher};
|
||||
use std::str::pattern::SearchStep::{Match, Reject, Done};
|
||||
let re = regex::Regex::new($re).unwrap();
|
||||
let mut se = re.into_searcher($haystack);
|
||||
let mut got_steps = vec![];
|
||||
loop {
|
||||
match se.next() {
|
||||
Done => break,
|
||||
step => { got_steps.push(step); }
|
||||
}
|
||||
}
|
||||
assert_eq!(got_steps, $expect_steps);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
searcher!(searcher_empty_regex_empty_haystack, r"", "", Match(0, 0));
|
||||
searcher!(
|
||||
searcher_empty_regex,
|
||||
r"",
|
||||
"ab",
|
||||
Match(0, 0),
|
||||
Reject(0, 1),
|
||||
Match(1, 1),
|
||||
Reject(1, 2),
|
||||
Match(2, 2)
|
||||
);
|
||||
searcher!(searcher_empty_haystack, r"\d", "");
|
||||
searcher!(searcher_one_match, r"\d", "5", Match(0, 1));
|
||||
searcher!(searcher_no_match, r"\d", "a", Reject(0, 1));
|
||||
searcher!(
|
||||
searcher_two_adjacent_matches,
|
||||
r"\d",
|
||||
"56",
|
||||
Match(0, 1),
|
||||
Match(1, 2)
|
||||
);
|
||||
searcher!(
|
||||
searcher_two_non_adjacent_matches,
|
||||
r"\d",
|
||||
"5a6",
|
||||
Match(0, 1),
|
||||
Reject(1, 2),
|
||||
Match(2, 3)
|
||||
);
|
||||
searcher!(searcher_reject_first, r"\d", "a6", Reject(0, 1), Match(1, 2));
|
||||
searcher!(
|
||||
searcher_one_zero_length_matches,
|
||||
r"\d*",
|
||||
"a1b2",
|
||||
Match(0, 0), // ^
|
||||
Reject(0, 1), // a
|
||||
Match(1, 2), // a1
|
||||
Reject(2, 3), // a1b
|
||||
Match(3, 4), // a1b2
|
||||
);
|
||||
searcher!(
|
||||
searcher_many_zero_length_matches,
|
||||
r"\d*",
|
||||
"a1bbb2",
|
||||
Match(0, 0), // ^
|
||||
Reject(0, 1), // a
|
||||
Match(1, 2), // a1
|
||||
Reject(2, 3), // a1b
|
||||
Match(3, 3), // a1bb
|
||||
Reject(3, 4), // a1bb
|
||||
Match(4, 4), // a1bbb
|
||||
Reject(4, 5), // a1bbb
|
||||
Match(5, 6), // a1bbba
|
||||
);
|
||||
searcher!(
|
||||
searcher_unicode,
|
||||
r".+?",
|
||||
"Ⅰ1Ⅱ2",
|
||||
Match(0, 3),
|
||||
Match(3, 4),
|
||||
Match(4, 7),
|
||||
Match(7, 8)
|
||||
);
|
||||
108
third-party/vendor/regex/tests/suite_bytes.rs
vendored
Normal file
108
third-party/vendor/regex/tests/suite_bytes.rs
vendored
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
use {
|
||||
anyhow::Result,
|
||||
regex::bytes::{Regex, RegexBuilder},
|
||||
regex_test::{
|
||||
CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner,
|
||||
},
|
||||
};
|
||||
|
||||
/// Tests the default configuration of the hybrid NFA/DFA.
|
||||
#[test]
|
||||
fn default() -> Result<()> {
|
||||
let mut runner = TestRunner::new()?;
|
||||
runner
|
||||
.expand(&["is_match", "find", "captures"], |test| test.compiles())
|
||||
.blacklist_iter(super::BLACKLIST)
|
||||
.test_iter(crate::suite()?.iter(), compiler)
|
||||
.assert();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_test(re: &Regex, test: &RegexTest) -> TestResult {
|
||||
match test.additional_name() {
|
||||
"is_match" => TestResult::matched(re.is_match(test.haystack())),
|
||||
"find" => TestResult::matches(
|
||||
re.find_iter(test.haystack())
|
||||
.take(test.match_limit().unwrap_or(std::usize::MAX))
|
||||
.map(|m| Match {
|
||||
id: 0,
|
||||
span: Span { start: m.start(), end: m.end() },
|
||||
}),
|
||||
),
|
||||
"captures" => {
|
||||
let it = re
|
||||
.captures_iter(test.haystack())
|
||||
.take(test.match_limit().unwrap_or(std::usize::MAX))
|
||||
.map(|caps| testify_captures(&caps));
|
||||
TestResult::captures(it)
|
||||
}
|
||||
name => TestResult::fail(&format!("unrecognized test name: {}", name)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the given regex test to a closure that searches with a
|
||||
/// `bytes::Regex`. If the test configuration is unsupported, then a
|
||||
/// `CompiledRegex` that skips the test is returned.
|
||||
fn compiler(
|
||||
test: &RegexTest,
|
||||
_patterns: &[String],
|
||||
) -> anyhow::Result<CompiledRegex> {
|
||||
let skip = Ok(CompiledRegex::skip());
|
||||
|
||||
// We're only testing bytes::Regex here, which supports one pattern only.
|
||||
let pattern = match test.regexes().len() {
|
||||
1 => &test.regexes()[0],
|
||||
_ => return skip,
|
||||
};
|
||||
// We only test is_match, find_iter and captures_iter. All of those are
|
||||
// leftmost searches.
|
||||
if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) {
|
||||
return skip;
|
||||
}
|
||||
// The top-level single-pattern regex API always uses leftmost-first.
|
||||
if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) {
|
||||
return skip;
|
||||
}
|
||||
// The top-level regex API always runs unanchored searches. ... But we can
|
||||
// handle tests that are anchored but have only one match.
|
||||
if test.anchored() && test.match_limit() != Some(1) {
|
||||
return skip;
|
||||
}
|
||||
// We don't support tests with explicit search bounds. We could probably
|
||||
// support this by using the 'find_at' (and such) APIs.
|
||||
let bounds = test.bounds();
|
||||
if !(bounds.start == 0 && bounds.end == test.haystack().len()) {
|
||||
return skip;
|
||||
}
|
||||
// The bytes::Regex API specifically does not support enabling UTF-8 mode.
|
||||
// It could I suppose, but currently it does not. That is, it permits
|
||||
// matches to have offsets that split codepoints.
|
||||
if test.utf8() {
|
||||
return skip;
|
||||
}
|
||||
// If the test requires Unicode but the Unicode feature isn't enabled,
|
||||
// skip it. This is a little aggressive, but the test suite doesn't
|
||||
// have any easy way of communicating which Unicode features are needed.
|
||||
if test.unicode() && !cfg!(feature = "unicode") {
|
||||
return skip;
|
||||
}
|
||||
let re = RegexBuilder::new(pattern)
|
||||
.case_insensitive(test.case_insensitive())
|
||||
.unicode(test.unicode())
|
||||
.line_terminator(test.line_terminator())
|
||||
.build()?;
|
||||
Ok(CompiledRegex::compiled(move |test| run_test(&re, test)))
|
||||
}
|
||||
|
||||
/// Convert `Captures` into the test suite's capture values.
|
||||
fn testify_captures(
|
||||
caps: ®ex::bytes::Captures<'_>,
|
||||
) -> regex_test::Captures {
|
||||
let spans = caps.iter().map(|group| {
|
||||
group.map(|m| regex_test::Span { start: m.start(), end: m.end() })
|
||||
});
|
||||
// This unwrap is OK because we assume our 'caps' represents a match, and
|
||||
// a match always gives a non-zero number of groups with the first group
|
||||
// being non-None.
|
||||
regex_test::Captures::new(0, spans).unwrap()
|
||||
}
|
||||
71
third-party/vendor/regex/tests/suite_bytes_set.rs
vendored
Normal file
71
third-party/vendor/regex/tests/suite_bytes_set.rs
vendored
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
use {
|
||||
anyhow::Result,
|
||||
regex::bytes::{RegexSet, RegexSetBuilder},
|
||||
regex_test::{CompiledRegex, RegexTest, TestResult, TestRunner},
|
||||
};
|
||||
|
||||
/// Tests the default configuration of the hybrid NFA/DFA.
|
||||
#[test]
|
||||
fn default() -> Result<()> {
|
||||
let mut runner = TestRunner::new()?;
|
||||
runner
|
||||
.expand(&["is_match", "which"], |test| test.compiles())
|
||||
.blacklist_iter(super::BLACKLIST)
|
||||
.test_iter(crate::suite()?.iter(), compiler)
|
||||
.assert();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_test(re: &RegexSet, test: &RegexTest) -> TestResult {
|
||||
match test.additional_name() {
|
||||
"is_match" => TestResult::matched(re.is_match(test.haystack())),
|
||||
"which" => TestResult::which(re.matches(test.haystack()).iter()),
|
||||
name => TestResult::fail(&format!("unrecognized test name: {}", name)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the given regex test to a closure that searches with a
|
||||
/// `bytes::Regex`. If the test configuration is unsupported, then a
|
||||
/// `CompiledRegex` that skips the test is returned.
|
||||
fn compiler(
|
||||
test: &RegexTest,
|
||||
_patterns: &[String],
|
||||
) -> anyhow::Result<CompiledRegex> {
|
||||
let skip = Ok(CompiledRegex::skip());
|
||||
|
||||
// The top-level RegexSet API only supports "overlapping" semantics.
|
||||
if !matches!(test.search_kind(), regex_test::SearchKind::Overlapping) {
|
||||
return skip;
|
||||
}
|
||||
// The top-level RegexSet API only supports "all" semantics.
|
||||
if !matches!(test.match_kind(), regex_test::MatchKind::All) {
|
||||
return skip;
|
||||
}
|
||||
// The top-level RegexSet API always runs unanchored searches.
|
||||
if test.anchored() {
|
||||
return skip;
|
||||
}
|
||||
// We don't support tests with explicit search bounds.
|
||||
let bounds = test.bounds();
|
||||
if !(bounds.start == 0 && bounds.end == test.haystack().len()) {
|
||||
return skip;
|
||||
}
|
||||
// The bytes::Regex API specifically does not support enabling UTF-8 mode.
|
||||
// It could I suppose, but currently it does not. That is, it permits
|
||||
// matches to have offsets that split codepoints.
|
||||
if test.utf8() {
|
||||
return skip;
|
||||
}
|
||||
// If the test requires Unicode but the Unicode feature isn't enabled,
|
||||
// skip it. This is a little aggressive, but the test suite doesn't
|
||||
// have any easy way of communicating which Unicode features are needed.
|
||||
if test.unicode() && !cfg!(feature = "unicode") {
|
||||
return skip;
|
||||
}
|
||||
let re = RegexSetBuilder::new(test.regexes())
|
||||
.case_insensitive(test.case_insensitive())
|
||||
.unicode(test.unicode())
|
||||
.line_terminator(test.line_terminator())
|
||||
.build()?;
|
||||
Ok(CompiledRegex::compiled(move |test| run_test(&re, test)))
|
||||
}
|
||||
114
third-party/vendor/regex/tests/suite_string.rs
vendored
Normal file
114
third-party/vendor/regex/tests/suite_string.rs
vendored
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
use {
|
||||
anyhow::Result,
|
||||
regex::{Regex, RegexBuilder},
|
||||
regex_test::{
|
||||
CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner,
|
||||
},
|
||||
};
|
||||
|
||||
/// Tests the default configuration of the hybrid NFA/DFA.
|
||||
#[test]
|
||||
fn default() -> Result<()> {
|
||||
let mut runner = TestRunner::new()?;
|
||||
runner
|
||||
.expand(&["is_match", "find", "captures"], |test| test.compiles())
|
||||
.blacklist_iter(super::BLACKLIST)
|
||||
.test_iter(crate::suite()?.iter(), compiler)
|
||||
.assert();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_test(re: &Regex, test: &RegexTest) -> TestResult {
|
||||
let hay = match std::str::from_utf8(test.haystack()) {
|
||||
Ok(hay) => hay,
|
||||
Err(err) => {
|
||||
return TestResult::fail(&format!(
|
||||
"haystack is not valid UTF-8: {}",
|
||||
err
|
||||
));
|
||||
}
|
||||
};
|
||||
match test.additional_name() {
|
||||
"is_match" => TestResult::matched(re.is_match(hay)),
|
||||
"find" => TestResult::matches(
|
||||
re.find_iter(hay)
|
||||
.take(test.match_limit().unwrap_or(std::usize::MAX))
|
||||
.map(|m| Match {
|
||||
id: 0,
|
||||
span: Span { start: m.start(), end: m.end() },
|
||||
}),
|
||||
),
|
||||
"captures" => {
|
||||
let it = re
|
||||
.captures_iter(hay)
|
||||
.take(test.match_limit().unwrap_or(std::usize::MAX))
|
||||
.map(|caps| testify_captures(&caps));
|
||||
TestResult::captures(it)
|
||||
}
|
||||
name => TestResult::fail(&format!("unrecognized test name: {}", name)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the given regex test to a closure that searches with a
|
||||
/// `bytes::Regex`. If the test configuration is unsupported, then a
|
||||
/// `CompiledRegex` that skips the test is returned.
|
||||
fn compiler(
|
||||
test: &RegexTest,
|
||||
_patterns: &[String],
|
||||
) -> anyhow::Result<CompiledRegex> {
|
||||
let skip = Ok(CompiledRegex::skip());
|
||||
|
||||
// We're only testing bytes::Regex here, which supports one pattern only.
|
||||
let pattern = match test.regexes().len() {
|
||||
1 => &test.regexes()[0],
|
||||
_ => return skip,
|
||||
};
|
||||
// We only test is_match, find_iter and captures_iter. All of those are
|
||||
// leftmost searches.
|
||||
if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) {
|
||||
return skip;
|
||||
}
|
||||
// The top-level single-pattern regex API always uses leftmost-first.
|
||||
if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) {
|
||||
return skip;
|
||||
}
|
||||
// The top-level regex API always runs unanchored searches. ... But we can
|
||||
// handle tests that are anchored but have only one match.
|
||||
if test.anchored() && test.match_limit() != Some(1) {
|
||||
return skip;
|
||||
}
|
||||
// We don't support tests with explicit search bounds. We could probably
|
||||
// support this by using the 'find_at' (and such) APIs.
|
||||
let bounds = test.bounds();
|
||||
if !(bounds.start == 0 && bounds.end == test.haystack().len()) {
|
||||
return skip;
|
||||
}
|
||||
// The Regex API specifically does not support disabling UTF-8 mode because
|
||||
// it can only search &str which is always valid UTF-8.
|
||||
if !test.utf8() {
|
||||
return skip;
|
||||
}
|
||||
// If the test requires Unicode but the Unicode feature isn't enabled,
|
||||
// skip it. This is a little aggressive, but the test suite doesn't
|
||||
// have any easy way of communicating which Unicode features are needed.
|
||||
if test.unicode() && !cfg!(feature = "unicode") {
|
||||
return skip;
|
||||
}
|
||||
let re = RegexBuilder::new(pattern)
|
||||
.case_insensitive(test.case_insensitive())
|
||||
.unicode(test.unicode())
|
||||
.line_terminator(test.line_terminator())
|
||||
.build()?;
|
||||
Ok(CompiledRegex::compiled(move |test| run_test(&re, test)))
|
||||
}
|
||||
|
||||
/// Convert `Captures` into the test suite's capture values.
|
||||
fn testify_captures(caps: ®ex::Captures<'_>) -> regex_test::Captures {
|
||||
let spans = caps.iter().map(|group| {
|
||||
group.map(|m| regex_test::Span { start: m.start(), end: m.end() })
|
||||
});
|
||||
// This unwrap is OK because we assume our 'caps' represents a match, and
|
||||
// a match always gives a non-zero number of groups with the first group
|
||||
// being non-None.
|
||||
regex_test::Captures::new(0, spans).unwrap()
|
||||
}
|
||||
79
third-party/vendor/regex/tests/suite_string_set.rs
vendored
Normal file
79
third-party/vendor/regex/tests/suite_string_set.rs
vendored
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
use {
|
||||
anyhow::Result,
|
||||
regex::{RegexSet, RegexSetBuilder},
|
||||
regex_test::{CompiledRegex, RegexTest, TestResult, TestRunner},
|
||||
};
|
||||
|
||||
/// Tests the default configuration of the hybrid NFA/DFA.
|
||||
#[test]
|
||||
fn default() -> Result<()> {
|
||||
let mut runner = TestRunner::new()?;
|
||||
runner
|
||||
.expand(&["is_match", "which"], |test| test.compiles())
|
||||
.blacklist_iter(super::BLACKLIST)
|
||||
.test_iter(crate::suite()?.iter(), compiler)
|
||||
.assert();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_test(re: &RegexSet, test: &RegexTest) -> TestResult {
|
||||
let hay = match std::str::from_utf8(test.haystack()) {
|
||||
Ok(hay) => hay,
|
||||
Err(err) => {
|
||||
return TestResult::fail(&format!(
|
||||
"haystack is not valid UTF-8: {}",
|
||||
err
|
||||
));
|
||||
}
|
||||
};
|
||||
match test.additional_name() {
|
||||
"is_match" => TestResult::matched(re.is_match(hay)),
|
||||
"which" => TestResult::which(re.matches(hay).iter()),
|
||||
name => TestResult::fail(&format!("unrecognized test name: {}", name)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the given regex test to a closure that searches with a
|
||||
/// `bytes::Regex`. If the test configuration is unsupported, then a
|
||||
/// `CompiledRegex` that skips the test is returned.
|
||||
fn compiler(
|
||||
test: &RegexTest,
|
||||
_patterns: &[String],
|
||||
) -> anyhow::Result<CompiledRegex> {
|
||||
let skip = Ok(CompiledRegex::skip());
|
||||
|
||||
// The top-level RegexSet API only supports "overlapping" semantics.
|
||||
if !matches!(test.search_kind(), regex_test::SearchKind::Overlapping) {
|
||||
return skip;
|
||||
}
|
||||
// The top-level RegexSet API only supports "all" semantics.
|
||||
if !matches!(test.match_kind(), regex_test::MatchKind::All) {
|
||||
return skip;
|
||||
}
|
||||
// The top-level RegexSet API always runs unanchored searches.
|
||||
if test.anchored() {
|
||||
return skip;
|
||||
}
|
||||
// We don't support tests with explicit search bounds.
|
||||
let bounds = test.bounds();
|
||||
if !(bounds.start == 0 && bounds.end == test.haystack().len()) {
|
||||
return skip;
|
||||
}
|
||||
// The Regex API specifically does not support disabling UTF-8 mode because
|
||||
// it can only search &str which is always valid UTF-8.
|
||||
if !test.utf8() {
|
||||
return skip;
|
||||
}
|
||||
// If the test requires Unicode but the Unicode feature isn't enabled,
|
||||
// skip it. This is a little aggressive, but the test suite doesn't
|
||||
// have any easy way of communicating which Unicode features are needed.
|
||||
if test.unicode() && !cfg!(feature = "unicode") {
|
||||
return skip;
|
||||
}
|
||||
let re = RegexSetBuilder::new(test.regexes())
|
||||
.case_insensitive(test.case_insensitive())
|
||||
.unicode(test.unicode())
|
||||
.line_terminator(test.line_terminator())
|
||||
.build()?;
|
||||
Ok(CompiledRegex::compiled(move |test| run_test(&re, test)))
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue