Vendor things
This commit is contained in:
parent
5deceec006
commit
977e3c17e5
19434 changed files with 10682014 additions and 0 deletions
461
third-party/vendor/regex-automata-0.1.10/tests/collection.rs
vendored
Normal file
461
third-party/vendor/regex-automata-0.1.10/tests/collection.rs
vendored
Normal file
|
|
@ -0,0 +1,461 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::env;
|
||||
use std::fmt::{self, Write};
|
||||
use std::thread;
|
||||
|
||||
use regex;
|
||||
use regex_automata::{DenseDFA, ErrorKind, Regex, RegexBuilder, StateID, DFA};
|
||||
use serde_bytes;
|
||||
use toml;
|
||||
|
||||
macro_rules! load {
|
||||
($col:ident, $path:expr) => {
|
||||
$col.extend(RegexTests::load(
|
||||
concat!("../data/tests/", $path),
|
||||
include_bytes!(concat!("../data/tests/", $path)),
|
||||
));
|
||||
};
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
pub static ref SUITE: RegexTestCollection = {
|
||||
let mut col = RegexTestCollection::new();
|
||||
load!(col, "fowler/basic.toml");
|
||||
load!(col, "fowler/nullsubexpr.toml");
|
||||
load!(col, "fowler/repetition.toml");
|
||||
load!(col, "fowler/repetition-long.toml");
|
||||
load!(col, "crazy.toml");
|
||||
load!(col, "flags.toml");
|
||||
load!(col, "iter.toml");
|
||||
load!(col, "no-unicode.toml");
|
||||
load!(col, "unicode.toml");
|
||||
col
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexTestCollection {
|
||||
pub by_name: BTreeMap<String, RegexTest>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct RegexTests {
|
||||
pub tests: Vec<RegexTest>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct RegexTest {
|
||||
pub name: String,
|
||||
#[serde(default)]
|
||||
pub options: Vec<RegexTestOption>,
|
||||
pub pattern: String,
|
||||
#[serde(with = "serde_bytes")]
|
||||
pub input: Vec<u8>,
|
||||
#[serde(rename = "matches")]
|
||||
pub matches: Vec<Match>,
|
||||
#[serde(default)]
|
||||
pub captures: Vec<Option<Match>>,
|
||||
#[serde(default)]
|
||||
pub fowler_line_number: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum RegexTestOption {
|
||||
Anchored,
|
||||
CaseInsensitive,
|
||||
NoUnicode,
|
||||
Escaped,
|
||||
#[serde(rename = "invalid-utf8")]
|
||||
InvalidUTF8,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Deserialize, Eq, PartialEq)]
|
||||
pub struct Match {
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
impl RegexTestCollection {
|
||||
fn new() -> RegexTestCollection {
|
||||
RegexTestCollection { by_name: BTreeMap::new() }
|
||||
}
|
||||
|
||||
fn extend(&mut self, tests: RegexTests) {
|
||||
for test in tests.tests {
|
||||
let name = test.name.clone();
|
||||
if self.by_name.contains_key(&name) {
|
||||
panic!("found duplicate test {}", name);
|
||||
}
|
||||
self.by_name.insert(name, test);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tests(&self) -> Vec<&RegexTest> {
|
||||
self.by_name.values().collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl RegexTests {
|
||||
fn load(path: &str, slice: &[u8]) -> RegexTests {
|
||||
let mut data: RegexTests = toml::from_slice(slice)
|
||||
.expect(&format!("failed to load {}", path));
|
||||
for test in &mut data.tests {
|
||||
if test.options.contains(&RegexTestOption::Escaped) {
|
||||
test.input = unescape_bytes(&test.input);
|
||||
}
|
||||
}
|
||||
data
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RegexTester {
|
||||
asserted: bool,
|
||||
results: RegexTestResults,
|
||||
skip_expensive: bool,
|
||||
whitelist: Vec<regex::Regex>,
|
||||
blacklist: Vec<regex::Regex>,
|
||||
}
|
||||
|
||||
impl Drop for RegexTester {
|
||||
fn drop(&mut self) {
|
||||
// If we haven't asserted yet, then the test is probably buggy, so
|
||||
// fail it. But if we're already panicking (e.g., a bug in the regex
|
||||
// engine), then don't double-panic, which causes an immediate abort.
|
||||
if !thread::panicking() && !self.asserted {
|
||||
panic!("must call RegexTester::assert at end of test");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RegexTester {
|
||||
pub fn new() -> RegexTester {
|
||||
let mut tester = RegexTester {
|
||||
asserted: false,
|
||||
results: RegexTestResults::default(),
|
||||
skip_expensive: false,
|
||||
whitelist: vec![],
|
||||
blacklist: vec![],
|
||||
};
|
||||
for x in env::var("REGEX_TEST").unwrap_or("".to_string()).split(",") {
|
||||
let x = x.trim();
|
||||
if x.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if x.starts_with("-") {
|
||||
tester = tester.blacklist(&x[1..]);
|
||||
} else {
|
||||
tester = tester.whitelist(x);
|
||||
}
|
||||
}
|
||||
tester
|
||||
}
|
||||
|
||||
pub fn skip_expensive(mut self) -> RegexTester {
|
||||
self.skip_expensive = true;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn whitelist(mut self, name: &str) -> RegexTester {
|
||||
self.whitelist.push(regex::Regex::new(name).unwrap());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn blacklist(mut self, name: &str) -> RegexTester {
|
||||
self.blacklist.push(regex::Regex::new(name).unwrap());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn assert(&mut self) {
|
||||
self.asserted = true;
|
||||
self.results.assert();
|
||||
}
|
||||
|
||||
pub fn build_regex<S: StateID>(
|
||||
&self,
|
||||
mut builder: RegexBuilder,
|
||||
test: &RegexTest,
|
||||
) -> Option<Regex<DenseDFA<Vec<S>, S>>> {
|
||||
if self.skip(test) {
|
||||
return None;
|
||||
}
|
||||
self.apply_options(test, &mut builder);
|
||||
|
||||
match builder.build_with_size::<S>(&test.pattern) {
|
||||
Ok(re) => Some(re),
|
||||
Err(err) => {
|
||||
if let ErrorKind::Unsupported(_) = *err.kind() {
|
||||
None
|
||||
} else {
|
||||
panic!(
|
||||
"failed to build {:?} with pattern '{:?}': {}",
|
||||
test.name, test.pattern, err
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_all<'a, I, T>(&mut self, builder: RegexBuilder, tests: I)
|
||||
where
|
||||
I: IntoIterator<IntoIter = T, Item = &'a RegexTest>,
|
||||
T: Iterator<Item = &'a RegexTest>,
|
||||
{
|
||||
for test in tests {
|
||||
let builder = builder.clone();
|
||||
let re: Regex = match self.build_regex(builder, test) {
|
||||
None => continue,
|
||||
Some(re) => re,
|
||||
};
|
||||
self.test(test, &re);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test<'a, D: DFA>(&mut self, test: &RegexTest, re: &Regex<D>) {
|
||||
self.test_is_match(test, re);
|
||||
self.test_find(test, re);
|
||||
// Some tests (namely, fowler) are designed only to detect the
|
||||
// first match even if there are more subsequent matches. To that
|
||||
// end, we only test match iteration when the number of matches
|
||||
// expected is not 1, or if the test name has 'iter' in it.
|
||||
if test.name.contains("iter") || test.matches.len() != 1 {
|
||||
self.test_find_iter(test, re);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_is_match<'a, D: DFA>(
|
||||
&mut self,
|
||||
test: &RegexTest,
|
||||
re: &Regex<D>,
|
||||
) {
|
||||
self.asserted = false;
|
||||
|
||||
let got = re.is_match(&test.input);
|
||||
let expected = test.matches.len() >= 1;
|
||||
if got == expected {
|
||||
self.results.succeeded.push(test.clone());
|
||||
return;
|
||||
}
|
||||
self.results.failed.push(RegexTestFailure {
|
||||
test: test.clone(),
|
||||
kind: RegexTestFailureKind::IsMatch,
|
||||
});
|
||||
}
|
||||
|
||||
pub fn test_find<'a, D: DFA>(&mut self, test: &RegexTest, re: &Regex<D>) {
|
||||
self.asserted = false;
|
||||
|
||||
let got =
|
||||
re.find(&test.input).map(|(start, end)| Match { start, end });
|
||||
if got == test.matches.get(0).map(|&m| m) {
|
||||
self.results.succeeded.push(test.clone());
|
||||
return;
|
||||
}
|
||||
self.results.failed.push(RegexTestFailure {
|
||||
test: test.clone(),
|
||||
kind: RegexTestFailureKind::Find { got },
|
||||
});
|
||||
}
|
||||
|
||||
pub fn test_find_iter<'a, D: DFA>(
|
||||
&mut self,
|
||||
test: &RegexTest,
|
||||
re: &Regex<D>,
|
||||
) {
|
||||
self.asserted = false;
|
||||
|
||||
let got: Vec<Match> = re
|
||||
.find_iter(&test.input)
|
||||
.map(|(start, end)| Match { start, end })
|
||||
.collect();
|
||||
if got == test.matches {
|
||||
self.results.succeeded.push(test.clone());
|
||||
return;
|
||||
}
|
||||
self.results.failed.push(RegexTestFailure {
|
||||
test: test.clone(),
|
||||
kind: RegexTestFailureKind::FindIter { got },
|
||||
});
|
||||
}
|
||||
|
||||
fn skip(&self, test: &RegexTest) -> bool {
|
||||
if self.skip_expensive {
|
||||
if test.name.starts_with("repetition-long") {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if !self.blacklist.is_empty() {
|
||||
if self.blacklist.iter().any(|re| re.is_match(&test.name)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if !self.whitelist.is_empty() {
|
||||
if !self.whitelist.iter().any(|re| re.is_match(&test.name)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn apply_options(&self, test: &RegexTest, builder: &mut RegexBuilder) {
|
||||
for opt in &test.options {
|
||||
match *opt {
|
||||
RegexTestOption::Anchored => {
|
||||
builder.anchored(true);
|
||||
}
|
||||
RegexTestOption::CaseInsensitive => {
|
||||
builder.case_insensitive(true);
|
||||
}
|
||||
RegexTestOption::NoUnicode => {
|
||||
builder.unicode(false);
|
||||
}
|
||||
RegexTestOption::Escaped => {}
|
||||
RegexTestOption::InvalidUTF8 => {
|
||||
builder.allow_invalid_utf8(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct RegexTestResults {
|
||||
/// Tests that succeeded.
|
||||
pub succeeded: Vec<RegexTest>,
|
||||
/// Failed tests, indexed by group name.
|
||||
pub failed: Vec<RegexTestFailure>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexTestFailure {
|
||||
test: RegexTest,
|
||||
kind: RegexTestFailureKind,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum RegexTestFailureKind {
|
||||
IsMatch,
|
||||
Find { got: Option<Match> },
|
||||
FindIter { got: Vec<Match> },
|
||||
}
|
||||
|
||||
impl RegexTestResults {
|
||||
pub fn assert(&self) {
|
||||
if self.failed.is_empty() {
|
||||
return;
|
||||
}
|
||||
let failures = self
|
||||
.failed
|
||||
.iter()
|
||||
.map(|f| f.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n\n");
|
||||
panic!(
|
||||
"found {} failures:\n{}\n{}\n{}\n\n\
|
||||
Set the REGEX_TEST environment variable to filter tests, \n\
|
||||
e.g., REGEX_TEST=crazy-misc,-crazy-misc2 runs every test \n\
|
||||
whose name contains crazy-misc but not crazy-misc2\n\n",
|
||||
self.failed.len(),
|
||||
"~".repeat(79),
|
||||
failures.trim(),
|
||||
"~".repeat(79)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for RegexTestFailure {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}: {}\n \
|
||||
options: {:?}\n \
|
||||
pattern: {}\n \
|
||||
pattern (escape): {}\n \
|
||||
input: {}\n \
|
||||
input (escape): {}\n \
|
||||
input (hex): {}",
|
||||
self.test.name,
|
||||
self.kind.fmt(&self.test)?,
|
||||
self.test.options,
|
||||
self.test.pattern,
|
||||
escape_default(&self.test.pattern),
|
||||
nice_raw_bytes(&self.test.input),
|
||||
escape_bytes(&self.test.input),
|
||||
hex_bytes(&self.test.input)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl RegexTestFailureKind {
|
||||
fn fmt(&self, test: &RegexTest) -> Result<String, fmt::Error> {
|
||||
let mut buf = String::new();
|
||||
match *self {
|
||||
RegexTestFailureKind::IsMatch => {
|
||||
if let Some(&m) = test.matches.get(0) {
|
||||
write!(buf, "expected match (at {}), but none found", m)?
|
||||
} else {
|
||||
write!(buf, "expected no match, but found a match")?
|
||||
}
|
||||
}
|
||||
RegexTestFailureKind::Find { got } => write!(
|
||||
buf,
|
||||
"expected {:?}, but found {:?}",
|
||||
test.matches.get(0),
|
||||
got
|
||||
)?,
|
||||
RegexTestFailureKind::FindIter { ref got } => write!(
|
||||
buf,
|
||||
"expected {:?}, but found {:?}",
|
||||
test.matches, got
|
||||
)?,
|
||||
}
|
||||
Ok(buf)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Match {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "({}, {})", self.start, self.end)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Match {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "({}, {})", self.start, self.end)
|
||||
}
|
||||
}
|
||||
|
||||
fn nice_raw_bytes(bytes: &[u8]) -> String {
|
||||
use std::str;
|
||||
|
||||
match str::from_utf8(bytes) {
|
||||
Ok(s) => s.to_string(),
|
||||
Err(_) => escape_bytes(bytes),
|
||||
}
|
||||
}
|
||||
|
||||
fn escape_bytes(bytes: &[u8]) -> String {
|
||||
use std::ascii;
|
||||
|
||||
let escaped = bytes
|
||||
.iter()
|
||||
.flat_map(|&b| ascii::escape_default(b))
|
||||
.collect::<Vec<u8>>();
|
||||
String::from_utf8(escaped).unwrap()
|
||||
}
|
||||
|
||||
fn hex_bytes(bytes: &[u8]) -> String {
|
||||
bytes.iter().map(|&b| format!(r"\x{:02X}", b)).collect()
|
||||
}
|
||||
|
||||
fn escape_default(s: &str) -> String {
|
||||
s.chars().flat_map(|c| c.escape_default()).collect()
|
||||
}
|
||||
|
||||
fn unescape_bytes(bytes: &[u8]) -> Vec<u8> {
|
||||
use std::str;
|
||||
use unescape::unescape;
|
||||
|
||||
unescape(&str::from_utf8(bytes).expect("all input must be valid UTF-8"))
|
||||
}
|
||||
42
third-party/vendor/regex-automata-0.1.10/tests/regression.rs
vendored
Normal file
42
third-party/vendor/regex-automata-0.1.10/tests/regression.rs
vendored
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
use regex_automata::{dense, DFA};
|
||||
|
||||
// A regression test for checking that minimization correctly translates
|
||||
// whether a state is a match state or not. Previously, it was possible for
|
||||
// minimization to mark a non-matching state as matching.
|
||||
#[test]
|
||||
fn minimize_sets_correct_match_states() {
|
||||
let pattern =
|
||||
// This is a subset of the grapheme matching regex. I couldn't seem
|
||||
// to get a repro any smaller than this unfortunately.
|
||||
r"(?x)
|
||||
(?:
|
||||
\p{gcb=Prepend}*
|
||||
(?:
|
||||
(?:
|
||||
(?:
|
||||
\p{gcb=L}*
|
||||
(?:\p{gcb=V}+|\p{gcb=LV}\p{gcb=V}*|\p{gcb=LVT})
|
||||
\p{gcb=T}*
|
||||
)
|
||||
|
|
||||
\p{gcb=L}+
|
||||
|
|
||||
\p{gcb=T}+
|
||||
)
|
||||
|
|
||||
\p{Extended_Pictographic}
|
||||
(?:\p{gcb=Extend}*\p{gcb=ZWJ}\p{Extended_Pictographic})*
|
||||
|
|
||||
[^\p{gcb=Control}\p{gcb=CR}\p{gcb=LF}]
|
||||
)
|
||||
[\p{gcb=Extend}\p{gcb=ZWJ}\p{gcb=SpacingMark}]*
|
||||
)
|
||||
";
|
||||
|
||||
let dfa = dense::Builder::new()
|
||||
.minimize(true)
|
||||
.anchored(true)
|
||||
.build(pattern)
|
||||
.unwrap();
|
||||
assert_eq!(None, dfa.find(b"\xE2"));
|
||||
}
|
||||
250
third-party/vendor/regex-automata-0.1.10/tests/suite.rs
vendored
Normal file
250
third-party/vendor/regex-automata-0.1.10/tests/suite.rs
vendored
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
use regex_automata::{DenseDFA, Regex, RegexBuilder, SparseDFA};
|
||||
|
||||
use collection::{RegexTester, SUITE};
|
||||
|
||||
#[test]
|
||||
fn unminimized_standard() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder.minimize(false).premultiply(false).byte_classes(false);
|
||||
|
||||
let mut tester = RegexTester::new().skip_expensive();
|
||||
tester.test_all(builder, SUITE.tests());
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unminimized_premultiply() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder.minimize(false).premultiply(true).byte_classes(false);
|
||||
|
||||
let mut tester = RegexTester::new().skip_expensive();
|
||||
tester.test_all(builder, SUITE.tests());
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unminimized_byte_class() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder.minimize(false).premultiply(false).byte_classes(true);
|
||||
|
||||
let mut tester = RegexTester::new();
|
||||
tester.test_all(builder, SUITE.tests());
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unminimized_premultiply_byte_class() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder.minimize(false).premultiply(true).byte_classes(true);
|
||||
|
||||
let mut tester = RegexTester::new();
|
||||
tester.test_all(builder, SUITE.tests());
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unminimized_standard_no_nfa_shrink() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder
|
||||
.minimize(false)
|
||||
.premultiply(false)
|
||||
.byte_classes(false)
|
||||
.shrink(false);
|
||||
|
||||
let mut tester = RegexTester::new().skip_expensive();
|
||||
tester.test_all(builder, SUITE.tests());
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minimized_standard() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder.minimize(true).premultiply(false).byte_classes(false);
|
||||
|
||||
let mut tester = RegexTester::new().skip_expensive();
|
||||
tester.test_all(builder, SUITE.tests());
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minimized_premultiply() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder.minimize(true).premultiply(true).byte_classes(false);
|
||||
|
||||
let mut tester = RegexTester::new().skip_expensive();
|
||||
tester.test_all(builder, SUITE.tests());
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minimized_byte_class() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder.minimize(true).premultiply(false).byte_classes(true);
|
||||
|
||||
let mut tester = RegexTester::new();
|
||||
tester.test_all(builder, SUITE.tests());
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minimized_premultiply_byte_class() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder.minimize(true).premultiply(true).byte_classes(true);
|
||||
|
||||
let mut tester = RegexTester::new();
|
||||
tester.test_all(builder, SUITE.tests());
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minimized_standard_no_nfa_shrink() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder
|
||||
.minimize(true)
|
||||
.premultiply(false)
|
||||
.byte_classes(false)
|
||||
.shrink(false);
|
||||
|
||||
let mut tester = RegexTester::new().skip_expensive();
|
||||
tester.test_all(builder, SUITE.tests());
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
// A basic sanity test that checks we can convert a regex to a smaller
|
||||
// representation and that the resulting regex still passes our tests.
|
||||
//
|
||||
// If tests grow minimal regexes that cannot be represented in 16 bits, then
|
||||
// we'll either want to skip those or increase the size to test to u32.
|
||||
#[test]
|
||||
fn u16() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder.minimize(true).premultiply(false).byte_classes(true);
|
||||
|
||||
let mut tester = RegexTester::new().skip_expensive();
|
||||
for test in SUITE.tests() {
|
||||
let builder = builder.clone();
|
||||
let re: Regex = match tester.build_regex(builder, test) {
|
||||
None => continue,
|
||||
Some(re) => re,
|
||||
};
|
||||
let small_re = Regex::from_dfas(
|
||||
re.forward().to_u16().unwrap(),
|
||||
re.reverse().to_u16().unwrap(),
|
||||
);
|
||||
|
||||
tester.test(test, &small_re);
|
||||
}
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
// Test that sparse DFAs work using the standard configuration.
|
||||
#[test]
|
||||
fn sparse_unminimized_standard() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder.minimize(false).premultiply(false).byte_classes(false);
|
||||
|
||||
let mut tester = RegexTester::new().skip_expensive();
|
||||
for test in SUITE.tests() {
|
||||
let builder = builder.clone();
|
||||
let re: Regex = match tester.build_regex(builder, test) {
|
||||
None => continue,
|
||||
Some(re) => re,
|
||||
};
|
||||
let fwd = re.forward().to_sparse().unwrap();
|
||||
let rev = re.reverse().to_sparse().unwrap();
|
||||
let sparse_re = Regex::from_dfas(fwd, rev);
|
||||
|
||||
tester.test(test, &sparse_re);
|
||||
}
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
// Test that sparse DFAs work after converting them to a different state ID
|
||||
// representation.
|
||||
#[test]
|
||||
fn sparse_u16() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder.minimize(true).premultiply(false).byte_classes(false);
|
||||
|
||||
let mut tester = RegexTester::new().skip_expensive();
|
||||
for test in SUITE.tests() {
|
||||
let builder = builder.clone();
|
||||
let re: Regex = match tester.build_regex(builder, test) {
|
||||
None => continue,
|
||||
Some(re) => re,
|
||||
};
|
||||
let fwd = re.forward().to_sparse().unwrap().to_u16().unwrap();
|
||||
let rev = re.reverse().to_sparse().unwrap().to_u16().unwrap();
|
||||
let sparse_re = Regex::from_dfas(fwd, rev);
|
||||
|
||||
tester.test(test, &sparse_re);
|
||||
}
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
// Another basic sanity test that checks we can serialize and then deserialize
|
||||
// a regex, and that the resulting regex can be used for searching correctly.
|
||||
#[test]
|
||||
fn serialization_roundtrip() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder.premultiply(false).byte_classes(true);
|
||||
|
||||
let mut tester = RegexTester::new().skip_expensive();
|
||||
for test in SUITE.tests() {
|
||||
let builder = builder.clone();
|
||||
let re: Regex = match tester.build_regex(builder, test) {
|
||||
None => continue,
|
||||
Some(re) => re,
|
||||
};
|
||||
|
||||
let fwd_bytes = re.forward().to_bytes_native_endian().unwrap();
|
||||
let rev_bytes = re.reverse().to_bytes_native_endian().unwrap();
|
||||
let fwd: DenseDFA<&[usize], usize> =
|
||||
unsafe { DenseDFA::from_bytes(&fwd_bytes) };
|
||||
let rev: DenseDFA<&[usize], usize> =
|
||||
unsafe { DenseDFA::from_bytes(&rev_bytes) };
|
||||
let re = Regex::from_dfas(fwd, rev);
|
||||
|
||||
tester.test(test, &re);
|
||||
}
|
||||
tester.assert();
|
||||
}
|
||||
|
||||
// A basic sanity test that checks we can serialize and then deserialize a
|
||||
// regex using sparse DFAs, and that the resulting regex can be used for
|
||||
// searching correctly.
|
||||
#[test]
|
||||
fn sparse_serialization_roundtrip() {
|
||||
let mut builder = RegexBuilder::new();
|
||||
builder.byte_classes(true);
|
||||
|
||||
let mut tester = RegexTester::new().skip_expensive();
|
||||
for test in SUITE.tests() {
|
||||
let builder = builder.clone();
|
||||
let re: Regex = match tester.build_regex(builder, test) {
|
||||
None => continue,
|
||||
Some(re) => re,
|
||||
};
|
||||
|
||||
let fwd_bytes = re
|
||||
.forward()
|
||||
.to_sparse()
|
||||
.unwrap()
|
||||
.to_bytes_native_endian()
|
||||
.unwrap();
|
||||
let rev_bytes = re
|
||||
.reverse()
|
||||
.to_sparse()
|
||||
.unwrap()
|
||||
.to_bytes_native_endian()
|
||||
.unwrap();
|
||||
let fwd: SparseDFA<&[u8], usize> =
|
||||
unsafe { SparseDFA::from_bytes(&fwd_bytes) };
|
||||
let rev: SparseDFA<&[u8], usize> =
|
||||
unsafe { SparseDFA::from_bytes(&rev_bytes) };
|
||||
let re = Regex::from_dfas(fwd, rev);
|
||||
|
||||
tester.test(test, &re);
|
||||
}
|
||||
tester.assert();
|
||||
}
|
||||
25
third-party/vendor/regex-automata-0.1.10/tests/tests.rs
vendored
Normal file
25
third-party/vendor/regex-automata-0.1.10/tests/tests.rs
vendored
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
#[cfg(feature = "std")]
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[cfg(feature = "std")]
|
||||
extern crate regex;
|
||||
#[cfg(feature = "std")]
|
||||
extern crate regex_automata;
|
||||
#[cfg(feature = "std")]
|
||||
extern crate serde;
|
||||
#[cfg(feature = "std")]
|
||||
extern crate serde_bytes;
|
||||
#[cfg(feature = "std")]
|
||||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
#[cfg(feature = "std")]
|
||||
extern crate toml;
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
mod collection;
|
||||
#[cfg(feature = "std")]
|
||||
mod regression;
|
||||
#[cfg(feature = "std")]
|
||||
mod suite;
|
||||
#[cfg(feature = "std")]
|
||||
mod unescape;
|
||||
84
third-party/vendor/regex-automata-0.1.10/tests/unescape.rs
vendored
Normal file
84
third-party/vendor/regex-automata-0.1.10/tests/unescape.rs
vendored
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
enum State {
|
||||
/// The state after seeing a `\`.
|
||||
Escape,
|
||||
/// The state after seeing a `\x`.
|
||||
HexFirst,
|
||||
/// The state after seeing a `\x[0-9A-Fa-f]`.
|
||||
HexSecond(char),
|
||||
/// Default state.
|
||||
Literal,
|
||||
}
|
||||
|
||||
pub fn unescape(s: &str) -> Vec<u8> {
|
||||
use self::State::*;
|
||||
|
||||
let mut bytes = vec![];
|
||||
let mut state = Literal;
|
||||
for c in s.chars() {
|
||||
match state {
|
||||
Escape => match c {
|
||||
'\\' => {
|
||||
bytes.push(b'\\');
|
||||
state = Literal;
|
||||
}
|
||||
'n' => {
|
||||
bytes.push(b'\n');
|
||||
state = Literal;
|
||||
}
|
||||
'r' => {
|
||||
bytes.push(b'\r');
|
||||
state = Literal;
|
||||
}
|
||||
't' => {
|
||||
bytes.push(b'\t');
|
||||
state = Literal;
|
||||
}
|
||||
'x' => {
|
||||
state = HexFirst;
|
||||
}
|
||||
c => {
|
||||
bytes.extend(format!(r"\{}", c).into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
},
|
||||
HexFirst => match c {
|
||||
'0'..='9' | 'A'..='F' | 'a'..='f' => {
|
||||
state = HexSecond(c);
|
||||
}
|
||||
c => {
|
||||
bytes.extend(format!(r"\x{}", c).into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
},
|
||||
HexSecond(first) => match c {
|
||||
'0'..='9' | 'A'..='F' | 'a'..='f' => {
|
||||
let ordinal = format!("{}{}", first, c);
|
||||
let byte = u8::from_str_radix(&ordinal, 16).unwrap();
|
||||
bytes.push(byte);
|
||||
state = Literal;
|
||||
}
|
||||
c => {
|
||||
let original = format!(r"\x{}{}", first, c);
|
||||
bytes.extend(original.into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
},
|
||||
Literal => match c {
|
||||
'\\' => {
|
||||
state = Escape;
|
||||
}
|
||||
c => {
|
||||
bytes.extend(c.to_string().as_bytes());
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
match state {
|
||||
Escape => bytes.push(b'\\'),
|
||||
HexFirst => bytes.extend(b"\\x"),
|
||||
HexSecond(c) => bytes.extend(format!("\\x{}", c).into_bytes()),
|
||||
Literal => {}
|
||||
}
|
||||
bytes
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue