Vendor things
This commit is contained in:
parent
5deceec006
commit
977e3c17e5
19434 changed files with 10682014 additions and 0 deletions
520
third-party/vendor/regex-syntax-0.6.29/src/hir/interval.rs
vendored
Normal file
520
third-party/vendor/regex-syntax-0.6.29/src/hir/interval.rs
vendored
Normal file
|
|
@ -0,0 +1,520 @@
|
|||
use std::char;
|
||||
use std::cmp;
|
||||
use std::fmt::Debug;
|
||||
use std::slice;
|
||||
use std::u8;
|
||||
|
||||
use crate::unicode;
|
||||
|
||||
// This module contains an *internal* implementation of interval sets.
|
||||
//
|
||||
// The primary invariant that interval sets guards is canonical ordering. That
|
||||
// is, every interval set contains an ordered sequence of intervals where
|
||||
// no two intervals are overlapping or adjacent. While this invariant is
|
||||
// occasionally broken within the implementation, it should be impossible for
|
||||
// callers to observe it.
|
||||
//
|
||||
// Since case folding (as implemented below) breaks that invariant, we roll
|
||||
// that into this API even though it is a little out of place in an otherwise
|
||||
// generic interval set. (Hence the reason why the `unicode` module is imported
|
||||
// here.)
|
||||
//
|
||||
// Some of the implementation complexity here is a result of me wanting to
|
||||
// preserve the sequential representation without using additional memory.
|
||||
// In many cases, we do use linear extra memory, but it is at most 2x and it
|
||||
// is amortized. If we relaxed the memory requirements, this implementation
|
||||
// could become much simpler. The extra memory is honestly probably OK, but
|
||||
// character classes (especially of the Unicode variety) can become quite
|
||||
// large, and it would be nice to keep regex compilation snappy even in debug
|
||||
// builds. (In the past, I have been careless with this area of code and it has
|
||||
// caused slow regex compilations in debug mode, so this isn't entirely
|
||||
// unwarranted.)
|
||||
//
|
||||
// Tests on this are relegated to the public API of HIR in src/hir.rs.
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct IntervalSet<I> {
|
||||
ranges: Vec<I>,
|
||||
}
|
||||
|
||||
impl<I: Interval> IntervalSet<I> {
|
||||
/// Create a new set from a sequence of intervals. Each interval is
|
||||
/// specified as a pair of bounds, where both bounds are inclusive.
|
||||
///
|
||||
/// The given ranges do not need to be in any specific order, and ranges
|
||||
/// may overlap.
|
||||
pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> {
|
||||
let mut set = IntervalSet { ranges: intervals.into_iter().collect() };
|
||||
set.canonicalize();
|
||||
set
|
||||
}
|
||||
|
||||
/// Add a new interval to this set.
|
||||
pub fn push(&mut self, interval: I) {
|
||||
// TODO: This could be faster. e.g., Push the interval such that
|
||||
// it preserves canonicalization.
|
||||
self.ranges.push(interval);
|
||||
self.canonicalize();
|
||||
}
|
||||
|
||||
/// Return an iterator over all intervals in this set.
|
||||
///
|
||||
/// The iterator yields intervals in ascending order.
|
||||
pub fn iter(&self) -> IntervalSetIter<'_, I> {
|
||||
IntervalSetIter(self.ranges.iter())
|
||||
}
|
||||
|
||||
/// Return an immutable slice of intervals in this set.
|
||||
///
|
||||
/// The sequence returned is in canonical ordering.
|
||||
pub fn intervals(&self) -> &[I] {
|
||||
&self.ranges
|
||||
}
|
||||
|
||||
/// Expand this interval set such that it contains all case folded
|
||||
/// characters. For example, if this class consists of the range `a-z`,
|
||||
/// then applying case folding will result in the class containing both the
|
||||
/// ranges `a-z` and `A-Z`.
|
||||
///
|
||||
/// This returns an error if the necessary case mapping data is not
|
||||
/// available.
|
||||
pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> {
|
||||
let len = self.ranges.len();
|
||||
for i in 0..len {
|
||||
let range = self.ranges[i];
|
||||
if let Err(err) = range.case_fold_simple(&mut self.ranges) {
|
||||
self.canonicalize();
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
self.canonicalize();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Union this set with the given set, in place.
|
||||
pub fn union(&mut self, other: &IntervalSet<I>) {
|
||||
// This could almost certainly be done more efficiently.
|
||||
self.ranges.extend(&other.ranges);
|
||||
self.canonicalize();
|
||||
}
|
||||
|
||||
/// Intersect this set with the given set, in place.
|
||||
pub fn intersect(&mut self, other: &IntervalSet<I>) {
|
||||
if self.ranges.is_empty() {
|
||||
return;
|
||||
}
|
||||
if other.ranges.is_empty() {
|
||||
self.ranges.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
// There should be a way to do this in-place with constant memory,
|
||||
// but I couldn't figure out a simple way to do it. So just append
|
||||
// the intersection to the end of this range, and then drain it before
|
||||
// we're done.
|
||||
let drain_end = self.ranges.len();
|
||||
|
||||
let mut ita = 0..drain_end;
|
||||
let mut itb = 0..other.ranges.len();
|
||||
let mut a = ita.next().unwrap();
|
||||
let mut b = itb.next().unwrap();
|
||||
loop {
|
||||
if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) {
|
||||
self.ranges.push(ab);
|
||||
}
|
||||
let (it, aorb) =
|
||||
if self.ranges[a].upper() < other.ranges[b].upper() {
|
||||
(&mut ita, &mut a)
|
||||
} else {
|
||||
(&mut itb, &mut b)
|
||||
};
|
||||
match it.next() {
|
||||
Some(v) => *aorb = v,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
self.ranges.drain(..drain_end);
|
||||
}
|
||||
|
||||
/// Subtract the given set from this set, in place.
|
||||
pub fn difference(&mut self, other: &IntervalSet<I>) {
|
||||
if self.ranges.is_empty() || other.ranges.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// This algorithm is (to me) surprisingly complex. A search of the
|
||||
// interwebs indicate that this is a potentially interesting problem.
|
||||
// Folks seem to suggest interval or segment trees, but I'd like to
|
||||
// avoid the overhead (both runtime and conceptual) of that.
|
||||
//
|
||||
// The following is basically my Shitty First Draft. Therefore, in
|
||||
// order to grok it, you probably need to read each line carefully.
|
||||
// Simplifications are most welcome!
|
||||
//
|
||||
// Remember, we can assume the canonical format invariant here, which
|
||||
// says that all ranges are sorted, not overlapping and not adjacent in
|
||||
// each class.
|
||||
let drain_end = self.ranges.len();
|
||||
let (mut a, mut b) = (0, 0);
|
||||
'LOOP: while a < drain_end && b < other.ranges.len() {
|
||||
// Basically, the easy cases are when neither range overlaps with
|
||||
// each other. If the `b` range is less than our current `a`
|
||||
// range, then we can skip it and move on.
|
||||
if other.ranges[b].upper() < self.ranges[a].lower() {
|
||||
b += 1;
|
||||
continue;
|
||||
}
|
||||
// ... similarly for the `a` range. If it's less than the smallest
|
||||
// `b` range, then we can add it as-is.
|
||||
if self.ranges[a].upper() < other.ranges[b].lower() {
|
||||
let range = self.ranges[a];
|
||||
self.ranges.push(range);
|
||||
a += 1;
|
||||
continue;
|
||||
}
|
||||
// Otherwise, we have overlapping ranges.
|
||||
assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b]));
|
||||
|
||||
// This part is tricky and was non-obvious to me without looking
|
||||
// at explicit examples (see the tests). The trickiness stems from
|
||||
// two things: 1) subtracting a range from another range could
|
||||
// yield two ranges and 2) after subtracting a range, it's possible
|
||||
// that future ranges can have an impact. The loop below advances
|
||||
// the `b` ranges until they can't possible impact the current
|
||||
// range.
|
||||
//
|
||||
// For example, if our `a` range is `a-t` and our next three `b`
|
||||
// ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply
|
||||
// subtraction three times before moving on to the next `a` range.
|
||||
let mut range = self.ranges[a];
|
||||
while b < other.ranges.len()
|
||||
&& !range.is_intersection_empty(&other.ranges[b])
|
||||
{
|
||||
let old_range = range;
|
||||
range = match range.difference(&other.ranges[b]) {
|
||||
(None, None) => {
|
||||
// We lost the entire range, so move on to the next
|
||||
// without adding this one.
|
||||
a += 1;
|
||||
continue 'LOOP;
|
||||
}
|
||||
(Some(range1), None) | (None, Some(range1)) => range1,
|
||||
(Some(range1), Some(range2)) => {
|
||||
self.ranges.push(range1);
|
||||
range2
|
||||
}
|
||||
};
|
||||
// It's possible that the `b` range has more to contribute
|
||||
// here. In particular, if it is greater than the original
|
||||
// range, then it might impact the next `a` range *and* it
|
||||
// has impacted the current `a` range as much as possible,
|
||||
// so we can quit. We don't bump `b` so that the next `a`
|
||||
// range can apply it.
|
||||
if other.ranges[b].upper() > old_range.upper() {
|
||||
break;
|
||||
}
|
||||
// Otherwise, the next `b` range might apply to the current
|
||||
// `a` range.
|
||||
b += 1;
|
||||
}
|
||||
self.ranges.push(range);
|
||||
a += 1;
|
||||
}
|
||||
while a < drain_end {
|
||||
let range = self.ranges[a];
|
||||
self.ranges.push(range);
|
||||
a += 1;
|
||||
}
|
||||
self.ranges.drain(..drain_end);
|
||||
}
|
||||
|
||||
/// Compute the symmetric difference of the two sets, in place.
|
||||
///
|
||||
/// This computes the symmetric difference of two interval sets. This
|
||||
/// removes all elements in this set that are also in the given set,
|
||||
/// but also adds all elements from the given set that aren't in this
|
||||
/// set. That is, the set will contain all elements in either set,
|
||||
/// but will not contain any elements that are in both sets.
|
||||
pub fn symmetric_difference(&mut self, other: &IntervalSet<I>) {
|
||||
// TODO(burntsushi): Fix this so that it amortizes allocation.
|
||||
let mut intersection = self.clone();
|
||||
intersection.intersect(other);
|
||||
self.union(other);
|
||||
self.difference(&intersection);
|
||||
}
|
||||
|
||||
/// Negate this interval set.
|
||||
///
|
||||
/// For all `x` where `x` is any element, if `x` was in this set, then it
|
||||
/// will not be in this set after negation.
|
||||
pub fn negate(&mut self) {
|
||||
if self.ranges.is_empty() {
|
||||
let (min, max) = (I::Bound::min_value(), I::Bound::max_value());
|
||||
self.ranges.push(I::create(min, max));
|
||||
return;
|
||||
}
|
||||
|
||||
// There should be a way to do this in-place with constant memory,
|
||||
// but I couldn't figure out a simple way to do it. So just append
|
||||
// the negation to the end of this range, and then drain it before
|
||||
// we're done.
|
||||
let drain_end = self.ranges.len();
|
||||
|
||||
// We do checked arithmetic below because of the canonical ordering
|
||||
// invariant.
|
||||
if self.ranges[0].lower() > I::Bound::min_value() {
|
||||
let upper = self.ranges[0].lower().decrement();
|
||||
self.ranges.push(I::create(I::Bound::min_value(), upper));
|
||||
}
|
||||
for i in 1..drain_end {
|
||||
let lower = self.ranges[i - 1].upper().increment();
|
||||
let upper = self.ranges[i].lower().decrement();
|
||||
self.ranges.push(I::create(lower, upper));
|
||||
}
|
||||
if self.ranges[drain_end - 1].upper() < I::Bound::max_value() {
|
||||
let lower = self.ranges[drain_end - 1].upper().increment();
|
||||
self.ranges.push(I::create(lower, I::Bound::max_value()));
|
||||
}
|
||||
self.ranges.drain(..drain_end);
|
||||
}
|
||||
|
||||
/// Converts this set into a canonical ordering.
|
||||
fn canonicalize(&mut self) {
|
||||
if self.is_canonical() {
|
||||
return;
|
||||
}
|
||||
self.ranges.sort();
|
||||
assert!(!self.ranges.is_empty());
|
||||
|
||||
// Is there a way to do this in-place with constant memory? I couldn't
|
||||
// figure out a way to do it. So just append the canonicalization to
|
||||
// the end of this range, and then drain it before we're done.
|
||||
let drain_end = self.ranges.len();
|
||||
for oldi in 0..drain_end {
|
||||
// If we've added at least one new range, then check if we can
|
||||
// merge this range in the previously added range.
|
||||
if self.ranges.len() > drain_end {
|
||||
let (last, rest) = self.ranges.split_last_mut().unwrap();
|
||||
if let Some(union) = last.union(&rest[oldi]) {
|
||||
*last = union;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let range = self.ranges[oldi];
|
||||
self.ranges.push(range);
|
||||
}
|
||||
self.ranges.drain(..drain_end);
|
||||
}
|
||||
|
||||
/// Returns true if and only if this class is in a canonical ordering.
|
||||
fn is_canonical(&self) -> bool {
|
||||
for pair in self.ranges.windows(2) {
|
||||
if pair[0] >= pair[1] {
|
||||
return false;
|
||||
}
|
||||
if pair[0].is_contiguous(&pair[1]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over intervals.
|
||||
#[derive(Debug)]
|
||||
pub struct IntervalSetIter<'a, I>(slice::Iter<'a, I>);
|
||||
|
||||
impl<'a, I> Iterator for IntervalSetIter<'a, I> {
|
||||
type Item = &'a I;
|
||||
|
||||
fn next(&mut self) -> Option<&'a I> {
|
||||
self.0.next()
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Interval:
|
||||
Clone + Copy + Debug + Default + Eq + PartialEq + PartialOrd + Ord
|
||||
{
|
||||
type Bound: Bound;
|
||||
|
||||
fn lower(&self) -> Self::Bound;
|
||||
fn upper(&self) -> Self::Bound;
|
||||
fn set_lower(&mut self, bound: Self::Bound);
|
||||
fn set_upper(&mut self, bound: Self::Bound);
|
||||
fn case_fold_simple(
|
||||
&self,
|
||||
intervals: &mut Vec<Self>,
|
||||
) -> Result<(), unicode::CaseFoldError>;
|
||||
|
||||
/// Create a new interval.
|
||||
fn create(lower: Self::Bound, upper: Self::Bound) -> Self {
|
||||
let mut int = Self::default();
|
||||
if lower <= upper {
|
||||
int.set_lower(lower);
|
||||
int.set_upper(upper);
|
||||
} else {
|
||||
int.set_lower(upper);
|
||||
int.set_upper(lower);
|
||||
}
|
||||
int
|
||||
}
|
||||
|
||||
/// Union the given overlapping range into this range.
|
||||
///
|
||||
/// If the two ranges aren't contiguous, then this returns `None`.
|
||||
fn union(&self, other: &Self) -> Option<Self> {
|
||||
if !self.is_contiguous(other) {
|
||||
return None;
|
||||
}
|
||||
let lower = cmp::min(self.lower(), other.lower());
|
||||
let upper = cmp::max(self.upper(), other.upper());
|
||||
Some(Self::create(lower, upper))
|
||||
}
|
||||
|
||||
/// Intersect this range with the given range and return the result.
|
||||
///
|
||||
/// If the intersection is empty, then this returns `None`.
|
||||
fn intersect(&self, other: &Self) -> Option<Self> {
|
||||
let lower = cmp::max(self.lower(), other.lower());
|
||||
let upper = cmp::min(self.upper(), other.upper());
|
||||
if lower <= upper {
|
||||
Some(Self::create(lower, upper))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Subtract the given range from this range and return the resulting
|
||||
/// ranges.
|
||||
///
|
||||
/// If subtraction would result in an empty range, then no ranges are
|
||||
/// returned.
|
||||
fn difference(&self, other: &Self) -> (Option<Self>, Option<Self>) {
|
||||
if self.is_subset(other) {
|
||||
return (None, None);
|
||||
}
|
||||
if self.is_intersection_empty(other) {
|
||||
return (Some(self.clone()), None);
|
||||
}
|
||||
let add_lower = other.lower() > self.lower();
|
||||
let add_upper = other.upper() < self.upper();
|
||||
// We know this because !self.is_subset(other) and the ranges have
|
||||
// a non-empty intersection.
|
||||
assert!(add_lower || add_upper);
|
||||
let mut ret = (None, None);
|
||||
if add_lower {
|
||||
let upper = other.lower().decrement();
|
||||
ret.0 = Some(Self::create(self.lower(), upper));
|
||||
}
|
||||
if add_upper {
|
||||
let lower = other.upper().increment();
|
||||
let range = Self::create(lower, self.upper());
|
||||
if ret.0.is_none() {
|
||||
ret.0 = Some(range);
|
||||
} else {
|
||||
ret.1 = Some(range);
|
||||
}
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
/// Compute the symmetric difference the given range from this range. This
|
||||
/// returns the union of the two ranges minus its intersection.
|
||||
fn symmetric_difference(
|
||||
&self,
|
||||
other: &Self,
|
||||
) -> (Option<Self>, Option<Self>) {
|
||||
let union = match self.union(other) {
|
||||
None => return (Some(self.clone()), Some(other.clone())),
|
||||
Some(union) => union,
|
||||
};
|
||||
let intersection = match self.intersect(other) {
|
||||
None => return (Some(self.clone()), Some(other.clone())),
|
||||
Some(intersection) => intersection,
|
||||
};
|
||||
union.difference(&intersection)
|
||||
}
|
||||
|
||||
/// Returns true if and only if the two ranges are contiguous. Two ranges
|
||||
/// are contiguous if and only if the ranges are either overlapping or
|
||||
/// adjacent.
|
||||
fn is_contiguous(&self, other: &Self) -> bool {
|
||||
let lower1 = self.lower().as_u32();
|
||||
let upper1 = self.upper().as_u32();
|
||||
let lower2 = other.lower().as_u32();
|
||||
let upper2 = other.upper().as_u32();
|
||||
cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1)
|
||||
}
|
||||
|
||||
/// Returns true if and only if the intersection of this range and the
|
||||
/// other range is empty.
|
||||
fn is_intersection_empty(&self, other: &Self) -> bool {
|
||||
let (lower1, upper1) = (self.lower(), self.upper());
|
||||
let (lower2, upper2) = (other.lower(), other.upper());
|
||||
cmp::max(lower1, lower2) > cmp::min(upper1, upper2)
|
||||
}
|
||||
|
||||
/// Returns true if and only if this range is a subset of the other range.
|
||||
fn is_subset(&self, other: &Self) -> bool {
|
||||
let (lower1, upper1) = (self.lower(), self.upper());
|
||||
let (lower2, upper2) = (other.lower(), other.upper());
|
||||
(lower2 <= lower1 && lower1 <= upper2)
|
||||
&& (lower2 <= upper1 && upper1 <= upper2)
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Bound:
|
||||
Copy + Clone + Debug + Eq + PartialEq + PartialOrd + Ord
|
||||
{
|
||||
fn min_value() -> Self;
|
||||
fn max_value() -> Self;
|
||||
fn as_u32(self) -> u32;
|
||||
fn increment(self) -> Self;
|
||||
fn decrement(self) -> Self;
|
||||
}
|
||||
|
||||
impl Bound for u8 {
|
||||
fn min_value() -> Self {
|
||||
u8::MIN
|
||||
}
|
||||
fn max_value() -> Self {
|
||||
u8::MAX
|
||||
}
|
||||
fn as_u32(self) -> u32 {
|
||||
self as u32
|
||||
}
|
||||
fn increment(self) -> Self {
|
||||
self.checked_add(1).unwrap()
|
||||
}
|
||||
fn decrement(self) -> Self {
|
||||
self.checked_sub(1).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl Bound for char {
|
||||
fn min_value() -> Self {
|
||||
'\x00'
|
||||
}
|
||||
fn max_value() -> Self {
|
||||
'\u{10FFFF}'
|
||||
}
|
||||
fn as_u32(self) -> u32 {
|
||||
self as u32
|
||||
}
|
||||
|
||||
fn increment(self) -> Self {
|
||||
match self {
|
||||
'\u{D7FF}' => '\u{E000}',
|
||||
c => char::from_u32((c as u32).checked_add(1).unwrap()).unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
fn decrement(self) -> Self {
|
||||
match self {
|
||||
'\u{E000}' => '\u{D7FF}',
|
||||
c => char::from_u32((c as u32).checked_sub(1).unwrap()).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tests for interval sets are written in src/hir.rs against the public API.
|
||||
1686
third-party/vendor/regex-syntax-0.6.29/src/hir/literal/mod.rs
vendored
Normal file
1686
third-party/vendor/regex-syntax-0.6.29/src/hir/literal/mod.rs
vendored
Normal file
File diff suppressed because it is too large
Load diff
2299
third-party/vendor/regex-syntax-0.6.29/src/hir/mod.rs
vendored
Normal file
2299
third-party/vendor/regex-syntax-0.6.29/src/hir/mod.rs
vendored
Normal file
File diff suppressed because it is too large
Load diff
367
third-party/vendor/regex-syntax-0.6.29/src/hir/print.rs
vendored
Normal file
367
third-party/vendor/regex-syntax-0.6.29/src/hir/print.rs
vendored
Normal file
|
|
@ -0,0 +1,367 @@
|
|||
/*!
|
||||
This module provides a regular expression printer for `Hir`.
|
||||
*/
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use crate::hir::visitor::{self, Visitor};
|
||||
use crate::hir::{self, Hir, HirKind};
|
||||
use crate::is_meta_character;
|
||||
|
||||
/// A builder for constructing a printer.
|
||||
///
|
||||
/// Note that since a printer doesn't have any configuration knobs, this type
|
||||
/// remains unexported.
|
||||
#[derive(Clone, Debug)]
|
||||
struct PrinterBuilder {
|
||||
_priv: (),
|
||||
}
|
||||
|
||||
impl Default for PrinterBuilder {
|
||||
fn default() -> PrinterBuilder {
|
||||
PrinterBuilder::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl PrinterBuilder {
|
||||
fn new() -> PrinterBuilder {
|
||||
PrinterBuilder { _priv: () }
|
||||
}
|
||||
|
||||
fn build(&self) -> Printer {
|
||||
Printer { _priv: () }
|
||||
}
|
||||
}
|
||||
|
||||
/// A printer for a regular expression's high-level intermediate
|
||||
/// representation.
|
||||
///
|
||||
/// A printer converts a high-level intermediate representation (HIR) to a
|
||||
/// regular expression pattern string. This particular printer uses constant
|
||||
/// stack space and heap space proportional to the size of the HIR.
|
||||
///
|
||||
/// Since this printer is only using the HIR, the pattern it prints will likely
|
||||
/// not resemble the original pattern at all. For example, a pattern like
|
||||
/// `\pL` will have its entire class written out.
|
||||
///
|
||||
/// The purpose of this printer is to provide a means to mutate an HIR and then
|
||||
/// build a regular expression from the result of that mutation. (A regex
|
||||
/// library could provide a constructor from this HIR explicitly, but that
|
||||
/// creates an unnecessary public coupling between the regex library and this
|
||||
/// specific HIR representation.)
|
||||
#[derive(Debug)]
|
||||
pub struct Printer {
|
||||
_priv: (),
|
||||
}
|
||||
|
||||
impl Printer {
|
||||
/// Create a new printer.
|
||||
pub fn new() -> Printer {
|
||||
PrinterBuilder::new().build()
|
||||
}
|
||||
|
||||
/// Print the given `Ast` to the given writer. The writer must implement
|
||||
/// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
|
||||
/// here are a `fmt::Formatter` (which is available in `fmt::Display`
|
||||
/// implementations) or a `&mut String`.
|
||||
pub fn print<W: fmt::Write>(&mut self, hir: &Hir, wtr: W) -> fmt::Result {
|
||||
visitor::visit(hir, Writer { wtr })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Writer<W> {
|
||||
wtr: W,
|
||||
}
|
||||
|
||||
impl<W: fmt::Write> Visitor for Writer<W> {
|
||||
type Output = ();
|
||||
type Err = fmt::Error;
|
||||
|
||||
fn finish(self) -> fmt::Result {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn visit_pre(&mut self, hir: &Hir) -> fmt::Result {
|
||||
match *hir.kind() {
|
||||
HirKind::Empty
|
||||
| HirKind::Repetition(_)
|
||||
| HirKind::Concat(_)
|
||||
| HirKind::Alternation(_) => {}
|
||||
HirKind::Literal(hir::Literal::Unicode(c)) => {
|
||||
self.write_literal_char(c)?;
|
||||
}
|
||||
HirKind::Literal(hir::Literal::Byte(b)) => {
|
||||
self.write_literal_byte(b)?;
|
||||
}
|
||||
HirKind::Class(hir::Class::Unicode(ref cls)) => {
|
||||
self.wtr.write_str("[")?;
|
||||
for range in cls.iter() {
|
||||
if range.start() == range.end() {
|
||||
self.write_literal_char(range.start())?;
|
||||
} else {
|
||||
self.write_literal_char(range.start())?;
|
||||
self.wtr.write_str("-")?;
|
||||
self.write_literal_char(range.end())?;
|
||||
}
|
||||
}
|
||||
self.wtr.write_str("]")?;
|
||||
}
|
||||
HirKind::Class(hir::Class::Bytes(ref cls)) => {
|
||||
self.wtr.write_str("(?-u:[")?;
|
||||
for range in cls.iter() {
|
||||
if range.start() == range.end() {
|
||||
self.write_literal_class_byte(range.start())?;
|
||||
} else {
|
||||
self.write_literal_class_byte(range.start())?;
|
||||
self.wtr.write_str("-")?;
|
||||
self.write_literal_class_byte(range.end())?;
|
||||
}
|
||||
}
|
||||
self.wtr.write_str("])")?;
|
||||
}
|
||||
HirKind::Anchor(hir::Anchor::StartLine) => {
|
||||
self.wtr.write_str("(?m:^)")?;
|
||||
}
|
||||
HirKind::Anchor(hir::Anchor::EndLine) => {
|
||||
self.wtr.write_str("(?m:$)")?;
|
||||
}
|
||||
HirKind::Anchor(hir::Anchor::StartText) => {
|
||||
self.wtr.write_str(r"\A")?;
|
||||
}
|
||||
HirKind::Anchor(hir::Anchor::EndText) => {
|
||||
self.wtr.write_str(r"\z")?;
|
||||
}
|
||||
HirKind::WordBoundary(hir::WordBoundary::Unicode) => {
|
||||
self.wtr.write_str(r"\b")?;
|
||||
}
|
||||
HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => {
|
||||
self.wtr.write_str(r"\B")?;
|
||||
}
|
||||
HirKind::WordBoundary(hir::WordBoundary::Ascii) => {
|
||||
self.wtr.write_str(r"(?-u:\b)")?;
|
||||
}
|
||||
HirKind::WordBoundary(hir::WordBoundary::AsciiNegate) => {
|
||||
self.wtr.write_str(r"(?-u:\B)")?;
|
||||
}
|
||||
HirKind::Group(ref x) => match x.kind {
|
||||
hir::GroupKind::CaptureIndex(_) => {
|
||||
self.wtr.write_str("(")?;
|
||||
}
|
||||
hir::GroupKind::CaptureName { ref name, .. } => {
|
||||
write!(self.wtr, "(?P<{}>", name)?;
|
||||
}
|
||||
hir::GroupKind::NonCapturing => {
|
||||
self.wtr.write_str("(?:")?;
|
||||
}
|
||||
},
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn visit_post(&mut self, hir: &Hir) -> fmt::Result {
|
||||
match *hir.kind() {
|
||||
// Handled during visit_pre
|
||||
HirKind::Empty
|
||||
| HirKind::Literal(_)
|
||||
| HirKind::Class(_)
|
||||
| HirKind::Anchor(_)
|
||||
| HirKind::WordBoundary(_)
|
||||
| HirKind::Concat(_)
|
||||
| HirKind::Alternation(_) => {}
|
||||
HirKind::Repetition(ref x) => {
|
||||
match x.kind {
|
||||
hir::RepetitionKind::ZeroOrOne => {
|
||||
self.wtr.write_str("?")?;
|
||||
}
|
||||
hir::RepetitionKind::ZeroOrMore => {
|
||||
self.wtr.write_str("*")?;
|
||||
}
|
||||
hir::RepetitionKind::OneOrMore => {
|
||||
self.wtr.write_str("+")?;
|
||||
}
|
||||
hir::RepetitionKind::Range(ref x) => match *x {
|
||||
hir::RepetitionRange::Exactly(m) => {
|
||||
write!(self.wtr, "{{{}}}", m)?;
|
||||
}
|
||||
hir::RepetitionRange::AtLeast(m) => {
|
||||
write!(self.wtr, "{{{},}}", m)?;
|
||||
}
|
||||
hir::RepetitionRange::Bounded(m, n) => {
|
||||
write!(self.wtr, "{{{},{}}}", m, n)?;
|
||||
}
|
||||
},
|
||||
}
|
||||
if !x.greedy {
|
||||
self.wtr.write_str("?")?;
|
||||
}
|
||||
}
|
||||
HirKind::Group(_) => {
|
||||
self.wtr.write_str(")")?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn visit_alternation_in(&mut self) -> fmt::Result {
|
||||
self.wtr.write_str("|")
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: fmt::Write> Writer<W> {
|
||||
fn write_literal_char(&mut self, c: char) -> fmt::Result {
|
||||
if is_meta_character(c) {
|
||||
self.wtr.write_str("\\")?;
|
||||
}
|
||||
self.wtr.write_char(c)
|
||||
}
|
||||
|
||||
fn write_literal_byte(&mut self, b: u8) -> fmt::Result {
|
||||
let c = b as char;
|
||||
if c <= 0x7F as char && !c.is_control() && !c.is_whitespace() {
|
||||
self.write_literal_char(c)
|
||||
} else {
|
||||
write!(self.wtr, "(?-u:\\x{:02X})", b)
|
||||
}
|
||||
}
|
||||
|
||||
fn write_literal_class_byte(&mut self, b: u8) -> fmt::Result {
|
||||
let c = b as char;
|
||||
if c <= 0x7F as char && !c.is_control() && !c.is_whitespace() {
|
||||
self.write_literal_char(c)
|
||||
} else {
|
||||
write!(self.wtr, "\\x{:02X}", b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Printer;
|
||||
use crate::ParserBuilder;
|
||||
|
||||
fn roundtrip(given: &str, expected: &str) {
|
||||
roundtrip_with(|b| b, given, expected);
|
||||
}
|
||||
|
||||
fn roundtrip_bytes(given: &str, expected: &str) {
|
||||
roundtrip_with(|b| b.allow_invalid_utf8(true), given, expected);
|
||||
}
|
||||
|
||||
fn roundtrip_with<F>(mut f: F, given: &str, expected: &str)
|
||||
where
|
||||
F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
|
||||
{
|
||||
let mut builder = ParserBuilder::new();
|
||||
f(&mut builder);
|
||||
let hir = builder.build().parse(given).unwrap();
|
||||
|
||||
let mut printer = Printer::new();
|
||||
let mut dst = String::new();
|
||||
printer.print(&hir, &mut dst).unwrap();
|
||||
|
||||
// Check that the result is actually valid.
|
||||
builder.build().parse(&dst).unwrap();
|
||||
|
||||
assert_eq!(expected, dst);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn print_literal() {
|
||||
roundtrip("a", "a");
|
||||
roundtrip(r"\xff", "\u{FF}");
|
||||
roundtrip_bytes(r"\xff", "\u{FF}");
|
||||
roundtrip_bytes(r"(?-u)\xff", r"(?-u:\xFF)");
|
||||
roundtrip("☃", "☃");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn print_class() {
|
||||
roundtrip(r"[a]", r"[a]");
|
||||
roundtrip(r"[a-z]", r"[a-z]");
|
||||
roundtrip(r"[a-z--b-c--x-y]", r"[ad-wz]");
|
||||
roundtrip(r"[^\x01-\u{10FFFF}]", "[\u{0}]");
|
||||
roundtrip(r"[-]", r"[\-]");
|
||||
roundtrip(r"[☃-⛄]", r"[☃-⛄]");
|
||||
|
||||
roundtrip(r"(?-u)[a]", r"(?-u:[a])");
|
||||
roundtrip(r"(?-u)[a-z]", r"(?-u:[a-z])");
|
||||
roundtrip_bytes(r"(?-u)[a-\xFF]", r"(?-u:[a-\xFF])");
|
||||
|
||||
// The following test that the printer escapes meta characters
|
||||
// in character classes.
|
||||
roundtrip(r"[\[]", r"[\[]");
|
||||
roundtrip(r"[Z-_]", r"[Z-_]");
|
||||
roundtrip(r"[Z-_--Z]", r"[\[-_]");
|
||||
|
||||
// The following test that the printer escapes meta characters
|
||||
// in byte oriented character classes.
|
||||
roundtrip_bytes(r"(?-u)[\[]", r"(?-u:[\[])");
|
||||
roundtrip_bytes(r"(?-u)[Z-_]", r"(?-u:[Z-_])");
|
||||
roundtrip_bytes(r"(?-u)[Z-_--Z]", r"(?-u:[\[-_])");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn print_anchor() {
|
||||
roundtrip(r"^", r"\A");
|
||||
roundtrip(r"$", r"\z");
|
||||
roundtrip(r"(?m)^", r"(?m:^)");
|
||||
roundtrip(r"(?m)$", r"(?m:$)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn print_word_boundary() {
|
||||
roundtrip(r"\b", r"\b");
|
||||
roundtrip(r"\B", r"\B");
|
||||
roundtrip(r"(?-u)\b", r"(?-u:\b)");
|
||||
roundtrip_bytes(r"(?-u)\B", r"(?-u:\B)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn print_repetition() {
|
||||
roundtrip("a?", "a?");
|
||||
roundtrip("a??", "a??");
|
||||
roundtrip("(?U)a?", "a??");
|
||||
|
||||
roundtrip("a*", "a*");
|
||||
roundtrip("a*?", "a*?");
|
||||
roundtrip("(?U)a*", "a*?");
|
||||
|
||||
roundtrip("a+", "a+");
|
||||
roundtrip("a+?", "a+?");
|
||||
roundtrip("(?U)a+", "a+?");
|
||||
|
||||
roundtrip("a{1}", "a{1}");
|
||||
roundtrip("a{1,}", "a{1,}");
|
||||
roundtrip("a{1,5}", "a{1,5}");
|
||||
roundtrip("a{1}?", "a{1}?");
|
||||
roundtrip("a{1,}?", "a{1,}?");
|
||||
roundtrip("a{1,5}?", "a{1,5}?");
|
||||
roundtrip("(?U)a{1}", "a{1}?");
|
||||
roundtrip("(?U)a{1,}", "a{1,}?");
|
||||
roundtrip("(?U)a{1,5}", "a{1,5}?");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn print_group() {
|
||||
roundtrip("()", "()");
|
||||
roundtrip("(?P<foo>)", "(?P<foo>)");
|
||||
roundtrip("(?:)", "(?:)");
|
||||
|
||||
roundtrip("(a)", "(a)");
|
||||
roundtrip("(?P<foo>a)", "(?P<foo>a)");
|
||||
roundtrip("(?:a)", "(?:a)");
|
||||
|
||||
roundtrip("((((a))))", "((((a))))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn print_alternation() {
|
||||
roundtrip("|", "|");
|
||||
roundtrip("||", "||");
|
||||
|
||||
roundtrip("a|b", "a|b");
|
||||
roundtrip("a|b|c", "a|b|c");
|
||||
roundtrip("foo|bar|quux", "foo|bar|quux");
|
||||
}
|
||||
}
|
||||
3207
third-party/vendor/regex-syntax-0.6.29/src/hir/translate.rs
vendored
Normal file
3207
third-party/vendor/regex-syntax-0.6.29/src/hir/translate.rs
vendored
Normal file
File diff suppressed because it is too large
Load diff
203
third-party/vendor/regex-syntax-0.6.29/src/hir/visitor.rs
vendored
Normal file
203
third-party/vendor/regex-syntax-0.6.29/src/hir/visitor.rs
vendored
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
use crate::hir::{self, Hir, HirKind};
|
||||
|
||||
/// A trait for visiting the high-level IR (HIR) in depth first order.
|
||||
///
|
||||
/// The principle aim of this trait is to enable callers to perform case
|
||||
/// analysis on a high-level intermediate representation of a regular
|
||||
/// expression without necessarily using recursion. In particular, this permits
|
||||
/// callers to do case analysis with constant stack usage, which can be
|
||||
/// important since the size of an HIR may be proportional to end user input.
|
||||
///
|
||||
/// Typical usage of this trait involves providing an implementation and then
|
||||
/// running it using the [`visit`](fn.visit.html) function.
|
||||
pub trait Visitor {
|
||||
/// The result of visiting an HIR.
|
||||
type Output;
|
||||
/// An error that visiting an HIR might return.
|
||||
type Err;
|
||||
|
||||
/// All implementors of `Visitor` must provide a `finish` method, which
|
||||
/// yields the result of visiting the HIR or an error.
|
||||
fn finish(self) -> Result<Self::Output, Self::Err>;
|
||||
|
||||
/// This method is called before beginning traversal of the HIR.
|
||||
fn start(&mut self) {}
|
||||
|
||||
/// This method is called on an `Hir` before descending into child `Hir`
|
||||
/// nodes.
|
||||
fn visit_pre(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// This method is called on an `Hir` after descending all of its child
|
||||
/// `Hir` nodes.
|
||||
fn visit_post(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// This method is called between child nodes of an alternation.
|
||||
fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Executes an implementation of `Visitor` in constant stack space.
|
||||
///
|
||||
/// This function will visit every node in the given `Hir` while calling
|
||||
/// appropriate methods provided by the
|
||||
/// [`Visitor`](trait.Visitor.html) trait.
|
||||
///
|
||||
/// The primary use case for this method is when one wants to perform case
|
||||
/// analysis over an `Hir` without using a stack size proportional to the depth
|
||||
/// of the `Hir`. Namely, this method will instead use constant stack space,
|
||||
/// but will use heap space proportional to the size of the `Hir`. This may be
|
||||
/// desirable in cases where the size of `Hir` is proportional to end user
|
||||
/// input.
|
||||
///
|
||||
/// If the visitor returns an error at any point, then visiting is stopped and
|
||||
/// the error is returned.
|
||||
pub fn visit<V: Visitor>(hir: &Hir, visitor: V) -> Result<V::Output, V::Err> {
|
||||
HeapVisitor::new().visit(hir, visitor)
|
||||
}
|
||||
|
||||
/// HeapVisitor visits every item in an `Hir` recursively using constant stack
|
||||
/// size and a heap size proportional to the size of the `Hir`.
|
||||
struct HeapVisitor<'a> {
|
||||
/// A stack of `Hir` nodes. This is roughly analogous to the call stack
|
||||
/// used in a typical recursive visitor.
|
||||
stack: Vec<(&'a Hir, Frame<'a>)>,
|
||||
}
|
||||
|
||||
/// Represents a single stack frame while performing structural induction over
|
||||
/// an `Hir`.
|
||||
enum Frame<'a> {
|
||||
/// A stack frame allocated just before descending into a repetition
|
||||
/// operator's child node.
|
||||
Repetition(&'a hir::Repetition),
|
||||
/// A stack frame allocated just before descending into a group's child
|
||||
/// node.
|
||||
Group(&'a hir::Group),
|
||||
/// The stack frame used while visiting every child node of a concatenation
|
||||
/// of expressions.
|
||||
Concat {
|
||||
/// The child node we are currently visiting.
|
||||
head: &'a Hir,
|
||||
/// The remaining child nodes to visit (which may be empty).
|
||||
tail: &'a [Hir],
|
||||
},
|
||||
/// The stack frame used while visiting every child node of an alternation
|
||||
/// of expressions.
|
||||
Alternation {
|
||||
/// The child node we are currently visiting.
|
||||
head: &'a Hir,
|
||||
/// The remaining child nodes to visit (which may be empty).
|
||||
tail: &'a [Hir],
|
||||
},
|
||||
}
|
||||
|
||||
impl<'a> HeapVisitor<'a> {
|
||||
fn new() -> HeapVisitor<'a> {
|
||||
HeapVisitor { stack: vec![] }
|
||||
}
|
||||
|
||||
fn visit<V: Visitor>(
|
||||
&mut self,
|
||||
mut hir: &'a Hir,
|
||||
mut visitor: V,
|
||||
) -> Result<V::Output, V::Err> {
|
||||
self.stack.clear();
|
||||
|
||||
visitor.start();
|
||||
loop {
|
||||
visitor.visit_pre(hir)?;
|
||||
if let Some(x) = self.induct(hir) {
|
||||
let child = x.child();
|
||||
self.stack.push((hir, x));
|
||||
hir = child;
|
||||
continue;
|
||||
}
|
||||
// No induction means we have a base case, so we can post visit
|
||||
// it now.
|
||||
visitor.visit_post(hir)?;
|
||||
|
||||
// At this point, we now try to pop our call stack until it is
|
||||
// either empty or we hit another inductive case.
|
||||
loop {
|
||||
let (post_hir, frame) = match self.stack.pop() {
|
||||
None => return visitor.finish(),
|
||||
Some((post_hir, frame)) => (post_hir, frame),
|
||||
};
|
||||
// If this is a concat/alternate, then we might have additional
|
||||
// inductive steps to process.
|
||||
if let Some(x) = self.pop(frame) {
|
||||
if let Frame::Alternation { .. } = x {
|
||||
visitor.visit_alternation_in()?;
|
||||
}
|
||||
hir = x.child();
|
||||
self.stack.push((post_hir, x));
|
||||
break;
|
||||
}
|
||||
// Otherwise, we've finished visiting all the child nodes for
|
||||
// this HIR, so we can post visit it now.
|
||||
visitor.visit_post(post_hir)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a stack frame for the given HIR if one is needed (which occurs if
|
||||
/// and only if there are child nodes in the HIR). Otherwise, return None.
|
||||
fn induct(&mut self, hir: &'a Hir) -> Option<Frame<'a>> {
|
||||
match *hir.kind() {
|
||||
HirKind::Repetition(ref x) => Some(Frame::Repetition(x)),
|
||||
HirKind::Group(ref x) => Some(Frame::Group(x)),
|
||||
HirKind::Concat(ref x) if x.is_empty() => None,
|
||||
HirKind::Concat(ref x) => {
|
||||
Some(Frame::Concat { head: &x[0], tail: &x[1..] })
|
||||
}
|
||||
HirKind::Alternation(ref x) if x.is_empty() => None,
|
||||
HirKind::Alternation(ref x) => {
|
||||
Some(Frame::Alternation { head: &x[0], tail: &x[1..] })
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Pops the given frame. If the frame has an additional inductive step,
|
||||
/// then return it, otherwise return `None`.
|
||||
fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
|
||||
match induct {
|
||||
Frame::Repetition(_) => None,
|
||||
Frame::Group(_) => None,
|
||||
Frame::Concat { tail, .. } => {
|
||||
if tail.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
|
||||
}
|
||||
}
|
||||
Frame::Alternation { tail, .. } => {
|
||||
if tail.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Frame::Alternation {
|
||||
head: &tail[0],
|
||||
tail: &tail[1..],
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Frame<'a> {
|
||||
/// Perform the next inductive step on this frame and return the next
|
||||
/// child HIR node to visit.
|
||||
fn child(&self) -> &'a Hir {
|
||||
match *self {
|
||||
Frame::Repetition(rep) => &rep.hir,
|
||||
Frame::Group(group) => &group.hir,
|
||||
Frame::Concat { head, .. } => head,
|
||||
Frame::Alternation { head, .. } => head,
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue