Vendor things
This commit is contained in:
parent
5deceec006
commit
977e3c17e5
19434 changed files with 10682014 additions and 0 deletions
461
third-party/vendor/regex-automata/src/util/lazy.rs
vendored
Normal file
461
third-party/vendor/regex-automata/src/util/lazy.rs
vendored
Normal file
|
|
@ -0,0 +1,461 @@
|
|||
/*!
|
||||
A lazily initialized value for safe sharing between threads.
|
||||
|
||||
The principal type in this module is `Lazy`, which makes it easy to construct
|
||||
values that are shared safely across multiple threads simultaneously.
|
||||
*/
|
||||
|
||||
use core::fmt;
|
||||
|
||||
/// A lazily initialized value that implements `Deref` for `T`.
|
||||
///
|
||||
/// A `Lazy` takes an initialization function and permits callers from any
|
||||
/// thread to access the result of that initialization function in a safe
|
||||
/// manner. In effect, this permits one-time initialization of global resources
|
||||
/// in a (possibly) multi-threaded program.
|
||||
///
|
||||
/// This type and its functionality are available even when neither the `alloc`
|
||||
/// nor the `std` features are enabled. In exchange, a `Lazy` does **not**
|
||||
/// guarantee that the given `create` function is called at most once. It
|
||||
/// might be called multiple times. Moreover, a call to `Lazy::get` (either
|
||||
/// explicitly or implicitly via `Lazy`'s `Deref` impl) may block until a `T`
|
||||
/// is available.
|
||||
///
|
||||
/// This is very similar to `lazy_static` or `once_cell`, except it doesn't
|
||||
/// guarantee that the initialization function will be run once and it works
|
||||
/// in no-alloc no-std environments. With that said, if you need stronger
|
||||
/// guarantees or a more flexible API, then it is recommended to use either
|
||||
/// `lazy_static` or `once_cell`.
|
||||
///
|
||||
/// # Warning: may use a spin lock
|
||||
///
|
||||
/// When this crate is compiled _without_ the `alloc` feature, then this type
|
||||
/// may used a spin lock internally. This can have subtle effects that may
|
||||
/// be undesirable. See [Spinlocks Considered Harmful][spinharm] for a more
|
||||
/// thorough treatment of this topic.
|
||||
///
|
||||
/// [spinharm]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This type is useful for creating regexes once, and then using them from
|
||||
/// multiple threads simultaneously without worrying about synchronization.
|
||||
///
|
||||
/// ```
|
||||
/// use regex_automata::{dfa::regex::Regex, util::lazy::Lazy, Match};
|
||||
///
|
||||
/// static RE: Lazy<Regex> = Lazy::new(|| Regex::new("foo[0-9]+bar").unwrap());
|
||||
///
|
||||
/// let expected = Some(Match::must(0, 3..14));
|
||||
/// assert_eq!(expected, RE.find(b"zzzfoo12345barzzz"));
|
||||
/// ```
|
||||
pub struct Lazy<T, F = fn() -> T>(lazy::Lazy<T, F>);
|
||||
|
||||
impl<T, F> Lazy<T, F> {
|
||||
/// Create a new `Lazy` value that is initialized via the given function.
|
||||
///
|
||||
/// The `T` type is automatically inferred from the return type of the
|
||||
/// `create` function given.
|
||||
pub const fn new(create: F) -> Lazy<T, F> {
|
||||
Lazy(lazy::Lazy::new(create))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, F: Fn() -> T> Lazy<T, F> {
|
||||
/// Return a reference to the lazily initialized value.
|
||||
///
|
||||
/// This routine may block if another thread is initializing a `T`.
|
||||
///
|
||||
/// Note that given a `x` which has type `Lazy`, this must be called via
|
||||
/// `Lazy::get(x)` and not `x.get()`. This routine is defined this way
|
||||
/// because `Lazy` impls `Deref` with a target of `T`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// This panics if the `create` function inside this lazy value panics.
|
||||
/// If the panic occurred in another thread, then this routine _may_ also
|
||||
/// panic (but is not guaranteed to do so).
|
||||
pub fn get(this: &Lazy<T, F>) -> &T {
|
||||
this.0.get()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, F: Fn() -> T> core::ops::Deref for Lazy<T, F> {
|
||||
type Target = T;
|
||||
|
||||
fn deref(&self) -> &T {
|
||||
Lazy::get(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: fmt::Debug, F: Fn() -> T> fmt::Debug for Lazy<T, F> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "alloc")]
|
||||
mod lazy {
|
||||
use core::{
|
||||
fmt,
|
||||
marker::PhantomData,
|
||||
sync::atomic::{AtomicPtr, Ordering},
|
||||
};
|
||||
|
||||
use alloc::boxed::Box;
|
||||
|
||||
/// A non-std lazy initialized value.
|
||||
///
|
||||
/// This might run the initialization function more than once, but will
|
||||
/// never block.
|
||||
///
|
||||
/// I wish I could get these semantics into the non-alloc non-std Lazy
|
||||
/// type below, but I'm not sure how to do it. If you can do an alloc,
|
||||
/// then the implementation becomes very simple if you don't care about
|
||||
/// redundant work precisely because a pointer can be atomically swapped.
|
||||
///
|
||||
/// Perhaps making this approach work in the non-alloc non-std case
|
||||
/// requires asking the caller for a pointer? It would make the API less
|
||||
/// convenient I think.
|
||||
pub(super) struct Lazy<T, F> {
|
||||
data: AtomicPtr<T>,
|
||||
create: F,
|
||||
// This indicates to the compiler that this type can drop T. It's not
|
||||
// totally clear how the absence of this marker could lead to trouble,
|
||||
// but putting here doesn't have any downsides so we hedge until somone
|
||||
// can from the Unsafe Working Group can tell us definitively that we
|
||||
// don't need it.
|
||||
//
|
||||
// See: https://github.com/BurntSushi/regex-automata/issues/30
|
||||
owned: PhantomData<Box<T>>,
|
||||
}
|
||||
|
||||
// SAFETY: So long as T and &T (and F and &F) can themselves be safely
|
||||
// shared among threads, so to can a Lazy<T, _>. Namely, the Lazy API only
|
||||
// permits accessing a &T and initialization is free of data races. So if T
|
||||
// is thread safe, then so to is Lazy<T, _>.
|
||||
//
|
||||
// We specifically require that T: Send in order for Lazy<T> to be Sync.
|
||||
// Without that requirement, it's possible to send a T from one thread to
|
||||
// another via Lazy's destructor.
|
||||
//
|
||||
// It's not clear whether we need F: Send+Sync for Lazy to be Sync. But
|
||||
// we're conservative for now and keep both.
|
||||
unsafe impl<T: Send + Sync, F: Send + Sync> Sync for Lazy<T, F> {}
|
||||
|
||||
impl<T, F> Lazy<T, F> {
|
||||
/// Create a new alloc but non-std lazy value that is racily
|
||||
/// initialized. That is, the 'create' function may be called more than
|
||||
/// once.
|
||||
pub(super) const fn new(create: F) -> Lazy<T, F> {
|
||||
Lazy {
|
||||
data: AtomicPtr::new(core::ptr::null_mut()),
|
||||
create,
|
||||
owned: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, F: Fn() -> T> Lazy<T, F> {
|
||||
/// Get the underlying lazy value. If it hasn't been initialized
|
||||
/// yet, then always attempt to initialize it (even if some other
|
||||
/// thread is initializing it) and atomically attach it to this lazy
|
||||
/// value before returning it.
|
||||
pub(super) fn get(&self) -> &T {
|
||||
if let Some(data) = self.poll() {
|
||||
return data;
|
||||
}
|
||||
let data = (self.create)();
|
||||
let mut ptr = Box::into_raw(Box::new(data));
|
||||
// We attempt to stuff our initialized value into our atomic
|
||||
// pointer. Upon success, we don't need to do anything. But if
|
||||
// someone else beat us to the punch, then we need to make sure
|
||||
// our newly created value is dropped.
|
||||
let result = self.data.compare_exchange(
|
||||
core::ptr::null_mut(),
|
||||
ptr,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Acquire,
|
||||
);
|
||||
if let Err(old) = result {
|
||||
// SAFETY: We created 'ptr' via Box::into_raw above, so turning
|
||||
// it back into a Box via from_raw is safe.
|
||||
drop(unsafe { Box::from_raw(ptr) });
|
||||
ptr = old;
|
||||
}
|
||||
// SAFETY: We just set the pointer above to a non-null value, even
|
||||
// in the error case, and set it to a fully initialized value
|
||||
// returned by 'create'.
|
||||
unsafe { &*ptr }
|
||||
}
|
||||
|
||||
/// If this lazy value has been initialized successfully, then return
|
||||
/// that value. Otherwise return None immediately. This never attempts
|
||||
/// to run initialization itself.
|
||||
fn poll(&self) -> Option<&T> {
|
||||
let ptr = self.data.load(Ordering::Acquire);
|
||||
if ptr.is_null() {
|
||||
return None;
|
||||
}
|
||||
// SAFETY: We just checked that the pointer is not null. Since it's
|
||||
// not null, it must have been fully initialized by 'get' at some
|
||||
// point.
|
||||
Some(unsafe { &*ptr })
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: fmt::Debug, F: Fn() -> T> fmt::Debug for Lazy<T, F> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_struct("Lazy").field("data", &self.poll()).finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, F> Drop for Lazy<T, F> {
|
||||
fn drop(&mut self) {
|
||||
let ptr = *self.data.get_mut();
|
||||
if !ptr.is_null() {
|
||||
// SAFETY: We just checked that 'ptr' is not null. And since
|
||||
// we have exclusive access, there are no races to worry about.
|
||||
drop(unsafe { Box::from_raw(ptr) });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "alloc"))]
|
||||
mod lazy {
|
||||
use core::{
|
||||
cell::Cell,
|
||||
fmt,
|
||||
mem::MaybeUninit,
|
||||
panic::{RefUnwindSafe, UnwindSafe},
|
||||
sync::atomic::{AtomicU8, Ordering},
|
||||
};
|
||||
|
||||
/// Our 'Lazy' value can be in one of three states:
|
||||
///
|
||||
/// * INIT is where it starts, and also ends up back here if the
|
||||
/// 'create' routine panics.
|
||||
/// * BUSY is where it sits while initialization is running in exactly
|
||||
/// one thread.
|
||||
/// * DONE is where it sits after 'create' has completed and 'data' has
|
||||
/// been fully initialized.
|
||||
const LAZY_STATE_INIT: u8 = 0;
|
||||
const LAZY_STATE_BUSY: u8 = 1;
|
||||
const LAZY_STATE_DONE: u8 = 2;
|
||||
|
||||
/// A non-alloc non-std lazy initialized value.
|
||||
///
|
||||
/// This guarantees initialization only happens once, but uses a spinlock
|
||||
/// to block in the case of simultaneous access. Blocking occurs so that
|
||||
/// one thread waits while another thread initializes the value.
|
||||
///
|
||||
/// I would much rather have the semantics of the 'alloc' Lazy type above.
|
||||
/// Namely, that we might run the initialization function more than once,
|
||||
/// but we never otherwise block. However, I don't know how to do that in
|
||||
/// a non-alloc non-std context.
|
||||
pub(super) struct Lazy<T, F> {
|
||||
state: AtomicU8,
|
||||
create: Cell<Option<F>>,
|
||||
data: Cell<MaybeUninit<T>>,
|
||||
}
|
||||
|
||||
// SAFETY: So long as T and &T (and F and &F) can themselves be safely
|
||||
// shared among threads, so to can a Lazy<T, _>. Namely, the Lazy API only
|
||||
// permits accessing a &T and initialization is free of data races. So if T
|
||||
// is thread safe, then so to is Lazy<T, _>.
|
||||
unsafe impl<T: Send + Sync, F: Send + Sync> Sync for Lazy<T, F> {}
|
||||
// A reference to a Lazy is unwind safe because we specifically take
|
||||
// precautions to poison all accesses to a Lazy if the caller-provided
|
||||
// 'create' function panics.
|
||||
impl<T: UnwindSafe, F: UnwindSafe + RefUnwindSafe> RefUnwindSafe
|
||||
for Lazy<T, F>
|
||||
{
|
||||
}
|
||||
|
||||
impl<T, F> Lazy<T, F> {
|
||||
/// Create a new non-alloc non-std lazy value that is initialized
|
||||
/// exactly once on first use using the given function.
|
||||
pub(super) const fn new(create: F) -> Lazy<T, F> {
|
||||
Lazy {
|
||||
state: AtomicU8::new(LAZY_STATE_INIT),
|
||||
create: Cell::new(Some(create)),
|
||||
data: Cell::new(MaybeUninit::uninit()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, F: FnOnce() -> T> Lazy<T, F> {
|
||||
/// Get the underlying lazy value. If it isn't been initialized
|
||||
/// yet, then either initialize it or block until some other thread
|
||||
/// initializes it. If the 'create' function given to Lazy::new panics
|
||||
/// (even in another thread), then this panics too.
|
||||
pub(super) fn get(&self) -> &T {
|
||||
// This is effectively a spinlock. We loop until we enter a DONE
|
||||
// state, and if possible, initialize it ourselves. The only way
|
||||
// we exit the loop is if 'create' panics, we initialize 'data' or
|
||||
// some other thread initializes 'data'.
|
||||
//
|
||||
// Yes, I have read spinlocks considered harmful[1]. And that
|
||||
// article is why this spinlock is only active when 'alloc' isn't
|
||||
// enabled. I did this because I don't think there is really
|
||||
// another choice without 'alloc', other than not providing this at
|
||||
// all. But I think that's a big bummer.
|
||||
//
|
||||
// [1]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html
|
||||
while self.state.load(Ordering::Acquire) != LAZY_STATE_DONE {
|
||||
// Check if we're the first ones to get here. If so, we'll be
|
||||
// the ones who initialize.
|
||||
let result = self.state.compare_exchange(
|
||||
LAZY_STATE_INIT,
|
||||
LAZY_STATE_BUSY,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Acquire,
|
||||
);
|
||||
// This means we saw the INIT state and nobody else can. So we
|
||||
// must take responsibility for initializing. And by virtue of
|
||||
// observing INIT, we have also told anyone else trying to
|
||||
// get here that we are BUSY. If someone else sees BUSY, then
|
||||
// they will spin until we finish initialization.
|
||||
if let Ok(_) = result {
|
||||
// Since we are guaranteed to be the only ones here, we
|
||||
// know that 'create' is there... Unless someone else got
|
||||
// here before us and 'create' panicked. In which case,
|
||||
// 'self.create' is now 'None' and we forward the panic
|
||||
// to the caller. (i.e., We implement poisoning.)
|
||||
//
|
||||
// SAFETY: Our use of 'self.state' guarantees that we are
|
||||
// the only thread executing this line, and thus there are
|
||||
// no races.
|
||||
let create = unsafe {
|
||||
(*self.create.as_ptr()).take().expect(
|
||||
"Lazy's create function panicked, \
|
||||
preventing initialization,
|
||||
poisoning current thread",
|
||||
)
|
||||
};
|
||||
let guard = Guard { state: &self.state };
|
||||
// SAFETY: Our use of 'self.state' guarantees that we are
|
||||
// the only thread executing this line, and thus there are
|
||||
// no races.
|
||||
unsafe {
|
||||
(*self.data.as_ptr()).as_mut_ptr().write(create());
|
||||
}
|
||||
// All is well. 'self.create' ran successfully, so we
|
||||
// forget the guard.
|
||||
core::mem::forget(guard);
|
||||
// Everything is initialized, so we can declare success.
|
||||
self.state.store(LAZY_STATE_DONE, Ordering::Release);
|
||||
break;
|
||||
}
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
// We only get here if data is fully initialized, and thus poll
|
||||
// will always return something.
|
||||
self.poll().unwrap()
|
||||
}
|
||||
|
||||
/// If this lazy value has been initialized successfully, then return
|
||||
/// that value. Otherwise return None immediately. This never blocks.
|
||||
fn poll(&self) -> Option<&T> {
|
||||
if self.state.load(Ordering::Acquire) == LAZY_STATE_DONE {
|
||||
// SAFETY: The DONE state only occurs when data has been fully
|
||||
// initialized.
|
||||
Some(unsafe { &*(*self.data.as_ptr()).as_ptr() })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: fmt::Debug, F: FnMut() -> T> fmt::Debug for Lazy<T, F> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_struct("Lazy")
|
||||
.field("state", &self.state.load(Ordering::Acquire))
|
||||
.field("create", &"<closure>")
|
||||
.field("data", &self.poll())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, F> Drop for Lazy<T, F> {
|
||||
fn drop(&mut self) {
|
||||
if *self.state.get_mut() == LAZY_STATE_DONE {
|
||||
// SAFETY: state is DONE if and only if data has been fully
|
||||
// initialized. At which point, it is safe to drop.
|
||||
unsafe {
|
||||
self.data.get_mut().assume_init_drop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A guard that will reset a Lazy's state back to INIT when dropped. The
|
||||
/// idea here is to 'forget' this guard on success. On failure (when a
|
||||
/// panic occurs), the Drop impl runs and causes all in-progress and future
|
||||
/// 'get' calls to panic. Without this guard, all in-progress and future
|
||||
/// 'get' calls would spin forever. Crashing is much better than getting
|
||||
/// stuck in an infinite loop.
|
||||
struct Guard<'a> {
|
||||
state: &'a AtomicU8,
|
||||
}
|
||||
|
||||
impl<'a> Drop for Guard<'a> {
|
||||
fn drop(&mut self) {
|
||||
// We force ourselves back into an INIT state. This will in turn
|
||||
// cause any future 'get' calls to attempt calling 'self.create'
|
||||
// again which will in turn panic because 'self.create' will now
|
||||
// be 'None'.
|
||||
self.state.store(LAZY_STATE_INIT, Ordering::Release);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn assert_send<T: Send>() {}
|
||||
fn assert_sync<T: Sync>() {}
|
||||
fn assert_unwind<T: core::panic::UnwindSafe>() {}
|
||||
fn assert_refunwind<T: core::panic::RefUnwindSafe>() {}
|
||||
|
||||
#[test]
|
||||
fn oibits() {
|
||||
assert_send::<Lazy<u64>>();
|
||||
assert_sync::<Lazy<u64>>();
|
||||
assert_unwind::<Lazy<u64>>();
|
||||
assert_refunwind::<Lazy<u64>>();
|
||||
}
|
||||
|
||||
// This is a regression test because we used to rely on the inferred Sync
|
||||
// impl for the Lazy type defined above (for 'alloc' mode). In the
|
||||
// inferred impl, it only requires that T: Sync for Lazy<T>: Sync. But
|
||||
// if we have that, we can actually make use of the fact that Lazy<T> drops
|
||||
// T to create a value on one thread and drop it on another. This *should*
|
||||
// require T: Send, but our missing bounds before let it sneak by.
|
||||
//
|
||||
// Basically, this test should not compile, so we... comment it out. We
|
||||
// don't have a great way of testing compile-fail tests right now.
|
||||
//
|
||||
// See: https://github.com/BurntSushi/regex-automata/issues/30
|
||||
/*
|
||||
#[test]
|
||||
fn sync_not_send() {
|
||||
#[allow(dead_code)]
|
||||
fn inner<T: Sync + Default>() {
|
||||
let lazy = Lazy::new(move || T::default());
|
||||
std::thread::scope(|scope| {
|
||||
scope.spawn(|| {
|
||||
Lazy::get(&lazy); // We create T in this thread
|
||||
});
|
||||
});
|
||||
// And drop in this thread.
|
||||
drop(lazy);
|
||||
// So we have send a !Send type over threads. (with some more
|
||||
// legwork, its possible to even sneak the value out of drop
|
||||
// through thread local)
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue