Vendor things
This commit is contained in:
parent
5deceec006
commit
977e3c17e5
19434 changed files with 10682014 additions and 0 deletions
388
third-party/vendor/string_cache/src/atom.rs
vendored
Normal file
388
third-party/vendor/string_cache/src/atom.rs
vendored
Normal file
|
|
@ -0,0 +1,388 @@
|
|||
// Copyright 2014 The Servo Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use crate::dynamic_set::{Entry, DYNAMIC_SET};
|
||||
use crate::static_sets::StaticAtomSet;
|
||||
use debug_unreachable::debug_unreachable;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::Ordering::{self, Equal};
|
||||
use std::fmt;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::marker::PhantomData;
|
||||
use std::mem;
|
||||
use std::num::NonZeroU64;
|
||||
use std::ops;
|
||||
use std::slice;
|
||||
use std::str;
|
||||
use std::sync::atomic::Ordering::SeqCst;
|
||||
|
||||
const DYNAMIC_TAG: u8 = 0b_00;
|
||||
const INLINE_TAG: u8 = 0b_01; // len in upper nybble
|
||||
const STATIC_TAG: u8 = 0b_10;
|
||||
const TAG_MASK: u64 = 0b_11;
|
||||
const LEN_OFFSET: u64 = 4;
|
||||
const LEN_MASK: u64 = 0xF0;
|
||||
|
||||
const MAX_INLINE_LEN: usize = 7;
|
||||
const STATIC_SHIFT_BITS: usize = 32;
|
||||
|
||||
/// Represents a string that has been interned.
|
||||
///
|
||||
/// While the type definition for `Atom` indicates that it generic on a particular
|
||||
/// implementation of an atom set, you don't need to worry about this. Atoms can be static
|
||||
/// and come from a `StaticAtomSet` generated by the `string_cache_codegen` crate, or they
|
||||
/// can be dynamic and created by you on an `EmptyStaticAtomSet`.
|
||||
///
|
||||
/// `Atom` implements `Clone` but not `Copy`, since internally atoms are reference-counted;
|
||||
/// this means that you may need to `.clone()` an atom to keep copies to it in different
|
||||
/// places, or when passing it to a function that takes an `Atom` rather than an `&Atom`.
|
||||
///
|
||||
/// ## Creating an atom at runtime
|
||||
///
|
||||
/// If you use `string_cache_codegen` to generate a precomputed list of atoms, your code
|
||||
/// may then do something like read data from somewhere and extract tokens that need to be
|
||||
/// compared to the atoms. In this case, you can use `Atom::from(&str)` or
|
||||
/// `Atom::from(String)`. These create a reference-counted atom which will be
|
||||
/// automatically freed when all references to it are dropped.
|
||||
///
|
||||
/// This means that your application can safely have a loop which tokenizes data, creates
|
||||
/// atoms from the tokens, and compares the atoms to a predefined set of keywords, without
|
||||
/// running the risk of arbitrary memory consumption from creating large numbers of atoms —
|
||||
/// as long as your application does not store clones of the atoms it creates along the
|
||||
/// way.
|
||||
///
|
||||
/// For example, the following is safe and will not consume arbitrary amounts of memory:
|
||||
///
|
||||
/// ```ignore
|
||||
/// let untrusted_data = "large amounts of text ...";
|
||||
///
|
||||
/// for token in untrusted_data.split_whitespace() {
|
||||
/// let atom = Atom::from(token); // interns the string
|
||||
///
|
||||
/// if atom == Atom::from("keyword") {
|
||||
/// // handle that keyword
|
||||
/// } else if atom == Atom::from("another_keyword") {
|
||||
/// // handle that keyword
|
||||
/// } else {
|
||||
/// println!("unknown keyword");
|
||||
/// }
|
||||
/// } // atom is dropped here, so it is not kept around in memory
|
||||
/// ```
|
||||
#[derive(PartialEq, Eq)]
|
||||
// NOTE: Deriving PartialEq requires that a given string must always be interned the same way.
|
||||
pub struct Atom<Static> {
|
||||
unsafe_data: NonZeroU64,
|
||||
phantom: PhantomData<Static>,
|
||||
}
|
||||
|
||||
// FIXME: bound removed from the struct definition before of this error for pack_static:
|
||||
// "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable"
|
||||
// https://github.com/rust-lang/rust/issues/57563
|
||||
impl<Static> Atom<Static> {
|
||||
/// For the atom!() macros
|
||||
#[inline(always)]
|
||||
#[doc(hidden)]
|
||||
pub const fn pack_static(n: u32) -> Self {
|
||||
Self {
|
||||
unsafe_data: unsafe {
|
||||
// STATIC_TAG ensures this is non-zero
|
||||
NonZeroU64::new_unchecked((STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS))
|
||||
},
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn tag(&self) -> u8 {
|
||||
(self.unsafe_data.get() & TAG_MASK) as u8
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> Atom<Static> {
|
||||
/// Return the internal representation. For testing.
|
||||
#[doc(hidden)]
|
||||
pub fn unsafe_data(&self) -> u64 {
|
||||
self.unsafe_data.get()
|
||||
}
|
||||
|
||||
/// Return true if this is a static Atom. For testing.
|
||||
#[doc(hidden)]
|
||||
pub fn is_static(&self) -> bool {
|
||||
self.tag() == STATIC_TAG
|
||||
}
|
||||
|
||||
/// Return true if this is a dynamic Atom. For testing.
|
||||
#[doc(hidden)]
|
||||
pub fn is_dynamic(&self) -> bool {
|
||||
self.tag() == DYNAMIC_TAG
|
||||
}
|
||||
|
||||
/// Return true if this is an inline Atom. For testing.
|
||||
#[doc(hidden)]
|
||||
pub fn is_inline(&self) -> bool {
|
||||
self.tag() == INLINE_TAG
|
||||
}
|
||||
|
||||
fn static_index(&self) -> u64 {
|
||||
self.unsafe_data.get() >> STATIC_SHIFT_BITS
|
||||
}
|
||||
|
||||
/// Get the hash of the string as it is stored in the set.
|
||||
pub fn get_hash(&self) -> u32 {
|
||||
match self.tag() {
|
||||
DYNAMIC_TAG => {
|
||||
let entry = self.unsafe_data.get() as *const Entry;
|
||||
unsafe { (*entry).hash }
|
||||
}
|
||||
STATIC_TAG => Static::get().hashes[self.static_index() as usize],
|
||||
INLINE_TAG => {
|
||||
let data = self.unsafe_data.get();
|
||||
// This may or may not be great...
|
||||
((data >> 32) ^ data) as u32
|
||||
}
|
||||
_ => unsafe { debug_unreachable!() },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_static(string_to_add: &str) -> Option<Self> {
|
||||
Self::try_static_internal(string_to_add).ok()
|
||||
}
|
||||
|
||||
fn try_static_internal(string_to_add: &str) -> Result<Self, phf_shared::Hashes> {
|
||||
let static_set = Static::get();
|
||||
let hash = phf_shared::hash(&*string_to_add, &static_set.key);
|
||||
let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len());
|
||||
|
||||
if static_set.atoms[index as usize] == string_to_add {
|
||||
Ok(Self::pack_static(index))
|
||||
} else {
|
||||
Err(hash)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> Default for Atom<Static> {
|
||||
#[inline]
|
||||
fn default() -> Self {
|
||||
Atom::pack_static(Static::empty_string_index())
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> Hash for Atom<Static> {
|
||||
#[inline]
|
||||
fn hash<H>(&self, state: &mut H)
|
||||
where
|
||||
H: Hasher,
|
||||
{
|
||||
state.write_u32(self.get_hash())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
|
||||
fn from(string_to_add: Cow<'a, str>) -> Self {
|
||||
Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
|
||||
let len = string_to_add.len();
|
||||
if len <= MAX_INLINE_LEN {
|
||||
let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
|
||||
{
|
||||
let dest = inline_atom_slice_mut(&mut data);
|
||||
dest[..len].copy_from_slice(string_to_add.as_bytes())
|
||||
}
|
||||
Atom {
|
||||
// INLINE_TAG ensures this is never zero
|
||||
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
|
||||
phantom: PhantomData,
|
||||
}
|
||||
} else {
|
||||
let ptr: std::ptr::NonNull<Entry> = DYNAMIC_SET.insert(string_to_add, hash.g);
|
||||
let data = ptr.as_ptr() as u64;
|
||||
debug_assert!(0 == data & TAG_MASK);
|
||||
Atom {
|
||||
// The address of a ptr::NonNull is non-zero
|
||||
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> Clone for Atom<Static> {
|
||||
#[inline(always)]
|
||||
fn clone(&self) -> Self {
|
||||
if self.tag() == DYNAMIC_TAG {
|
||||
let entry = self.unsafe_data.get() as *const Entry;
|
||||
unsafe { &*entry }.ref_count.fetch_add(1, SeqCst);
|
||||
}
|
||||
Atom { ..*self }
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static> Drop for Atom<Static> {
|
||||
#[inline]
|
||||
fn drop(&mut self) {
|
||||
if self.tag() == DYNAMIC_TAG {
|
||||
let entry = self.unsafe_data.get() as *const Entry;
|
||||
if unsafe { &*entry }.ref_count.fetch_sub(1, SeqCst) == 1 {
|
||||
drop_slow(self)
|
||||
}
|
||||
}
|
||||
|
||||
// Out of line to guide inlining.
|
||||
fn drop_slow<Static>(this: &mut Atom<Static>) {
|
||||
DYNAMIC_SET.remove(this.unsafe_data.get() as *mut Entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> ops::Deref for Atom<Static> {
|
||||
type Target = str;
|
||||
|
||||
#[inline]
|
||||
fn deref(&self) -> &str {
|
||||
unsafe {
|
||||
match self.tag() {
|
||||
DYNAMIC_TAG => {
|
||||
let entry = self.unsafe_data.get() as *const Entry;
|
||||
&(*entry).string
|
||||
}
|
||||
INLINE_TAG => {
|
||||
let len = (self.unsafe_data() & LEN_MASK) >> LEN_OFFSET;
|
||||
let src = inline_atom_slice(&self.unsafe_data);
|
||||
str::from_utf8_unchecked(&src[..(len as usize)])
|
||||
}
|
||||
STATIC_TAG => Static::get().atoms[self.static_index() as usize],
|
||||
_ => debug_unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> fmt::Debug for Atom<Static> {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let ty_str = unsafe {
|
||||
match self.tag() {
|
||||
DYNAMIC_TAG => "dynamic",
|
||||
INLINE_TAG => "inline",
|
||||
STATIC_TAG => "static",
|
||||
_ => debug_unreachable!(),
|
||||
}
|
||||
};
|
||||
|
||||
write!(f, "Atom('{}' type={})", &*self, ty_str)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> PartialOrd for Atom<Static> {
|
||||
#[inline]
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
if self.unsafe_data == other.unsafe_data {
|
||||
return Some(Equal);
|
||||
}
|
||||
self.as_ref().partial_cmp(other.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> Ord for Atom<Static> {
|
||||
#[inline]
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
if self.unsafe_data == other.unsafe_data {
|
||||
return Equal;
|
||||
}
|
||||
self.as_ref().cmp(other.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
// AsciiExt requires mutating methods, so we just implement the non-mutating ones.
|
||||
// We don't need to implement is_ascii because there's no performance improvement
|
||||
// over the one from &str.
|
||||
impl<Static: StaticAtomSet> Atom<Static> {
|
||||
fn from_mutated_str<F: FnOnce(&mut str)>(s: &str, f: F) -> Self {
|
||||
let mut buffer = mem::MaybeUninit::<[u8; 64]>::uninit();
|
||||
let buffer = unsafe { &mut *buffer.as_mut_ptr() };
|
||||
|
||||
if let Some(buffer_prefix) = buffer.get_mut(..s.len()) {
|
||||
buffer_prefix.copy_from_slice(s.as_bytes());
|
||||
let as_str = unsafe { ::std::str::from_utf8_unchecked_mut(buffer_prefix) };
|
||||
f(as_str);
|
||||
Atom::from(&*as_str)
|
||||
} else {
|
||||
let mut string = s.to_owned();
|
||||
f(&mut string);
|
||||
Atom::from(string)
|
||||
}
|
||||
}
|
||||
|
||||
/// Like [`to_ascii_uppercase`].
|
||||
///
|
||||
/// [`to_ascii_uppercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_uppercase
|
||||
pub fn to_ascii_uppercase(&self) -> Self {
|
||||
for (i, b) in self.bytes().enumerate() {
|
||||
if let b'a'..=b'z' = b {
|
||||
return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase());
|
||||
}
|
||||
}
|
||||
self.clone()
|
||||
}
|
||||
|
||||
/// Like [`to_ascii_lowercase`].
|
||||
///
|
||||
/// [`to_ascii_lowercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_lowercase
|
||||
pub fn to_ascii_lowercase(&self) -> Self {
|
||||
for (i, b) in self.bytes().enumerate() {
|
||||
if let b'A'..=b'Z' = b {
|
||||
return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase());
|
||||
}
|
||||
}
|
||||
self.clone()
|
||||
}
|
||||
|
||||
/// Like [`eq_ignore_ascii_case`].
|
||||
///
|
||||
/// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case
|
||||
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
|
||||
(self == other) || self.eq_str_ignore_ascii_case(&**other)
|
||||
}
|
||||
|
||||
/// Like [`eq_ignore_ascii_case`], but takes an unhashed string as `other`.
|
||||
///
|
||||
/// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case
|
||||
pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool {
|
||||
(&**self).eq_ignore_ascii_case(other)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn inline_atom_slice(x: &NonZeroU64) -> &[u8] {
|
||||
unsafe {
|
||||
let x: *const NonZeroU64 = x;
|
||||
let mut data = x as *const u8;
|
||||
// All except the lowest byte, which is first in little-endian, last in big-endian.
|
||||
if cfg!(target_endian = "little") {
|
||||
data = data.offset(1);
|
||||
}
|
||||
let len = 7;
|
||||
slice::from_raw_parts(data, len)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] {
|
||||
unsafe {
|
||||
let x: *mut u64 = x;
|
||||
let mut data = x as *mut u8;
|
||||
// All except the lowest byte, which is first in little-endian, last in big-endian.
|
||||
if cfg!(target_endian = "little") {
|
||||
data = data.offset(1);
|
||||
}
|
||||
let len = 7;
|
||||
slice::from_raw_parts_mut(data, len)
|
||||
}
|
||||
}
|
||||
108
third-party/vendor/string_cache/src/dynamic_set.rs
vendored
Normal file
108
third-party/vendor/string_cache/src/dynamic_set.rs
vendored
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
// Copyright 2014 The Servo Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use parking_lot::Mutex;
|
||||
use std::borrow::Cow;
|
||||
use std::mem;
|
||||
use std::ptr::NonNull;
|
||||
use std::sync::atomic::AtomicIsize;
|
||||
use std::sync::atomic::Ordering::SeqCst;
|
||||
|
||||
const NB_BUCKETS: usize = 1 << 12; // 4096
|
||||
const BUCKET_MASK: u32 = (1 << 12) - 1;
|
||||
|
||||
pub(crate) struct Set {
|
||||
buckets: Box<[Mutex<Option<Box<Entry>>>]>,
|
||||
}
|
||||
|
||||
pub(crate) struct Entry {
|
||||
pub(crate) string: Box<str>,
|
||||
pub(crate) hash: u32,
|
||||
pub(crate) ref_count: AtomicIsize,
|
||||
next_in_bucket: Option<Box<Entry>>,
|
||||
}
|
||||
|
||||
// Addresses are a multiples of this,
|
||||
// and therefore have have TAG_MASK bits unset, available for tagging.
|
||||
pub(crate) const ENTRY_ALIGNMENT: usize = 4;
|
||||
|
||||
#[test]
|
||||
fn entry_alignment_is_sufficient() {
|
||||
assert!(mem::align_of::<Entry>() >= ENTRY_ALIGNMENT);
|
||||
}
|
||||
|
||||
pub(crate) static DYNAMIC_SET: Lazy<Set> = Lazy::new(|| {
|
||||
// NOTE: Using const initialization for buckets breaks the small-stack test.
|
||||
// ```
|
||||
// // buckets: [Mutex<Option<Box<Entry>>>; NB_BUCKETS],
|
||||
// const MUTEX: Mutex<Option<Box<Entry>>> = Mutex::new(None);
|
||||
// let buckets = Box::new([MUTEX; NB_BUCKETS]);
|
||||
// ```
|
||||
let buckets = (0..NB_BUCKETS).map(|_| Mutex::new(None)).collect();
|
||||
Set { buckets }
|
||||
});
|
||||
|
||||
impl Set {
|
||||
pub(crate) fn insert(&self, string: Cow<str>, hash: u32) -> NonNull<Entry> {
|
||||
let bucket_index = (hash & BUCKET_MASK) as usize;
|
||||
let mut linked_list = self.buckets[bucket_index].lock();
|
||||
|
||||
{
|
||||
let mut ptr: Option<&mut Box<Entry>> = linked_list.as_mut();
|
||||
|
||||
while let Some(entry) = ptr.take() {
|
||||
if entry.hash == hash && *entry.string == *string {
|
||||
if entry.ref_count.fetch_add(1, SeqCst) > 0 {
|
||||
return NonNull::from(&mut **entry);
|
||||
}
|
||||
// Uh-oh. The pointer's reference count was zero, which means someone may try
|
||||
// to free it. (Naive attempts to defend against this, for example having the
|
||||
// destructor check to see whether the reference count is indeed zero, don't
|
||||
// work due to ABA.) Thus we need to temporarily add a duplicate string to the
|
||||
// list.
|
||||
entry.ref_count.fetch_sub(1, SeqCst);
|
||||
break;
|
||||
}
|
||||
ptr = entry.next_in_bucket.as_mut();
|
||||
}
|
||||
}
|
||||
debug_assert!(mem::align_of::<Entry>() >= ENTRY_ALIGNMENT);
|
||||
let string = string.into_owned();
|
||||
let mut entry = Box::new(Entry {
|
||||
next_in_bucket: linked_list.take(),
|
||||
hash,
|
||||
ref_count: AtomicIsize::new(1),
|
||||
string: string.into_boxed_str(),
|
||||
});
|
||||
let ptr = NonNull::from(&mut *entry);
|
||||
*linked_list = Some(entry);
|
||||
ptr
|
||||
}
|
||||
|
||||
pub(crate) fn remove(&self, ptr: *mut Entry) {
|
||||
let value: &Entry = unsafe { &*ptr };
|
||||
let bucket_index = (value.hash & BUCKET_MASK) as usize;
|
||||
|
||||
let mut linked_list = self.buckets[bucket_index].lock();
|
||||
debug_assert!(value.ref_count.load(SeqCst) == 0);
|
||||
let mut current: &mut Option<Box<Entry>> = &mut linked_list;
|
||||
|
||||
while let Some(entry_ptr) = current.as_mut() {
|
||||
let entry_ptr: *mut Entry = &mut **entry_ptr;
|
||||
if entry_ptr == ptr {
|
||||
mem::drop(mem::replace(current, unsafe {
|
||||
(*entry_ptr).next_in_bucket.take()
|
||||
}));
|
||||
break;
|
||||
}
|
||||
current = unsafe { &mut (*entry_ptr).next_in_bucket };
|
||||
}
|
||||
}
|
||||
}
|
||||
139
third-party/vendor/string_cache/src/lib.rs
vendored
Normal file
139
third-party/vendor/string_cache/src/lib.rs
vendored
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
// Copyright 2014 The Servo Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//!
|
||||
//! A library for interning things that are `AsRef<str>`.
|
||||
//!
|
||||
//! Some strings may be interned at compile time using the `string-cache-codegen` crate, or the
|
||||
//! `EmptyStaticAtomSet` may be used that has no compile-time interned strings. An `Atom` is an
|
||||
//! interned string for a given set (either `EmptyStaticAtomSet` or a generated `StaticAtomSet`).
|
||||
//!
|
||||
//! Generated `Atom`s will have assocated macros to intern static strings at compile-time.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! Here are two examples, one with compile-time `Atom`s, and one without.
|
||||
//!
|
||||
//! ## With compile-time atoms
|
||||
//!
|
||||
//! In `Cargo.toml`:
|
||||
//! ```toml
|
||||
//! [dependencies]
|
||||
//! string_cache = "0.8"
|
||||
//!
|
||||
//! [dev-dependencies]
|
||||
//! string_cache_codegen = "0.5"
|
||||
//! ```
|
||||
//!
|
||||
//! In `build.rs`:
|
||||
//!
|
||||
//! ```ignore
|
||||
//! extern crate string_cache_codegen;
|
||||
//!
|
||||
//! use std::env;
|
||||
//! use std::path::Path;
|
||||
//!
|
||||
//! fn main() {
|
||||
//! string_cache_codegen::AtomType::new("foo::FooAtom", "foo_atom!")
|
||||
//! .atoms(&["foo", "bar"])
|
||||
//! .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs"))
|
||||
//! .unwrap()
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! In `lib.rs`:
|
||||
//!
|
||||
//! ```ignore
|
||||
//! extern crate string_cache;
|
||||
//!
|
||||
//! mod foo {
|
||||
//! include!(concat!(env!("OUT_DIR"), "/foo_atom.rs"));
|
||||
//! }
|
||||
//!
|
||||
//! fn use_the_atom(t: &str) {
|
||||
//! match *t {
|
||||
//! foo_atom!("foo") => println!("Found foo!"),
|
||||
//! foo_atom!("bar") => println!("Found bar!"),
|
||||
//! // foo_atom!("baz") => println!("Found baz!"), - would be a compile time error
|
||||
//! _ => {
|
||||
//! println!("String not interned");
|
||||
//! // We can intern strings at runtime as well
|
||||
//! foo::FooAtom::from(t)
|
||||
//! }
|
||||
//! }
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ## No compile-time atoms
|
||||
//!
|
||||
//! ```
|
||||
//! # extern crate string_cache;
|
||||
//! use string_cache::DefaultAtom;
|
||||
//!
|
||||
//! # fn main() {
|
||||
//! let mut interned_stuff = Vec::new();
|
||||
//! let text = "here is a sentence of text that will be tokenised and
|
||||
//! interned and some repeated tokens is of text and";
|
||||
//! for word in text.split_whitespace() {
|
||||
//! let seen_before = interned_stuff.iter()
|
||||
//! // We can use impl PartialEq<T> where T is anything string-like
|
||||
//! // to compare to interned strings to either other interned strings,
|
||||
//! // or actual strings Comparing two interned strings is very fast
|
||||
//! // (normally a single cpu operation).
|
||||
//! .filter(|interned_word| interned_word == &word)
|
||||
//! .count();
|
||||
//! if seen_before > 0 {
|
||||
//! println!(r#"Seen the word "{}" {} times"#, word, seen_before);
|
||||
//! } else {
|
||||
//! println!(r#"Not seen the word "{}" before"#, word);
|
||||
//! }
|
||||
//! // We use the impl From<(Cow<'a, str>, or &'a str, or String)> for
|
||||
//! // Atom<Static> to intern a new string.
|
||||
//! interned_stuff.push(DefaultAtom::from(word));
|
||||
//! }
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
|
||||
#![cfg_attr(test, deny(warnings))]
|
||||
|
||||
// Types, such as Atom, that impl Hash must follow the hash invariant: if two objects match
|
||||
// with PartialEq, they must also have the same Hash. Clippy warns on types that derive one while
|
||||
// manually impl-ing the other, because it seems easy for the two to drift apart, causing the
|
||||
// invariant to be violated.
|
||||
//
|
||||
// But Atom is a newtype over NonZeroU64, and probably always will be, since cheap comparisons and
|
||||
// copying are this library's purpose. So we know what the PartialEq comparison is going to do.
|
||||
//
|
||||
// The `get_hash` function, seen in `atom.rs`, consults that number, plus the global string interner
|
||||
// tables. The only way for the resulting hash for two Atoms with the same inner 64-bit number to
|
||||
// differ would be if the table entry changed between invocations, and that would be really bad.
|
||||
#![allow(clippy::derive_hash_xor_eq)]
|
||||
|
||||
mod atom;
|
||||
mod dynamic_set;
|
||||
mod static_sets;
|
||||
mod trivial_impls;
|
||||
|
||||
pub use atom::Atom;
|
||||
pub use static_sets::{EmptyStaticAtomSet, PhfStrSet, StaticAtomSet};
|
||||
|
||||
/// Use this if you don’t care about static atoms.
|
||||
pub type DefaultAtom = Atom<EmptyStaticAtomSet>;
|
||||
|
||||
// Some minor tests of internal layout here.
|
||||
// See ../integration-tests for much more.
|
||||
|
||||
/// Guard against accidental changes to the sizes of things.
|
||||
#[test]
|
||||
fn assert_sizes() {
|
||||
use std::mem::size_of;
|
||||
assert_eq!(size_of::<DefaultAtom>(), 8);
|
||||
assert_eq!(size_of::<Option<DefaultAtom>>(), size_of::<DefaultAtom>(),);
|
||||
}
|
||||
64
third-party/vendor/string_cache/src/static_sets.rs
vendored
Normal file
64
third-party/vendor/string_cache/src/static_sets.rs
vendored
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
// Copyright 2014 The Servo Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
/// A static `PhfStrSet`
|
||||
///
|
||||
/// This trait is implemented by static sets of interned strings generated using
|
||||
/// `string_cache_codegen`, and `EmptyStaticAtomSet` for when strings will be added dynamically.
|
||||
///
|
||||
/// It is used by the methods of [`Atom`] to check if a string is present in the static set.
|
||||
///
|
||||
/// [`Atom`]: struct.Atom.html
|
||||
pub trait StaticAtomSet: Ord {
|
||||
/// Get the location of the static string set in the binary.
|
||||
fn get() -> &'static PhfStrSet;
|
||||
/// Get the index of the empty string, which is in every set and is used for `Atom::default`.
|
||||
fn empty_string_index() -> u32;
|
||||
}
|
||||
|
||||
/// A string set created using a [perfect hash function], specifically
|
||||
/// [Hash, Displace and Compress].
|
||||
///
|
||||
/// See the CHD document for the meaning of the struct fields.
|
||||
///
|
||||
/// [perfect hash function]: https://en.wikipedia.org/wiki/Perfect_hash_function
|
||||
/// [Hash, Displace and Compress]: http://cmph.sourceforge.net/papers/esa09.pdf
|
||||
pub struct PhfStrSet {
|
||||
#[doc(hidden)]
|
||||
pub key: u64,
|
||||
#[doc(hidden)]
|
||||
pub disps: &'static [(u32, u32)],
|
||||
#[doc(hidden)]
|
||||
pub atoms: &'static [&'static str],
|
||||
#[doc(hidden)]
|
||||
pub hashes: &'static [u32],
|
||||
}
|
||||
|
||||
/// An empty static atom set for when only dynamic strings will be added
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct EmptyStaticAtomSet;
|
||||
|
||||
impl StaticAtomSet for EmptyStaticAtomSet {
|
||||
fn get() -> &'static PhfStrSet {
|
||||
// The name is a lie: this set is not empty (it contains the empty string)
|
||||
// but that’s only to avoid divisions by zero in rust-phf.
|
||||
static SET: PhfStrSet = PhfStrSet {
|
||||
key: 0,
|
||||
disps: &[(0, 0)],
|
||||
atoms: &[""],
|
||||
// "" SipHash'd, and xored with u64_hash_to_u32.
|
||||
hashes: &[0x3ddddef3],
|
||||
};
|
||||
&SET
|
||||
}
|
||||
|
||||
fn empty_string_index() -> u32 {
|
||||
0
|
||||
}
|
||||
}
|
||||
119
third-party/vendor/string_cache/src/trivial_impls.rs
vendored
Normal file
119
third-party/vendor/string_cache/src/trivial_impls.rs
vendored
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
// Copyright 2014 The Servo Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use crate::{Atom, StaticAtomSet};
|
||||
#[cfg(feature = "serde_support")]
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
|
||||
impl<Static: StaticAtomSet> ::precomputed_hash::PrecomputedHash for Atom<Static> {
|
||||
fn precomputed_hash(&self) -> u32 {
|
||||
self.get_hash()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, Static: StaticAtomSet> From<&'a Atom<Static>> for Atom<Static> {
|
||||
fn from(atom: &'a Self) -> Self {
|
||||
atom.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> PartialEq<str> for Atom<Static> {
|
||||
fn eq(&self, other: &str) -> bool {
|
||||
&self[..] == other
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> PartialEq<Atom<Static>> for str {
|
||||
fn eq(&self, other: &Atom<Static>) -> bool {
|
||||
self == &other[..]
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> PartialEq<String> for Atom<Static> {
|
||||
fn eq(&self, other: &String) -> bool {
|
||||
self[..] == other[..]
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, Static: StaticAtomSet> From<&'a str> for Atom<Static> {
|
||||
#[inline]
|
||||
fn from(string_to_add: &str) -> Self {
|
||||
Atom::from(Cow::Borrowed(string_to_add))
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> From<String> for Atom<Static> {
|
||||
#[inline]
|
||||
fn from(string_to_add: String) -> Self {
|
||||
Atom::from(Cow::Owned(string_to_add))
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> fmt::Display for Atom<Static> {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
<str as fmt::Display>::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Static: StaticAtomSet> AsRef<str> for Atom<Static> {
|
||||
fn as_ref(&self) -> &str {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde_support")]
|
||||
impl<Static: StaticAtomSet> Serialize for Atom<Static> {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
let string: &str = self.as_ref();
|
||||
string.serialize(serializer)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde_support")]
|
||||
impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom<Static> {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'a>,
|
||||
{
|
||||
use serde::de;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
struct AtomVisitor<Static: StaticAtomSet>(PhantomData<Static>);
|
||||
|
||||
impl<'de, Static: StaticAtomSet> de::Visitor<'de> for AtomVisitor<Static> {
|
||||
type Value = Atom<Static>;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(formatter, "an Atom")
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
Ok(Atom::from(v))
|
||||
}
|
||||
|
||||
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
Ok(Atom::from(v))
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_str(AtomVisitor(PhantomData))
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue