Vendor things

This commit is contained in:
John Doty 2024-03-08 11:03:01 -08:00
parent 5deceec006
commit 977e3c17e5
19434 changed files with 10682014 additions and 0 deletions

View file

@ -0,0 +1,156 @@
//! ARM64 CPU feature detection support.
//!
//! Unfortunately ARM instructions to detect CPU features cannot be called from
//! unprivileged userspace code, so this implementation relies on OS-specific
//! APIs for feature detection.
// Evaluate the given `$body` expression any of the supplied target features
// are not enabled. Otherwise returns true.
#[macro_export]
#[doc(hidden)]
macro_rules! __unless_target_features {
($($tf:tt),+ => $body:expr ) => {
{
#[cfg(not(all($(target_feature=$tf,)*)))]
$body
#[cfg(all($(target_feature=$tf,)*))]
true
}
};
}
// Linux runtime detection of target CPU features using `getauxval`.
#[cfg(any(target_os = "linux", target_os = "android"))]
#[macro_export]
#[doc(hidden)]
macro_rules! __detect_target_features {
($($tf:tt),+) => {{
let hwcaps = $crate::aarch64::getauxval_hwcap();
$($crate::check!(hwcaps, $tf) & )+ true
}};
}
/// Linux helper function for calling `getauxval` to get `AT_HWCAP`.
#[cfg(any(target_os = "linux", target_os = "android"))]
pub fn getauxval_hwcap() -> u64 {
unsafe { libc::getauxval(libc::AT_HWCAP) }
}
// Apple platform's runtime detection of target CPU features using `sysctlbyname`.
#[cfg(target_vendor = "apple")]
#[macro_export]
#[doc(hidden)]
macro_rules! __detect_target_features {
($($tf:tt),+) => {{
$($crate::check!($tf) & )+ true
}};
}
// Linux `expand_check_macro`
#[cfg(any(target_os = "linux", target_os = "android"))]
macro_rules! __expand_check_macro {
($(($name:tt, $hwcap:ident)),* $(,)?) => {
#[macro_export]
#[doc(hidden)]
macro_rules! check {
$(
($hwcaps:expr, $name) => {
(($hwcaps & $crate::aarch64::hwcaps::$hwcap) != 0)
};
)*
}
};
}
// Linux `expand_check_macro`
#[cfg(any(target_os = "linux", target_os = "android"))]
__expand_check_macro! {
("aes", AES), // Enable AES support.
("sha2", SHA2), // Enable SHA1 and SHA256 support.
("sha3", SHA3), // Enable SHA512 and SHA3 support.
("sm4", SM4), // Enable SM3 and SM4 support.
}
/// Linux hardware capabilities mapped to target features.
///
/// Note that LLVM target features are coarser grained than what Linux supports
/// and imply more capabilities under each feature. This module attempts to
/// provide that mapping accordingly.
///
/// See this issue for more info: <https://github.com/RustCrypto/utils/issues/395>
#[cfg(any(target_os = "linux", target_os = "android"))]
pub mod hwcaps {
use libc::c_ulong;
pub const AES: c_ulong = libc::HWCAP_AES | libc::HWCAP_PMULL;
pub const SHA2: c_ulong = libc::HWCAP_SHA2;
pub const SHA3: c_ulong = libc::HWCAP_SHA3 | libc::HWCAP_SHA512;
pub const SM4: c_ulong = libc::HWCAP_SM3 | libc::HWCAP_SM4;
}
// Apple OS (macOS, iOS, watchOS, and tvOS) `check!` macro.
//
// NOTE: several of these instructions (e.g. `aes`, `sha2`) can be assumed to
// be present on all Apple ARM64 hardware.
//
// Newer CPU instructions now have nodes within sysctl's `hw.optional`
// namespace, however the ones that do not can safely be assumed to be
// present on all Apple ARM64 devices, now and for the foreseeable future.
//
// See discussion on this issue for more information:
// <https://github.com/RustCrypto/utils/issues/378>
#[cfg(target_vendor = "apple")]
#[macro_export]
#[doc(hidden)]
macro_rules! check {
("aes") => {
true
};
("sha2") => {
true
};
("sha3") => {
unsafe {
// `sha3` target feature implies SHA-512 as well
$crate::aarch64::sysctlbyname(b"hw.optional.armv8_2_sha512\0")
&& $crate::aarch64::sysctlbyname(b"hw.optional.armv8_2_sha3\0")
}
};
}
/// Apple helper function for calling `sysctlbyname`.
#[cfg(target_vendor = "apple")]
pub unsafe fn sysctlbyname(name: &[u8]) -> bool {
assert_eq!(
name.last().cloned(),
Some(0),
"name is not NUL terminated: {:?}",
name
);
let mut value: u32 = 0;
let mut size = core::mem::size_of::<u32>();
let rc = libc::sysctlbyname(
name.as_ptr() as *const i8,
&mut value as *mut _ as *mut libc::c_void,
&mut size,
core::ptr::null_mut(),
0,
);
assert_eq!(size, 4, "unexpected sysctlbyname(3) result size");
assert_eq!(rc, 0, "sysctlbyname returned error code: {}", rc);
value != 0
}
// On other targets, runtime CPU feature detection is unavailable
#[cfg(not(any(target_vendor = "apple", target_os = "linux", target_os = "android",)))]
#[macro_export]
#[doc(hidden)]
macro_rules! __detect_target_features {
($($tf:tt),+) => {
false
};
}

View file

@ -0,0 +1,217 @@
//! This crate provides macros for runtime CPU feature detection. It's intended
//! as a stopgap until Rust [RFC 2725] adding first-class target feature detection
//! macros to `libcore` is implemented.
//!
//! # Supported target architectures
//!
//! *NOTE: target features with an asterisk are unstable (nightly-only) and
//! subject to change to match upstream name changes in the Rust standard
//! library.
//!
//! ## `aarch64`
//!
//! Linux, iOS, and macOS/ARM only (ARM64 does not support OS-independent feature detection)
//!
//! Target features:
//!
//! - `aes`*
//! - `sha2`*
//! - `sha3`*
//!
//! Linux only
//!
//! - `sm4`*
//!
//! ## `loongarch64`
//!
//! Linux only (LoongArch64 does not support OS-independent feature detection)
//!
//! Target features:
//!
//! - `lam`*
//! - `ual`*
//! - `fpu`*
//! - `lsx`*
//! - `lasx`*
//! - `crc32`*
//! - `complex`*
//! - `crypto`*
//! - `lvz`*
//! - `lbt.x86`*
//! - `lbt.arm`*
//! - `lbt.mips`*
//! - `ptw`*
//!
//! ## `x86`/`x86_64`
//!
//! OS independent and `no_std`-friendly
//!
//! Target features:
//!
//! - `adx`
//! - `aes`
//! - `avx`
//! - `avx2`
//! - `avx512bw`*
//! - `avx512cd`*
//! - `avx512dq`*
//! - `avx512er`*
//! - `avx512f`*
//! - `avx512ifma`*
//! - `avx512pf`*
//! - `avx512vl`*
//! - `bmi1`
//! - `bmi2`
//! - `fma`,
//! - `mmx`
//! - `pclmulqdq`
//! - `popcnt`
//! - `rdrand`
//! - `rdseed`
//! - `sgx`
//! - `sha`
//! - `sse`
//! - `sse2`
//! - `sse3`
//! - `sse4.1`
//! - `sse4.2`
//! - `ssse3`
//!
//! If you would like detection support for a target feature which is not on
//! this list, please [open a GitHub issue][gh].
//!
//! # Example
//! ```
//! # #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
//! # {
//! // This macro creates `cpuid_aes_sha` module
//! cpufeatures::new!(cpuid_aes_sha, "aes", "sha");
//!
//! // `token` is a Zero Sized Type (ZST) value, which guarantees
//! // that underlying static storage got properly initialized,
//! // which allows to omit initialization branch
//! let token: cpuid_aes_sha::InitToken = cpuid_aes_sha::init();
//!
//! if token.get() {
//! println!("CPU supports both SHA and AES extensions");
//! } else {
//! println!("SHA and AES extensions are not supported");
//! }
//!
//! // If stored value needed only once you can get stored value
//! // omitting the token
//! let val = cpuid_aes_sha::get();
//! assert_eq!(val, token.get());
//!
//! // Additionally you can get both token and value
//! let (token, val) = cpuid_aes_sha::init_get();
//! assert_eq!(val, token.get());
//! # }
//! ```
//!
//! Note that if all tested target features are enabled via compiler options
//! (e.g. by using `RUSTFLAGS`), the `get` method will always return `true`
//! and `init` will not use CPUID instruction. Such behavior allows
//! compiler to completely eliminate fallback code.
//!
//! After first call macro caches result and returns it in subsequent
//! calls, thus runtime overhead for them is minimal.
//!
//! [RFC 2725]: https://github.com/rust-lang/rfcs/pull/2725
//! [gh]: https://github.com/RustCrypto/utils/issues/new?title=cpufeatures:%20requesting%20support%20for%20CHANGEME%20target%20feature
#![no_std]
#![doc(
html_logo_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg",
html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg"
)]
#[cfg(not(miri))]
#[cfg(target_arch = "aarch64")]
#[doc(hidden)]
pub mod aarch64;
#[cfg(not(miri))]
#[cfg(target_arch = "loongarch64")]
#[doc(hidden)]
pub mod loongarch64;
#[cfg(not(miri))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86;
#[cfg(miri)]
mod miri;
#[cfg(not(any(
target_arch = "aarch64",
target_arch = "loongarch64",
target_arch = "x86",
target_arch = "x86_64"
)))]
compile_error!("This crate works only on `aarch64`, `loongarch64`, `x86`, and `x86-64` targets.");
/// Create module with CPU feature detection code.
#[macro_export]
macro_rules! new {
($mod_name:ident, $($tf:tt),+ $(,)?) => {
mod $mod_name {
use core::sync::atomic::{AtomicU8, Ordering::Relaxed};
const UNINIT: u8 = u8::max_value();
static STORAGE: AtomicU8 = AtomicU8::new(UNINIT);
/// Initialization token
#[derive(Copy, Clone, Debug)]
pub struct InitToken(());
impl InitToken {
/// Get initialized value
#[inline(always)]
pub fn get(&self) -> bool {
$crate::__unless_target_features! {
$($tf),+ => {
STORAGE.load(Relaxed) == 1
}
}
}
}
/// Initialize underlying storage if needed and get
/// stored value and initialization token.
#[inline]
pub fn init_get() -> (InitToken, bool) {
let res = $crate::__unless_target_features! {
$($tf),+ => {
// Relaxed ordering is fine, as we only have a single atomic variable.
let val = STORAGE.load(Relaxed);
if val == UNINIT {
let res = $crate::__detect_target_features!($($tf),+);
STORAGE.store(res as u8, Relaxed);
res
} else {
val == 1
}
}
};
(InitToken(()), res)
}
/// Initialize underlying storage if needed and get
/// initialization token.
#[inline]
pub fn init() -> InitToken {
init_get().0
}
/// Initialize underlying storage if needed and get
/// stored value.
#[inline]
pub fn get() -> bool {
init_get().1
}
}
};
}

View file

@ -0,0 +1,106 @@
//! LoongArch64 CPU feature detection support.
//!
//! This implementation relies on OS-specific APIs for feature detection.
// Evaluate the given `$body` expression any of the supplied target features
// are not enabled. Otherwise returns true.
#[macro_export]
#[doc(hidden)]
macro_rules! __unless_target_features {
($($tf:tt),+ => $body:expr ) => {
{
#[cfg(not(all($(target_feature=$tf,)*)))]
$body
#[cfg(all($(target_feature=$tf,)*))]
true
}
};
}
// Linux runtime detection of target CPU features using `getauxval`.
#[cfg(target_os = "linux")]
#[macro_export]
#[doc(hidden)]
macro_rules! __detect_target_features {
($($tf:tt),+) => {{
let hwcaps = $crate::loongarch64::getauxval_hwcap();
$($crate::check!(hwcaps, $tf) & )+ true
}};
}
/// Linux helper function for calling `getauxval` to get `AT_HWCAP`.
#[cfg(target_os = "linux")]
pub fn getauxval_hwcap() -> u64 {
unsafe { libc::getauxval(libc::AT_HWCAP) }
}
// Linux `expand_check_macro`
#[cfg(target_os = "linux")]
macro_rules! __expand_check_macro {
($(($name:tt, $hwcap:ident)),* $(,)?) => {
#[macro_export]
#[doc(hidden)]
macro_rules! check {
$(
($hwcaps:expr, $name) => {
(($hwcaps & $crate::loongarch64::hwcaps::$hwcap) != 0)
};
)*
}
};
}
// Linux `expand_check_macro`
#[cfg(target_os = "linux")]
__expand_check_macro! {
("cpucfg", CPUCFG), // Enable CPUCFG support.
("lam", LAM), // Enable LAM support.
("ual", UAL), // Enable UAL support.
("fpu", FPU), // Enable FPU support.
("lsx", LSX), // Enable LSX support.
("lasx", LASX), // Enable LASX support.
("crc32", CRC32), // Enable CRC32 support.
("complex", COMPLEX), // Enable COMPLEX support.
("crypto", CRYPTO), // Enable CRYPTO support.
("lvz", LVZ), // Enable LVZ support.
("lbt.x86", LBT_X86), // Enable LBT_X86 support.
("lbt.arm", LBT_ARM), // Enable LBT_ARM support.
("lbt.mips", LBT_MIPS), // Enable LBT_MIPS support.
("ptw", PTW), // Enable PTW support.
}
/// Linux hardware capabilities mapped to target features.
///
/// Note that LLVM target features are coarser grained than what Linux supports
/// and imply more capabilities under each feature. This module attempts to
/// provide that mapping accordingly.
#[cfg(target_os = "linux")]
pub mod hwcaps {
use libc::c_ulong;
pub const CPUCFG: c_ulong = libc::HWCAP_CPUCFG;
pub const LAM: c_ulong = libc::HWCAP_LAM;
pub const UAL: c_ulong = libc::HWCAP_UAL;
pub const FPU: c_ulong = libc::HWCAP_FPU;
pub const LSX: c_ulong = libc::HWCAP_LSX;
pub const LASX: c_ulong = libc::HWCAP_LASX;
pub const CRC32: c_ulong = libc::HWCAP_CRC32;
pub const COMPLEX: c_ulong = libc::HWCAP_COMPLEX;
pub const CRYPTO: c_ulong = libc::HWCAP_CRYPTO;
pub const LVZ: c_ulong = libc::HWCAP_LVZ;
pub const LBT_X86: c_ulong = libc::HWCAP_LBT_X86;
pub const LBT_ARM: c_ulong = libc::HWCAP_LBT_ARM;
pub const LBT_MIPS: c_ulong = libc::HWCAP_LBT_MIPS;
pub const PTW: c_ulong = libc::HWCAP_PTW;
}
// On other targets, runtime CPU feature detection is unavailable
#[cfg(not(target_os = "linux"))]
#[macro_export]
#[doc(hidden)]
macro_rules! __detect_target_features {
($($tf:tt),+) => {
false
};
}

View file

@ -0,0 +1,20 @@
//! Minimal miri support.
//!
//! Miri is an interpreter, and though it tries to emulate the target CPU
//! it does not support any target features.
#[macro_export]
#[doc(hidden)]
macro_rules! __unless_target_features {
($($tf:tt),+ => $body:expr ) => {
false
};
}
#[macro_export]
#[doc(hidden)]
macro_rules! __detect_target_features {
($($tf:tt),+) => {
false
};
}

View file

@ -0,0 +1,152 @@
//! x86/x86-64 CPU feature detection support.
//!
//! Portable, `no_std`-friendly implementation that relies on the x86 `CPUID`
//! instruction for feature detection.
/// Evaluate the given `$body` expression any of the supplied target features
/// are not enabled. Otherwise returns true.
///
/// The `$body` expression is not evaluated on SGX targets, and returns false
/// on these targets unless *all* supplied target features are enabled.
#[macro_export]
#[doc(hidden)]
macro_rules! __unless_target_features {
($($tf:tt),+ => $body:expr ) => {{
#[cfg(not(all($(target_feature=$tf,)*)))]
{
#[cfg(not(any(target_env = "sgx", target_os = "", target_os = "uefi")))]
$body
// CPUID is not available on SGX. Freestanding and UEFI targets
// do not support SIMD features with default compilation flags.
#[cfg(any(target_env = "sgx", target_os = "", target_os = "uefi"))]
false
}
#[cfg(all($(target_feature=$tf,)*))]
true
}};
}
/// Use CPUID to detect the presence of all supplied target features.
#[macro_export]
#[doc(hidden)]
macro_rules! __detect_target_features {
($($tf:tt),+) => {{
#[cfg(target_arch = "x86")]
use core::arch::x86::{__cpuid, __cpuid_count, CpuidResult};
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::{__cpuid, __cpuid_count, CpuidResult};
// These wrappers are workarounds around
// https://github.com/rust-lang/rust/issues/101346
//
// DO NOT remove it until MSRV is bumped to a version
// with the issue fix (at least 1.64).
#[inline(never)]
unsafe fn cpuid(leaf: u32) -> CpuidResult {
__cpuid(leaf)
}
#[inline(never)]
unsafe fn cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
__cpuid_count(leaf, sub_leaf)
}
let cr = unsafe {
[cpuid(1), cpuid_count(7, 0)]
};
$($crate::check!(cr, $tf) & )+ true
}};
}
/// Check that OS supports required SIMD registers
#[macro_export]
#[doc(hidden)]
macro_rules! __xgetbv {
($cr:expr, $mask:expr) => {{
#[cfg(target_arch = "x86")]
use core::arch::x86 as arch;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64 as arch;
// Check bits 26 and 27
let xmask = 0b11 << 26;
let xsave = $cr[0].ecx & xmask == xmask;
if xsave {
let xcr0 = unsafe { arch::_xgetbv(arch::_XCR_XFEATURE_ENABLED_MASK) };
(xcr0 & $mask) == $mask
} else {
false
}
}};
}
macro_rules! __expand_check_macro {
($(($name:tt, $reg_cap:tt $(, $i:expr, $reg:ident, $offset:expr)*)),* $(,)?) => {
#[macro_export]
#[doc(hidden)]
macro_rules! check {
$(
($cr:expr, $name) => {{
// Register bits are listed here:
// https://wiki.osdev.org/CPU_Registers_x86#Extended_Control_Registers
let reg_cap = match $reg_cap {
// Bit 1
"xmm" => $crate::__xgetbv!($cr, 0b10),
// Bits 1 and 2
"ymm" => $crate::__xgetbv!($cr, 0b110),
// Bits 1, 2, 5, 6, and 7
"zmm" => $crate::__xgetbv!($cr, 0b1110_0110),
_ => true,
};
reg_cap
$(
& ($cr[$i].$reg & (1 << $offset) != 0)
)*
}};
)*
}
};
}
__expand_check_macro! {
("sse3", "xmm", 0, ecx, 0),
("pclmulqdq", "xmm", 0, ecx, 1),
("ssse3", "xmm", 0, ecx, 9),
("fma", "xmm", 0, ecx, 12, 0, ecx, 28),
("sse4.1", "xmm", 0, ecx, 19),
("sse4.2", "xmm", 0, ecx, 20),
("popcnt", "", 0, ecx, 23),
("aes", "xmm", 0, ecx, 25),
("avx", "xmm", 0, ecx, 28),
("rdrand", "", 0, ecx, 30),
("mmx", "", 0, edx, 23),
("sse", "xmm", 0, edx, 25),
("sse2", "xmm", 0, edx, 26),
("sgx", "", 1, ebx, 2),
("bmi1", "", 1, ebx, 3),
("bmi2", "", 1, ebx, 8),
("avx2", "ymm", 1, ebx, 5, 0, ecx, 28),
("avx512f", "zmm", 1, ebx, 16),
("avx512dq", "zmm", 1, ebx, 17),
("rdseed", "", 1, ebx, 18),
("adx", "", 1, ebx, 19),
("avx512ifma", "zmm", 1, ebx, 21),
("avx512pf", "zmm", 1, ebx, 26),
("avx512er", "zmm", 1, ebx, 27),
("avx512cd", "zmm", 1, ebx, 28),
("sha", "xmm", 1, ebx, 29),
("avx512bw", "zmm", 1, ebx, 30),
("avx512vl", "zmm", 1, ebx, 31),
("avx512vbmi", "zmm", 1, ecx, 1),
("avx512vbmi2", "zmm", 1, ecx, 6),
("gfni", "zmm", 1, ecx, 8),
("vaes", "zmm", 1, ecx, 9),
("vpclmulqdq", "zmm", 1, ecx, 10),
("avx512bitalg", "zmm", 1, ecx, 12),
("avx512vpopcntdq", "zmm", 1, ecx, 14),
}