2247 lines
77 KiB
Rust
2247 lines
77 KiB
Rust
//! UTF string slices.
|
||
//!
|
||
//! This module contains UTF string slices and related types.
|
||
|
||
use crate::{
|
||
error::{Utf16Error, Utf32Error},
|
||
is_utf16_low_surrogate,
|
||
iter::{EncodeUtf16, EncodeUtf32, EncodeUtf8},
|
||
validate_utf16, validate_utf32, U16Str, U32Str,
|
||
};
|
||
#[cfg(feature = "alloc")]
|
||
use crate::{Utf16String, Utf32String};
|
||
#[cfg(feature = "alloc")]
|
||
use alloc::{borrow::Cow, boxed::Box, string::String};
|
||
use core::{
|
||
convert::{AsMut, AsRef, TryFrom},
|
||
fmt::Write,
|
||
ops::{Index, IndexMut, RangeBounds},
|
||
slice::SliceIndex,
|
||
};
|
||
|
||
mod iter;
|
||
|
||
pub use iter::*;
|
||
|
||
macro_rules! utfstr_common_impl {
|
||
{
|
||
$(#[$utfstr_meta:meta])*
|
||
struct $utfstr:ident([$uchar:ty]);
|
||
type UtfString = $utfstring:ident;
|
||
type UStr = $ustr:ident;
|
||
type UCStr = $ucstr:ident;
|
||
type UtfError = $utferror:ident;
|
||
$(#[$from_slice_unchecked_meta:meta])*
|
||
fn from_slice_unchecked() -> {}
|
||
$(#[$from_slice_unchecked_mut_meta:meta])*
|
||
fn from_slice_unchecked_mut() -> {}
|
||
$(#[$from_boxed_slice_unchecked_meta:meta])*
|
||
fn from_boxed_slice_unchecked() -> {}
|
||
$(#[$get_unchecked_meta:meta])*
|
||
fn get_unchecked() -> {}
|
||
$(#[$get_unchecked_mut_meta:meta])*
|
||
fn get_unchecked_mut() -> {}
|
||
$(#[$len_meta:meta])*
|
||
fn len() -> {}
|
||
} => {
|
||
$(#[$utfstr_meta])*
|
||
#[allow(clippy::derive_hash_xor_eq)]
|
||
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||
pub struct $utfstr {
|
||
pub(crate) inner: [$uchar],
|
||
}
|
||
|
||
impl $utfstr {
|
||
$(#[$from_slice_unchecked_meta])*
|
||
#[allow(trivial_casts)]
|
||
#[inline]
|
||
#[must_use]
|
||
pub const unsafe fn from_slice_unchecked(s: &[$uchar]) -> &Self {
|
||
&*(s as *const [$uchar] as *const Self)
|
||
}
|
||
|
||
$(#[$from_slice_unchecked_mut_meta])*
|
||
#[allow(trivial_casts)]
|
||
#[inline]
|
||
#[must_use]
|
||
pub unsafe fn from_slice_unchecked_mut(s: &mut [$uchar]) -> &mut Self {
|
||
&mut *(s as *mut [$uchar] as *mut Self)
|
||
}
|
||
|
||
$(#[$from_boxed_slice_unchecked_meta])*
|
||
#[inline]
|
||
#[cfg(feature = "alloc")]
|
||
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
|
||
#[must_use]
|
||
pub unsafe fn from_boxed_slice_unchecked(s: Box<[$uchar]>) -> Box<Self> {
|
||
Box::from_raw(Box::into_raw(s) as *mut Self)
|
||
}
|
||
|
||
$(#[$get_unchecked_meta])*
|
||
#[inline]
|
||
#[must_use]
|
||
pub unsafe fn get_unchecked<I>(&self, index: I) -> &Self
|
||
where
|
||
I: SliceIndex<[$uchar], Output = [$uchar]>,
|
||
{
|
||
Self::from_slice_unchecked(self.inner.get_unchecked(index))
|
||
}
|
||
|
||
$(#[$get_unchecked_mut_meta])*
|
||
#[inline]
|
||
#[must_use]
|
||
pub unsafe fn get_unchecked_mut<I>(&mut self, index: I) -> &mut Self
|
||
where
|
||
I: SliceIndex<[$uchar], Output = [$uchar]>,
|
||
{
|
||
Self::from_slice_unchecked_mut(self.inner.get_unchecked_mut(index))
|
||
}
|
||
|
||
$(#[$len_meta])*
|
||
#[inline]
|
||
#[must_use]
|
||
pub const fn len(&self) -> usize {
|
||
self.inner.len()
|
||
}
|
||
|
||
/// Returns `true` if the string has a length of zero.
|
||
#[inline]
|
||
#[must_use]
|
||
pub const fn is_empty(&self) -> bool {
|
||
self.inner.is_empty()
|
||
}
|
||
|
||
/// Converts a string to a slice of its underlying elements.
|
||
///
|
||
/// To convert the slice back into a string slice, use the
|
||
/// [`from_slice`][Self::from_slice] function.
|
||
#[inline]
|
||
#[must_use]
|
||
pub const fn as_slice(&self) -> &[$uchar] {
|
||
&self.inner
|
||
}
|
||
|
||
/// Converts a mutable string to a mutable slice of its underlying elements.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because you can violate the invariants of this type when
|
||
/// mutating the slice. The caller must ensure that the contents of the slice is valid
|
||
/// UTF before the borrow ends and the underlying string is used.
|
||
///
|
||
/// Use of this string type whose contents have been mutated to invalid UTF is
|
||
/// undefined behavior.
|
||
#[inline]
|
||
#[must_use]
|
||
pub unsafe fn as_mut_slice(&mut self) -> &mut [$uchar] {
|
||
&mut self.inner
|
||
}
|
||
|
||
/// Converts a string slice to a raw pointer.
|
||
///
|
||
/// This pointer will be pointing to the first element of the string slice.
|
||
///
|
||
/// The caller must ensure that the returned pointer is never written to. If you need to
|
||
/// mutate the contents of the string slice, use [`as_mut_ptr`][Self::as_mut_ptr].
|
||
#[inline]
|
||
#[must_use]
|
||
pub const fn as_ptr(&self) -> *const $uchar {
|
||
self.inner.as_ptr()
|
||
}
|
||
|
||
/// Converts a mutable string slice to a mutable pointer.
|
||
///
|
||
/// This pointer will be pointing to the first element of the string slice.
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn as_mut_ptr(&mut self) -> *mut $uchar {
|
||
self.inner.as_mut_ptr()
|
||
}
|
||
|
||
/// Returns this string as a wide string slice of undefined encoding.
|
||
#[inline]
|
||
#[must_use]
|
||
pub const fn as_ustr(&self) -> &$ustr {
|
||
$ustr::from_slice(self.as_slice())
|
||
}
|
||
|
||
/// Returns a string slice with leading and trailing whitespace removed.
|
||
///
|
||
/// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
|
||
/// `White_Space`.
|
||
#[must_use]
|
||
pub fn trim(&self) -> &Self {
|
||
self.trim_start().trim_end()
|
||
}
|
||
|
||
/// Returns a string slice with leading whitespace removed.
|
||
///
|
||
/// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
|
||
/// `White_Space`.
|
||
///
|
||
/// # Text directionality
|
||
///
|
||
/// A string is a sequence of elements. `start` in this context means the first position
|
||
/// of that sequence; for a left-to-right language like English or Russian, this will be
|
||
/// left side, and for right-to-left languages like Arabic or Hebrew, this will be the
|
||
/// right side.
|
||
#[must_use]
|
||
pub fn trim_start(&self) -> &Self {
|
||
if let Some((index, _)) = self.char_indices().find(|(_, c)| !c.is_whitespace()) {
|
||
&self[index..]
|
||
} else {
|
||
<&Self as Default>::default()
|
||
}
|
||
}
|
||
|
||
/// Returns a string slice with trailing whitespace removed.
|
||
///
|
||
/// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
|
||
/// `White_Space`.
|
||
///
|
||
/// # Text directionality
|
||
///
|
||
/// A string is a sequence of elements. `end` in this context means the last position of
|
||
/// that sequence; for a left-to-right language like English or Russian, this will be
|
||
/// right side, and for right-to-left languages like Arabic or Hebrew, this will be the
|
||
/// left side.
|
||
#[must_use]
|
||
pub fn trim_end(&self) -> &Self {
|
||
if let Some((index, _)) = self.char_indices().rfind(|(_, c)| !c.is_whitespace()) {
|
||
&self[..=index]
|
||
} else {
|
||
<&Self as Default>::default()
|
||
}
|
||
}
|
||
|
||
/// Converts a boxed string into a boxed slice without copying or allocating.
|
||
#[inline]
|
||
#[cfg(feature = "alloc")]
|
||
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
|
||
#[must_use]
|
||
pub fn into_boxed_slice(self: Box<Self>) -> Box<[$uchar]> {
|
||
// SAFETY: from_raw pointer is from into_raw
|
||
unsafe { Box::from_raw(Box::into_raw(self) as *mut [$uchar]) }
|
||
}
|
||
|
||
/// Converts a boxed string slice into an owned UTF string without copying or
|
||
/// allocating.
|
||
#[inline]
|
||
#[cfg(feature = "alloc")]
|
||
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
|
||
#[must_use]
|
||
pub fn into_utfstring(self: Box<Self>) -> $utfstring {
|
||
unsafe { $utfstring::from_vec_unchecked(self.into_boxed_slice().into_vec()) }
|
||
}
|
||
|
||
/// Creates a new owned string by repeating this string `n` times.
|
||
///
|
||
/// # Panics
|
||
///
|
||
/// This function will panic if the capacity would overflow.
|
||
#[inline]
|
||
#[cfg(feature = "alloc")]
|
||
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
|
||
#[must_use]
|
||
pub fn repeat(&self, n: usize) -> $utfstring {
|
||
unsafe { $utfstring::from_vec_unchecked(self.as_slice().repeat(n)) }
|
||
}
|
||
}
|
||
|
||
impl AsMut<$utfstr> for $utfstr {
|
||
#[inline]
|
||
fn as_mut(&mut self) -> &mut $utfstr {
|
||
self
|
||
}
|
||
}
|
||
|
||
impl AsRef<$utfstr> for $utfstr {
|
||
#[inline]
|
||
fn as_ref(&self) -> &$utfstr {
|
||
self
|
||
}
|
||
}
|
||
|
||
impl AsRef<[$uchar]> for $utfstr {
|
||
#[inline]
|
||
fn as_ref(&self) -> &[$uchar] {
|
||
self.as_slice()
|
||
}
|
||
}
|
||
|
||
impl AsRef<$ustr> for $utfstr {
|
||
#[inline]
|
||
fn as_ref(&self) -> &$ustr {
|
||
self.as_ustr()
|
||
}
|
||
}
|
||
|
||
impl core::fmt::Debug for $utfstr {
|
||
#[inline]
|
||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||
f.write_char('"')?;
|
||
self.escape_debug().try_for_each(|c| f.write_char(c))?;
|
||
f.write_char('"')
|
||
}
|
||
}
|
||
|
||
impl Default for &$utfstr {
|
||
#[inline]
|
||
fn default() -> Self {
|
||
// SAFETY: Empty slice is always valid
|
||
unsafe { $utfstr::from_slice_unchecked(&[]) }
|
||
}
|
||
}
|
||
|
||
impl Default for &mut $utfstr {
|
||
#[inline]
|
||
fn default() -> Self {
|
||
// SAFETY: Empty slice is valways valid
|
||
unsafe { $utfstr::from_slice_unchecked_mut(&mut []) }
|
||
}
|
||
}
|
||
|
||
impl core::fmt::Display for $utfstr {
|
||
#[inline]
|
||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||
self.chars().try_for_each(|c| f.write_char(c))
|
||
}
|
||
}
|
||
|
||
#[cfg(feature = "alloc")]
|
||
impl From<Box<$utfstr>> for Box<[$uchar]> {
|
||
#[inline]
|
||
fn from(value: Box<$utfstr>) -> Self {
|
||
value.into_boxed_slice()
|
||
}
|
||
}
|
||
|
||
impl<'a> From<&'a $utfstr> for &'a $ustr {
|
||
#[inline]
|
||
fn from(value: &'a $utfstr) -> Self {
|
||
value.as_ustr()
|
||
}
|
||
}
|
||
|
||
impl<'a> From<&'a $utfstr> for &'a [$uchar] {
|
||
#[inline]
|
||
fn from(value: &'a $utfstr) -> Self {
|
||
value.as_slice()
|
||
}
|
||
}
|
||
|
||
#[cfg(feature = "std")]
|
||
impl From<&$utfstr> for std::ffi::OsString {
|
||
#[inline]
|
||
fn from(value: &$utfstr) -> std::ffi::OsString {
|
||
value.as_ustr().to_os_string()
|
||
}
|
||
}
|
||
|
||
impl PartialEq<$utfstr> for &$utfstr {
|
||
#[inline]
|
||
fn eq(&self, other: &$utfstr) -> bool {
|
||
self.as_slice() == other.as_slice()
|
||
}
|
||
}
|
||
|
||
#[cfg(feature = "alloc")]
|
||
impl<'a, 'b> PartialEq<Cow<'a, $utfstr>> for &'b $utfstr {
|
||
#[inline]
|
||
fn eq(&self, other: &Cow<'a, $utfstr>) -> bool {
|
||
self == other.as_ref()
|
||
}
|
||
}
|
||
|
||
#[cfg(feature = "alloc")]
|
||
impl PartialEq<$utfstr> for Cow<'_, $utfstr> {
|
||
#[inline]
|
||
fn eq(&self, other: &$utfstr) -> bool {
|
||
self.as_ref() == other
|
||
}
|
||
}
|
||
|
||
#[cfg(feature = "alloc")]
|
||
impl<'a, 'b> PartialEq<&'a $utfstr> for Cow<'b, $utfstr> {
|
||
#[inline]
|
||
fn eq(&self, other: &&'a $utfstr) -> bool {
|
||
self.as_ref() == *other
|
||
}
|
||
}
|
||
|
||
impl PartialEq<$ustr> for $utfstr {
|
||
#[inline]
|
||
fn eq(&self, other: &$ustr) -> bool {
|
||
self.as_slice() == other.as_slice()
|
||
}
|
||
}
|
||
|
||
impl PartialEq<$utfstr> for $ustr {
|
||
#[inline]
|
||
fn eq(&self, other: &$utfstr) -> bool {
|
||
self.as_slice() == other.as_slice()
|
||
}
|
||
}
|
||
|
||
impl PartialEq<crate::$ucstr> for $utfstr {
|
||
#[inline]
|
||
fn eq(&self, other: &crate::$ucstr) -> bool {
|
||
self.as_slice() == other.as_slice()
|
||
}
|
||
}
|
||
|
||
impl PartialEq<$utfstr> for crate::$ucstr {
|
||
#[inline]
|
||
fn eq(&self, other: &$utfstr) -> bool {
|
||
self.as_slice() == other.as_slice()
|
||
}
|
||
}
|
||
|
||
impl PartialEq<str> for $utfstr {
|
||
#[inline]
|
||
fn eq(&self, other: &str) -> bool {
|
||
self.chars().eq(other.chars())
|
||
}
|
||
}
|
||
|
||
impl PartialEq<&str> for $utfstr {
|
||
#[inline]
|
||
fn eq(&self, other: &&str) -> bool {
|
||
self.chars().eq(other.chars())
|
||
}
|
||
}
|
||
|
||
impl PartialEq<str> for &$utfstr {
|
||
#[inline]
|
||
fn eq(&self, other: &str) -> bool {
|
||
self.chars().eq(other.chars())
|
||
}
|
||
}
|
||
|
||
impl PartialEq<$utfstr> for str {
|
||
#[inline]
|
||
fn eq(&self, other: &$utfstr) -> bool {
|
||
self.chars().eq(other.chars())
|
||
}
|
||
}
|
||
|
||
impl PartialEq<$utfstr> for &str {
|
||
#[inline]
|
||
fn eq(&self, other: &$utfstr) -> bool {
|
||
self.chars().eq(other.chars())
|
||
}
|
||
}
|
||
|
||
#[cfg(feature = "alloc")]
|
||
impl<'a, 'b> PartialEq<Cow<'a, str>> for &'b $utfstr {
|
||
#[inline]
|
||
fn eq(&self, other: &Cow<'a, str>) -> bool {
|
||
self == other.as_ref()
|
||
}
|
||
}
|
||
|
||
#[cfg(feature = "alloc")]
|
||
impl PartialEq<$utfstr> for Cow<'_, str> {
|
||
#[inline]
|
||
fn eq(&self, other: &$utfstr) -> bool {
|
||
self.as_ref() == other
|
||
}
|
||
}
|
||
|
||
#[cfg(feature = "alloc")]
|
||
impl<'a, 'b> PartialEq<&'a $utfstr> for Cow<'b, str> {
|
||
#[inline]
|
||
fn eq(&self, other: &&'a $utfstr) -> bool {
|
||
self.as_ref() == *other
|
||
}
|
||
}
|
||
|
||
impl<'a> TryFrom<&'a $ustr> for &'a $utfstr {
|
||
type Error = $utferror;
|
||
|
||
#[inline]
|
||
fn try_from(value: &'a $ustr) -> Result<Self, Self::Error> {
|
||
$utfstr::from_ustr(value)
|
||
}
|
||
}
|
||
|
||
impl<'a> TryFrom<&'a crate::$ucstr> for &'a $utfstr {
|
||
type Error = $utferror;
|
||
|
||
#[inline]
|
||
fn try_from(value: &'a crate::$ucstr) -> Result<Self, Self::Error> {
|
||
$utfstr::from_ucstr(value)
|
||
}
|
||
}
|
||
};
|
||
}
|
||
|
||
utfstr_common_impl! {
|
||
/// UTF-16 string slice for [`Utf16String`][crate::Utf16String].
|
||
///
|
||
/// [`Utf16Str`] is to [`Utf16String`][crate::Utf16String] as [`str`] is to [`String`].
|
||
///
|
||
/// [`Utf16Str`] slices are string slices that are always valid UTF-16 encoding. This is unlike
|
||
/// the [`U16Str`][crate::U16Str] string slices, which may not have valid encoding. In this way,
|
||
/// [`Utf16Str`] string slices most resemble native [`str`] slices of all the types in this
|
||
/// crate.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// The easiest way to use [`Utf16Str`] is with the [`utf16str!`][crate::utf16str] macro to
|
||
/// convert string literals into string slices at compile time:
|
||
///
|
||
/// ```
|
||
/// use widestring::utf16str;
|
||
/// let hello = utf16str!("Hello, world!");
|
||
/// ```
|
||
///
|
||
/// You can also convert a [`u16`] slice directly, provided it is valid UTF-16:
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf16Str;
|
||
///
|
||
/// let sparkle_heart = [0xd83d, 0xdc96];
|
||
/// let sparkle_heart = Utf16Str::from_slice(&sparkle_heart).unwrap();
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
struct Utf16Str([u16]);
|
||
|
||
type UtfString = Utf16String;
|
||
type UStr = U16Str;
|
||
type UCStr = U16CStr;
|
||
type UtfError = Utf16Error;
|
||
|
||
/// Converts a slice to a string slice without checking that the string contains valid UTF-16.
|
||
///
|
||
/// See the safe version, [`from_slice`][Self::from_slice], for more information.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the slice passed to it is valid
|
||
/// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
|
||
/// [`Utf16Str`] is always valid UTF-16.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf16Str;
|
||
///
|
||
/// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
|
||
/// let sparkle_heart = unsafe { Utf16Str::from_slice_unchecked(&sparkle_heart) };
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
fn from_slice_unchecked() -> {}
|
||
|
||
/// Converts a mutable slice to a mutable string slice without checking that the string contains
|
||
/// valid UTF-16.
|
||
///
|
||
/// See the safe version, [`from_slice_mut`][Self::from_slice_mut], for more information.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the slice passed to it is valid
|
||
/// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
|
||
/// [`Utf16Str`] is always valid UTF-16.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf16Str;
|
||
///
|
||
/// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
|
||
/// let sparkle_heart = unsafe { Utf16Str::from_slice_unchecked_mut(&mut sparkle_heart) };
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
fn from_slice_unchecked_mut() -> {}
|
||
|
||
/// Converts a boxed slice to a boxed string slice without checking that the string contains
|
||
/// valid UTF-16.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check if the string slice is valid UTF-16, and
|
||
/// [`Utf16Str`] must always be valid UTF-16.
|
||
fn from_boxed_slice_unchecked() -> {}
|
||
|
||
/// Returns an unchecked subslice of this string slice.
|
||
///
|
||
/// This is the unchecked alternative to indexing the string slice.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// Callers of this function are responsible that these preconditions are satisfied:
|
||
///
|
||
/// - The starting index must not exceed the ending index;
|
||
/// - Indexes must be within bounds of the original slice;
|
||
/// - Indexes must lie on UTF-16 sequence boundaries.
|
||
///
|
||
/// Failing that, the returned string slice may reference invalid memory or violate the
|
||
/// invariants communicated by the type.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::{utf16str};
|
||
/// let v = utf16str!("⚧️🏳️⚧️➡️s");
|
||
/// unsafe {
|
||
/// assert_eq!(utf16str!("⚧️"), v.get_unchecked(..2));
|
||
/// assert_eq!(utf16str!("🏳️⚧️"), v.get_unchecked(2..8));
|
||
/// assert_eq!(utf16str!("➡️"), v.get_unchecked(8..10));
|
||
/// assert_eq!(utf16str!("s"), v.get_unchecked(10..));
|
||
/// }
|
||
/// ```
|
||
fn get_unchecked() -> {}
|
||
|
||
/// Returns a mutable, unchecked subslice of this string slice
|
||
///
|
||
/// This is the unchecked alternative to indexing the string slice.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// Callers of this function are responsible that these preconditions are satisfied:
|
||
///
|
||
/// - The starting index must not exceed the ending index;
|
||
/// - Indexes must be within bounds of the original slice;
|
||
/// - Indexes must lie on UTF-16 sequence boundaries.
|
||
///
|
||
/// Failing that, the returned string slice may reference invalid memory or violate the
|
||
/// invariants communicated by the type.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::{utf16str};
|
||
/// # #[cfg(feature = "alloc")] {
|
||
/// let mut v = utf16str!("⚧️🏳️⚧️➡️s").to_owned();
|
||
/// unsafe {
|
||
/// assert_eq!(utf16str!("⚧️"), v.get_unchecked_mut(..2));
|
||
/// assert_eq!(utf16str!("🏳️⚧️"), v.get_unchecked_mut(2..8));
|
||
/// assert_eq!(utf16str!("➡️"), v.get_unchecked_mut(8..10));
|
||
/// assert_eq!(utf16str!("s"), v.get_unchecked_mut(10..));
|
||
/// }
|
||
/// # }
|
||
/// ```
|
||
fn get_unchecked_mut() -> {}
|
||
|
||
/// Returns the length of `self`.
|
||
///
|
||
/// This length is in `u16` values, not [`char`]s or graphemes. In other words, it may not be
|
||
/// what human considers the length of the string.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::utf16str;
|
||
/// assert_eq!(utf16str!("foo").len(), 3);
|
||
///
|
||
/// let complex = utf16str!("⚧️🏳️⚧️➡️s");
|
||
/// assert_eq!(complex.len(), 11);
|
||
/// assert_eq!(complex.chars().count(), 10);
|
||
/// ```
|
||
fn len() -> {}
|
||
}
|
||
|
||
utfstr_common_impl! {
|
||
/// UTF-32 string slice for [`Utf32String`][crate::Utf32String].
|
||
///
|
||
/// [`Utf32Str`] is to [`Utf32String`][crate::Utf32String] as [`str`] is to [`String`].
|
||
///
|
||
/// [`Utf32Str`] slices are string slices that are always valid UTF-32 encoding. This is unlike
|
||
/// the [`U32Str`][crate::U16Str] string slices, which may not have valid encoding. In this way,
|
||
/// [`Utf32Str`] string slices most resemble native [`str`] slices of all the types in this
|
||
/// crate.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// The easiest way to use [`Utf32Str`] is with the [`utf32str!`][crate::utf32str] macro to
|
||
/// convert string literals into string slices at compile time:
|
||
///
|
||
/// ```
|
||
/// use widestring::utf32str;
|
||
/// let hello = utf32str!("Hello, world!");
|
||
/// ```
|
||
///
|
||
/// You can also convert a [`u32`] slice directly, provided it is valid UTF-32:
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf32Str;
|
||
///
|
||
/// let sparkle_heart = [0x1f496];
|
||
/// let sparkle_heart = Utf32Str::from_slice(&sparkle_heart).unwrap();
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
///
|
||
/// Since [`char`] slices are valid UTF-32, a slice of [`char`]s can be easily converted to a
|
||
/// string slice:
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf32Str;
|
||
///
|
||
/// let sparkle_heart = ['💖'; 3];
|
||
/// let sparkle_heart = Utf32Str::from_char_slice(&sparkle_heart);
|
||
///
|
||
/// assert_eq!("💖💖💖", sparkle_heart);
|
||
/// ```
|
||
struct Utf32Str([u32]);
|
||
|
||
type UtfString = Utf32String;
|
||
type UStr = U32Str;
|
||
type UCStr = U32CStr;
|
||
type UtfError = Utf32Error;
|
||
|
||
/// Converts a slice to a string slice without checking that the string contains valid UTF-32.
|
||
///
|
||
/// See the safe version, [`from_slice`][Self::from_slice], for more information.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the slice passed to it is valid
|
||
/// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
|
||
/// [`Utf32Str`] is always valid UTF-32.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf32Str;
|
||
///
|
||
/// let sparkle_heart = vec![0x1f496];
|
||
/// let sparkle_heart = unsafe { Utf32Str::from_slice_unchecked(&sparkle_heart) };
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
fn from_slice_unchecked() -> {}
|
||
|
||
/// Converts a mutable slice to a mutable string slice without checking that the string contains
|
||
/// valid UTF-32.
|
||
///
|
||
/// See the safe version, [`from_slice_mut`][Self::from_slice_mut], for more information.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the slice passed to it is valid
|
||
/// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
|
||
/// [`Utf32Str`] is always valid UTF-32.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf32Str;
|
||
///
|
||
/// let mut sparkle_heart = vec![0x1f496];
|
||
/// let sparkle_heart = unsafe { Utf32Str::from_slice_unchecked_mut(&mut sparkle_heart) };
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
fn from_slice_unchecked_mut() -> {}
|
||
|
||
/// Converts a boxed slice to a boxed string slice without checking that the string contains
|
||
/// valid UTF-32.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check if the string slice is valid UTF-32, and
|
||
/// [`Utf32Str`] must always be valid UTF-32.
|
||
fn from_boxed_slice_unchecked() -> {}
|
||
|
||
/// Returns an unchecked subslice of this string slice.
|
||
///
|
||
/// This is the unchecked alternative to indexing the string slice.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// Callers of this function are responsible that these preconditions are satisfied:
|
||
///
|
||
/// - The starting index must not exceed the ending index;
|
||
/// - Indexes must be within bounds of the original slice;
|
||
///
|
||
/// Failing that, the returned string slice may reference invalid memory or violate the
|
||
/// invariants communicated by the type.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::utf32str;
|
||
/// let v = utf32str!("⚧️🏳️⚧️➡️s");
|
||
/// unsafe {
|
||
/// assert_eq!(utf32str!("⚧️"), v.get_unchecked(..2));
|
||
/// assert_eq!(utf32str!("🏳️⚧️"), v.get_unchecked(2..7));
|
||
/// assert_eq!(utf32str!("➡️"), v.get_unchecked(7..9));
|
||
/// assert_eq!(utf32str!("s"), v.get_unchecked(9..))
|
||
/// }
|
||
/// ```
|
||
fn get_unchecked() -> {}
|
||
|
||
/// Returns a mutable, unchecked subslice of this string slice
|
||
///
|
||
/// This is the unchecked alternative to indexing the string slice.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// Callers of this function are responsible that these preconditions are satisfied:
|
||
///
|
||
/// - The starting index must not exceed the ending index;
|
||
/// - Indexes must be within bounds of the original slice;
|
||
///
|
||
/// Failing that, the returned string slice may reference invalid memory or violate the
|
||
/// invariants communicated by the type.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::utf32str;
|
||
/// # #[cfg(feature = "alloc")] {
|
||
/// let mut v = utf32str!("⚧️🏳️⚧️➡️s").to_owned();
|
||
/// unsafe {
|
||
/// assert_eq!(utf32str!("⚧️"), v.get_unchecked_mut(..2));
|
||
/// assert_eq!(utf32str!("🏳️⚧️"), v.get_unchecked_mut(2..7));
|
||
/// assert_eq!(utf32str!("➡️"), v.get_unchecked_mut(7..9));
|
||
/// assert_eq!(utf32str!("s"), v.get_unchecked_mut(9..))
|
||
/// }
|
||
/// # }
|
||
/// ```
|
||
fn get_unchecked_mut() -> {}
|
||
|
||
/// Returns the length of `self`.
|
||
///
|
||
/// This length is in the number of [`char`]s in the slice, not graphemes. In other words, it
|
||
/// may not be what human considers the length of the string.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::utf32str;
|
||
/// assert_eq!(utf32str!("foo").len(), 3);
|
||
///
|
||
/// let complex = utf32str!("⚧️🏳️⚧️➡️s");
|
||
/// assert_eq!(complex.len(), 10);
|
||
/// assert_eq!(complex.chars().count(), 10);
|
||
/// ```
|
||
fn len() -> {}
|
||
}
|
||
|
||
impl Utf16Str {
|
||
/// Converts a slice of UTF-16 data to a string slice.
|
||
///
|
||
/// Not all slices of [`u16`] values are valid to convert, since [`Utf16Str`] requires that it
|
||
/// is always valid UTF-16. This function checks to ensure that the values are valid UTF-16, and
|
||
/// then does the conversion.
|
||
///
|
||
/// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
|
||
/// the validity check, there is an unsafe version of this function,
|
||
/// [`from_slice_unchecked`][Self::from_slice_unchecked], which has the same behavior but skips
|
||
/// the check.
|
||
///
|
||
/// If you need an owned string, consider using [`Utf16String::from_vec`] instead.
|
||
///
|
||
/// Because you can stack-allocate a `[u16; N]`, this function is one way to have a
|
||
/// stack-allocated string. Indeed, the [`utf16str!`][crate::utf16str] macro does exactly this
|
||
/// after converting from UTF-8 to UTF-16.
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// Returns an error if the slice is not UTF-16 with a description as to why the provided slice
|
||
/// is not UTF-16.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf16Str;
|
||
///
|
||
/// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
|
||
/// let sparkle_heart = Utf16Str::from_slice(&sparkle_heart).unwrap();
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
///
|
||
/// With incorrect values that return an error:
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf16Str;
|
||
///
|
||
/// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
|
||
///
|
||
/// assert!(Utf16Str::from_slice(&sparkle_heart).is_err());
|
||
/// ```
|
||
pub fn from_slice(s: &[u16]) -> Result<&Self, Utf16Error> {
|
||
validate_utf16(s)?;
|
||
// SAFETY: Just validated
|
||
Ok(unsafe { Self::from_slice_unchecked(s) })
|
||
}
|
||
|
||
/// Converts a mutable slice of UTF-16 data to a mutable string slice.
|
||
///
|
||
/// Not all slices of [`u16`] values are valid to convert, since [`Utf16Str`] requires that it
|
||
/// is always valid UTF-16. This function checks to ensure that the values are valid UTF-16, and
|
||
/// then does the conversion.
|
||
///
|
||
/// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
|
||
/// the validity check, there is an unsafe version of this function,
|
||
/// [`from_slice_unchecked_mut`][Self::from_slice_unchecked_mut], which has the same behavior
|
||
/// but skips the check.
|
||
///
|
||
/// If you need an owned string, consider using [`Utf16String::from_vec`] instead.
|
||
///
|
||
/// Because you can stack-allocate a `[u16; N]`, this function is one way to have a
|
||
/// stack-allocated string. Indeed, the [`utf16str!`][crate::utf16str] macro does exactly this
|
||
/// after converting from UTF-8 to UTF-16.
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// Returns an error if the slice is not UTF-16 with a description as to why the provided slice
|
||
/// is not UTF-16.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf16Str;
|
||
///
|
||
/// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
|
||
/// let sparkle_heart = Utf16Str::from_slice_mut(&mut sparkle_heart).unwrap();
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
///
|
||
/// With incorrect values that return an error:
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf16Str;
|
||
///
|
||
/// let mut sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
|
||
///
|
||
/// assert!(Utf16Str::from_slice_mut(&mut sparkle_heart).is_err());
|
||
/// ```
|
||
pub fn from_slice_mut(s: &mut [u16]) -> Result<&mut Self, Utf16Error> {
|
||
validate_utf16(s)?;
|
||
// SAFETY: Just validated
|
||
Ok(unsafe { Self::from_slice_unchecked_mut(s) })
|
||
}
|
||
|
||
/// Converts a wide string slice of undefined encoding to a UTF-16 string slice without checking
|
||
/// if the string slice is valid UTF-16.
|
||
///
|
||
/// See the safe version, [`from_ustr`][Self::from_ustr], for more information.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the string slice passed to it is
|
||
/// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
|
||
/// the [`Utf16Str`] is always valid UTF-16.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::{Utf16Str, u16str};
|
||
///
|
||
/// let sparkle_heart = u16str!("💖");
|
||
/// let sparkle_heart = unsafe { Utf16Str::from_ustr_unchecked(sparkle_heart) };
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
#[must_use]
|
||
pub const unsafe fn from_ustr_unchecked(s: &U16Str) -> &Self {
|
||
Self::from_slice_unchecked(s.as_slice())
|
||
}
|
||
|
||
/// Converts a mutable wide string slice of undefined encoding to a mutable UTF-16 string slice
|
||
/// without checking if the string slice is valid UTF-16.
|
||
///
|
||
/// See the safe version, [`from_ustr_mut`][Self::from_ustr_mut], for more information.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the string slice passed to it is
|
||
/// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
|
||
/// the [`Utf16Str`] is always valid UTF-16.
|
||
#[must_use]
|
||
pub unsafe fn from_ustr_unchecked_mut(s: &mut U16Str) -> &mut Self {
|
||
Self::from_slice_unchecked_mut(s.as_mut_slice())
|
||
}
|
||
|
||
/// Converts a wide string slice of undefined encoding to a UTF-16 string slice.
|
||
///
|
||
/// Since [`U16Str`] does not have a specified encoding, this conversion may fail if the
|
||
/// [`U16Str`] does not contain valid UTF-16 data.
|
||
///
|
||
/// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
|
||
/// the validity check, there is an unsafe version of this function,
|
||
/// [`from_ustr_unchecked`][Self::from_ustr_unchecked], which has the same behavior
|
||
/// but skips the check.
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// Returns an error if the string slice is not UTF-16 with a description as to why the
|
||
/// provided string slice is not UTF-16.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::{Utf16Str, u16str};
|
||
///
|
||
/// let sparkle_heart = u16str!("💖");
|
||
/// let sparkle_heart = Utf16Str::from_ustr(sparkle_heart).unwrap();
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
#[inline]
|
||
pub fn from_ustr(s: &U16Str) -> Result<&Self, Utf16Error> {
|
||
Self::from_slice(s.as_slice())
|
||
}
|
||
|
||
/// Converts a mutable wide string slice of undefined encoding to a mutable UTF-16 string slice.
|
||
///
|
||
/// Since [`U16Str`] does not have a specified encoding, this conversion may fail if the
|
||
/// [`U16Str`] does not contain valid UTF-16 data.
|
||
///
|
||
/// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
|
||
/// the validity check, there is an unsafe version of this function,
|
||
/// [`from_ustr_unchecked_mut`][Self::from_ustr_unchecked_mut], which has the same behavior
|
||
/// but skips the check.
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// Returns an error if the string slice is not UTF-16 with a description as to why the
|
||
/// provided string slice is not UTF-16.
|
||
#[inline]
|
||
pub fn from_ustr_mut(s: &mut U16Str) -> Result<&mut Self, Utf16Error> {
|
||
Self::from_slice_mut(s.as_mut_slice())
|
||
}
|
||
|
||
/// Converts a wide C string slice to a UTF-16 string slice without checking if the
|
||
/// string slice is valid UTF-16.
|
||
///
|
||
/// The resulting string slice does *not* contain the nul terminator.
|
||
///
|
||
/// See the safe version, [`from_ucstr`][Self::from_ucstr], for more information.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the string slice passed to it is
|
||
/// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
|
||
/// the [`Utf16Str`] is always valid UTF-16.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::{Utf16Str, u16cstr};
|
||
///
|
||
/// let sparkle_heart = u16cstr!("💖");
|
||
/// let sparkle_heart = unsafe { Utf16Str::from_ucstr_unchecked(sparkle_heart) };
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
#[inline]
|
||
#[must_use]
|
||
pub unsafe fn from_ucstr_unchecked(s: &crate::U16CStr) -> &Self {
|
||
Self::from_slice_unchecked(s.as_slice())
|
||
}
|
||
|
||
/// Converts a mutable wide C string slice to a mutable UTF-16 string slice without
|
||
/// checking if the string slice is valid UTF-16.
|
||
///
|
||
/// The resulting string slice does *not* contain the nul terminator.
|
||
///
|
||
/// See the safe version, [`from_ucstr_mut`][Self::from_ucstr_mut], for more information.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the string slice passed to it is
|
||
/// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
|
||
/// the [`Utf16Str`] is always valid UTF-16.
|
||
#[inline]
|
||
#[must_use]
|
||
pub unsafe fn from_ucstr_unchecked_mut(s: &mut crate::U16CStr) -> &mut Self {
|
||
Self::from_slice_unchecked_mut(s.as_mut_slice())
|
||
}
|
||
|
||
/// Converts a wide C string slice to a UTF-16 string slice.
|
||
///
|
||
/// The resulting string slice does *not* contain the nul terminator.
|
||
///
|
||
/// Since [`U16CStr`][crate::U16CStr] does not have a specified encoding, this conversion may
|
||
/// fail if the [`U16CStr`][crate::U16CStr] does not contain valid UTF-16 data.
|
||
///
|
||
/// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
|
||
/// the validity check, there is an unsafe version of this function,
|
||
/// [`from_ucstr_unchecked`][Self::from_ucstr_unchecked], which has the same behavior
|
||
/// but skips the check.
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// Returns an error if the string slice is not UTF-16 with a description as to why the
|
||
/// provided string slice is not UTF-16.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::{Utf16Str, u16cstr};
|
||
///
|
||
/// let sparkle_heart = u16cstr!("💖");
|
||
/// let sparkle_heart = Utf16Str::from_ucstr(sparkle_heart).unwrap();
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
#[inline]
|
||
pub fn from_ucstr(s: &crate::U16CStr) -> Result<&Self, Utf16Error> {
|
||
Self::from_slice(s.as_slice())
|
||
}
|
||
|
||
/// Converts a mutable wide C string slice to a mutable UTF-16 string slice.
|
||
///
|
||
/// The resulting string slice does *not* contain the nul terminator.
|
||
///
|
||
/// Since [`U16CStr`][crate::U16CStr] does not have a specified encoding, this conversion may
|
||
/// fail if the [`U16CStr`][crate::U16CStr] does not contain valid UTF-16 data.
|
||
///
|
||
/// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
|
||
/// the validity check, there is an unsafe version of this function,
|
||
/// [`from_ucstr_unchecked_mut`][Self::from_ucstr_unchecked_mut], which has the same behavior
|
||
/// but skips the check.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This method is unsafe because you can violate the invariants of [`U16CStr`][crate::U16CStr]
|
||
/// when mutating the slice (i.e. by adding interior nul values).
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// Returns an error if the string slice is not UTF-16 with a description as to why the
|
||
/// provided string slice is not UTF-16.
|
||
#[inline]
|
||
pub unsafe fn from_ucstr_mut(s: &mut crate::U16CStr) -> Result<&mut Self, Utf16Error> {
|
||
Self::from_slice_mut(s.as_mut_slice())
|
||
}
|
||
|
||
/// Converts to a standard UTF-8 [`String`].
|
||
///
|
||
/// Because this string is always valid UTF-16, the conversion is lossless and non-fallible.
|
||
#[inline]
|
||
#[allow(clippy::inherent_to_string_shadow_display)]
|
||
#[cfg(feature = "alloc")]
|
||
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
|
||
#[must_use]
|
||
pub fn to_string(&self) -> String {
|
||
String::from_utf16(self.as_slice()).unwrap()
|
||
}
|
||
|
||
/// Checks that `index`-th value is the value in a UTF-16 code point sequence or the end of the
|
||
/// string.
|
||
///
|
||
/// Returns `true` if the value at `index` is not a UTF-16 surrogate value, or if the value at
|
||
/// `index` is the first value of a surrogate pair (the "high" surrogate). Returns `false` if
|
||
/// the value at `index` is the second value of a surrogate pair (a.k.a the "low" surrogate).
|
||
///
|
||
/// The start and end of the string (when `index == self.len()`) are considered to be
|
||
/// boundaries.
|
||
///
|
||
/// Returns `false` if `index is greater than `self.len()`.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::utf16str;
|
||
/// let s = utf16str!("Sparkle 💖 Heart");
|
||
/// assert!(s.is_char_boundary(0));
|
||
///
|
||
/// // high surrogate of `💖`
|
||
/// assert!(s.is_char_boundary(8));
|
||
/// // low surrogate of `💖`
|
||
/// assert!(!s.is_char_boundary(9));
|
||
///
|
||
/// assert!(s.is_char_boundary(s.len()));
|
||
/// ```
|
||
#[inline]
|
||
#[must_use]
|
||
pub const fn is_char_boundary(&self, index: usize) -> bool {
|
||
if index > self.len() {
|
||
false
|
||
} else if index == self.len() {
|
||
true
|
||
} else {
|
||
!is_utf16_low_surrogate(self.inner[index])
|
||
}
|
||
}
|
||
|
||
/// Returns a subslice of this string.
|
||
///
|
||
/// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
|
||
/// equivalent indexing operation would panic.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::{utf16str};
|
||
/// let v = utf16str!("⚧️🏳️⚧️➡️s");
|
||
///
|
||
/// assert_eq!(Some(utf16str!("⚧️")), v.get(..2));
|
||
/// assert_eq!(Some(utf16str!("🏳️⚧️")), v.get(2..8));
|
||
/// assert_eq!(Some(utf16str!("➡️")), v.get(8..10));
|
||
/// assert_eq!(Some(utf16str!("s")), v.get(10..));
|
||
///
|
||
/// assert!(v.get(3..4).is_none());
|
||
/// ```
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn get<I>(&self, index: I) -> Option<&Self>
|
||
where
|
||
I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
|
||
{
|
||
// TODO: Use SliceIndex directly when it is stabilized
|
||
let range = crate::range_check(index, ..self.len())?;
|
||
if !self.is_char_boundary(range.start) || !self.is_char_boundary(range.end) {
|
||
return None;
|
||
}
|
||
|
||
// SAFETY: range_check verified bounds, and we just verified char boundaries
|
||
Some(unsafe { self.get_unchecked(range) })
|
||
}
|
||
|
||
/// Returns a mutable subslice of this string.
|
||
///
|
||
/// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
|
||
/// equivalent indexing operation would panic.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::{utf16str};
|
||
/// # #[cfg(feature = "alloc")] {
|
||
/// let mut v = utf16str!("⚧️🏳️⚧️➡️s").to_owned();
|
||
///
|
||
/// assert_eq!(utf16str!("⚧️"), v.get_mut(..2).unwrap());
|
||
/// assert_eq!(utf16str!("🏳️⚧️"), v.get_mut(2..8).unwrap());
|
||
/// assert_eq!(utf16str!("➡️"), v.get_mut(8..10).unwrap());
|
||
/// assert_eq!(utf16str!("s"), v.get_mut(10..).unwrap());
|
||
///
|
||
/// assert!(v.get_mut(3..4).is_none());
|
||
/// # }
|
||
/// ```
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn get_mut<I>(&mut self, index: I) -> Option<&mut Self>
|
||
where
|
||
I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
|
||
{
|
||
// TODO: Use SliceIndex directly when it is stabilized
|
||
let range = crate::range_check(index, ..self.len())?;
|
||
if !self.is_char_boundary(range.start) || !self.is_char_boundary(range.end) {
|
||
return None;
|
||
}
|
||
|
||
// SAFETY: range_check verified bounds, and we just verified char boundaries
|
||
Some(unsafe { self.get_unchecked_mut(range) })
|
||
}
|
||
|
||
/// Divide one string slice into two at an index.
|
||
///
|
||
/// The argument, `mid`, should be an offset from the start of the string. It must also be on
|
||
/// the boundary of a UTF-16 code point.
|
||
///
|
||
/// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
|
||
/// the end of the string slice.
|
||
///
|
||
/// To get mutable string slices instead, see the [`split_at_mut`][Self::split_at_mut] method.
|
||
///
|
||
/// # Panics
|
||
///
|
||
/// Panics if `mid` is not on a UTF-16 code point boundary, or if it is past the end of the last
|
||
/// code point of the string slice.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::utf16str;
|
||
/// let s = utf16str!("Per Martin-Löf");
|
||
///
|
||
/// let (first, last) = s.split_at(3);
|
||
///
|
||
/// assert_eq!("Per", first);
|
||
/// assert_eq!(" Martin-Löf", last);
|
||
/// ```
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
|
||
assert!(self.is_char_boundary(mid));
|
||
let (a, b) = self.inner.split_at(mid);
|
||
unsafe { (Self::from_slice_unchecked(a), Self::from_slice_unchecked(b)) }
|
||
}
|
||
|
||
/// Divide one mutable string slice into two at an index.
|
||
///
|
||
/// The argument, `mid`, should be an offset from the start of the string. It must also be on
|
||
/// the boundary of a UTF-16 code point.
|
||
///
|
||
/// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
|
||
/// the end of the string slice.
|
||
///
|
||
/// To get immutable string slices instead, see the [`split_at`][Self::split_at] method.
|
||
///
|
||
/// # Panics
|
||
///
|
||
/// Panics if `mid` is not on a UTF-16 code point boundary, or if it is past the end of the last
|
||
/// code point of the string slice.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::utf16str;
|
||
/// # #[cfg(feature = "alloc")] {
|
||
/// let mut s = utf16str!("Per Martin-Löf").to_owned();
|
||
///
|
||
/// let (first, last) = s.split_at_mut(3);
|
||
///
|
||
/// assert_eq!("Per", first);
|
||
/// assert_eq!(" Martin-Löf", last);
|
||
/// # }
|
||
/// ```
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
|
||
assert!(self.is_char_boundary(mid));
|
||
let (a, b) = self.inner.split_at_mut(mid);
|
||
unsafe {
|
||
(
|
||
Self::from_slice_unchecked_mut(a),
|
||
Self::from_slice_unchecked_mut(b),
|
||
)
|
||
}
|
||
}
|
||
|
||
/// Returns an iterator over the [`char`]s of a string slice.
|
||
///
|
||
/// As this string slice consists of valid UTF-16, we can iterate through a string slice by
|
||
/// [`char`]. This method returns such an iterator.
|
||
///
|
||
/// It's important to remember that [`char`] represents a Unicode Scalar Value, and might not
|
||
/// match your idea of what a 'character' is. Iteration over grapheme clusters may be what you
|
||
/// actually want. This functionality is not provided by this crate.
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn chars(&self) -> CharsUtf16<'_> {
|
||
CharsUtf16::new(self.as_slice())
|
||
}
|
||
|
||
/// Returns an iterator over the [`char`]s of a string slice and their positions.
|
||
///
|
||
/// As this string slice consists of valid UTF-16, we can iterate through a string slice by
|
||
/// [`char`]. This method returns an iterator of both these [`char`]s as well as their offsets.
|
||
///
|
||
/// The iterator yields tuples. The position is first, the [`char`] is second.
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn char_indices(&self) -> CharIndicesUtf16<'_> {
|
||
CharIndicesUtf16::new(self.as_slice())
|
||
}
|
||
|
||
/// An iterator over the [`u16`] code units of a string slice.
|
||
///
|
||
/// As a UTF-16 string slice consists of a sequence of [`u16`] code units, we can iterate
|
||
/// through a string slice by each code unit. This method returns such an iterator.
|
||
#[must_use]
|
||
pub fn code_units(&self) -> CodeUnits<'_> {
|
||
CodeUnits::new(self.as_slice())
|
||
}
|
||
|
||
/// Returns an iterator of bytes over the string encoded as UTF-8.
|
||
#[must_use]
|
||
pub fn encode_utf8(&self) -> EncodeUtf8<CharsUtf16<'_>> {
|
||
crate::encode_utf8(self.chars())
|
||
}
|
||
|
||
/// Returns an iterator of [`u32`] over the sting encoded as UTF-32.
|
||
#[must_use]
|
||
pub fn encode_utf32(&self) -> EncodeUtf32<CharsUtf16<'_>> {
|
||
crate::encode_utf32(self.chars())
|
||
}
|
||
|
||
/// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_debug`].
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn escape_debug(&self) -> EscapeDebug<CharsUtf16<'_>> {
|
||
EscapeDebug::<CharsUtf16>::new(self.as_slice())
|
||
}
|
||
|
||
/// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_default`].
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn escape_default(&self) -> EscapeDefault<CharsUtf16<'_>> {
|
||
EscapeDefault::<CharsUtf16>::new(self.as_slice())
|
||
}
|
||
|
||
/// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_unicode`].
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn escape_unicode(&self) -> EscapeUnicode<CharsUtf16<'_>> {
|
||
EscapeUnicode::<CharsUtf16>::new(self.as_slice())
|
||
}
|
||
|
||
/// Returns the lowercase equivalent of this string slice, as a new [`Utf16String`].
|
||
///
|
||
/// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
|
||
/// `Lowercase`.
|
||
///
|
||
/// Since some characters can expand into multiple characters when changing the case, this
|
||
/// function returns a [`Utf16String`] instead of modifying the parameter in-place.
|
||
#[inline]
|
||
#[cfg(feature = "alloc")]
|
||
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
|
||
#[must_use]
|
||
pub fn to_lowercase(&self) -> Utf16String {
|
||
let mut s = Utf16String::with_capacity(self.len());
|
||
for c in self.chars() {
|
||
for lower in c.to_lowercase() {
|
||
s.push(lower);
|
||
}
|
||
}
|
||
s
|
||
}
|
||
|
||
/// Returns the uppercase equivalent of this string slice, as a new [`Utf16String`].
|
||
///
|
||
/// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
|
||
/// `Uppercase`.
|
||
///
|
||
/// Since some characters can expand into multiple characters when changing the case, this
|
||
/// function returns a [`Utf16String`] instead of modifying the parameter in-place.
|
||
#[inline]
|
||
#[cfg(feature = "alloc")]
|
||
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
|
||
#[must_use]
|
||
pub fn to_uppercase(&self) -> Utf16String {
|
||
let mut s = Utf16String::with_capacity(self.len());
|
||
for c in self.chars() {
|
||
for lower in c.to_uppercase() {
|
||
s.push(lower);
|
||
}
|
||
}
|
||
s
|
||
}
|
||
}
|
||
|
||
impl Utf32Str {
|
||
/// Converts a slice of UTF-32 data to a string slice.
|
||
///
|
||
/// Not all slices of [`u32`] values are valid to convert, since [`Utf32Str`] requires that it
|
||
/// is always valid UTF-32. This function checks to ensure that the values are valid UTF-32, and
|
||
/// then does the conversion.
|
||
///
|
||
/// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
|
||
/// the validity check, there is an unsafe version of this function,
|
||
/// [`from_slice_unchecked`][Self::from_slice_unchecked], which has the same behavior but skips
|
||
/// the check.
|
||
///
|
||
/// If you need an owned string, consider using [`Utf32String::from_vec`] instead.
|
||
///
|
||
/// Because you can stack-allocate a `[u32; N]`, this function is one way to have a
|
||
/// stack-allocated string. Indeed, the [`utf32str!`][crate::utf32str] macro does exactly this
|
||
/// after converting from UTF-8 to UTF-32.
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// Returns an error if the slice is not UTF-32 with a description as to why the provided slice
|
||
/// is not UTF-32.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf32Str;
|
||
///
|
||
/// let sparkle_heart = vec![0x1f496];
|
||
/// let sparkle_heart = Utf32Str::from_slice(&sparkle_heart).unwrap();
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
///
|
||
/// With incorrect values that return an error:
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf32Str;
|
||
///
|
||
/// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
|
||
///
|
||
/// assert!(Utf32Str::from_slice(&sparkle_heart).is_err());
|
||
/// ```
|
||
pub fn from_slice(s: &[u32]) -> Result<&Self, Utf32Error> {
|
||
validate_utf32(s)?;
|
||
// SAFETY: Just validated
|
||
Ok(unsafe { Self::from_slice_unchecked(s) })
|
||
}
|
||
|
||
/// Converts a mutable slice of UTF-32 data to a mutable string slice.
|
||
///
|
||
/// Not all slices of [`u32`] values are valid to convert, since [`Utf32Str`] requires that it
|
||
/// is always valid UTF-32. This function checks to ensure that the values are valid UTF-32, and
|
||
/// then does the conversion.
|
||
///
|
||
/// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
|
||
/// the validity check, there is an unsafe version of this function,
|
||
/// [`from_slice_unchecked_mut`][Self::from_slice_unchecked_mut], which has the same behavior
|
||
/// but skips the check.
|
||
///
|
||
/// If you need an owned string, consider using [`Utf32String::from_vec`] instead.
|
||
///
|
||
/// Because you can stack-allocate a `[u32; N]`, this function is one way to have a
|
||
/// stack-allocated string. Indeed, the [`utf32str!`][crate::utf32str] macro does exactly this
|
||
/// after converting from UTF-8 to UTF-32.
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// Returns an error if the slice is not UTF-32 with a description as to why the provided slice
|
||
/// is not UTF-32.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf32Str;
|
||
///
|
||
/// let mut sparkle_heart = vec![0x1f496];
|
||
/// let sparkle_heart = Utf32Str::from_slice_mut(&mut sparkle_heart).unwrap();
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
///
|
||
/// With incorrect values that return an error:
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf32Str;
|
||
///
|
||
/// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
|
||
///
|
||
/// assert!(Utf32Str::from_slice_mut(&mut sparkle_heart).is_err());
|
||
/// ```
|
||
pub fn from_slice_mut(s: &mut [u32]) -> Result<&mut Self, Utf32Error> {
|
||
validate_utf32(s)?;
|
||
// SAFETY: Just validated
|
||
Ok(unsafe { Self::from_slice_unchecked_mut(s) })
|
||
}
|
||
|
||
/// Converts a wide string slice of undefined encoding to a UTF-32 string slice without checking
|
||
/// if the string slice is valid UTF-32.
|
||
///
|
||
/// See the safe version, [`from_ustr`][Self::from_ustr], for more information.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the string slice passed to it is
|
||
/// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
|
||
/// the [`Utf32Str`] is always valid UTF-32.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::{Utf32Str, u32str};
|
||
///
|
||
/// let sparkle_heart = u32str!("💖");
|
||
/// let sparkle_heart = unsafe { Utf32Str::from_ustr_unchecked(sparkle_heart) };
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
#[inline]
|
||
#[must_use]
|
||
pub const unsafe fn from_ustr_unchecked(s: &crate::U32Str) -> &Self {
|
||
Self::from_slice_unchecked(s.as_slice())
|
||
}
|
||
|
||
/// Converts a mutable wide string slice of undefined encoding to a mutable UTF-32 string slice
|
||
/// without checking if the string slice is valid UTF-32.
|
||
///
|
||
/// See the safe version, [`from_ustr_mut`][Self::from_ustr_mut], for more information.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the string slice passed to it is
|
||
/// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
|
||
/// the [`Utf32Str`] is always valid UTF-32.
|
||
#[inline]
|
||
#[must_use]
|
||
pub unsafe fn from_ustr_unchecked_mut(s: &mut crate::U32Str) -> &mut Self {
|
||
Self::from_slice_unchecked_mut(s.as_mut_slice())
|
||
}
|
||
|
||
/// Converts a wide string slice of undefined encoding to a UTF-32 string slice.
|
||
///
|
||
/// Since [`U32Str`] does not have a specified encoding, this conversion may fail if the
|
||
/// [`U32Str`] does not contain valid UTF-32 data.
|
||
///
|
||
/// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
|
||
/// the validity check, there is an unsafe version of this function,
|
||
/// [`from_ustr_unchecked`][Self::from_ustr_unchecked], which has the same behavior
|
||
/// but skips the check.
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// Returns an error if the string slice is not UTF-32 with a description as to why the
|
||
/// provided string slice is not UTF-32.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::{Utf32Str, u32str};
|
||
///
|
||
/// let sparkle_heart = u32str!("💖");
|
||
/// let sparkle_heart = Utf32Str::from_ustr(sparkle_heart).unwrap();
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
#[inline]
|
||
pub fn from_ustr(s: &crate::U32Str) -> Result<&Self, Utf32Error> {
|
||
Self::from_slice(s.as_slice())
|
||
}
|
||
|
||
/// Converts a mutable wide string slice of undefined encoding to a mutable UTF-32 string slice.
|
||
///
|
||
/// Since [`U32Str`] does not have a specified encoding, this conversion may fail if the
|
||
/// [`U32Str`] does not contain valid UTF-32 data.
|
||
///
|
||
/// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
|
||
/// the validity check, there is an unsafe version of this function,
|
||
/// [`from_ustr_unchecked_mut`][Self::from_ustr_unchecked_mut], which has the same behavior
|
||
/// but skips the check.
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// Returns an error if the string slice is not UTF-32 with a description as to why the
|
||
/// provided string slice is not UTF-32.
|
||
#[inline]
|
||
pub fn from_ustr_mut(s: &mut crate::U32Str) -> Result<&mut Self, Utf32Error> {
|
||
Self::from_slice_mut(s.as_mut_slice())
|
||
}
|
||
|
||
/// Converts a wide C string slice to a UTF-32 string slice without checking if the
|
||
/// string slice is valid UTF-32.
|
||
///
|
||
/// The resulting string slice does *not* contain the nul terminator.
|
||
///
|
||
/// See the safe version, [`from_ucstr`][Self::from_ucstr], for more information.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the string slice passed to it is
|
||
/// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
|
||
/// the [`Utf32Str`] is always valid UTF-32.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::{Utf32Str, u32cstr};
|
||
///
|
||
/// let sparkle_heart = u32cstr!("💖");
|
||
/// let sparkle_heart = unsafe { Utf32Str::from_ucstr_unchecked(sparkle_heart) };
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
#[inline]
|
||
#[must_use]
|
||
pub unsafe fn from_ucstr_unchecked(s: &crate::U32CStr) -> &Self {
|
||
Self::from_slice_unchecked(s.as_slice())
|
||
}
|
||
|
||
/// Converts a mutable wide C string slice to a mutable UTF-32 string slice without
|
||
/// checking if the string slice is valid UTF-32.
|
||
///
|
||
/// The resulting string slice does *not* contain the nul terminator.
|
||
///
|
||
/// See the safe version, [`from_ucstr_mut`][Self::from_ucstr_mut], for more information.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the string slice passed to it is
|
||
/// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
|
||
/// the [`Utf32Str`] is always valid UTF-32.
|
||
#[inline]
|
||
#[must_use]
|
||
pub unsafe fn from_ucstr_unchecked_mut(s: &mut crate::U32CStr) -> &mut Self {
|
||
Self::from_slice_unchecked_mut(s.as_mut_slice())
|
||
}
|
||
|
||
/// Converts a wide C string slice to a UTF-32 string slice.
|
||
///
|
||
/// The resulting string slice does *not* contain the nul terminator.
|
||
///
|
||
/// Since [`U32CStr`][crate::U32CStr] does not have a specified encoding, this conversion may
|
||
/// fail if the [`U32CStr`][crate::U32CStr] does not contain valid UTF-32 data.
|
||
///
|
||
/// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
|
||
/// the validity check, there is an unsafe version of this function,
|
||
/// [`from_ucstr_unchecked`][Self::from_ucstr_unchecked], which has the same behavior
|
||
/// but skips the check.
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// Returns an error if the string slice is not UTF-32 with a description as to why the
|
||
/// provided string slice is not UTF-32.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::{Utf32Str, u32cstr};
|
||
///
|
||
/// let sparkle_heart = u32cstr!("💖");
|
||
/// let sparkle_heart = Utf32Str::from_ucstr(sparkle_heart).unwrap();
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
#[inline]
|
||
pub fn from_ucstr(s: &crate::U32CStr) -> Result<&Self, Utf32Error> {
|
||
Self::from_slice(s.as_slice())
|
||
}
|
||
|
||
/// Converts a mutable wide C string slice to a mutable UTF-32 string slice.
|
||
///
|
||
/// The resulting string slice does *not* contain the nul terminator.
|
||
///
|
||
/// Since [`U32CStr`][crate::U32CStr] does not have a specified encoding, this conversion may
|
||
/// fail if the [`U32CStr`][crate::U32CStr] does not contain valid UTF-32 data.
|
||
///
|
||
/// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
|
||
/// the validity check, there is an unsafe version of this function,
|
||
/// [`from_ucstr_unchecked_mut`][Self::from_ucstr_unchecked_mut], which has the same behavior
|
||
/// but skips the check.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This method is unsafe because you can violate the invariants of [`U16CStr`][crate::U16CStr]
|
||
/// when mutating the slice (i.e. by adding interior nul values).
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// Returns an error if the string slice is not UTF-32 with a description as to why the
|
||
/// provided string slice is not UTF-32.
|
||
#[inline]
|
||
pub unsafe fn from_ucstr_mut(s: &mut crate::U32CStr) -> Result<&mut Self, Utf32Error> {
|
||
Self::from_slice_mut(s.as_mut_slice())
|
||
}
|
||
|
||
/// Converts a slice of [`char`]s to a string slice.
|
||
///
|
||
/// Since [`char`] slices are always valid UTF-32, this conversion always suceeds.
|
||
///
|
||
/// If you need an owned string, consider using [`Utf32String::from_chars`] instead.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf32Str;
|
||
///
|
||
/// let sparkle_heart = ['💖'];
|
||
/// let sparkle_heart = Utf32Str::from_char_slice(&sparkle_heart);
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
#[allow(trivial_casts)]
|
||
#[inline]
|
||
#[must_use]
|
||
pub const fn from_char_slice(s: &[char]) -> &Self {
|
||
// SAFETY: char slice is always valid UTF-32
|
||
unsafe { Self::from_slice_unchecked(&*(s as *const [char] as *const [u32])) }
|
||
}
|
||
|
||
/// Converts a mutable slice of [`char`]s to a string slice.
|
||
///
|
||
/// Since [`char`] slices are always valid UTF-32, this conversion always suceeds.
|
||
///
|
||
/// If you need an owned string, consider using [`Utf32String::from_chars`] instead.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// use widestring::Utf32Str;
|
||
///
|
||
/// let mut sparkle_heart = ['💖'];
|
||
/// let sparkle_heart = Utf32Str::from_char_slice_mut(&mut sparkle_heart);
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
#[allow(trivial_casts)]
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn from_char_slice_mut(s: &mut [char]) -> &mut Self {
|
||
// SAFETY: char slice is always valid UTF-32
|
||
unsafe { Self::from_slice_unchecked_mut(&mut *(s as *mut [char] as *mut [u32])) }
|
||
}
|
||
|
||
/// Converts a string slice into a slice of [`char`]s.
|
||
#[allow(trivial_casts)]
|
||
#[inline]
|
||
#[must_use]
|
||
pub const fn as_char_slice(&self) -> &[char] {
|
||
// SAFETY: Self should be valid UTF-32 so chars will be in range
|
||
unsafe { &*(self.as_slice() as *const [u32] as *const [char]) }
|
||
}
|
||
|
||
/// Converts a mutable string slice into a mutable slice of [`char`]s.
|
||
#[allow(trivial_casts)]
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn as_char_slice_mut(&mut self) -> &mut [char] {
|
||
// SAFETY: Self should be valid UTF-32 so chars will be in range
|
||
unsafe { &mut *(self.as_mut_slice() as *mut [u32] as *mut [char]) }
|
||
}
|
||
|
||
/// Converts to a standard UTF-8 [`String`].
|
||
///
|
||
/// Because this string is always valid UTF-32, the conversion is lossless and non-fallible.
|
||
#[inline]
|
||
#[allow(clippy::inherent_to_string_shadow_display)]
|
||
#[cfg(feature = "alloc")]
|
||
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
|
||
#[must_use]
|
||
pub fn to_string(&self) -> String {
|
||
let mut s = String::with_capacity(self.len());
|
||
s.extend(self.as_char_slice());
|
||
s
|
||
}
|
||
|
||
/// Returns a subslice of this string.
|
||
///
|
||
/// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
|
||
/// equivalent indexing operation would panic.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::{utf32str};
|
||
/// let v = utf32str!("⚧️🏳️⚧️➡️s");
|
||
///
|
||
/// assert_eq!(Some(utf32str!("⚧️")), v.get(..2));
|
||
/// assert_eq!(Some(utf32str!("🏳️⚧️")), v.get(2..7));
|
||
/// assert_eq!(Some(utf32str!("➡️")), v.get(7..9));
|
||
/// assert_eq!(Some(utf32str!("s")), v.get(9..));
|
||
/// ```
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn get<I>(&self, index: I) -> Option<&Self>
|
||
where
|
||
I: SliceIndex<[u32], Output = [u32]>,
|
||
{
|
||
// TODO: Use SliceIndex directly when it is stabilized
|
||
// SAFETY: subslice has already been verified
|
||
self.inner
|
||
.get(index)
|
||
.map(|s| unsafe { Self::from_slice_unchecked(s) })
|
||
}
|
||
|
||
/// Returns a mutable subslice of this string.
|
||
///
|
||
/// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
|
||
/// equivalent indexing operation would panic.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::{utf32str};
|
||
/// # #[cfg(feature = "alloc")] {
|
||
/// let mut v = utf32str!("⚧️🏳️⚧️➡️s").to_owned();
|
||
///
|
||
/// assert_eq!(utf32str!("⚧️"), v.get_mut(..2).unwrap());
|
||
/// assert_eq!(utf32str!("🏳️⚧️"), v.get_mut(2..7).unwrap());
|
||
/// assert_eq!(utf32str!("➡️"), v.get_mut(7..9).unwrap());
|
||
/// assert_eq!(utf32str!("s"), v.get_mut(9..).unwrap());
|
||
/// # }
|
||
/// ```
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn get_mut<I>(&mut self, index: I) -> Option<&mut Self>
|
||
where
|
||
I: SliceIndex<[u32], Output = [u32]>,
|
||
{
|
||
// TODO: Use SliceIndex directly when it is stabilized
|
||
// SAFETY: subslice has already been verified
|
||
self.inner
|
||
.get_mut(index)
|
||
.map(|s| unsafe { Self::from_slice_unchecked_mut(s) })
|
||
}
|
||
|
||
/// Divide one string slice into two at an index.
|
||
///
|
||
/// The argument, `mid`, should be an offset from the start of the string.
|
||
///
|
||
/// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
|
||
/// the end of the string slice.
|
||
///
|
||
/// To get mutable string slices instead, see the [`split_at_mut`][Self::split_at_mut] method.
|
||
///
|
||
/// # Panics
|
||
///
|
||
/// Panics if `mid` is past the end of the last code point of the string slice.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::utf32str;
|
||
/// let s = utf32str!("Per Martin-Löf");
|
||
///
|
||
/// let (first, last) = s.split_at(3);
|
||
///
|
||
/// assert_eq!("Per", first);
|
||
/// assert_eq!(" Martin-Löf", last);
|
||
/// ```
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
|
||
let (a, b) = self.inner.split_at(mid);
|
||
unsafe { (Self::from_slice_unchecked(a), Self::from_slice_unchecked(b)) }
|
||
}
|
||
|
||
/// Divide one mutable string slice into two at an index.
|
||
///
|
||
/// The argument, `mid`, should be an offset from the start of the string.
|
||
///
|
||
/// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
|
||
/// the end of the string slice.
|
||
///
|
||
/// To get immutable string slices instead, see the [`split_at`][Self::split_at] method.
|
||
///
|
||
/// # Panics
|
||
///
|
||
/// Panics if `mid` is past the end of the last code point of the string slice.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// # use widestring::utf32str;
|
||
/// # #[cfg(feature = "alloc")] {
|
||
/// let mut s = utf32str!("Per Martin-Löf").to_owned();
|
||
///
|
||
/// let (first, last) = s.split_at_mut(3);
|
||
///
|
||
/// assert_eq!("Per", first);
|
||
/// assert_eq!(" Martin-Löf", last);
|
||
/// # }
|
||
/// ```
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
|
||
let (a, b) = self.inner.split_at_mut(mid);
|
||
unsafe {
|
||
(
|
||
Self::from_slice_unchecked_mut(a),
|
||
Self::from_slice_unchecked_mut(b),
|
||
)
|
||
}
|
||
}
|
||
|
||
/// Returns an iterator over the [`char`]s of a string slice.
|
||
///
|
||
/// As this string slice consists of valid UTF-32, we can iterate through a string slice by
|
||
/// [`char`]. This method returns such an iterator.
|
||
///
|
||
/// It's important to remember that [`char`] represents a Unicode Scalar Value, and might not
|
||
/// match your idea of what a 'character' is. Iteration over grapheme clusters may be what you
|
||
/// actually want. This functionality is not provided by this crate.
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn chars(&self) -> CharsUtf32<'_> {
|
||
CharsUtf32::new(self.as_slice())
|
||
}
|
||
|
||
/// Returns an iterator over the [`char`]s of a string slice and their positions.
|
||
///
|
||
/// As this string slice consists of valid UTF-32, we can iterate through a string slice by
|
||
/// [`char`]. This method returns an iterator of both these [`char`]s as well as their offsets.
|
||
///
|
||
/// The iterator yields tuples. The position is first, the [`char`] is second.
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn char_indices(&self) -> CharIndicesUtf32<'_> {
|
||
CharIndicesUtf32::new(self.as_slice())
|
||
}
|
||
|
||
/// Returns an iterator of bytes over the string encoded as UTF-8.
|
||
#[must_use]
|
||
pub fn encode_utf8(&self) -> EncodeUtf8<CharsUtf32<'_>> {
|
||
crate::encode_utf8(self.chars())
|
||
}
|
||
|
||
/// Returns an iterator of [`u16`] over the sting encoded as UTF-16.
|
||
#[must_use]
|
||
pub fn encode_utf16(&self) -> EncodeUtf16<CharsUtf32<'_>> {
|
||
crate::encode_utf16(self.chars())
|
||
}
|
||
|
||
/// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_debug`].
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn escape_debug(&self) -> EscapeDebug<CharsUtf32<'_>> {
|
||
EscapeDebug::<CharsUtf32>::new(self.as_slice())
|
||
}
|
||
|
||
/// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_default`].
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn escape_default(&self) -> EscapeDefault<CharsUtf32<'_>> {
|
||
EscapeDefault::<CharsUtf32>::new(self.as_slice())
|
||
}
|
||
|
||
/// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_unicode`].
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn escape_unicode(&self) -> EscapeUnicode<CharsUtf32<'_>> {
|
||
EscapeUnicode::<CharsUtf32>::new(self.as_slice())
|
||
}
|
||
|
||
/// Returns the lowercase equivalent of this string slice, as a new [`Utf32String`].
|
||
///
|
||
/// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
|
||
/// `Lowercase`.
|
||
///
|
||
/// Since some characters can expand into multiple characters when changing the case, this
|
||
/// function returns a [`Utf32String`] instead of modifying the parameter in-place.
|
||
#[inline]
|
||
#[cfg(feature = "alloc")]
|
||
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
|
||
#[must_use]
|
||
pub fn to_lowercase(&self) -> Utf32String {
|
||
let mut s = Utf32String::with_capacity(self.len());
|
||
for c in self.chars() {
|
||
for lower in c.to_lowercase() {
|
||
s.push(lower);
|
||
}
|
||
}
|
||
s
|
||
}
|
||
|
||
/// Returns the uppercase equivalent of this string slice, as a new [`Utf32String`].
|
||
///
|
||
/// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
|
||
/// `Uppercase`.
|
||
///
|
||
/// Since some characters can expand into multiple characters when changing the case, this
|
||
/// function returns a [`Utf32String`] instead of modifying the parameter in-place.
|
||
#[inline]
|
||
#[cfg(feature = "alloc")]
|
||
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
|
||
#[must_use]
|
||
pub fn to_uppercase(&self) -> Utf32String {
|
||
let mut s = Utf32String::with_capacity(self.len());
|
||
for c in self.chars() {
|
||
for lower in c.to_uppercase() {
|
||
s.push(lower);
|
||
}
|
||
}
|
||
s
|
||
}
|
||
}
|
||
|
||
impl AsMut<[char]> for Utf32Str {
|
||
#[inline]
|
||
fn as_mut(&mut self) -> &mut [char] {
|
||
self.as_char_slice_mut()
|
||
}
|
||
}
|
||
|
||
impl AsRef<[char]> for Utf32Str {
|
||
#[inline]
|
||
fn as_ref(&self) -> &[char] {
|
||
self.as_char_slice()
|
||
}
|
||
}
|
||
|
||
impl<'a> From<&'a [char]> for &'a Utf32Str {
|
||
#[inline]
|
||
fn from(value: &'a [char]) -> Self {
|
||
Utf32Str::from_char_slice(value)
|
||
}
|
||
}
|
||
|
||
impl<'a> From<&'a mut [char]> for &'a mut Utf32Str {
|
||
#[inline]
|
||
fn from(value: &'a mut [char]) -> Self {
|
||
Utf32Str::from_char_slice_mut(value)
|
||
}
|
||
}
|
||
|
||
impl<'a> From<&'a Utf32Str> for &'a [char] {
|
||
#[inline]
|
||
fn from(value: &'a Utf32Str) -> Self {
|
||
value.as_char_slice()
|
||
}
|
||
}
|
||
|
||
impl<'a> From<&'a mut Utf32Str> for &'a mut [char] {
|
||
#[inline]
|
||
fn from(value: &'a mut Utf32Str) -> Self {
|
||
value.as_char_slice_mut()
|
||
}
|
||
}
|
||
|
||
impl<I> Index<I> for Utf16Str
|
||
where
|
||
I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
|
||
{
|
||
type Output = Utf16Str;
|
||
|
||
#[inline]
|
||
fn index(&self, index: I) -> &Self::Output {
|
||
self.get(index)
|
||
.expect("index out of bounds or not on char boundary")
|
||
}
|
||
}
|
||
|
||
impl<I> Index<I> for Utf32Str
|
||
where
|
||
I: SliceIndex<[u32], Output = [u32]>,
|
||
{
|
||
type Output = Utf32Str;
|
||
|
||
#[inline]
|
||
fn index(&self, index: I) -> &Self::Output {
|
||
self.get(index).expect("index out of bounds")
|
||
}
|
||
}
|
||
|
||
impl<I> IndexMut<I> for Utf16Str
|
||
where
|
||
I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
|
||
{
|
||
#[inline]
|
||
fn index_mut(&mut self, index: I) -> &mut Self::Output {
|
||
self.get_mut(index)
|
||
.expect("index out of bounds or not on char boundary")
|
||
}
|
||
}
|
||
|
||
impl<I> IndexMut<I> for Utf32Str
|
||
where
|
||
I: SliceIndex<[u32], Output = [u32]>,
|
||
{
|
||
#[inline]
|
||
fn index_mut(&mut self, index: I) -> &mut Self::Output {
|
||
self.get_mut(index).expect("index out of bounds")
|
||
}
|
||
}
|
||
|
||
impl PartialEq<[char]> for Utf32Str {
|
||
#[inline]
|
||
fn eq(&self, other: &[char]) -> bool {
|
||
self.as_char_slice() == other
|
||
}
|
||
}
|
||
|
||
impl PartialEq<Utf32Str> for [char] {
|
||
#[inline]
|
||
fn eq(&self, other: &Utf32Str) -> bool {
|
||
self == other.as_char_slice()
|
||
}
|
||
}
|
||
|
||
impl PartialEq<Utf16Str> for Utf32Str {
|
||
#[inline]
|
||
fn eq(&self, other: &Utf16Str) -> bool {
|
||
self.chars().eq(other.chars())
|
||
}
|
||
}
|
||
|
||
impl PartialEq<Utf32Str> for Utf16Str {
|
||
#[inline]
|
||
fn eq(&self, other: &Utf32Str) -> bool {
|
||
self.chars().eq(other.chars())
|
||
}
|
||
}
|
||
|
||
impl PartialEq<&Utf16Str> for Utf32Str {
|
||
#[inline]
|
||
fn eq(&self, other: &&Utf16Str) -> bool {
|
||
self.chars().eq(other.chars())
|
||
}
|
||
}
|
||
|
||
impl PartialEq<&Utf32Str> for Utf16Str {
|
||
#[inline]
|
||
fn eq(&self, other: &&Utf32Str) -> bool {
|
||
self.chars().eq(other.chars())
|
||
}
|
||
}
|
||
|
||
impl PartialEq<Utf16Str> for &Utf32Str {
|
||
#[inline]
|
||
fn eq(&self, other: &Utf16Str) -> bool {
|
||
self.chars().eq(other.chars())
|
||
}
|
||
}
|
||
|
||
impl PartialEq<Utf32Str> for &Utf16Str {
|
||
#[inline]
|
||
fn eq(&self, other: &Utf32Str) -> bool {
|
||
self.chars().eq(other.chars())
|
||
}
|
||
}
|
||
|
||
impl<'a> TryFrom<&'a [u16]> for &'a Utf16Str {
|
||
type Error = Utf16Error;
|
||
|
||
#[inline]
|
||
fn try_from(value: &'a [u16]) -> Result<Self, Self::Error> {
|
||
Utf16Str::from_slice(value)
|
||
}
|
||
}
|
||
|
||
impl<'a> TryFrom<&'a mut [u16]> for &'a mut Utf16Str {
|
||
type Error = Utf16Error;
|
||
|
||
#[inline]
|
||
fn try_from(value: &'a mut [u16]) -> Result<Self, Self::Error> {
|
||
Utf16Str::from_slice_mut(value)
|
||
}
|
||
}
|
||
|
||
impl<'a> TryFrom<&'a [u32]> for &'a Utf32Str {
|
||
type Error = Utf32Error;
|
||
|
||
#[inline]
|
||
fn try_from(value: &'a [u32]) -> Result<Self, Self::Error> {
|
||
Utf32Str::from_slice(value)
|
||
}
|
||
}
|
||
|
||
impl<'a> TryFrom<&'a mut [u32]> for &'a mut Utf32Str {
|
||
type Error = Utf32Error;
|
||
|
||
#[inline]
|
||
fn try_from(value: &'a mut [u32]) -> Result<Self, Self::Error> {
|
||
Utf32Str::from_slice_mut(value)
|
||
}
|
||
}
|
||
|
||
/// Alias for [`Utf16Str`] or [`Utf32Str`] depending on platform. Intended to match typical C
|
||
/// `wchar_t` size on platform.
|
||
#[cfg(not(windows))]
|
||
pub type WideUtfStr = Utf32Str;
|
||
|
||
/// Alias for [`Utf16Str`] or [`Utf32Str`] depending on platform. Intended to match typical C
|
||
/// `wchar_t` size on platform.
|
||
#[cfg(windows)]
|
||
pub type WideUtfStr = Utf16Str;
|
||
|
||
#[cfg(test)]
|
||
mod test {
|
||
use crate::*;
|
||
|
||
#[test]
|
||
fn utf16_trim() {
|
||
let s = utf16str!(" Hello\tworld\t");
|
||
assert_eq!(utf16str!("Hello\tworld\t"), s.trim_start());
|
||
|
||
let s = utf16str!(" English ");
|
||
assert!(Some('E') == s.trim_start().chars().next());
|
||
|
||
let s = utf16str!(" עברית ");
|
||
assert!(Some('ע') == s.trim_start().chars().next());
|
||
}
|
||
|
||
#[test]
|
||
fn utf32_trim() {
|
||
let s = utf32str!(" Hello\tworld\t");
|
||
assert_eq!(utf32str!("Hello\tworld\t"), s.trim_start());
|
||
|
||
let s = utf32str!(" English ");
|
||
assert!(Some('E') == s.trim_start().chars().next());
|
||
|
||
let s = utf32str!(" עברית ");
|
||
assert!(Some('ע') == s.trim_start().chars().next());
|
||
}
|
||
}
|