Vendor things

This commit is contained in:
John Doty 2024-03-08 11:03:01 -08:00
parent 5deceec006
commit 977e3c17e5
19434 changed files with 10682014 additions and 0 deletions

View file

@ -0,0 +1 @@
{"files":{"CHANGELOG.md":"e79dc5e9c323c3fa2043b2a10640ee4ddbf2d5817972b2c72f8ce0bbead24623","Cargo.toml":"1e72afcceba2ca5a9cffb1a22c00e604b01deb1482b770aee09bf678945f85b9","LICENSE":"36bb253818ac13761081556ff5c457d626da9df1eb4f56194d9ad3926c418a68","LICENSES/Apache-2.0.txt":"074e6e32c86a4c0ef8b3ed25b721ca23aca83df277cd88106ef7177c354615ff","LICENSES/MIT.txt":"b85dcd3e453d05982552c52b5fc9e0bdd6d23c6f8e844b984a88af32570b0cc0","Makefile.toml":"195401320b5ceec8d8cc7d4410300197e5a1d109f784a5fc7ff79cabe1b645fd","README.md":"387638ef3eabd6d14b55618ca748f00508501045f6b610466fbd4b5b8c0de84f","src/error.rs":"d247d758414ce09f8c4b78f97975844d93b08aadc1c7ea8a783fecad29556ada","src/iter.rs":"dec82fd55996e18f60279920868a722e1c4ac50f82c53373af76a5608bc67226","src/lib.rs":"75d2565dc301b50c200795053d5ffa0817f237dbdb2b49cb2b708f9625730afd","src/macros.rs":"53ab327c485c8ceda40c6999d456054c483e53a3a906b6e4c6ecfebb758f5805","src/platform/mod.rs":"370e032d7f43b5893d40350ac0428ec494af68c84d9f37b9ff2f93a5d7176550","src/platform/other.rs":"bc67c5aa65b8245813f4d2cdc1f0f43ceb9e72721be120d3b40c28447f47b5f2","src/platform/windows.rs":"1739ffaf89872aa12496a674e94be916ed21e44e25ce768cfbd29d9aa48497c7","src/ucstr.rs":"f10409e006d0b68983f37bd3e052ab999356e1723f455ffe559a8a67549421d8","src/ucstring.rs":"29b6d5246bc0102c0c515d68575e22e097553ba1117da1ba424115fbb4877da9","src/ustr.rs":"19b14a0a5ebf0a094c454e191b220a7489da1e3648183bc6127a34a4871f5965","src/ustr/iter.rs":"6271fe909c3620162c1e709cd264b26eb199d4449f72f96b95f17b00112026d2","src/ustring.rs":"0597374dc8c748151e16a964ff7f038257d5add0ad7b398a7ec9ae0344895267","src/ustring/iter.rs":"26ed076ee4c67d305ea4c64ba139c63106e957cb3bbda8b48390c3a78e06d138","src/utfstr.rs":"af473e60f7924c3a23010a916d905fd2c5ab9c884dcfee77151980b4e305813e","src/utfstr/iter.rs":"0371e0bd5d1da6a834d5c8c5785ef928b18cbaf13bb0bb7a8f268c8d358fa49a","src/utfstring.rs":"58ee8851a6e087303ed59059d95e0d9cb04cc54423299636031a34f5a845889c","src/utfstring/iter.rs":"fc1f595c41b05df7b084540295c043911e18414c07e1ad07e35a6321add79494"},"package":"653f141f39ec16bba3c5abe400a0c60da7468261cc2cbf36805022876bc721a8"}

View file

@ -0,0 +1,351 @@
# Changelog
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
## [1.0.2] - 2022-07-15 <a name="1.0.2"></a>
### Fixed
- Correctly check for and error on nul values in C-string macros `u16cstr!`, `u32cstr!`, and
`widecstr!`. Fixes [#28].
## [1.0.1] - 2022-06-24 <a name="1.0.1"></a>
### Fixed
- Reduce collision potential for macros. By [@OpenByteDev].
## [1.0.0] - 2022-06-21 <a name="1.0.0"></a>
### Changed
- **Breaking Change** Minimum supported Rust version is now 1.58.
- Added `#[must_use]` attributes to many crate functions, as appropriate.
- Remove `unsafe` qualifiers from `as_mut_ptr` and `as_mut_ptr_range` to match standard library. By
[@yescallop].
### Added
- Added `new` function that creates and empty string to `U16CString` and `U32CString` to match other
string types.
- Additional `From` implementations for conversion to `OsString`.
## [1.0.0-beta.1] - 2021-11-08 <a name="1.0.0-beta.1"></a>
### Changed
- **Breaking Change** Minimum supported Rust version is now 1.56.
- **Breaking Change** The following methods on `U16String` and `U32String` have been renamed and
replaced by functions with different semantics:
- `pop` is now `pop_char`
- `remove` is now `remove_char`
- `insert` is now `insert_char`
- **Breaking Change** Moved and renamed the following iterator types:
- `iter::Utf16Chars` renamed to `CharsUtf16` and moved to `ustr` and `ucstr`
- `iter::Utf32Chars` renamed to `CharsUtf32` and moved to `ustr` and `ucstr`
- `iter::CharsLossy` split and renamed to `CharsLossyUtf16` and `CharsLossyUtf32` and moved to
`ustr` and `ucstr`
- `iter::Utf16CharIndices` renamed to `CharIndicesUtf16` and moved to `ustr` and `ucstr`
- `iter::Utf16CharIndicesLossy` renamed to `CharIndicesLossyUtf16` and moved to `ustr` and `ucstr`
- **Breaking Change** `error::FromUtf16Error` and `error::FromUtf32Error` has been renamed to
`Utf16Error` and `Utf32Error` respectively and expanded with more details about the error.
- Migrated crate to Rust 2021 edition.
- The following methods on `U16Str` and `U32Str` are now `const`:
- `from_slice`
- `as_slice`
- `as_ptr`
- `len`
- `is_empty`
- The following methods on `U16CStr` and `U32CStr` are now `const`:
- `from_slice_unchecked`
- `as_slice_with_nul`
- `as_ptr`
- `len`
- `is_empty`
- The following methods on `U16String` and `U32String` are now `const`:
- `new`
### Added
- Added new UTF-encoded string types and associated types:
- `Utf16Str`
- `Utf32Str`
- `Utf16String`
- `Utf32String`
- Added macros to convert string literals into `const` wide string slices:
- `u16str!`
- `u16cstr!`
- `u32str!`
- `u32cstr!`
- `widestr!`
- `widecstr!`
- `utf16str!`
- `utf32str!`
- Added `NUL_TERMINATOR` associated constant to `U16CStr`, `U32CStr`, `U16CString`, and
`U32CString`.
- Added `DoubleEndedIterator` and `ExactSizeIterator` implementations to a number of iterator types.
- Added new UTF encoding functions alongside existing decode functions:
- `encode_utf8`
- `encode_utf16`
- `encode_utf32`
- Added various methods:
- `repeat` on `U16Str`, `U32Str`, `U16CStr`, and `U32CStr`
- `shrink_to` on `U16String` and `U32String`
- `retain` on `U16String` and `U32String`
- `drain` on `U16String` and `U32String`
- `replace_range` on `U16String` and `U32String`
- `get`, `get_mut`, `get_unchecked`, and `get_unchecked_mut` on `U16CStr` and `U32CStr`
- `split_at` and `split_at_mut` on `U16CStr` and `U32CStr`
- Added more trait implementations.
### Removed
- **Breaking Change** Functions and types deprecated in 0.5 have been removed.
- **Breaking Change** The following types and traits, which were implementation details, have been
removed. Use the existing non-generic types instead (e.g. use `U16Str` instead of `UStr<u16>`).
- `UChar`
- `UStr`
- `UCStr`
- `UString`
- `UCString`
- **Breaking Change** Removed `IndexMut<RangeFull>` trait implementation of `U16CString` and
`U32CString`. Use the unsafe `get_mut` method instead, which also supports more ranges.
### Fixed
- **Breaking Change** The iterator returned by `U16Str::char_indices` and `U16CStr::char_indices`
is now over `(usize, Result<char, DecodeUtf16Error>)` tuples instead of the reverse order, to
better match standard library string iterators. The same is true of `U16Str::char_indices_lossy`
and `U16CStr::char_indices_lossy`. This matches what was stated in original documentation.
- `U32Str::to_string` and `U32CStr::to_string` now only allocate once instead of twice.
## [0.5.1] - 2021-10-23 <a name="0.5.1"></a>
### Fixed
- Fixed a regression in 0.5.0 where zero-length vectors and strings were incorrectly causing panics
in `UCString::from_vec` and `UCString::from_str`. Fixes [#22].
- Modified an implentation detail in `ustr::to_string` & `ustr::to_string_lossy` to remove possibly
unsafe behaviour.
## [0.5.0] - 2021-10-12 <a name="0.5.0"></a>
### Changed
- **Breaking Change** Minimum supported Rust version is now 1.48.
- **Breaking Change** Renamed a number of types and functions to increase consistency and clarity.
This also meant renaming errors to more clearly convey error and trying to be more consistent with
name conventions and functionality across types. Check renamed function docs for any changes in
functionality, as there have been some minor tweaks (mostly relaxing/removing error conditions and
reducing panics). Old names have been deprecated to ease transition and will be removed in a
future release. Fixes [#18].
- `MissingNulError` => `error::MissingNulTerminator`
- `FromUtf32Error` => `error::FromUtf32Error`
- `NulError` => `error::ContainsNul`
- `UCStr::from_ptr_with_nul` => `from_ptr_unchecked`
- `UCStr::from_slice_with_nul` => `from_slice_truncate`
- `UCStr::from_slice_with_nul_unchecked` => `from_slice_unchecked`
- `U32CStr::from_char_ptr_with_nul` => `from_char_ptr_unchecked`
- `U32CStr::from_char_slice_with_nul` => `from_char_slice_truncate`
- `U32CStr::from_char_slice_with_nul_unchecked` => `from_char_slice_unchecked`
- `UCString::new` => `from_vec`
- `UCString::from_vec_with_nul` => `from_vec_truncate`
- `UCString::from_ustr_with_nul` => `from_ustr_truncate`
- `UCString::from_ptr_with_nul` => `from_ptr_truncate`
- `UCString::from_str_with_nul` => `from_str_truncate`
- `UCString::from_os_str_with_nul` => `from_os_str_truncate`
- `U32CString::from_chars_with_nul` => `from_chars_truncate`
- `U32CString::from_char_ptr_with_nul` => `from_char_ptr_truncate`
- Improved implementations in some areas to reduce unncessary double allocations.
- Improved `Debug` implementations. No more debugging lists of raw integer values.
- Migrated crate to Rust 2018 edition.
- Made crate package [REUSE compliant](https://reuse.software/).
- Improved documentation and used intra-doc links.
### Added
- Added crate-level functions `decode_utf16`, `decode_utf16_lossy`, `decode_utf32`, and
`decode_utf32_lossy` and associated iterators. Note that `decode_utf16` is an alias of
`core::char::decode_utf16`, but provided for consistency.
- Added `display` method to to both `UStr` and `UCStr` to display strings in formatting without heap
allocations, similar to `Path::display`. Fixes [#20].
- Added more trait implementations, including more index operations and string formatting via
`Write` trait. Fixes [#19].
- Added new functions:
- `UStr::from_ptr_mut`
- `UStr::from_slice_mut`
- `UStr::as_mut_slice`
- `UStr::as_mut_ptr`
- `UStr::as_ptr_range`
- `UStr::as_mut_ptr_range`
- `UStr::get`
- `UStr::get_mut`
- `UStr::get_unchecked`
- `UStr::get_unchecked_mut`
- `UStr::split_at`
- `UStr::split_at_mut`
- `UStr::chars`
- `UStr::chars_lossy`
- `U16Str::char_indices`
- `U16Str::char_indices_lossy`
- `U32Str::from_char_ptr_mut`
- `U32Str::from_char_slice_mut`
- `UCStr::from_ptr`
- `UCStr::from_ptr_truncate`
- `UCStr::from_slice`
- `UCStr::as_ustr`
- `UCStr::from_ptr_str_mut`
- `UCStr::from_ptr_mut`
- `UCStr::from_ptr_truncate_mut`
- `UCStr::from_ptr_unchecked_mut`
- `UCStr::from_slice_mut`
- `UCStr::from_slice_truncate_mut`
- `UCStr::from_slice_unchecked_mut`
- `UCStr::as_mut_slice`
- `UCStr::as_mut_ptr`
- `UCStr::as_ustr_with_nul`
- `UCStr::as_mut_ustr`
- `UCStr::as_ptr_range`
- `UCStr::as_mut_ptr_range`
- `UCStr::chars`
- `UCStr::chars_lossy`
- `U16CStr::char_indices`
- `U16CStr::char_indices_lossy`
- `U32CStr::from_char_ptr_str_mut`
- `U32CStr::from_char_ptr_mut`
- `U32CStr::from_char_ptr_truncate_mut`
- `U32CStr::from_char_ptr_unchecked_mut`
- `U32CStr::from_char_slice_mut`
- `U32CStr::from_char_slice_truncate_mut`
- `U32CStr::from_char_slice_unchecked_mut`
- `U32CStr::from_char_ptr`
- `U32CStr::from_char_ptr_truncate`
- `U32CStr::from_char_slice`
- `UString::as_vec`
- `UString::as_mut_vec`
- `UString::push_char`
- `UString::truncate`
- `UString::pop`
- `UString::remove`
- `UString::insert`
- `UString::insert_ustr`
- `UString::split_off`
- `UCString::as_mut_ucstr`
- `UCString::into_ustring`
- `UCString::into_ustring_with_nul`
- `U32CString::from_char_ptr_str`
### Deprecated
- Deprecated functions as part of simplifying to increase clarity. These will be removed entirely
in a future release.
- `MissingNulError`. Use `error::MissingNulTerminator` instead.
- `FromUtf32Error`. Use `error::FromUtf32Error` instead.
- `NulError`. Use `error::ContainsNul` instead.
- `UCStr::from_ptr_with_nul`. Use `from_ptr_unchecked` instead.
- `UCStr::from_slice_with_nul`. Use `from_slice_truncate` instead.
- `UCStr::from_slice_with_nul_unchecked`. Use `from_slice_unchecked` instead.
- `U32CStr::from_char_ptr_with_nul`. Use `from_char_ptr_unchecked` instead.
- `U32CStr::from_char_slice_with_nul`. Use `from_char_slice_truncate` instead.
- `U32CStr::from_char_slice_with_nul_unchecked`. Use `from_char_slice_unchecked` instead.
- `UCString::new`. Use `from_vec` instead.
- `UCString::from_vec_with_nul_unchecked`. Use `from_vec_unchecked` instead.
- `UCString::from_ustr_with_nul_unchecked`. Use `from_ustr_unchecked` instead.
- `UCString::from_ptr_with_nul_unchecked`. Use `from_ptr_unchecked` instead.
- `UCString::from_str_with_nul_unchecked`. Use `from_str_unchecked` instead.
- `UCString::from_os_str_with_nul_unchecked`. Use `from_os_str_unchecked` instead.
- `UCString::from_vec_with_nul`. Use `from_vec_truncate` instead.
- `UCString::from_ustr_with_nul`. Use `from_ustr_truncate` instead.
- `UCString::from_ptr_with_nul`. Use `from_ptr_truncate` instead.
- `UCString::from_str_with_nul`. Use `from_str_truncate` instead.
- `UCString::from_os_str_with_nul`. Use `from_os_str_truncate` instead.
- `U32CString::from_chars_with_nul_unchecked`. Use `from_chars_unchecked` instead.
- `U32CString::from_char_ptr_with_nul_unchecked`. Use `from_char_ptr_unchecked` instead.
- `U32CString::from_chars_with_nul`. Use `from_chars_truncate` instead.
- `U32CString::from_char_ptr_with_nul`. Use `from_char_ptr_truncate` instead.
- Deprecated error types in the crate root. Use the errors directly from `error` module instead.
## [0.4.3] - 2020-10-05 <a name="0.4.3"></a>
### Fixed
- Fixed undefined behaviours and cleaned up clippy warnings. By [@joshwd36].
## [0.4.2] - 2020-06-09 <a name="0.4.2"></a>
### Fixed
- Fixed compile errors on pre-1.36.0 Rust due to unstable `alloc` crate. Minimum supported version
is Rust 1.34.2, the rust version for Debian stable. Fixes [#14].
## [0.4.1] - 2020-06-08 <a name="0.4.1"></a>
### ***Yanked***
### Changed
- Now supports `no_std`. Added the `std` and `alloc` features, enabled by default. `U16String`,
`U32String`, `U16CString`, and `U32CString` and their aliases all require the `alloc` or `std`
feature. By [@nicbn].
## [0.4.0] - 2018-08-18 <a name="0.4.0"></a>
### Added
- New `U32String`, `U32Str`, `U32CString`, and `U32CStr` types for dealing with UTF-32 FFI. These
new types are roughly equivalent to the existing UTF-16 types.
- `WideChar` is a type alias to `u16` on Windows but `u32` on non-Windows platforms.
- The generic types `UString`, `UStr`, `UCString` and `UCStr` are used to implement the string
types.
### Changed
- **Breaking Change** Existing wide string types have been renamed to `U16String`, `U16Str`,
`U16CString`, and `U16CStr` (previously `WideString`, `WideStr`, etc.). Some function have
also been renamed to reflect this change (`wide_str` to `u16_str`, etc.).
- **Breaking Change** `WideString`, `WideStr`, `WideCString`, and `WideCStr` are now type aliases
that vary between platforms. On Windows, these are aliases to the `U16` types and are equivalent
to the previous version, but on non-Windows platforms these alias the new `U32` types instead.
See crate documentation for more details.
## [0.3.0] - 2018-03-17 <a name="0.3.0"></a>
### Added
- Additional unchecked functions on `WideCString`.
- All types now implement `Default`.
- `WideString::shrink_to_fit`
- `WideString::into_boxed_wide_str` and `Box<WideStr>::into_wide_string`.
- `WideCString::into_boxed_wide_c_str` and `Box<WideCStr>::into_wide_c_string`.
- `From` and `Default` implementations for boxed `WideStr` and boxed `WideCStr`.
### Changed
- Renamed `WideCString::from_vec` to replace `WideCString::new`. To create empty string, use
`WideCString::default()` now.
- `WideCString` now implements `Drop`, which sets the string to an empty string to prevent invalid
unsafe code from working correctly when it should otherwise break. Also see `Drop` implementation
of `CString`.
- Writing changelog manually.
- Upgraded winapi dev dependency.
- Now requires at least Rust 1.17+ to compile (previously, was Rust 1.8).
## [0.2.2] - 2016-09-09 <a name="0.2.2"></a>
### Fixed
- Make `WideCString::into_raw` correctly forget the original self.
## [0.2.1] - 2016-08-12 <a name="0.2.1"></a>
### Added
- `into_raw`/`from_raw` on `WideCString`. Closes [#2].
## [0.2.0] - 2016-05-31 <a name="0.2.0"></a>
### Added
- `Default` trait to wide strings.
- Traits for conversion of strings to `Cow`.
### Changed
- Methods & traits to bring to parity with Rust 1.9 string APIs.
## 0.1.0 - 2016-02-06 <a name="0.1.0"></a>
### Added
- Initial release.
[#2]: https://github.com/starkat99/widestring-rs/issues/2
[#14]: https://github.com/starkat99/widestring-rs/issues/14
[#18]: https://github.com/starkat99/widestring-rs/issues/18
[#19]: https://github.com/starkat99/widestring-rs/issues/19
[#20]: https://github.com/starkat99/widestring-rs/issues/20
[#22]: https://github.com/starkat99/widestring-rs/issues/22
[#28]: https://github.com/starkat99/widestring-rs/issues/28
[@nicbn]: https://github.com/nicbn
[@joshwd36]: https://github.com/joshwb36
[@yescallop]: https://github.com/yescallop
[@OpenByteDev]: https://github.com/OpenByteDev
[Unreleased]: https://github.com/starkat99/widestring-rs/compare/v1.0.2...HEAD
[1.0.2]: https://github.com/starkat99/widestring-rs/compare/v1.0.1...v1.0.2
[1.0.1]: https://github.com/starkat99/widestring-rs/compare/v1.0.0...v1.0.1
[1.0.0]: https://github.com/starkat99/widestring-rs/compare/v1.0.0-beta.1...v1.0.0
[1.0.0-beta.1]: https://github.com/starkat99/widestring-rs/compare/v0.5.1...v1.0.0-beta.1
[0.5.1]: https://github.com/starkat99/widestring-rs/compare/v0.5.0...v0.5.1
[0.5.0]: https://github.com/starkat99/widestring-rs/compare/v0.4.3...v0.5.0
[0.4.3]: https://github.com/starkat99/widestring-rs/compare/v0.4.2...v0.4.3
[0.4.2]: https://github.com/starkat99/widestring-rs/compare/v0.4.1...v0.4.2
[0.4.1]: https://github.com/starkat99/widestring-rs/compare/v0.4.0...v0.4.1
[0.4.0]: https://github.com/starkat99/widestring-rs/compare/v0.3.0...v0.4.0
[0.3.0]: https://github.com/starkat99/widestring-rs/compare/v0.2.2...v0.3.0
[0.2.2]: https://github.com/starkat99/widestring-rs/compare/v0.2.1...v0.2.2
[0.2.1]: https://github.com/starkat99/widestring-rs/compare/v0.2.0...v0.2.1
[0.2.0]: https://github.com/starkat99/widestring-rs/compare/v0.1.0...v0.2.0

View file

@ -0,0 +1,53 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2021"
rust-version = "1.58"
name = "widestring"
version = "1.0.2"
exclude = [
".git*",
".editorconfig",
]
description = "A wide string Rust library for converting to and from wide strings, such as those often used in Windows API or other FFI libaries. Both `u16` and `u32` string types are provided, including support for UTF-16 and UTF-32, malformed encoding, C-style strings, etc."
readme = "README.md"
keywords = [
"wide",
"string",
"win32",
"utf-16",
"utf-32",
]
categories = [
"text-processing",
"encoding",
"development-tools::ffi",
"no-std",
]
license = "MIT OR Apache-2.0"
repository = "https://github.com/starkat99/widestring-rs"
resolver = "2"
[package.metadata.docs.rs]
rustc-args = [
"--cfg",
"docsrs",
]
[dev-dependencies.winapi]
version = "0.3"
features = ["winbase"]
[features]
alloc = []
default = ["std"]
std = ["alloc"]

1
third-party/vendor/widestring/LICENSE vendored Normal file
View file

@ -0,0 +1 @@
MIT OR Apache-2.0

View file

@ -0,0 +1,73 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
(a) You must give any other recipients of the Work or Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -0,0 +1,9 @@
MIT License
Copyright (c) <year> <copyright holders>
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -0,0 +1,69 @@
[config]
min_version = "0.35.0"
[env]
CI_CARGO_TEST_FLAGS = { value = "--locked -- --nocapture", condition = { env_true = [
"CARGO_MAKE_CI",
] } }
CARGO_MAKE_CLIPPY_ARGS = { value = "${CARGO_MAKE_CLIPPY_ALL_FEATURES_WARN}", condition = { env_true = [
"CARGO_MAKE_CI",
] } }
# Override for CI flag additions
[tasks.test]
args = [
"test",
"@@remove-empty(CARGO_MAKE_CARGO_VERBOSE_FLAGS)",
"@@split(CARGO_MAKE_CARGO_BUILD_TEST_FLAGS, )",
"@@split(CI_CARGO_TEST_FLAGS, )",
]
# Let clippy run on non-nightly CI
[tasks.clippy-ci-flow]
condition = { env_set = ["CARGO_MAKE_RUN_CLIPPY"] }
# Let format check run on non-nightly CI
[tasks.check-format-ci-flow]
condition = { env_set = ["CARGO_MAKE_RUN_CHECK_FORMAT"] }
[tasks.check-docs]
description = "Checks docs for errors."
category = "Documentation"
install_crate = false
env = { RUSTDOCFLAGS = "-D warnings" }
command = "cargo"
args = [
"doc",
"--workspace",
"--no-deps",
"@@remove-empty(CARGO_MAKE_CARGO_VERBOSE_FLAGS)",
"${CARGO_MAKE_CARGO_ALL_FEATURES}",
]
# Build & Test with no features enabled
[tasks.post-ci-flow]
run_task = [{ name = ["check-docs", "build-no-std", "test-no-std", "build-alloc", "test-alloc"] }]
[tasks.build-no-std]
description = "Build without any features"
category = "Build"
env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features" }
run_task = "build"
[tasks.test-no-std]
description = "Run tests without any features"
category = "Test"
env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features" }
run_task = "test"
[tasks.build-alloc]
description = "Build with only the alloc feature"
category = "Build"
env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features --features=alloc" }
run_task = "build"
[tasks.test-alloc]
description = "Run tests with only the alloc feature"
category = "Test"
env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features --features=alloc" }
run_task = "test"

49
third-party/vendor/widestring/README.md vendored Normal file
View file

@ -0,0 +1,49 @@
# widestring
[![Crates.io](https://img.shields.io/crates/v/widestring.svg)](https://crates.io/crates/widestring/) [![Documentation](https://docs.rs/widestring/badge.svg)](https://docs.rs/widestring/) ![Crates.io](https://img.shields.io/crates/l/widestring) [![Build status](https://github.com/starkat99/widestring-rs/actions/workflows/rust.yml/badge.svg?branch=master)](https://github.com/starkat99/widestring-rs/actions/workflows/rust.yml)
A wide string Rust library for converting to and from wide strings, such as
those often used in Windows API or other FFI libaries. Both `u16` and `u32` string types are
provided, including support for UTF-16 and UTF-32, malformed encoding, C-style strings, etc.
Macros for converting string literals to UTF-16 and UTF-32 strings at compile time are also
included.
*Requires Rust 1.58 or greater.* If you need support for older versions of Rust, use 0.x versions of
this crate.
## Documentation
- [Crate API Reference](https://docs.rs/widestring/)
- [Latest Changes](CHANGELOG.md)
### Optional Features
- **`alloc`** - Enabled by default. Enable use of the [`alloc`](https://doc.rust-lang.org/alloc/)
crate when not using the `std` library.
This enables the owned string types and aliases.
- **`std`** - Enabled by default. Enable features that depend on the Rust `std` library, including
everything in the `alloc` feature.
## License
This library is distributed under the terms of either of:
* [MIT License](LICENSES/MIT.txt)
([http://opensource.org/licenses/MIT](http://opensource.org/licenses/MIT))
* [Apache License, Version 2.0](LICENSES/Apache-2.0.txt)
([http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0))
at your option.
This project is [REUSE-compliant](https://reuse.software/spec/). Copyrights are retained by their
contributors. Some files may include explicit copyright notices and/or license
[SPDX identifiers](https://spdx.dev/ids/). For full authorship information, see the version control
history.
### Contributing
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the
work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any
additional terms or conditions.

View file

@ -0,0 +1,357 @@
//! Errors returned by functions in this crate.
#[cfg(feature = "alloc")]
use alloc::vec::Vec;
/// An error returned to indicate a problem with nul values occurred.
///
/// The error will either being a [`MissingNulTerminator`] or [`ContainsNul`].
/// The error optionally returns the ownership of the invalid vector whenever a vector was owned.
#[derive(Debug, Clone)]
pub enum NulError<C> {
/// A terminating nul value was missing.
MissingNulTerminator(MissingNulTerminator),
/// An interior nul value was found.
ContainsNul(ContainsNul<C>),
}
impl<C> NulError<C> {
/// Consumes this error, returning the underlying vector of values which generated the error in
/// the first place.
#[inline]
#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
#[must_use]
pub fn into_vec(self) -> Option<Vec<C>> {
match self {
Self::MissingNulTerminator(_) => None,
Self::ContainsNul(e) => e.into_vec(),
}
}
}
impl<C> core::fmt::Display for NulError<C> {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
match self {
Self::MissingNulTerminator(e) => e.fmt(f),
Self::ContainsNul(e) => e.fmt(f),
}
}
}
#[cfg(feature = "std")]
impl<C> std::error::Error for NulError<C>
where
C: core::fmt::Debug + 'static,
{
#[inline]
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::MissingNulTerminator(e) => Some(e),
Self::ContainsNul(e) => Some(e),
}
}
}
impl<C> From<MissingNulTerminator> for NulError<C> {
#[inline]
fn from(value: MissingNulTerminator) -> Self {
Self::MissingNulTerminator(value)
}
}
impl<C> From<ContainsNul<C>> for NulError<C> {
#[inline]
fn from(value: ContainsNul<C>) -> Self {
Self::ContainsNul(value)
}
}
/// An error returned from to indicate that a terminating nul value was missing.
#[derive(Debug, Clone)]
pub struct MissingNulTerminator {
_unused: (),
}
impl MissingNulTerminator {
pub(crate) fn new() -> Self {
Self { _unused: () }
}
}
impl core::fmt::Display for MissingNulTerminator {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(f, "missing terminating nul value")
}
}
#[cfg(feature = "std")]
impl std::error::Error for MissingNulTerminator {}
/// An error returned to indicate that an invalid nul value was found in a string.
///
/// The error indicates the position in the vector where the nul value was found, as well as
/// returning the ownership of the invalid vector.
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
#[derive(Debug, Clone)]
pub struct ContainsNul<C> {
index: usize,
#[cfg(feature = "alloc")]
pub(crate) inner: Option<Vec<C>>,
#[cfg(not(feature = "alloc"))]
_p: core::marker::PhantomData<C>,
}
impl<C> ContainsNul<C> {
#[cfg(feature = "alloc")]
pub(crate) fn new(index: usize, v: Vec<C>) -> Self {
Self {
index,
inner: Some(v),
}
}
#[cfg(feature = "alloc")]
pub(crate) fn empty(index: usize) -> Self {
Self { index, inner: None }
}
#[cfg(not(feature = "alloc"))]
pub(crate) fn empty(index: usize) -> Self {
Self {
index,
_p: core::marker::PhantomData,
}
}
/// Returns the index of the invalid nul value in the slice.
#[inline]
#[must_use]
pub fn nul_position(&self) -> usize {
self.index
}
/// Consumes this error, returning the underlying vector of values which generated the error in
/// the first place.
///
/// If the sequence that generated the error was a reference to a slice instead of a [`Vec`],
/// this will return [`None`].
#[inline]
#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
#[must_use]
pub fn into_vec(self) -> Option<Vec<C>> {
self.inner
}
}
impl<C> core::fmt::Display for ContainsNul<C> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(f, "invalid nul value found at position {}", self.index)
}
}
#[cfg(feature = "std")]
impl<C> std::error::Error for ContainsNul<C> where C: core::fmt::Debug {}
/// An error that can be returned when decoding UTF-16 code points.
///
/// This struct is created when using the [`DecodeUtf16`][crate::iter::DecodeUtf16] iterator.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DecodeUtf16Error {
unpaired_surrogate: u16,
}
impl DecodeUtf16Error {
pub(crate) fn new(unpaired_surrogate: u16) -> Self {
Self { unpaired_surrogate }
}
/// Returns the unpaired surrogate which caused this error.
#[must_use]
pub fn unpaired_surrogate(&self) -> u16 {
self.unpaired_surrogate
}
}
impl core::fmt::Display for DecodeUtf16Error {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "unpaired surrogate found: {:x}", self.unpaired_surrogate)
}
}
#[cfg(feature = "std")]
impl std::error::Error for DecodeUtf16Error {}
/// An error that can be returned when decoding UTF-32 code points.
///
/// This error occurs when a [`u32`] value is outside the 21-bit Unicode code point range
/// (>`U+10FFFF`) or is a UTF-16 surrogate value.
#[derive(Debug, Clone)]
pub struct DecodeUtf32Error {
code: u32,
}
impl DecodeUtf32Error {
pub(crate) fn new(code: u32) -> Self {
Self { code }
}
/// Returns the invalid code point value which caused the error.
#[must_use]
pub fn invalid_code_point(&self) -> u32 {
self.code
}
}
impl core::fmt::Display for DecodeUtf32Error {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "invalid UTF-32 code point: {:x}", self.code)
}
}
#[cfg(feature = "std")]
impl std::error::Error for DecodeUtf32Error {}
/// Errors which can occur when attempting to interpret a sequence of `u16` as UTF-16.
#[derive(Debug, Clone)]
pub struct Utf16Error {
index: usize,
source: DecodeUtf16Error,
#[cfg(feature = "alloc")]
inner: Option<Vec<u16>>,
}
impl Utf16Error {
#[cfg(feature = "alloc")]
pub(crate) fn new(inner: Vec<u16>, index: usize, source: DecodeUtf16Error) -> Self {
Self {
inner: Some(inner),
index,
source,
}
}
#[cfg(feature = "alloc")]
pub(crate) fn empty(index: usize, source: DecodeUtf16Error) -> Self {
Self {
index,
source,
inner: None,
}
}
#[cfg(not(feature = "alloc"))]
pub(crate) fn empty(index: usize, source: DecodeUtf16Error) -> Self {
Self { index, source }
}
/// Returns the index in the given string at which the invalid UTF-16 value occurred.
#[must_use]
pub fn index(&self) -> usize {
self.index
}
/// Consumes this error, returning the underlying vector of values which generated the error in
/// the first place.
///
/// If the sequence that generated the error was a reference to a slice instead of a [`Vec`],
/// this will return [`None`].
#[inline]
#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
#[must_use]
pub fn into_vec(self) -> Option<Vec<u16>> {
self.inner
}
}
impl core::fmt::Display for Utf16Error {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(
f,
"unpaired UTF-16 surrogate {:x} at index {}",
self.source.unpaired_surrogate(),
self.index
)
}
}
#[cfg(feature = "std")]
impl std::error::Error for Utf16Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(&self.source)
}
}
/// Errors which can occur when attempting to interpret a sequence of `u32` as UTF-32.
#[derive(Debug, Clone)]
pub struct Utf32Error {
index: usize,
source: DecodeUtf32Error,
#[cfg(feature = "alloc")]
inner: Option<Vec<u32>>,
}
impl Utf32Error {
#[cfg(feature = "alloc")]
pub(crate) fn new(inner: Vec<u32>, index: usize, source: DecodeUtf32Error) -> Self {
Self {
inner: Some(inner),
index,
source,
}
}
#[cfg(feature = "alloc")]
pub(crate) fn empty(index: usize, source: DecodeUtf32Error) -> Self {
Self {
index,
source,
inner: None,
}
}
#[cfg(not(feature = "alloc"))]
pub(crate) fn empty(index: usize, source: DecodeUtf32Error) -> Self {
Self { index, source }
}
/// Returns the index in the given string at which the invalid UTF-32 value occurred.
#[must_use]
pub fn index(&self) -> usize {
self.index
}
/// Consumes this error, returning the underlying vector of values which generated the error in
/// the first place.
///
/// If the sequence that generated the error was a reference to a slice instead of a [`Vec`],
/// this will return [`None`].
#[inline]
#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
#[must_use]
pub fn into_vec(self) -> Option<Vec<u32>> {
self.inner
}
}
impl core::fmt::Display for Utf32Error {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(
f,
"invalid UTF-32 value {:x} at index {}",
self.source.invalid_code_point(),
self.index
)
}
}
#[cfg(feature = "std")]
impl std::error::Error for Utf32Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(&self.source)
}
}

View file

@ -0,0 +1,442 @@
//! Iterators for encoding and decoding slices of string data.
use crate::{
decode_utf16_surrogate_pair,
error::{DecodeUtf16Error, DecodeUtf32Error},
is_utf16_high_surrogate, is_utf16_low_surrogate, is_utf16_surrogate,
};
use core::{
char,
iter::{DoubleEndedIterator, ExactSizeIterator, FusedIterator},
};
/// An iterator that decodes UTF-16 encoded code points from an iterator of [`u16`]s.
///
/// This struct is created by [`decode_utf16`][crate::decode_utf16]. See its documentation for more.
///
/// This struct is identical to [`char::DecodeUtf16`] except it is a [`DoubleEndedIterator`] if
/// `I` is.
#[derive(Debug, Clone)]
pub struct DecodeUtf16<I>
where
I: Iterator<Item = u16>,
{
iter: I,
forward_buf: Option<u16>,
back_buf: Option<u16>,
}
impl<I> DecodeUtf16<I>
where
I: Iterator<Item = u16>,
{
pub(crate) fn new(iter: I) -> Self {
Self {
iter,
forward_buf: None,
back_buf: None,
}
}
}
impl<I> Iterator for DecodeUtf16<I>
where
I: Iterator<Item = u16>,
{
type Item = Result<char, DecodeUtf16Error>;
fn next(&mut self) -> Option<Self::Item> {
// Copied from char::DecodeUtf16
let u = match self.forward_buf.take() {
Some(buf) => buf,
None => self.iter.next().or_else(|| self.back_buf.take())?,
};
if !is_utf16_surrogate(u) {
// SAFETY: not a surrogate
Some(Ok(unsafe { char::from_u32_unchecked(u as u32) }))
} else if is_utf16_low_surrogate(u) {
// a trailing surrogate
Some(Err(DecodeUtf16Error::new(u)))
} else {
let u2 = match self.iter.next().or_else(|| self.back_buf.take()) {
Some(u2) => u2,
// eof
None => return Some(Err(DecodeUtf16Error::new(u))),
};
if !is_utf16_low_surrogate(u2) {
// not a trailing surrogate so we're not a valid
// surrogate pair, so rewind to redecode u2 next time.
self.forward_buf = Some(u2);
return Some(Err(DecodeUtf16Error::new(u)));
}
// all ok, so lets decode it.
// SAFETY: verified the surrogate pair
unsafe { Some(Ok(decode_utf16_surrogate_pair(u, u2))) }
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let (low, high) = self.iter.size_hint();
// we could be entirely valid surrogates (2 elements per
// char), or entirely non-surrogates (1 element per char)
(low / 2, high)
}
}
impl<I> DoubleEndedIterator for DecodeUtf16<I>
where
I: Iterator<Item = u16> + DoubleEndedIterator,
{
fn next_back(&mut self) -> Option<Self::Item> {
let u2 = match self.back_buf.take() {
Some(buf) => buf,
None => self.iter.next_back().or_else(|| self.forward_buf.take())?,
};
if !is_utf16_surrogate(u2) {
// SAFETY: not a surrogate
Some(Ok(unsafe { char::from_u32_unchecked(u2 as u32) }))
} else if is_utf16_high_surrogate(u2) {
// a leading surrogate
Some(Err(DecodeUtf16Error::new(u2)))
} else {
let u = match self.iter.next_back().or_else(|| self.forward_buf.take()) {
Some(u) => u,
// eof
None => return Some(Err(DecodeUtf16Error::new(u2))),
};
if !is_utf16_high_surrogate(u) {
// not a leading surrogate so we're not a valid
// surrogate pair, so rewind to redecode u next time.
self.back_buf = Some(u);
return Some(Err(DecodeUtf16Error::new(u2)));
}
// all ok, so lets decode it.
// SAFETY: verified the surrogate pair
unsafe { Some(Ok(decode_utf16_surrogate_pair(u, u2))) }
}
}
}
impl<I> FusedIterator for DecodeUtf16<I> where I: Iterator<Item = u16> + FusedIterator {}
/// An iterator that lossily decodes possibly ill-formed UTF-16 encoded code points from an iterator
/// of [`u16`]s.
///
/// Any unpaired UTF-16 surrogate values are replaced by
/// [`U+FFFD REPLACEMENT_CHARACTER`][char::REPLACEMENT_CHARACTER] (<28>).
#[derive(Debug, Clone)]
pub struct DecodeUtf16Lossy<I>
where
I: Iterator<Item = u16>,
{
pub(crate) iter: DecodeUtf16<I>,
}
impl<I> Iterator for DecodeUtf16Lossy<I>
where
I: Iterator<Item = u16>,
{
type Item = char;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter
.next()
.map(|res| res.unwrap_or(char::REPLACEMENT_CHARACTER))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<I> DoubleEndedIterator for DecodeUtf16Lossy<I>
where
I: Iterator<Item = u16> + DoubleEndedIterator,
{
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.iter
.next_back()
.map(|res| res.unwrap_or(char::REPLACEMENT_CHARACTER))
}
}
impl<I> FusedIterator for DecodeUtf16Lossy<I> where I: Iterator<Item = u16> + FusedIterator {}
/// An iterator that decodes UTF-32 encoded code points from an iterator of `u32`s.
#[derive(Debug, Clone)]
pub struct DecodeUtf32<I>
where
I: Iterator<Item = u32>,
{
pub(crate) iter: I,
}
impl<I> Iterator for DecodeUtf32<I>
where
I: Iterator<Item = u32>,
{
type Item = Result<char, DecodeUtf32Error>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter
.next()
.map(|u| char::from_u32(u).ok_or_else(|| DecodeUtf32Error::new(u)))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<I> DoubleEndedIterator for DecodeUtf32<I>
where
I: Iterator<Item = u32> + DoubleEndedIterator,
{
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.iter
.next_back()
.map(|u| char::from_u32(u).ok_or_else(|| DecodeUtf32Error::new(u)))
}
}
impl<I> FusedIterator for DecodeUtf32<I> where I: Iterator<Item = u32> + FusedIterator {}
impl<I> ExactSizeIterator for DecodeUtf32<I>
where
I: Iterator<Item = u32> + ExactSizeIterator,
{
#[inline]
fn len(&self) -> usize {
self.iter.len()
}
}
/// An iterator that lossily decodes possibly ill-formed UTF-32 encoded code points from an iterator
/// of `u32`s.
///
/// Any invalid UTF-32 values are replaced by
/// [`U+FFFD REPLACEMENT_CHARACTER`][core::char::REPLACEMENT_CHARACTER] (<28>).
#[derive(Debug, Clone)]
pub struct DecodeUtf32Lossy<I>
where
I: Iterator<Item = u32>,
{
pub(crate) iter: DecodeUtf32<I>,
}
impl<I> Iterator for DecodeUtf32Lossy<I>
where
I: Iterator<Item = u32>,
{
type Item = char;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter
.next()
.map(|res| res.unwrap_or(core::char::REPLACEMENT_CHARACTER))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<I> DoubleEndedIterator for DecodeUtf32Lossy<I>
where
I: Iterator<Item = u32> + DoubleEndedIterator,
{
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.iter
.next_back()
.map(|res| res.unwrap_or(core::char::REPLACEMENT_CHARACTER))
}
}
impl<I> FusedIterator for DecodeUtf32Lossy<I> where I: Iterator<Item = u32> + FusedIterator {}
impl<I> ExactSizeIterator for DecodeUtf32Lossy<I>
where
I: Iterator<Item = u32> + ExactSizeIterator,
{
#[inline]
fn len(&self) -> usize {
self.iter.len()
}
}
/// An iterator that encodes an iterator of [`char`][prim@char]s into UTF-8 bytes.
///
/// This struct is created by [`encode_utf8`][crate::encode_utf8]. See its documentation for more.
#[derive(Debug, Clone)]
pub struct EncodeUtf8<I>
where
I: Iterator<Item = char>,
{
iter: I,
buf: [u8; 4],
idx: u8,
len: u8,
}
impl<I> EncodeUtf8<I>
where
I: Iterator<Item = char>,
{
pub(crate) fn new(iter: I) -> Self {
Self {
iter,
buf: [0; 4],
idx: 0,
len: 0,
}
}
}
impl<I> Iterator for EncodeUtf8<I>
where
I: Iterator<Item = char>,
{
type Item = u8;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.idx >= self.len {
let c = self.iter.next()?;
self.idx = 0;
self.len = c.encode_utf8(&mut self.buf).len() as u8;
}
self.idx += 1;
let idx = (self.idx - 1) as usize;
Some(self.buf[idx])
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let (lower, upper) = self.iter.size_hint();
(lower, upper.and_then(|len| len.checked_mul(4))) // Max 4 UTF-8 bytes per char
}
}
impl<I> FusedIterator for EncodeUtf8<I> where I: Iterator<Item = char> + FusedIterator {}
/// An iterator that encodes an iterator of [`char`][prim@char]s into UTF-16 [`u16`] code units.
///
/// This struct is created by [`encode_utf16`][crate::encode_utf16]. See its documentation for more.
#[derive(Debug, Clone)]
pub struct EncodeUtf16<I>
where
I: Iterator<Item = char>,
{
iter: I,
buf: Option<u16>,
}
impl<I> EncodeUtf16<I>
where
I: Iterator<Item = char>,
{
pub(crate) fn new(iter: I) -> Self {
Self { iter, buf: None }
}
}
impl<I> Iterator for EncodeUtf16<I>
where
I: Iterator<Item = char>,
{
type Item = u16;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.buf.take().or_else(|| {
let c = self.iter.next()?;
let mut buf = [0; 2];
let buf = c.encode_utf16(&mut buf);
if buf.len() > 1 {
self.buf = Some(buf[1]);
}
Some(buf[0])
})
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let (lower, upper) = self.iter.size_hint();
(lower, upper.and_then(|len| len.checked_mul(2))) // Max 2 UTF-16 code units per char
}
}
impl<I> FusedIterator for EncodeUtf16<I> where I: Iterator<Item = char> + FusedIterator {}
/// An iterator that encodes an iterator of [`char`][prim@char]s into UTF-32 [`u32`] values.
///
/// This struct is created by [`encode_utf32`][crate::encode_utf32]. See its documentation for more.
#[derive(Debug, Clone)]
pub struct EncodeUtf32<I>
where
I: Iterator<Item = char>,
{
iter: I,
}
impl<I> EncodeUtf32<I>
where
I: Iterator<Item = char>,
{
pub(crate) fn new(iter: I) -> Self {
Self { iter }
}
}
impl<I> Iterator for EncodeUtf32<I>
where
I: Iterator<Item = char>,
{
type Item = u32;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next().map(|c| c as u32)
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<I> FusedIterator for EncodeUtf32<I> where I: Iterator<Item = char> + FusedIterator {}
impl<I> ExactSizeIterator for EncodeUtf32<I>
where
I: Iterator<Item = char> + ExactSizeIterator,
{
#[inline]
fn len(&self) -> usize {
self.iter.len()
}
}
impl<I> DoubleEndedIterator for EncodeUtf32<I>
where
I: Iterator<Item = char> + DoubleEndedIterator,
{
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back().map(|c| c as u32)
}
}

742
third-party/vendor/widestring/src/lib.rs vendored Normal file
View file

@ -0,0 +1,742 @@
//! A wide string library for converting to and from wide string variants.
//!
//! This library provides multiple types of wide strings, each corresponding to a string types in
//! the Rust standard library. [`Utf16String`] and [`Utf32String`] are analogous to the standard
//! [`String`] type, providing a similar interface, and are always encoded as valid UTF-16 and
//! UTF-32, respectively. They are the only type in this library that can losslessly and infallibly
//! convert to and from [`String`], and are the easiest type to work with. They are not designed for
//! working with FFI, but do support efficient conversions from the FFI types.
//!
//! [`U16String`] and [`U32String`], on the other hand, are similar to (but not the same as),
//! [`OsString`], and are designed around working with FFI. Unlike the UTF variants, these strings
//! do not have a defined encoding, and can work with any wide character strings, regardless of
//! the encoding. They can be converted to and from [`OsString`] (but may require an encoding
//! conversion depending on the platform), although that string type is an OS-specified
//! encoding, so take special care.
//!
//! [`U16String`] and [`U32String`] also allow access and mutation that relies on the user
//! to enforce any constraints on the data. Some methods do assume a UTF encoding, but do so in a
//! way that handles malformed encoding data. For FFI, use [`U16String`] or [`U32String`] when you
//! simply need to pass-through string data, or when you're not dealing with a nul-terminated data.
//!
//! Finally, [`U16CString`] and [`U32CString`] are wide version of the standard [`CString`] type.
//! Like [`U16String`] and [`U32String`], they do not have defined encoding, but are designed to
//! work with FFI, particularly C-style nul-terminated wide string data. These C-style strings are
//! always terminated in a nul value, and are guaranteed to contain no interior nul values (unless
//! unchecked methods are used). Again, these types may contain ill-formed encoding data, and
//! methods handle it appropriately. Use [`U16CString`] or [`U32CString`] anytime you must properly
//! handle nul values for when dealing with wide string C FFI.
//!
//! Like the standard Rust string types, each wide string type has its corresponding wide string
//! slice type, as shown in the following table:
//!
//! | String Type | Slice Type |
//! |-----------------|--------------|
//! | [`Utf16String`] | [`Utf16Str`] |
//! | [`Utf32String`] | [`Utf32Str`] |
//! | [`U16String`] | [`U16Str`] |
//! | [`U32String`] | [`U32Str`] |
//! | [`U16CString`] | [`U16CStr`] |
//! | [`U32CString`] | [`U32CStr`] |
//!
//! All the string types in this library can be converted between string types of the same bit
//! width, as well as appropriate standard Rust types, but be lossy and/or require knowledge of the
//! underlying encoding. The UTF strings additionally can be converted between the two sizes of
//! string, re-encoding the strings.
//!
//! # Wide string literals
//!
//! Macros are provided for each wide string slice type that convert standard Rust [`str`] literals
//! into UTF-16 or UTF-32 encoded versions of the slice type at *compile time*.
//!
//! ```
//! use widestring::u16str;
//! let hello = u16str!("Hello, world!"); // `hello` will be a &U16Str value
//! ```
//!
//! These can be used anywhere a `const` function can be used, and provide a convenient method of
//! specifying wide string literals instead of coding values by hand. The resulting string slices
//! are always valid UTF encoding, and the [`u16cstr!`] and [`u32cstr!`] macros are automatically
//! nul-terminated.
//!
//! # Cargo features
//!
//! This crate supports `no_std` when default cargo features are disabled. The `std` and `alloc`
//! cargo features (enabled by default) enable the owned string types: [`U16String`], [`U32String`],
//! [`U16CString`], [`U32CString`], [`Utf16String`], and [`Utf32String`] types and their modules.
//! Other types such as the string slices do not require allocation and can be used in a `no_std`
//! environment, even without the [`alloc`](https://doc.rust-lang.org/stable/alloc/index.html)
//! crate.
//!
//! # Remarks on UTF-16 and UTF-32
//!
//! UTF-16 encoding is a variable-length encoding. The 16-bit code units can specificy Unicode code
//! points either as single units or in _surrogate pairs_. Because every value might be part of a
//! surrogate pair, many regular string operations on UTF-16 data, including indexing, writing, or
//! even iterating, require considering either one or two values at a time. This library provides
//! safe methods for these operations when the data is known to be UTF-16, such as with
//! [`Utf16String`]. In those cases, keep in mind that the number of elements (`len()`) of the
//! wide string is _not_ equivalent to the number of Unicode code points in the string, but is
//! instead the number of code unit values.
//!
//! For [`U16String`] and [`U16CString`], which do not define an encoding, these same operations
//! (indexing, mutating, iterating) do _not_ take into account UTF-16 encoding and may result in
//! sequences that are ill-formed UTF-16. Some methods are provided that do make an exception to
//! this and treat the strings as malformed UTF-16, which are specified in their documentation as to
//! how they handle the invalid data.
//!
//! UTF-32 simply encodes Unicode code points as-is in 32-bit Unicode Scalar Values, but Unicode
//! character code points are reserved only for 21-bits, and UTF-16 surrogates are invalid in
//! UTF-32. Since UTF-32 is a fixed-width encoding, it is much easier to deal with, but equivalent
//! methods to the 16-bit strings are provided for compatibility.
//!
//! All the 32-bit wide strings provide efficient methods to convert to and from sequences of
//! [`char`] data, as the representation of UTF-32 strings is functionally equivalent to sequences
//! of [`char`]s. Keep in mind that only [`Utf32String`] guaruntees this equivalence, however, since
//! the other strings may contain invalid values.
//!
//! # FFI with C/C++ `wchar_t`
//!
//! C/C++'s `wchar_t` (and C++'s corresponding `widestring`) varies in size depending on compiler
//! and platform. Typically, `wchar_t` is 16-bits on Windows and 32-bits on most Unix-based
//! platforms. For convenience when using `wchar_t`-based FFI's, type aliases for the corresponding
//! string types are provided: [`WideString`] aliases [`U16String`] on Windows or [`U32String`]
//! elsewhere, [`WideCString`] aliases [`U16CString`] or [`U32CString`], and [`WideUtfString`]
//! aliases [`Utf16String`] or [`Utf32String`]. [`WideStr`], [`WideCStr`], and [`WideUtfStr`] are
//! provided for the string slice types. The [`WideChar`] alias is also provided, aliasing [`u16`]
//! or [`u32`] depending on platform.
//!
//! When not interacting with a FFI that uses `wchar_t`, it is recommended to use the string types
//! directly rather than via the wide alias.
//!
//! # Nul values
//!
//! This crate uses the term legacy ASCII term "nul" to refer to Unicode code point `U+0000 NULL`
//! and its associated code unit representation as zero-value bytes. This is to disambiguate this
//! zero value from null pointer values. C-style strings end in a nul value, while regular Rust
//! strings allow interior nul values and are not terminated with nul.
//!
//! # Examples
//!
//! The following example uses [`U16String`] to get Windows error messages, since `FormatMessageW`
//! returns a string length for us and we don't need to pass error messages into other FFI
//! functions so we don't need to worry about nul values.
//!
//! ```rust
//! # #[cfg(any(not(windows), not(feature = "alloc")))]
//! # fn main() {}
//! # extern crate winapi;
//! # extern crate widestring;
//! # #[cfg(all(windows, feature = "alloc"))]
//! # fn main() {
//! use winapi::um::winbase::{FormatMessageW, LocalFree, FORMAT_MESSAGE_FROM_SYSTEM,
//! FORMAT_MESSAGE_ALLOCATE_BUFFER, FORMAT_MESSAGE_IGNORE_INSERTS};
//! use winapi::shared::ntdef::LPWSTR;
//! use winapi::shared::minwindef::HLOCAL;
//! use std::ptr;
//! use widestring::U16String;
//! # use winapi::shared::minwindef::DWORD;
//! # let error_code: DWORD = 0;
//!
//! let s: U16String;
//! unsafe {
//! // First, get a string buffer from some windows api such as FormatMessageW...
//! let mut buffer: LPWSTR = ptr::null_mut();
//! let strlen = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM |
//! FORMAT_MESSAGE_ALLOCATE_BUFFER |
//! FORMAT_MESSAGE_IGNORE_INSERTS,
//! ptr::null(),
//! error_code, // error code from GetLastError()
//! 0,
//! (&mut buffer as *mut LPWSTR) as LPWSTR,
//! 0,
//! ptr::null_mut());
//!
//! // Get the buffer as a wide string
//! s = U16String::from_ptr(buffer, strlen as usize);
//! // Since U16String creates an owned copy, it's safe to free original buffer now
//! // If you didn't want an owned copy, you could use &U16Str.
//! LocalFree(buffer as HLOCAL);
//! }
//! // Convert to a regular Rust String and use it to your heart's desire!
//! let message = s.to_string_lossy();
//! # assert_eq!(message, "The operation completed successfully.\r\n");
//! # }
//! ```
//!
//! The following example is the functionally the same, only using [`U16CString`] instead.
//!
//! ```rust
//! # #[cfg(any(not(windows), not(feature = "alloc")))]
//! # fn main() {}
//! # extern crate winapi;
//! # extern crate widestring;
//! # #[cfg(all(windows, feature = "alloc"))]
//! # fn main() {
//! use winapi::um::winbase::{FormatMessageW, LocalFree, FORMAT_MESSAGE_FROM_SYSTEM,
//! FORMAT_MESSAGE_ALLOCATE_BUFFER, FORMAT_MESSAGE_IGNORE_INSERTS};
//! use winapi::shared::ntdef::LPWSTR;
//! use winapi::shared::minwindef::HLOCAL;
//! use std::ptr;
//! use widestring::U16CString;
//! # use winapi::shared::minwindef::DWORD;
//! # let error_code: DWORD = 0;
//!
//! let s: U16CString;
//! unsafe {
//! // First, get a string buffer from some windows api such as FormatMessageW...
//! let mut buffer: LPWSTR = ptr::null_mut();
//! FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM |
//! FORMAT_MESSAGE_ALLOCATE_BUFFER |
//! FORMAT_MESSAGE_IGNORE_INSERTS,
//! ptr::null(),
//! error_code, // error code from GetLastError()
//! 0,
//! (&mut buffer as *mut LPWSTR) as LPWSTR,
//! 0,
//! ptr::null_mut());
//!
//! // Get the buffer as a wide string
//! s = U16CString::from_ptr_str(buffer);
//! // Since U16CString creates an owned copy, it's safe to free original buffer now
//! // If you didn't want an owned copy, you could use &U16CStr.
//! LocalFree(buffer as HLOCAL);
//! }
//! // Convert to a regular Rust String and use it to your heart's desire!
//! let message = s.to_string_lossy();
//! # assert_eq!(message, "The operation completed successfully.\r\n");
//! # }
//! ```
//!
//! [`OsString`]: std::ffi::OsString
//! [`OsStr`]: std::ffi::OsStr
//! [`CString`]: std::ffi::CString
//! [`CStr`]: std::ffi::CStr
#![warn(
missing_docs,
missing_debug_implementations,
trivial_casts,
trivial_numeric_casts,
future_incompatible
)]
#![cfg_attr(not(feature = "std"), no_std)]
#![doc(html_root_url = "https://docs.rs/widestring/1.0.2")]
#![doc(test(attr(deny(warnings), allow(unused))))]
#![cfg_attr(docsrs, feature(doc_cfg))]
#[cfg(feature = "alloc")]
extern crate alloc;
use crate::error::{DecodeUtf16Error, DecodeUtf32Error};
#[cfg(feature = "alloc")]
use alloc::vec::Vec;
use core::fmt::Write;
pub mod error;
pub mod iter;
mod macros;
#[cfg(feature = "std")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
mod platform;
pub mod ucstr;
#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
pub mod ucstring;
pub mod ustr;
#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
pub mod ustring;
pub mod utfstr;
#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
pub mod utfstring;
#[doc(hidden)]
pub use macros::internals;
pub use ucstr::{U16CStr, U32CStr, WideCStr};
#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
pub use ucstring::{U16CString, U32CString, WideCString};
pub use ustr::{U16Str, U32Str, WideStr};
#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
pub use ustring::{U16String, U32String, WideString};
pub use utfstr::{Utf16Str, Utf32Str, WideUtfStr};
#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
pub use utfstring::{Utf16String, Utf32String, WideUtfString};
#[cfg(not(windows))]
/// Alias for [`u16`] or [`u32`] depending on platform. Intended to match typical C `wchar_t` size
/// on platform.
pub type WideChar = u32;
#[cfg(windows)]
/// Alias for [`u16`] or [`u32`] depending on platform. Intended to match typical C `wchar_t` size
/// on platform.
pub type WideChar = u16;
/// Creates an iterator over the UTF-16 encoded code points in `iter`, returning unpaired surrogates
/// as `Err`s.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use std::char::decode_utf16;
///
/// // 𝄞mus<invalid>ic<invalid>
/// let v = [
/// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
/// ];
///
/// assert_eq!(
/// decode_utf16(v.iter().cloned())
/// .map(|r| r.map_err(|e| e.unpaired_surrogate()))
/// .collect::<Vec<_>>(),
/// vec![
/// Ok('𝄞'),
/// Ok('m'), Ok('u'), Ok('s'),
/// Err(0xDD1E),
/// Ok('i'), Ok('c'),
/// Err(0xD834)
/// ]
/// );
/// ```
///
/// A lossy decoder can be obtained by replacing Err results with the replacement character:
///
/// ```
/// use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
///
/// // 𝄞mus<invalid>ic<invalid>
/// let v = [
/// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
/// ];
///
/// assert_eq!(
/// decode_utf16(v.iter().cloned())
/// .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
/// .collect::<String>(),
/// "𝄞mus<75>ic<69>"
/// );
/// ```
#[must_use]
pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> iter::DecodeUtf16<I::IntoIter> {
iter::DecodeUtf16::new(iter.into_iter())
}
/// Creates a lossy decoder iterator over the possibly ill-formed UTF-16 encoded code points in
/// `iter`.
///
/// This is equivalent to [`char::decode_utf16`][core::char::decode_utf16] except that any unpaired
/// UTF-16 surrogate values are replaced by
/// [`U+FFFD REPLACEMENT_CHARACTER`][core::char::REPLACEMENT_CHARACTER] (<28>) instead of returning
/// errors.
///
/// # Examples
///
/// ```
/// use widestring::decode_utf16_lossy;
///
/// // 𝄞mus<invalid>ic<invalid>
/// let v = [
/// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
/// ];
///
/// assert_eq!(
/// decode_utf16_lossy(v.iter().copied()).collect::<String>(),
/// "𝄞mus<75>ic<69>"
/// );
/// ```
#[inline]
#[must_use]
pub fn decode_utf16_lossy<I: IntoIterator<Item = u16>>(
iter: I,
) -> iter::DecodeUtf16Lossy<I::IntoIter> {
iter::DecodeUtf16Lossy {
iter: decode_utf16(iter),
}
}
/// Creates a decoder iterator over UTF-32 encoded code points in `iter`, returning invalid values
/// as `Err`s.
///
/// # Examples
///
/// ```
/// use widestring::decode_utf32;
///
/// // 𝄞mus<invalid>ic<invalid>
/// let v = [
/// 0x1D11E, 0x6d, 0x75, 0x73, 0xDD1E, 0x69, 0x63, 0x23FD5A,
/// ];
///
/// assert_eq!(
/// decode_utf32(v.iter().copied())
/// .map(|r| r.map_err(|e| e.invalid_code_point()))
/// .collect::<Vec<_>>(),
/// vec![
/// Ok('𝄞'),
/// Ok('m'), Ok('u'), Ok('s'),
/// Err(0xDD1E),
/// Ok('i'), Ok('c'),
/// Err(0x23FD5A)
/// ]
/// );
/// ```
#[inline]
#[must_use]
pub fn decode_utf32<I: IntoIterator<Item = u32>>(iter: I) -> iter::DecodeUtf32<I::IntoIter> {
iter::DecodeUtf32 {
iter: iter.into_iter(),
}
}
/// Creates a lossy decoder iterator over the possibly ill-formed UTF-32 encoded code points in
/// `iter`.
///
/// This is equivalent to [`decode_utf32`] except that any invalid UTF-32 values are replaced by
/// [`U+FFFD REPLACEMENT_CHARACTER`][core::char::REPLACEMENT_CHARACTER] (<28>) instead of returning
/// errors.
///
/// # Examples
///
/// ```
/// use widestring::decode_utf32_lossy;
///
/// // 𝄞mus<invalid>ic<invalid>
/// let v = [
/// 0x1D11E, 0x6d, 0x75, 0x73, 0xDD1E, 0x69, 0x63, 0x23FD5A,
/// ];
///
/// assert_eq!(
/// decode_utf32_lossy(v.iter().copied()).collect::<String>(),
/// "𝄞mus<75>ic<69>"
/// );
/// ```
#[inline]
#[must_use]
pub fn decode_utf32_lossy<I: IntoIterator<Item = u32>>(
iter: I,
) -> iter::DecodeUtf32Lossy<I::IntoIter> {
iter::DecodeUtf32Lossy {
iter: decode_utf32(iter),
}
}
/// Creates an iterator that encodes an iterator over [`char`]s into UTF-8 bytes.
///
/// # Examples
///
/// ```
/// use widestring::encode_utf8;
///
/// let music = "𝄞music";
///
/// let encoded: Vec<u8> = encode_utf8(music.chars()).collect();
///
/// assert_eq!(encoded, music.as_bytes());
/// ```
#[must_use]
pub fn encode_utf8<I: IntoIterator<Item = char>>(iter: I) -> iter::EncodeUtf8<I::IntoIter> {
iter::EncodeUtf8::new(iter.into_iter())
}
/// Creates an iterator that encodes an iterator over [`char`]s into UTF-16 [`u16`] code units.
///
/// # Examples
///
/// ```
/// use widestring::encode_utf16;
///
/// let encoded: Vec<u16> = encode_utf16("𝄞music".chars()).collect();
///
/// let v = [
/// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0x0069, 0x0063,
/// ];
///
/// assert_eq!(encoded, v);
/// ```
#[must_use]
pub fn encode_utf16<I: IntoIterator<Item = char>>(iter: I) -> iter::EncodeUtf16<I::IntoIter> {
iter::EncodeUtf16::new(iter.into_iter())
}
/// Creates an iterator that encodes an iterator over [`char`]s into UTF-32 [`u32`] values.
///
/// This iterator is a simple type cast from [`char`] to [`u32`], as any sequence of [`char`]s is
/// valid UTF-32.
///
/// # Examples
///
/// ```
/// use widestring::encode_utf32;
///
/// let encoded: Vec<u32> = encode_utf32("𝄞music".chars()).collect();
///
/// let v = [
/// 0x1D11E, 0x006d, 0x0075, 0x0073, 0x0069, 0x0063,
/// ];
///
/// assert_eq!(encoded, v);
/// ```
#[must_use]
pub fn encode_utf32<I: IntoIterator<Item = char>>(iter: I) -> iter::EncodeUtf32<I::IntoIter> {
iter::EncodeUtf32::new(iter.into_iter())
}
/// Debug implementation for any U16 string slice.
///
/// Properly encoded input data will output valid strings with escape sequences, however invalid
/// encoding will purposefully output any unpaired surrogate as \<XXXX> which is not a valid escape
/// sequence. This is intentional, as debug output is not meant to be parsed but read by humans.
fn debug_fmt_u16(s: &[u16], fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
debug_fmt_utf16_iter(decode_utf16(s.iter().copied()), fmt)
}
/// Debug implementation for any U16 string iterator.
///
/// Properly encoded input data will output valid strings with escape sequences, however invalid
/// encoding will purposefully output any unpaired surrogate as \<XXXX> which is not a valid escape
/// sequence. This is intentional, as debug output is not meant to be parsed but read by humans.
fn debug_fmt_utf16_iter(
iter: impl Iterator<Item = Result<char, DecodeUtf16Error>>,
fmt: &mut core::fmt::Formatter<'_>,
) -> core::fmt::Result {
fmt.write_char('"')?;
for res in iter {
match res {
Ok(ch) => {
for c in ch.escape_debug() {
fmt.write_char(c)?;
}
}
Err(e) => {
write!(fmt, "\\<{:X}>", e.unpaired_surrogate())?;
}
}
}
fmt.write_char('"')
}
/// Debug implementation for any U16 string slice.
///
/// Properly encoded input data will output valid strings with escape sequences, however invalid
/// encoding will purposefully output any invalid code point as \<XXXX> which is not a valid escape
/// sequence. This is intentional, as debug output is not meant to be parsed but read by humans.
fn debug_fmt_u32(s: &[u32], fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
debug_fmt_utf32_iter(decode_utf32(s.iter().copied()), fmt)
}
/// Debug implementation for any U16 string iterator.
///
/// Properly encoded input data will output valid strings with escape sequences, however invalid
/// encoding will purposefully output any invalid code point as \<XXXX> which is not a valid escape
/// sequence. This is intentional, as debug output is not meant to be parsed but read by humans.
fn debug_fmt_utf32_iter(
iter: impl Iterator<Item = Result<char, DecodeUtf32Error>>,
fmt: &mut core::fmt::Formatter<'_>,
) -> core::fmt::Result {
fmt.write_char('"')?;
for res in iter {
match res {
Ok(ch) => {
for c in ch.escape_debug() {
fmt.write_char(c)?;
}
}
Err(e) => {
write!(fmt, "\\<{:X}>", e.invalid_code_point())?;
}
}
}
fmt.write_char('"')
}
/// Debug implementation for any `char` iterator.
fn debug_fmt_char_iter(
iter: impl Iterator<Item = char>,
fmt: &mut core::fmt::Formatter<'_>,
) -> core::fmt::Result {
fmt.write_char('"')?;
iter.flat_map(|c| c.escape_debug())
.try_for_each(|c| fmt.write_char(c))?;
fmt.write_char('"')
}
/// Returns whether the code unit a UTF-16 surrogate value.
#[inline(always)]
#[allow(dead_code)]
const fn is_utf16_surrogate(u: u16) -> bool {
u >= 0xD800 && u <= 0xDFFF
}
/// Returns whether the code unit a UTF-16 high surrogate value.
#[inline(always)]
#[allow(dead_code)]
const fn is_utf16_high_surrogate(u: u16) -> bool {
u >= 0xD800 && u <= 0xDBFF
}
/// Returns whether the code unit a UTF-16 low surrogate value.
#[inline(always)]
const fn is_utf16_low_surrogate(u: u16) -> bool {
u >= 0xDC00 && u <= 0xDFFF
}
/// Convert a UTF-16 surrogate pair to a `char`. Does not validate if the surrogates are valid.
#[inline(always)]
unsafe fn decode_utf16_surrogate_pair(high: u16, low: u16) -> char {
let c: u32 = (((high - 0xD800) as u32) << 10 | ((low) - 0xDC00) as u32) + 0x1_0000;
// SAFETY: we checked that it's a legal unicode value
core::char::from_u32_unchecked(c)
}
/// Validates whether a slice of 16-bit values is valid UTF-16, returning an error if it is not.
#[inline(always)]
fn validate_utf16(s: &[u16]) -> Result<(), crate::error::Utf16Error> {
for (index, result) in crate::decode_utf16(s.iter().copied()).enumerate() {
if let Err(e) = result {
return Err(crate::error::Utf16Error::empty(index, e));
}
}
Ok(())
}
/// Validates whether a vector of 16-bit values is valid UTF-16, returning an error if it is not.
#[inline(always)]
#[cfg(feature = "alloc")]
fn validate_utf16_vec(v: Vec<u16>) -> Result<Vec<u16>, crate::error::Utf16Error> {
for (index, result) in crate::decode_utf16(v.iter().copied()).enumerate() {
if let Err(e) = result {
return Err(crate::error::Utf16Error::new(v, index, e));
}
}
Ok(v)
}
/// Validates whether a slice of 32-bit values is valid UTF-32, returning an error if it is not.
#[inline(always)]
fn validate_utf32(s: &[u32]) -> Result<(), crate::error::Utf32Error> {
for (index, result) in crate::decode_utf32(s.iter().copied()).enumerate() {
if let Err(e) = result {
return Err(crate::error::Utf32Error::empty(index, e));
}
}
Ok(())
}
/// Validates whether a vector of 32-bit values is valid UTF-32, returning an error if it is not.
#[inline(always)]
#[cfg(feature = "alloc")]
fn validate_utf32_vec(v: Vec<u32>) -> Result<Vec<u32>, crate::error::Utf32Error> {
for (index, result) in crate::decode_utf32(v.iter().copied()).enumerate() {
if let Err(e) = result {
return Err(crate::error::Utf32Error::new(v, index, e));
}
}
Ok(v)
}
/// Copy of unstable core::slice::range to soundly handle ranges
/// TODO: Replace with core::slice::range when it is stabilized
#[track_caller]
#[allow(dead_code, clippy::redundant_closure)]
fn range<R>(range: R, bounds: core::ops::RangeTo<usize>) -> core::ops::Range<usize>
where
R: core::ops::RangeBounds<usize>,
{
#[inline(never)]
#[cold]
#[track_caller]
fn slice_end_index_len_fail(index: usize, len: usize) -> ! {
panic!(
"range end index {} out of range for slice of length {}",
index, len
);
}
#[inline(never)]
#[cold]
#[track_caller]
fn slice_index_order_fail(index: usize, end: usize) -> ! {
panic!("slice index starts at {} but ends at {}", index, end);
}
#[inline(never)]
#[cold]
#[track_caller]
fn slice_start_index_overflow_fail() -> ! {
panic!("attempted to index slice from after maximum usize");
}
#[inline(never)]
#[cold]
#[track_caller]
fn slice_end_index_overflow_fail() -> ! {
panic!("attempted to index slice up to maximum usize");
}
use core::ops::Bound::*;
let len = bounds.end;
let start = range.start_bound();
let start = match start {
Included(&start) => start,
Excluded(start) => start
.checked_add(1)
.unwrap_or_else(|| slice_start_index_overflow_fail()),
Unbounded => 0,
};
let end = range.end_bound();
let end = match end {
Included(end) => end
.checked_add(1)
.unwrap_or_else(|| slice_end_index_overflow_fail()),
Excluded(&end) => end,
Unbounded => len,
};
if start > end {
slice_index_order_fail(start, end);
}
if end > len {
slice_end_index_len_fail(end, len);
}
core::ops::Range { start, end }
}
/// Similar to core::slice::range, but returns [`None`] instead of panicking.
fn range_check<R>(range: R, bounds: core::ops::RangeTo<usize>) -> Option<core::ops::Range<usize>>
where
R: core::ops::RangeBounds<usize>,
{
use core::ops::Bound::*;
let len = bounds.end;
let start = range.start_bound();
let start = match start {
Included(&start) => start,
Excluded(start) => start.checked_add(1)?,
Unbounded => 0,
};
let end = range.end_bound();
let end = match end {
Included(end) => end.checked_add(1)?,
Excluded(&end) => end,
Unbounded => len,
};
if start > end || end > len {
return None;
}
Some(core::ops::Range { start, end })
}

View file

@ -0,0 +1,336 @@
macro_rules! implement_utf16_macro {
($(#[$m:meta])* $name:ident $extra_len:literal $str:ident $fn:ident) => {
$(#[$m])*
#[macro_export]
macro_rules! $name {
($text:expr) => {{
const _WIDESTRING_U16_MACRO_UTF8: &$crate::internals::core::primitive::str = $text;
const _WIDESTRING_U16_MACRO_LEN: $crate::internals::core::primitive::usize =
$crate::internals::length_as_utf16(_WIDESTRING_U16_MACRO_UTF8) + $extra_len;
const _WIDESTRING_U16_MACRO_UTF16: [$crate::internals::core::primitive::u16;
_WIDESTRING_U16_MACRO_LEN] = {
let mut _widestring_buffer: [$crate::internals::core::primitive::u16; _WIDESTRING_U16_MACRO_LEN] = [0; _WIDESTRING_U16_MACRO_LEN];
let mut _widestring_bytes = _WIDESTRING_U16_MACRO_UTF8.as_bytes();
let mut _widestring_i = 0;
while let $crate::internals::core::option::Option::Some((_widestring_ch, _widestring_rest)) = $crate::internals::next_code_point(_widestring_bytes) {
_widestring_bytes = _widestring_rest;
if $extra_len > 0 && _widestring_ch == 0 {
panic!("invalid NUL value found in string literal");
}
// https://doc.rust-lang.org/std/primitive.char.html#method.encode_utf16
if _widestring_ch & 0xFFFF == _widestring_ch {
_widestring_buffer[_widestring_i] = _widestring_ch as $crate::internals::core::primitive::u16;
_widestring_i += 1;
} else {
let _widestring_code = _widestring_ch - 0x1_0000;
_widestring_buffer[_widestring_i] = 0xD800 | ((_widestring_code >> 10) as $crate::internals::core::primitive::u16);
_widestring_buffer[_widestring_i + 1] = 0xDC00 | ((_widestring_code as $crate::internals::core::primitive::u16) & 0x3FF);
_widestring_i += 2;
}
}
_widestring_buffer
};
#[allow(unused_unsafe)]
unsafe { $crate::$str::$fn(&_WIDESTRING_U16_MACRO_UTF16) }
}};
}
}
}
implement_utf16_macro! {
/// Converts a string literal into a `const` UTF-16 string slice of type
/// [`Utf16Str`][crate::Utf16Str].
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use widestring::{utf16str, Utf16Str, Utf16String};
///
/// const STRING: &Utf16Str = utf16str!("My string");
/// assert_eq!(Utf16String::from_str("My string"), STRING);
/// # }
/// ```
utf16str 0 Utf16Str from_slice_unchecked
}
implement_utf16_macro! {
/// Converts a string literal into a `const` UTF-16 string slice of type
/// [`U16Str`][crate::U16Str].
///
/// The resulting `const` string slice will always be valid UTF-16.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use widestring::{u16str, U16Str, U16String};
///
/// const STRING: &U16Str = u16str!("My string");
/// assert_eq!(U16String::from_str("My string"), STRING);
/// # }
/// ```
u16str 0 U16Str from_slice
}
implement_utf16_macro! {
/// Converts a string literal into a `const` UTF-16 string slice of type
/// [`U16CStr`][crate::U16CStr].
///
/// The resulting `const` string slice will always be valid UTF-16 and include a nul terminator.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use widestring::{u16cstr, U16CStr, U16CString};
///
/// const STRING: &U16CStr = u16cstr!("My string");
/// assert_eq!(U16CString::from_str("My string").unwrap(), STRING);
/// # }
/// ```
u16cstr 1 U16CStr from_slice_unchecked
}
macro_rules! implement_utf32_macro {
($(#[$m:meta])* $name:ident $extra_len:literal $str:ident $fn:ident) => {
$(#[$m])*
#[macro_export]
macro_rules! $name {
($text:expr) => {{
const _WIDESTRING_U32_MACRO_UTF8: &$crate::internals::core::primitive::str = $text;
const _WIDESTRING_U32_MACRO_LEN: $crate::internals::core::primitive::usize =
$crate::internals::length_as_utf32(_WIDESTRING_U32_MACRO_UTF8) + $extra_len;
const _WIDESTRING_U32_MACRO_UTF32: [$crate::internals::core::primitive::u32;
_WIDESTRING_U32_MACRO_LEN] = {
let mut _widestring_buffer: [$crate::internals::core::primitive::u32; _WIDESTRING_U32_MACRO_LEN] = [0; _WIDESTRING_U32_MACRO_LEN];
let mut _widestring_bytes = _WIDESTRING_U32_MACRO_UTF8.as_bytes();
let mut _widestring_i = 0;
while let $crate::internals::core::option::Option::Some((_widestring_ch, _widestring_rest)) = $crate::internals::next_code_point(_widestring_bytes) {
if $extra_len > 0 && _widestring_ch == 0 {
panic!("invalid NUL value found in string literal");
}
_widestring_bytes = _widestring_rest;
_widestring_buffer[_widestring_i] = _widestring_ch;
_widestring_i += 1;
}
_widestring_buffer
};
#[allow(unused_unsafe)]
unsafe { $crate::$str::$fn(&_WIDESTRING_U32_MACRO_UTF32) }
}};
}
}
}
implement_utf32_macro! {
/// Converts a string literal into a `const` UTF-32 string slice of type
/// [`Utf32Str`][crate::Utf32Str].
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use widestring::{utf32str, Utf32Str, Utf32String};
///
/// const STRING: &Utf32Str = utf32str!("My string");
/// assert_eq!(Utf32String::from_str("My string"), STRING);
/// # }
/// ```
utf32str 0 Utf32Str from_slice_unchecked
}
implement_utf32_macro! {
/// Converts a string literal into a `const` UTF-32 string slice of type
/// [`U32Str`][crate::U32Str].
///
/// The resulting `const` string slice will always be valid UTF-32.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use widestring::{u32str, U32Str, U32String};
///
/// const STRING: &U32Str = u32str!("My string");
/// assert_eq!(U32String::from_str("My string"), STRING);
/// # }
/// ```
u32str 0 U32Str from_slice
}
implement_utf32_macro! {
/// Converts a string literal into a `const` UTF-32 string slice of type
/// [`U32CStr`][crate::U32CStr].
///
/// The resulting `const` string slice will always be valid UTF-32 and include a nul terminator.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use widestring::{u32cstr, U32CStr, U32CString};
///
/// const STRING: &U32CStr = u32cstr!("My string");
/// assert_eq!(U32CString::from_str("My string").unwrap(), STRING);
/// # }
/// ```
u32cstr 1 U32CStr from_slice_unchecked
}
/// Alias for [`u16str`] or [`u32str`] macros depending on platform. Intended to be used when using
/// [`WideStr`][crate::WideStr] type alias.
#[cfg(not(windows))]
#[macro_export]
macro_rules! widestr {
($text:expr) => {{
use $crate::*;
u32str!($text)
}};
}
/// Alias for [`utf16str`] or [`utf32str`] macros depending on platform. Intended to be used when
/// using [`WideUtfStr`][crate::WideUtfStr] type alias.
#[cfg(not(windows))]
#[macro_export]
macro_rules! wideutfstr {
($text:expr) => {{
use $crate::*;
utf32str!($text)
}};
}
/// Alias for [`u16cstr`] or [`u32cstr`] macros depending on platform. Intended to be used when
/// using [`WideCStr`][crate::WideCStr] type alias.
#[cfg(not(windows))]
#[macro_export]
macro_rules! widecstr {
($text:expr) => {{
use $crate::*;
u32cstr!($text)
}};
}
/// Alias for [`u16str`] or [`u32str`] macros depending on platform. Intended to be used when using
/// [`WideStr`][crate::WideStr] type alias.
#[cfg(windows)]
#[macro_export]
macro_rules! widestr {
($text:expr) => {{
use $crate::*;
u16str!($text)
}};
}
/// Alias for [`utf16str`] or [`utf32str`] macros depending on platform. Intended to be used when
/// using [`WideUtfStr`][crate::WideUtfStr] type alias.
#[cfg(windows)]
#[macro_export]
macro_rules! wideutfstr {
($text:expr) => {{
use $crate::*;
utf16str!($text)
}};
}
/// Alias for [`u16cstr`] or [`u32cstr`] macros depending on platform. Intended to be used when
/// using [`WideCStr`][crate::WideCStr] type alias.
#[cfg(windows)]
#[macro_export]
macro_rules! widecstr {
($text:expr) => {{
use $crate::*;
u16cstr!($text)
}};
}
#[doc(hidden)]
pub mod internals {
pub use core;
// A const implementation of https://github.com/rust-lang/rust/blob/d902752866cbbdb331e3cf28ff6bba86ab0f6c62/library/core/src/str/mod.rs#L509-L537
// Assumes `utf8` is a valid &str
pub const fn next_code_point(utf8: &[u8]) -> Option<(u32, &[u8])> {
const CONT_MASK: u8 = 0b0011_1111;
match utf8 {
[one @ 0..=0b0111_1111, rest @ ..] => Some((*one as u32, rest)),
[one @ 0b1100_0000..=0b1101_1111, two, rest @ ..] => Some((
(((*one & 0b0001_1111) as u32) << 6) | ((*two & CONT_MASK) as u32),
rest,
)),
[one @ 0b1110_0000..=0b1110_1111, two, three, rest @ ..] => Some((
(((*one & 0b0000_1111) as u32) << 12)
| (((*two & CONT_MASK) as u32) << 6)
| ((*three & CONT_MASK) as u32),
rest,
)),
[one, two, three, four, rest @ ..] => Some((
(((*one & 0b0000_0111) as u32) << 18)
| (((*two & CONT_MASK) as u32) << 12)
| (((*three & CONT_MASK) as u32) << 6)
| ((*four & CONT_MASK) as u32),
rest,
)),
[..] => None,
}
}
// A const implementation of `s.chars().map(|ch| ch.len_utf16()).sum()`
pub const fn length_as_utf16(s: &str) -> usize {
let mut bytes = s.as_bytes();
let mut len = 0;
while let Some((ch, rest)) = next_code_point(bytes) {
bytes = rest;
len += if (ch & 0xFFFF) == ch { 1 } else { 2 };
}
len
}
// A const implementation of `s.chars().len()`
pub const fn length_as_utf32(s: &str) -> usize {
let mut bytes = s.as_bytes();
let mut len = 0;
while let Some((_, rest)) = next_code_point(bytes) {
bytes = rest;
len += 1;
}
len
}
}
#[cfg(all(test, feature = "alloc"))]
mod test {
use crate::{
U16CStr, U16Str, U32CStr, U32Str, Utf16Str, Utf16String, Utf32Str, Utf32String, WideCStr,
WideStr, WideString,
};
const UTF16STR_TEST: &Utf16Str = utf16str!("🏳s");
const U16STR_TEST: &U16Str = u16str!("🏳s");
const U16CSTR_TEST: &U16CStr = u16cstr!("🏳s");
const UTF32STR_TEST: &Utf32Str = utf32str!("🏳s");
const U32STR_TEST: &U32Str = u32str!("🏳s");
const U32CSTR_TEST: &U32CStr = u32cstr!("🏳s");
const WIDESTR_TEST: &WideStr = widestr!("🏳s");
const WIDECSTR_TEST: &WideCStr = widecstr!("🏳s");
#[test]
fn str_macros() {
let str = Utf16String::from_str("🏳s");
assert_eq!(&str, UTF16STR_TEST);
assert_eq!(&str, U16STR_TEST);
assert_eq!(&str, U16CSTR_TEST);
assert!(matches!(U16CSTR_TEST.as_slice_with_nul().last(), Some(&0)));
let str = Utf32String::from_str("🏳s");
assert_eq!(&str, UTF32STR_TEST);
assert_eq!(&str, U32STR_TEST);
assert_eq!(&str, U32CSTR_TEST);
assert!(matches!(U32CSTR_TEST.as_slice_with_nul().last(), Some(&0)));
let str = WideString::from_str("🏳s");
assert_eq!(&str, WIDESTR_TEST);
assert_eq!(&str, WIDECSTR_TEST);
assert!(matches!(WIDECSTR_TEST.as_slice_with_nul().last(), Some(&0)));
}
}

View file

@ -0,0 +1,9 @@
#[cfg(windows)]
mod windows;
#[cfg(windows)]
pub(crate) use self::windows::*;
#[cfg(not(windows))]
mod other;
#[cfg(not(windows))]
pub(crate) use self::other::*;

View file

@ -0,0 +1,9 @@
use std::ffi::{OsStr, OsString};
pub(crate) fn os_to_wide(s: &OsStr) -> Vec<u16> {
s.to_string_lossy().encode_utf16().collect()
}
pub(crate) fn os_from_wide(s: &[u16]) -> OsString {
OsString::from(String::from_utf16_lossy(s))
}

View file

@ -0,0 +1,11 @@
#![cfg(windows)]
use std::ffi::{OsStr, OsString};
use std::os::windows::ffi::{OsStrExt, OsStringExt};
pub(crate) fn os_to_wide(s: &OsStr) -> Vec<u16> {
s.encode_wide().collect()
}
pub(crate) fn os_from_wide(s: &[u16]) -> OsString {
OsString::from_wide(s)
}

2028
third-party/vendor/widestring/src/ucstr.rs vendored Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

1248
third-party/vendor/widestring/src/ustr.rs vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,488 @@
use crate::{
error::{DecodeUtf16Error, DecodeUtf32Error},
iter::{DecodeUtf16, DecodeUtf16Lossy, DecodeUtf32, DecodeUtf32Lossy},
};
use core::{
iter::{Copied, DoubleEndedIterator, ExactSizeIterator, FusedIterator},
slice::Iter,
};
/// An iterator over UTF-16 decoded [`char`][prim@char]s of a string slice.
///
/// This struct is created by the `chars` method on strings. See its documentation for more.
#[derive(Clone)]
pub struct CharsUtf16<'a> {
inner: DecodeUtf16<Copied<Iter<'a, u16>>>,
}
impl<'a> CharsUtf16<'a> {
pub(crate) fn new(s: &'a [u16]) -> Self {
Self {
inner: crate::decode_utf16(s.iter().copied()),
}
}
}
impl<'a> Iterator for CharsUtf16<'a> {
type Item = Result<char, DecodeUtf16Error>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
impl<'a> FusedIterator for CharsUtf16<'a> {}
impl<'a> DoubleEndedIterator for CharsUtf16<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.inner.next_back()
}
}
impl<'a> core::fmt::Debug for CharsUtf16<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
crate::debug_fmt_utf16_iter(self.clone(), f)
}
}
/// An iterator over UTF-32 decoded [`char`][prim@char]s of a string slice.
///
/// This struct is created by the `chars` method on strings. See its documentation for more.
#[derive(Clone)]
pub struct CharsUtf32<'a> {
inner: DecodeUtf32<Copied<Iter<'a, u32>>>,
}
impl<'a> CharsUtf32<'a> {
pub(crate) fn new(s: &'a [u32]) -> Self {
Self {
inner: crate::decode_utf32(s.iter().copied()),
}
}
}
impl<'a> Iterator for CharsUtf32<'a> {
type Item = Result<char, DecodeUtf32Error>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
impl<'a> FusedIterator for CharsUtf32<'a> {}
impl<'a> DoubleEndedIterator for CharsUtf32<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.inner.next_back()
}
}
impl<'a> ExactSizeIterator for CharsUtf32<'a> {
#[inline]
fn len(&self) -> usize {
self.inner.len()
}
}
impl<'a> core::fmt::Debug for CharsUtf32<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
crate::debug_fmt_utf32_iter(self.clone(), f)
}
}
/// A lossy iterator over UTF-16 decoded [`char`][prim@char]s of a string slice.
///
/// This struct is created by the `chars_lossy` method on strings. See its documentation for more.
#[derive(Clone)]
pub struct CharsLossyUtf16<'a> {
iter: DecodeUtf16Lossy<Copied<Iter<'a, u16>>>,
}
impl<'a> CharsLossyUtf16<'a> {
pub(crate) fn new(s: &'a [u16]) -> Self {
Self {
iter: crate::decode_utf16_lossy(s.iter().copied()),
}
}
}
impl<'a> Iterator for CharsLossyUtf16<'a> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> FusedIterator for CharsLossyUtf16<'a> {}
impl<'a> DoubleEndedIterator for CharsLossyUtf16<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back()
}
}
impl<'a> core::fmt::Debug for CharsLossyUtf16<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
crate::debug_fmt_char_iter(self.clone(), f)
}
}
/// A lossy iterator over UTF-32 decoded [`char`][prim@char]s of a string slice.
///
/// This struct is created by the `chars_lossy` method on strings. See its documentation for more.
#[derive(Clone)]
pub struct CharsLossyUtf32<'a> {
iter: DecodeUtf32Lossy<Copied<Iter<'a, u32>>>,
}
impl<'a> CharsLossyUtf32<'a> {
pub(crate) fn new(s: &'a [u32]) -> Self {
Self {
iter: crate::decode_utf32_lossy(s.iter().copied()),
}
}
}
impl<'a> Iterator for CharsLossyUtf32<'a> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> FusedIterator for CharsLossyUtf32<'a> {}
impl<'a> DoubleEndedIterator for CharsLossyUtf32<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back()
}
}
impl<'a> ExactSizeIterator for CharsLossyUtf32<'a> {
#[inline]
fn len(&self) -> usize {
self.iter.len()
}
}
impl<'a> core::fmt::Debug for CharsLossyUtf32<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
crate::debug_fmt_char_iter(self.clone(), f)
}
}
/// An iterator over the decoded [`char`][prim@char]s of a string slice, and their positions.
///
/// This struct is created by the `char_indices` method on strings. See its documentation for
/// more.
#[derive(Debug, Clone)]
pub struct CharIndicesUtf16<'a> {
forward_offset: usize,
back_offset: usize,
iter: CharsUtf16<'a>,
}
impl<'a> CharIndicesUtf16<'a> {
pub(crate) fn new(s: &'a [u16]) -> Self {
Self {
forward_offset: 0,
back_offset: s.len(),
iter: CharsUtf16::new(s),
}
}
/// Returns the position of the next character, or the length of the underlying string if
/// there are no more characters.
#[inline]
pub fn offset(&self) -> usize {
self.forward_offset
}
}
impl<'a> Iterator for CharIndicesUtf16<'a> {
type Item = (usize, Result<char, DecodeUtf16Error>);
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok(c)) => {
let idx = self.forward_offset;
self.forward_offset += c.len_utf16();
Some((idx, Ok(c)))
}
Some(Err(e)) => {
let idx = self.forward_offset;
self.forward_offset += 1;
Some((idx, Err(e)))
}
None => None,
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> FusedIterator for CharIndicesUtf16<'a> {}
impl<'a> DoubleEndedIterator for CharIndicesUtf16<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
match self.iter.next_back() {
Some(Ok(c)) => {
self.back_offset -= c.len_utf16();
Some((self.back_offset, Ok(c)))
}
Some(Err(e)) => {
self.back_offset -= 1;
Some((self.back_offset, Err(e)))
}
None => None,
}
}
}
/// An iterator over the decoded [`char`][prim@char]s of a string slice, and their positions.
///
/// This struct is created by the `char_indices` method on strings. See its documentation for
/// more.
#[derive(Debug, Clone)]
pub struct CharIndicesUtf32<'a> {
forward_offset: usize,
back_offset: usize,
iter: CharsUtf32<'a>,
}
impl<'a> CharIndicesUtf32<'a> {
pub(crate) fn new(s: &'a [u32]) -> Self {
Self {
forward_offset: 0,
back_offset: s.len(),
iter: CharsUtf32::new(s),
}
}
/// Returns the position of the next character, or the length of the underlying string if
/// there are no more characters.
#[inline]
pub fn offset(&self) -> usize {
self.forward_offset
}
}
impl<'a> Iterator for CharIndicesUtf32<'a> {
type Item = (usize, Result<char, DecodeUtf32Error>);
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok(c)) => {
let idx = self.forward_offset;
self.forward_offset += 1;
Some((idx, Ok(c)))
}
Some(Err(e)) => {
let idx = self.forward_offset;
self.forward_offset += 1;
Some((idx, Err(e)))
}
None => None,
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> FusedIterator for CharIndicesUtf32<'a> {}
impl<'a> DoubleEndedIterator for CharIndicesUtf32<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
match self.iter.next_back() {
Some(Ok(c)) => {
self.back_offset -= 1;
Some((self.back_offset, Ok(c)))
}
Some(Err(e)) => {
self.back_offset -= 1;
Some((self.back_offset, Err(e)))
}
None => None,
}
}
}
impl<'a> ExactSizeIterator for CharIndicesUtf32<'a> {
#[inline]
fn len(&self) -> usize {
self.iter.len()
}
}
/// A lossy iterator over the [`char`][prim@char]s of a string slice, and their positions.
///
/// This struct is created by the `char_indices_lossy` method on strings. See its documentation
/// for more.
#[derive(Debug, Clone)]
pub struct CharIndicesLossyUtf16<'a> {
forward_offset: usize,
back_offset: usize,
iter: CharsLossyUtf16<'a>,
}
impl<'a> CharIndicesLossyUtf16<'a> {
pub(crate) fn new(s: &'a [u16]) -> Self {
Self {
forward_offset: 0,
back_offset: s.len(),
iter: CharsLossyUtf16::new(s),
}
}
/// Returns the position of the next character, or the length of the underlying string if
/// there are no more characters.
#[inline]
pub fn offset(&self) -> usize {
self.forward_offset
}
}
impl<'a> Iterator for CharIndicesLossyUtf16<'a> {
type Item = (usize, char);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(c) => {
let idx = self.forward_offset;
self.forward_offset += c.len_utf16();
Some((idx, c))
}
None => None,
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> FusedIterator for CharIndicesLossyUtf16<'a> {}
impl<'a> DoubleEndedIterator for CharIndicesLossyUtf16<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
match self.iter.next_back() {
Some(c) => {
self.back_offset -= c.len_utf16();
Some((self.back_offset, c))
}
None => None,
}
}
}
/// A lossy iterator over the [`char`][prim@char]s of a string slice, and their positions.
///
/// This struct is created by the `char_indices_lossy` method on strings. See its documentation
/// for more.
#[derive(Debug, Clone)]
pub struct CharIndicesLossyUtf32<'a> {
forward_offset: usize,
back_offset: usize,
iter: CharsLossyUtf32<'a>,
}
impl<'a> CharIndicesLossyUtf32<'a> {
pub(crate) fn new(s: &'a [u32]) -> Self {
Self {
forward_offset: 0,
back_offset: s.len(),
iter: CharsLossyUtf32::new(s),
}
}
/// Returns the position of the next character, or the length of the underlying string if
/// there are no more characters.
#[inline]
pub fn offset(&self) -> usize {
self.forward_offset
}
}
impl<'a> Iterator for CharIndicesLossyUtf32<'a> {
type Item = (usize, char);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(c) => {
let idx = self.forward_offset;
self.forward_offset += 1;
Some((idx, c))
}
None => None,
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> FusedIterator for CharIndicesLossyUtf32<'a> {}
impl<'a> DoubleEndedIterator for CharIndicesLossyUtf32<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
match self.iter.next_back() {
Some(c) => {
self.back_offset -= 1;
Some((self.back_offset, c))
}
None => None,
}
}
}
impl<'a> ExactSizeIterator for CharIndicesLossyUtf32<'a> {
#[inline]
fn len(&self) -> usize {
self.iter.len()
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,44 @@
use core::iter::{DoubleEndedIterator, ExactSizeIterator, FusedIterator};
/// A draining iterator for string data with unknown encoding.
#[derive(Debug)]
pub struct Drain<'a, T> {
pub(crate) inner: alloc::vec::Drain<'a, T>,
}
impl<T> AsRef<[T]> for Drain<'_, T> {
#[inline]
fn as_ref(&self) -> &[T] {
self.inner.as_ref()
}
}
impl<T> Iterator for Drain<'_, T> {
type Item = T;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
impl<T> DoubleEndedIterator for Drain<'_, T> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.inner.next_back()
}
}
impl<T> ExactSizeIterator for Drain<'_, T> {
#[inline]
fn len(&self) -> usize {
self.inner.len()
}
}
impl<T> FusedIterator for Drain<'_, T> {}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,410 @@
use crate::{
debug_fmt_char_iter, decode_utf16, decode_utf32,
iter::{DecodeUtf16, DecodeUtf32},
};
use core::{
fmt::Write,
iter::{Copied, DoubleEndedIterator, ExactSizeIterator, FlatMap, FusedIterator},
slice::Iter,
};
/// An iterator over the [`char`]s of a UTF-16 string slice
///
/// This struct is created by the [`chars`][crate::Utf16Str::chars] method on
/// [`Utf16Str`][crate::Utf16Str]. See its documentation for more.
#[derive(Clone)]
pub struct CharsUtf16<'a> {
iter: DecodeUtf16<Copied<Iter<'a, u16>>>,
}
impl<'a> CharsUtf16<'a> {
pub(super) fn new(s: &'a [u16]) -> Self {
Self {
iter: decode_utf16(s.iter().copied()),
}
}
}
impl<'a> Iterator for CharsUtf16<'a> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
// Utf16Str already ensures valid surrogate pairs
self.iter.next().map(|r| r.unwrap())
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> FusedIterator for CharsUtf16<'a> {}
impl<'a> DoubleEndedIterator for CharsUtf16<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back().map(|r| r.unwrap())
}
}
impl<'a> core::fmt::Debug for CharsUtf16<'a> {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
debug_fmt_char_iter(self.clone(), f)
}
}
impl<'a> core::fmt::Display for CharsUtf16<'a> {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
self.clone().try_for_each(|c| f.write_char(c))
}
}
/// An iterator over the [`char`]s of a UTF-32 string slice
///
/// This struct is created by the [`chars`][crate::Utf32Str::chars] method on
/// [`Utf32Str`][crate::Utf32Str]. See its documentation for more.
#[derive(Clone)]
pub struct CharsUtf32<'a> {
iter: DecodeUtf32<Copied<Iter<'a, u32>>>,
}
impl<'a> CharsUtf32<'a> {
pub(super) fn new(s: &'a [u32]) -> Self {
Self {
iter: decode_utf32(s.iter().copied()),
}
}
}
impl<'a> Iterator for CharsUtf32<'a> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
// Utf32Str already ensures valid code points
self.iter.next().map(|r| r.unwrap())
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> DoubleEndedIterator for CharsUtf32<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
// Utf32Str already ensures valid code points
self.iter.next_back().map(|r| r.unwrap())
}
}
impl<'a> FusedIterator for CharsUtf32<'a> {}
impl<'a> ExactSizeIterator for CharsUtf32<'a> {
#[inline]
fn len(&self) -> usize {
self.iter.len()
}
}
impl<'a> core::fmt::Debug for CharsUtf32<'a> {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
debug_fmt_char_iter(self.clone(), f)
}
}
impl<'a> core::fmt::Display for CharsUtf32<'a> {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
self.clone().try_for_each(|c| f.write_char(c))
}
}
/// An iterator over the [`char`]s of a string slice, and their positions
///
/// This struct is created by the [`char_indices`][crate::Utf16Str::char_indices] method on
/// [`Utf16Str`][crate::Utf16Str]. See its documentation for more.
#[derive(Debug, Clone)]
pub struct CharIndicesUtf16<'a> {
forward_offset: usize,
back_offset: usize,
iter: CharsUtf16<'a>,
}
impl<'a> CharIndicesUtf16<'a> {
/// Returns the position of the next character, or the length of the underlying string if
/// there are no more characters.
#[inline]
pub fn offset(&self) -> usize {
self.forward_offset
}
}
impl<'a> CharIndicesUtf16<'a> {
pub(super) fn new(s: &'a [u16]) -> Self {
Self {
forward_offset: 0,
back_offset: s.len(),
iter: CharsUtf16::new(s),
}
}
}
impl<'a> Iterator for CharIndicesUtf16<'a> {
type Item = (usize, char);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let result = self.iter.next();
if let Some(c) = result {
let offset = self.forward_offset;
self.forward_offset += c.len_utf16();
Some((offset, c))
} else {
None
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> FusedIterator for CharIndicesUtf16<'a> {}
impl<'a> DoubleEndedIterator for CharIndicesUtf16<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
let result = self.iter.next_back();
if let Some(c) = result {
self.back_offset -= c.len_utf16();
Some((self.back_offset, c))
} else {
None
}
}
}
/// An iterator over the [`char`]s of a string slice, and their positions
///
/// This struct is created by the [`char_indices`][crate::Utf32Str::char_indices] method on
/// [`Utf32Str`][crate::Utf32Str]. See its documentation for more.
#[derive(Debug, Clone)]
pub struct CharIndicesUtf32<'a> {
forward_offset: usize,
back_offset: usize,
iter: CharsUtf32<'a>,
}
impl<'a> CharIndicesUtf32<'a> {
/// Returns the position of the next character, or the length of the underlying string if
/// there are no more characters.
#[inline]
pub fn offset(&self) -> usize {
self.forward_offset
}
}
impl<'a> CharIndicesUtf32<'a> {
pub(super) fn new(s: &'a [u32]) -> Self {
Self {
forward_offset: 0,
back_offset: s.len(),
iter: CharsUtf32::new(s),
}
}
}
impl<'a> Iterator for CharIndicesUtf32<'a> {
type Item = (usize, char);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let result = self.iter.next();
if let Some(c) = result {
let offset = self.forward_offset;
self.forward_offset += 1;
Some((offset, c))
} else {
None
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> FusedIterator for CharIndicesUtf32<'a> {}
impl<'a> DoubleEndedIterator for CharIndicesUtf32<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
let result = self.iter.next_back();
if let Some(c) = result {
self.back_offset -= 1;
Some((self.back_offset, c))
} else {
None
}
}
}
impl<'a> ExactSizeIterator for CharIndicesUtf32<'a> {
#[inline]
fn len(&self) -> usize {
self.iter.len()
}
}
/// The return type of [`Utf16Str::escape_debug`][crate::Utf16Str::escape_debug].
#[derive(Debug, Clone)]
pub struct EscapeDebug<I> {
iter: FlatMap<I, core::char::EscapeDebug, fn(char) -> core::char::EscapeDebug>,
}
impl<'a> EscapeDebug<CharsUtf16<'a>> {
pub(super) fn new(s: &'a [u16]) -> Self {
Self {
iter: CharsUtf16::new(s).flat_map(|c| c.escape_debug()),
}
}
}
impl<'a> EscapeDebug<CharsUtf32<'a>> {
pub(super) fn new(s: &'a [u32]) -> Self {
Self {
iter: CharsUtf32::new(s).flat_map(|c| c.escape_debug()),
}
}
}
/// The return type of [`Utf16Str::escape_default`][crate::Utf16Str::escape_default].
#[derive(Debug, Clone)]
pub struct EscapeDefault<I> {
iter: FlatMap<I, core::char::EscapeDefault, fn(char) -> core::char::EscapeDefault>,
}
impl<'a> EscapeDefault<CharsUtf16<'a>> {
pub(super) fn new(s: &'a [u16]) -> Self {
Self {
iter: CharsUtf16::new(s).flat_map(|c| c.escape_default()),
}
}
}
impl<'a> EscapeDefault<CharsUtf32<'a>> {
pub(super) fn new(s: &'a [u32]) -> Self {
Self {
iter: CharsUtf32::new(s).flat_map(|c| c.escape_default()),
}
}
}
/// The return type of [`Utf16Str::escape_unicode`][crate::Utf16Str::escape_unicode].
#[derive(Debug, Clone)]
pub struct EscapeUnicode<I> {
iter: FlatMap<I, core::char::EscapeUnicode, fn(char) -> core::char::EscapeUnicode>,
}
impl<'a> EscapeUnicode<CharsUtf16<'a>> {
pub(super) fn new(s: &'a [u16]) -> Self {
Self {
iter: CharsUtf16::new(s).flat_map(|c| c.escape_unicode()),
}
}
}
impl<'a> EscapeUnicode<CharsUtf32<'a>> {
pub(super) fn new(s: &'a [u32]) -> Self {
Self {
iter: CharsUtf32::new(s).flat_map(|c| c.escape_unicode()),
}
}
}
macro_rules! escape_impls {
($($name:ident),+) => {$(
impl<I> core::fmt::Display for $name<I> where I: Iterator<Item = char> + Clone {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
self.clone().try_for_each(|c| f.write_char(c))
}
}
impl< I> Iterator for $name<I> where I: Iterator<Item = char> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let (lower, upper) = self.iter.size_hint();
// Worst case, every char has to be unicode escaped as \u{NNNNNN}
(lower, upper.and_then(|len| len.checked_mul(10)))
}
}
impl<I> FusedIterator for $name<I> where I: Iterator<Item = char> + FusedIterator {}
)+}
}
escape_impls!(EscapeDebug, EscapeDefault, EscapeUnicode);
/// An iterator over the [`u16`] code units of a UTF-16 string slice
///
/// This struct is created by the [`code_units`][crate::Utf16Str::code_units] method on
/// [`Utf16Str`][crate::Utf16Str]. See its documentation for more.
#[derive(Debug, Clone)]
pub struct CodeUnits<'a> {
iter: Copied<Iter<'a, u16>>,
}
impl<'a> CodeUnits<'a> {
pub(super) fn new(s: &'a [u16]) -> Self {
Self {
iter: s.iter().copied(),
}
}
}
impl<'a> Iterator for CodeUnits<'a> {
type Item = u16;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> FusedIterator for CodeUnits<'a> {}
impl<'a> DoubleEndedIterator for CodeUnits<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back()
}
}
impl<'a> ExactSizeIterator for CodeUnits<'a> {
#[inline]
fn len(&self) -> usize {
self.iter.len()
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,138 @@
use super::{Utf16String, Utf32String};
use crate::utfstr::{CharsUtf16, CharsUtf32};
use core::iter::{DoubleEndedIterator, ExactSizeIterator, FusedIterator, Iterator};
/// A draining iterator for [`Utf16String`].
///
/// This struct is created by the [`drain`][Utf16String::drain] method on [`Utf16String`]. See its
/// documentation for more.
pub struct DrainUtf16<'a> {
pub(super) start: usize,
pub(super) end: usize,
pub(super) iter: CharsUtf16<'a>,
pub(super) string: *mut Utf16String,
}
unsafe impl Sync for DrainUtf16<'_> {}
unsafe impl Send for DrainUtf16<'_> {}
impl Drop for DrainUtf16<'_> {
fn drop(&mut self) {
unsafe {
// Use Vec::drain. "Reaffirm" the bounds checks to avoid
// panic code being inserted again.
let self_vec = (*self.string).as_mut_vec();
if self.start <= self.end && self.end <= self_vec.len() {
self_vec.drain(self.start..self.end);
}
}
}
}
impl core::fmt::Debug for DrainUtf16<'_> {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
core::fmt::Debug::fmt(&self.iter, f)
}
}
impl core::fmt::Display for DrainUtf16<'_> {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
core::fmt::Display::fmt(&self.iter, f)
}
}
impl Iterator for DrainUtf16<'_> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl DoubleEndedIterator for DrainUtf16<'_> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back()
}
}
impl FusedIterator for DrainUtf16<'_> {}
/// A draining iterator for [`Utf32String`].
///
/// This struct is created by the [`drain`][Utf32String::drain] method on [`Utf32String`]. See its
/// documentation for more.
pub struct DrainUtf32<'a> {
pub(super) start: usize,
pub(super) end: usize,
pub(super) iter: CharsUtf32<'a>,
pub(super) string: *mut Utf32String,
}
unsafe impl Sync for DrainUtf32<'_> {}
unsafe impl Send for DrainUtf32<'_> {}
impl Drop for DrainUtf32<'_> {
fn drop(&mut self) {
unsafe {
// Use Vec::drain. "Reaffirm" the bounds checks to avoid
// panic code being inserted again.
let self_vec = (*self.string).as_mut_vec();
if self.start <= self.end && self.end <= self_vec.len() {
self_vec.drain(self.start..self.end);
}
}
}
}
impl core::fmt::Debug for DrainUtf32<'_> {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
core::fmt::Debug::fmt(&self.iter, f)
}
}
impl core::fmt::Display for DrainUtf32<'_> {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
core::fmt::Display::fmt(&self.iter, f)
}
}
impl Iterator for DrainUtf32<'_> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl DoubleEndedIterator for DrainUtf32<'_> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back()
}
}
impl FusedIterator for DrainUtf32<'_> {}
impl ExactSizeIterator for DrainUtf32<'_> {
#[inline]
fn len(&self) -> usize {
self.iter.len()
}
}