Vendor things
This commit is contained in:
parent
5deceec006
commit
977e3c17e5
19434 changed files with 10682014 additions and 0 deletions
328
third-party/vendor/idna/src/punycode.rs
vendored
Normal file
328
third-party/vendor/idna/src/punycode.rs
vendored
Normal file
|
|
@ -0,0 +1,328 @@
|
|||
// Copyright 2013 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! Punycode ([RFC 3492](http://tools.ietf.org/html/rfc3492)) implementation.
|
||||
//!
|
||||
//! Since Punycode fundamentally works on unicode code points,
|
||||
//! `encode` and `decode` take and return slices and vectors of `char`.
|
||||
//! `encode_str` and `decode_to_string` provide convenience wrappers
|
||||
//! that convert from and to Rust’s UTF-8 based `str` and `String` types.
|
||||
|
||||
use alloc::{string::String, vec::Vec};
|
||||
use core::char;
|
||||
use core::u32;
|
||||
|
||||
// Bootstring parameters for Punycode
|
||||
static BASE: u32 = 36;
|
||||
static T_MIN: u32 = 1;
|
||||
static T_MAX: u32 = 26;
|
||||
static SKEW: u32 = 38;
|
||||
static DAMP: u32 = 700;
|
||||
static INITIAL_BIAS: u32 = 72;
|
||||
static INITIAL_N: u32 = 0x80;
|
||||
static DELIMITER: char = '-';
|
||||
|
||||
#[inline]
|
||||
fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 {
|
||||
delta /= if first_time { DAMP } else { 2 };
|
||||
delta += delta / num_points;
|
||||
let mut k = 0;
|
||||
while delta > ((BASE - T_MIN) * T_MAX) / 2 {
|
||||
delta /= BASE - T_MIN;
|
||||
k += BASE;
|
||||
}
|
||||
k + (((BASE - T_MIN + 1) * delta) / (delta + SKEW))
|
||||
}
|
||||
|
||||
/// Convert Punycode to an Unicode `String`.
|
||||
///
|
||||
/// This is a convenience wrapper around `decode`.
|
||||
#[inline]
|
||||
pub fn decode_to_string(input: &str) -> Option<String> {
|
||||
decode(input).map(|chars| chars.into_iter().collect())
|
||||
}
|
||||
|
||||
/// Convert Punycode to Unicode.
|
||||
///
|
||||
/// Return None on malformed input or overflow.
|
||||
/// Overflow can only happen on inputs that take more than
|
||||
/// 63 encoded bytes, the DNS limit on domain name labels.
|
||||
pub fn decode(input: &str) -> Option<Vec<char>> {
|
||||
Some(Decoder::default().decode(input).ok()?.collect())
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct Decoder {
|
||||
insertions: Vec<(usize, char)>,
|
||||
}
|
||||
|
||||
impl Decoder {
|
||||
/// Split the input iterator and return a Vec with insertions of encoded characters
|
||||
pub(crate) fn decode<'a>(&'a mut self, input: &'a str) -> Result<Decode<'a>, ()> {
|
||||
self.insertions.clear();
|
||||
// Handle "basic" (ASCII) code points.
|
||||
// They are encoded as-is before the last delimiter, if any.
|
||||
let (base, input) = match input.rfind(DELIMITER) {
|
||||
None => ("", input),
|
||||
Some(position) => (
|
||||
&input[..position],
|
||||
if position > 0 {
|
||||
&input[position + 1..]
|
||||
} else {
|
||||
input
|
||||
},
|
||||
),
|
||||
};
|
||||
|
||||
if !base.is_ascii() {
|
||||
return Err(());
|
||||
}
|
||||
|
||||
let base_len = base.len();
|
||||
let mut length = base_len as u32;
|
||||
let mut code_point = INITIAL_N;
|
||||
let mut bias = INITIAL_BIAS;
|
||||
let mut i = 0;
|
||||
let mut iter = input.bytes();
|
||||
loop {
|
||||
let previous_i = i;
|
||||
let mut weight = 1;
|
||||
let mut k = BASE;
|
||||
let mut byte = match iter.next() {
|
||||
None => break,
|
||||
Some(byte) => byte,
|
||||
};
|
||||
|
||||
// Decode a generalized variable-length integer into delta,
|
||||
// which gets added to i.
|
||||
loop {
|
||||
let digit = match byte {
|
||||
byte @ b'0'..=b'9' => byte - b'0' + 26,
|
||||
byte @ b'A'..=b'Z' => byte - b'A',
|
||||
byte @ b'a'..=b'z' => byte - b'a',
|
||||
_ => return Err(()),
|
||||
} as u32;
|
||||
if digit > (u32::MAX - i) / weight {
|
||||
return Err(()); // Overflow
|
||||
}
|
||||
i += digit * weight;
|
||||
let t = if k <= bias {
|
||||
T_MIN
|
||||
} else if k >= bias + T_MAX {
|
||||
T_MAX
|
||||
} else {
|
||||
k - bias
|
||||
};
|
||||
if digit < t {
|
||||
break;
|
||||
}
|
||||
if weight > u32::MAX / (BASE - t) {
|
||||
return Err(()); // Overflow
|
||||
}
|
||||
weight *= BASE - t;
|
||||
k += BASE;
|
||||
byte = match iter.next() {
|
||||
None => return Err(()), // End of input before the end of this delta
|
||||
Some(byte) => byte,
|
||||
};
|
||||
}
|
||||
|
||||
bias = adapt(i - previous_i, length + 1, previous_i == 0);
|
||||
if i / (length + 1) > u32::MAX - code_point {
|
||||
return Err(()); // Overflow
|
||||
}
|
||||
|
||||
// i was supposed to wrap around from length+1 to 0,
|
||||
// incrementing code_point each time.
|
||||
code_point += i / (length + 1);
|
||||
i %= length + 1;
|
||||
let c = match char::from_u32(code_point) {
|
||||
Some(c) => c,
|
||||
None => return Err(()),
|
||||
};
|
||||
|
||||
// Move earlier insertions farther out in the string
|
||||
for (idx, _) in &mut self.insertions {
|
||||
if *idx >= i as usize {
|
||||
*idx += 1;
|
||||
}
|
||||
}
|
||||
self.insertions.push((i as usize, c));
|
||||
length += 1;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
self.insertions.sort_by_key(|(i, _)| *i);
|
||||
Ok(Decode {
|
||||
base: base.chars(),
|
||||
insertions: &self.insertions,
|
||||
inserted: 0,
|
||||
position: 0,
|
||||
len: base_len + self.insertions.len(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct Decode<'a> {
|
||||
base: core::str::Chars<'a>,
|
||||
pub(crate) insertions: &'a [(usize, char)],
|
||||
inserted: usize,
|
||||
position: usize,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Decode<'a> {
|
||||
type Item = char;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
match self.insertions.get(self.inserted) {
|
||||
Some((pos, c)) if *pos == self.position => {
|
||||
self.inserted += 1;
|
||||
self.position += 1;
|
||||
return Some(*c);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
if let Some(c) = self.base.next() {
|
||||
self.position += 1;
|
||||
return Some(c);
|
||||
} else if self.inserted >= self.insertions.len() {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let len = self.len - self.position;
|
||||
(len, Some(len))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ExactSizeIterator for Decode<'a> {
|
||||
fn len(&self) -> usize {
|
||||
self.len - self.position
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an Unicode `str` to Punycode.
|
||||
///
|
||||
/// This is a convenience wrapper around `encode`.
|
||||
#[inline]
|
||||
pub fn encode_str(input: &str) -> Option<String> {
|
||||
if input.len() > u32::MAX as usize {
|
||||
return None;
|
||||
}
|
||||
let mut buf = String::with_capacity(input.len());
|
||||
encode_into(input.chars(), &mut buf).ok().map(|()| buf)
|
||||
}
|
||||
|
||||
/// Convert Unicode to Punycode.
|
||||
///
|
||||
/// Return None on overflow, which can only happen on inputs that would take more than
|
||||
/// 63 encoded bytes, the DNS limit on domain name labels.
|
||||
pub fn encode(input: &[char]) -> Option<String> {
|
||||
if input.len() > u32::MAX as usize {
|
||||
return None;
|
||||
}
|
||||
let mut buf = String::with_capacity(input.len());
|
||||
encode_into(input.iter().copied(), &mut buf)
|
||||
.ok()
|
||||
.map(|()| buf)
|
||||
}
|
||||
|
||||
pub(crate) fn encode_into<I>(input: I, output: &mut String) -> Result<(), ()>
|
||||
where
|
||||
I: Iterator<Item = char> + Clone,
|
||||
{
|
||||
// Handle "basic" (ASCII) code points. They are encoded as-is.
|
||||
let (mut input_length, mut basic_length) = (0u32, 0);
|
||||
for c in input.clone() {
|
||||
input_length = input_length.checked_add(1).ok_or(())?;
|
||||
if c.is_ascii() {
|
||||
output.push(c);
|
||||
basic_length += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if basic_length > 0 {
|
||||
output.push('-')
|
||||
}
|
||||
let mut code_point = INITIAL_N;
|
||||
let mut delta = 0;
|
||||
let mut bias = INITIAL_BIAS;
|
||||
let mut processed = basic_length;
|
||||
while processed < input_length {
|
||||
// All code points < code_point have been handled already.
|
||||
// Find the next larger one.
|
||||
let min_code_point = input
|
||||
.clone()
|
||||
.map(|c| c as u32)
|
||||
.filter(|&c| c >= code_point)
|
||||
.min()
|
||||
.unwrap();
|
||||
if min_code_point - code_point > (u32::MAX - delta) / (processed + 1) {
|
||||
return Err(()); // Overflow
|
||||
}
|
||||
// Increase delta to advance the decoder’s <code_point,i> state to <min_code_point,0>
|
||||
delta += (min_code_point - code_point) * (processed + 1);
|
||||
code_point = min_code_point;
|
||||
for c in input.clone() {
|
||||
let c = c as u32;
|
||||
if c < code_point {
|
||||
delta = delta.checked_add(1).ok_or(())?;
|
||||
}
|
||||
if c == code_point {
|
||||
// Represent delta as a generalized variable-length integer:
|
||||
let mut q = delta;
|
||||
let mut k = BASE;
|
||||
loop {
|
||||
let t = if k <= bias {
|
||||
T_MIN
|
||||
} else if k >= bias + T_MAX {
|
||||
T_MAX
|
||||
} else {
|
||||
k - bias
|
||||
};
|
||||
if q < t {
|
||||
break;
|
||||
}
|
||||
let value = t + ((q - t) % (BASE - t));
|
||||
output.push(value_to_digit(value));
|
||||
q = (q - t) / (BASE - t);
|
||||
k += BASE;
|
||||
}
|
||||
output.push(value_to_digit(q));
|
||||
bias = adapt(delta, processed + 1, processed == basic_length);
|
||||
delta = 0;
|
||||
processed += 1;
|
||||
}
|
||||
}
|
||||
delta += 1;
|
||||
code_point += 1;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn value_to_digit(value: u32) -> char {
|
||||
match value {
|
||||
0..=25 => (value as u8 + b'a') as char, // a..z
|
||||
26..=35 => (value as u8 - 26 + b'0') as char, // 0..9
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "slow"]
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
fn huge_encode() {
|
||||
let mut buf = String::new();
|
||||
assert!(encode_into(std::iter::repeat('ß').take(u32::MAX as usize + 1), &mut buf).is_err());
|
||||
assert_eq!(buf.len(), 0);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue