//! [WTF-8 编码](https://simonsapin.github.io/wtf-8/) 的实现。
//!
//! 该库使用 Rust 的类型系统来维护 [结构良好性](https://simonsapin.github.io/wtf-8/#well-formed)，就像 `String` 和 `&str` 类型用于 UTF-8 一样。
//!
//! 从 [WTF-8 不得用于互换](https://simonsapin.github.io/wtf-8/#intended-audience) 开始，该库故意不提供对 WTF-8 字符串底层字节的访问，也不能从任意字节解码 WTF-8。
//!
//! WTF-8 字符串可以从 UTF-8、UTF-16 或代码点获得。
//!
//!
//!
//!
//!

// 该模块是从 @SimonSapin 的 repo 导入的，并且在 unix 上具有大量无效代码 (主要在 windows 上使用)，因此在这里不必担心无效代码。
//
#![allow(dead_code)]

#[cfg(test)]
mod tests;

use core::char::{encode_utf16_raw, encode_utf8_raw};
use core::str::next_code_point;

use crate::borrow::Cow;
use crate::collections::TryReserveError;
use crate::fmt;
use crate::hash::{Hash, Hasher};
use crate::iter::FusedIterator;
use crate::mem;
use crate::ops;
use crate::rc::Rc;
use crate::slice;
use crate::str;
use crate::sync::Arc;
use crate::sys_common::AsInner;

const UTF8_REPLACEMENT_CHARACTER: &str = "\u{FFFD}";

/// Unicode 代码点：从 U+0000 到 U+10FFFF。
///
/// 与表示 Unicode 标量值的 `char` 类型进行比较：
///
/// 一个非代理的代码点 (U+D800 至 U+DFFF)。
#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Copy)]
pub struct CodePoint {
    value: u32,
}

/// 将代码点的格式设置为 `U+`，后跟 4 至 6 个十六进制数字。
/// 例子: `U+1F4A9`
impl fmt::Debug for CodePoint {
    #[inline]
    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(formatter, "U+{:04X}", self.value)
    }
}

impl CodePoint {
    /// 不检查值不安全地创建新的 `CodePoint`。
    ///
    /// 仅在已知 `value` 小于或等于 0x10FFFF 时使用。
    #[inline]
    pub unsafe fn from_u32_unchecked(value: u32) -> CodePoint {
        CodePoint { value }
    }

    /// 如果该值是有效的代码点，则创建一个新的 `CodePoint`。
    ///
    /// 如果 `value` 高于 0x10FFFF，则返回 `None`。
    #[inline]
    pub fn from_u32(value: u32) -> Option<CodePoint> {
        match value {
            0..=0x10FFFF => Some(CodePoint { value }),
            _ => None,
        }
    }

    /// 从 `char` 创建一个新的 `CodePoint`。
    ///
    /// 由于所有 Unicode 标量值都是代码点，因此这始终会成功。
    #[inline]
    pub fn from_char(value: char) -> CodePoint {
        CodePoint { value: value as u32 }
    }

    /// 返回代码点的数值。
    #[inline]
    pub fn to_u32(&self) -> u32 {
        self.value
    }

    /// 如果它是前导代理项，则返回代码点的数值。
    #[inline]
    pub fn to_lead_surrogate(&self) -> Option<u16> {
        match self.value {
            lead @ 0xD800..=0xDBFF => Some(lead as u16),
            _ => None,
        }
    }

    /// 如果它是尾随代理项，则返回代码点的数值。
    #[inline]
    pub fn to_trail_surrogate(&self) -> Option<u16> {
        match self.value {
            trail @ 0xDC00..=0xDFFF => Some(trail as u16),
            _ => None,
        }
    }

    /// (可选) 返回代码点的 Unicode 标量值。
    ///
    /// 如果代码点是代理 (从 U+D800 到 U+DFFF)，则返回 `None`。
    #[inline]
    pub fn to_char(&self) -> Option<char> {
        match self.value {
            0xD800..=0xDFFF => None,
            _ => Some(unsafe { char::from_u32_unchecked(self.value) }),
        }
    }

    /// 返回代码点的 Unicode 标量值。
    ///
    /// 如果代码点是代理 (从 U+D800 到 U+DFFF)，则返回 `'\u{FFFD}'` (替换字符 ``)。
    ///
    #[inline]
    pub fn to_char_lossy(&self) -> char {
        self.to_char().unwrap_or('\u{FFFD}')
    }
}

/// 格式良好的 WTF-8 数据的拥有的，可增长的字符串。
///
/// 与 `String` 相似，但是如果它们不在代理对中，则可以另外包含代理代码点。
///
#[derive(Eq, PartialEq, Ord, PartialOrd, Clone)]
pub struct Wtf8Buf {
    bytes: Vec<u8>,

    /// 我们是否知道 `bytes` 拥有有效的 UTF-8 编码? 如果我们是由 `String` 或 `&str` 构建的，我们很容易知道这一点。
    ///
    /// `bytes` 有可能在没有设置的情况下拥有有效的 UTF-8，例如当我们连接 `&Wtf8` 并且代理成对时，因为我们不费心重新扫描整个字符串。
    ///
    ///
    ///
    is_known_utf8: bool,
}

impl ops::Deref for Wtf8Buf {
    type Target = Wtf8;

    fn deref(&self) -> &Wtf8 {
        self.as_slice()
    }
}

impl ops::DerefMut for Wtf8Buf {
    fn deref_mut(&mut self) -> &mut Wtf8 {
        self.as_mut_slice()
    }
}

/// 用双引号将字符串格式设置，并代之以 `\u`，后跟四个十六进制数字。
///
/// 示例: `"a\u{D800}"` 用于代码点为 [U+0061, U+D800] 的字符串
impl fmt::Debug for Wtf8Buf {
    #[inline]
    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
        fmt::Debug::fmt(&**self, formatter)
    }
}

impl Wtf8Buf {
    /// 创建一个新的空 WTF-8 字符串。
    #[inline]
    pub fn new() -> Wtf8Buf {
        Wtf8Buf { bytes: Vec::new(), is_known_utf8: true }
    }

    /// 创建一个新的空 WTF-8 字符串，该字符串具有预分配的 `capacity` 字节容量。
    #[inline]
    pub fn with_capacity(capacity: usize) -> Wtf8Buf {
        Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true }
    }

    /// 从 UTF-8 `String` 创建 WTF-8 字符串。
    ///
    /// 这将获得 `String` 的所有权，并且不会复制。
    ///
    /// 由于 WTF-8 是 UTF-8 的超集，因此它总是成功。
    #[inline]
    pub fn from_string(string: String) -> Wtf8Buf {
        Wtf8Buf { bytes: string.into_bytes(), is_known_utf8: true }
    }

    /// 从 UTF-8 `&str` 切片创建 WTF-8 字符串。
    ///
    /// 这将复制切片的内容。
    ///
    /// 由于 WTF-8 是 UTF-8 的超集，因此它总是成功。
    #[inline]
    pub fn from_str(str: &str) -> Wtf8Buf {
        Wtf8Buf { bytes: <[_]>::to_vec(str.as_bytes()), is_known_utf8: true }
    }

    pub fn clear(&mut self) {
        self.bytes.clear();
        self.is_known_utf8 = true;
    }

    /// 从可能格式不正确的 16 位代码单元的 UTF-16 切片创建 WTF-8 字符串。
    ///
    /// 这是无损的：在结果字符串上调用 `.encode_wide()` 将始终返回原始代码单元。
    ///
    pub fn from_wide(v: &[u16]) -> Wtf8Buf {
        let mut string = Wtf8Buf::with_capacity(v.len());
        for item in char::decode_utf16(v.iter().cloned()) {
            match item {
                Ok(ch) => string.push_char(ch),
                Err(surrogate) => {
                    let surrogate = surrogate.unpaired_surrogate();
                    // 已知代理在代码点范围内。
                    let code_point = unsafe { CodePoint::from_u32_unchecked(surrogate as u32) };
                    // 该字符串现在将包含一个未配对的代理项。
                    string.is_known_utf8 = false;
                    // 跳过 WTF-8 并置检查，代理对已经被 decode_utf16 解码
                    //
                    string.push_code_point_unchecked(code_point);
                }
            }
        }
        string
    }

    /// 复制自 String::push 这**不**包括 WTF-8 连接检查或 `is_known_utf8` 检查。
    ///
    fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
        let mut bytes = [0; 4];
        let bytes = encode_utf8_raw(code_point.value, &mut bytes);
        self.bytes.extend_from_slice(bytes)
    }

    #[inline]
    pub fn as_slice(&self) -> &Wtf8 {
        unsafe { Wtf8::from_bytes_unchecked(&self.bytes) }
    }

    #[inline]
    pub fn as_mut_slice(&mut self) -> &mut Wtf8 {
        // 安全性: `Wtf8` 没有公开任何改变字节的方法，这些字节会导致它们从格式良好的 UTF-8 更改为格式错误的 UTF-8，这会破坏 `is_known_utf8` 字段的假设。
        //
        //
        unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) }
    }

    /// 为给定的 `Wtf8Buf` 保留至少 `additional` 字节的容量。
    ///
    /// 该集合可以保留更多空间，以避免频繁的重新分配。
    ///
    /// # Panics
    ///
    /// 如果新容量溢出 `usize`，就会出现 panics。
    #[inline]
    pub fn reserve(&mut self, additional: usize) {
        self.bytes.reserve(additional)
    }

    /// 尝试在给定的 `Wtf8Buf` 中为至少 `additional` 多个长度单位保留容量。
    /// `Wtf8Buf` 可能会保留更多空间，以避免频繁的重新分配。
    /// 调用 `try_reserve` 后，容量将大于或等于 `self.len() + additional`。
    ///
    /// 如果容量已经足够，则不执行任何操作。
    /// 即使发生错误，此方法也会保留内容。
    ///
    /// # Errors
    ///
    /// 如果容量溢出，或者分配器报告失败，则返回错误。
    ///
    #[inline]
    pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
        self.bytes.try_reserve(additional)
    }

    #[inline]
    pub fn reserve_exact(&mut self, additional: usize) {
        self.bytes.reserve_exact(additional)
    }

    /// 尝试为给定的 `Wtf8Buf` 中的 `additional` 长度单位保留最小容量。
    /// 调用 `try_reserve_exact` 后，如果返回 `Ok(())`，则容量将大于或等于 `self.len() + additional`。
    ///
    /// 如果容量已经足够，则不执行任何操作。
    ///
    /// 请注意，分配器可能会为 `Wtf8Buf` 提供比它请求更多的空间。
    /// 因此，不能依靠容量来精确地最小化。
    /// 如果希望将来插入，则首选 [`try_reserve`]。
    ///
    /// [`try_reserve`]: Wtf8Buf::try_reserve
    ///
    /// # Errors
    ///
    /// 如果容量溢出，或者分配器报告失败，则返回错误。
    ///
    ///
    #[inline]
    pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
        self.bytes.try_reserve_exact(additional)
    }

    #[inline]
    pub fn shrink_to_fit(&mut self) {
        self.bytes.shrink_to_fit()
    }

    #[inline]
    pub fn shrink_to(&mut self, min_capacity: usize) {
        self.bytes.shrink_to(min_capacity)
    }

    /// 返回此字符串缓冲区无需重新分配即可容纳的字节数。
    #[inline]
    pub fn capacity(&self) -> usize {
        self.bytes.capacity()
    }

    /// 在字符串的末尾追加一个 UTF-8 切片。
    #[inline]
    pub fn push_str(&mut self, other: &str) {
        self.bytes.extend_from_slice(other.as_bytes())
    }

    /// 在字符串的末尾追加一个 WTF-8 切片。
    ///
    /// 这样可以用一个补充代码点替换边界上新配对的代理，就像有效地合并格式不正确的 UTF-16 字符串一样。
    ///
    ///
    #[inline]
    pub fn push_wtf8(&mut self, other: &Wtf8) {
        match ((&*self).final_lead_surrogate(), other.initial_trail_surrogate()) {
            // 用补充代码点替换新配对的代理。
            (Some(lead), Some(trail)) => {
                let len_without_lead_surrogate = self.len() - 3;
                self.bytes.truncate(len_without_lead_surrogate);
                let other_without_trail_surrogate = &other.bytes[3..];
                // 4 个字节的补充代码点
                self.bytes.reserve(4 + other_without_trail_surrogate.len());
                self.push_char(decode_surrogate_pair(lead, trail));
                self.bytes.extend_from_slice(other_without_trail_surrogate);
            }
            _ => {
                // 如果我们要推送一个包含代理项的字符串，我们可能不再有 UTF-8。
                //
                if other.next_surrogate(0).is_some() {
                    self.is_known_utf8 = false;
                }

                self.bytes.extend_from_slice(&other.bytes);
            }
        }
    }

    /// 在字符串的末尾追加一个 Unicode 标量值。
    #[inline]
    pub fn push_char(&mut self, c: char) {
        self.push_code_point_unchecked(CodePoint::from_char(c))
    }

    /// 在字符串的末尾追加一个代码点。
    ///
    /// 这样可以用一个补充代码点替换边界上新配对的代理，就像有效地合并格式不正确的 UTF-16 字符串一样。
    ///
    ///
    #[inline]
    pub fn push(&mut self, code_point: CodePoint) {
        if let Some(trail) = code_point.to_trail_surrogate() {
            if let Some(lead) = (&*self).final_lead_surrogate() {
                let len_without_lead_surrogate = self.len() - 3;
                self.bytes.truncate(len_without_lead_surrogate);
                self.push_char(decode_surrogate_pair(lead, trail));
                return;
            }

            // 我们正在推动一个尾随代理。
            self.is_known_utf8 = false;
        } else if code_point.to_lead_surrogate().is_some() {
            // 我们正在推动一个领先的代理人。
            self.is_known_utf8 = false;
        }

        // 边界上没有新配对的代理人。
        self.push_code_point_unchecked(code_point)
    }

    /// 将字符串缩短到指定的长度。
    ///
    /// # Panics
    ///
    /// 如果 `new_len` > 当前长度，或者 `new_len` 不是代码点边界，就会出现 panics。
    ///
    #[inline]
    pub fn truncate(&mut self, new_len: usize) {
        assert!(is_code_point_boundary(self, new_len));
        self.bytes.truncate(new_len)
    }

    /// 消耗 WTF-8 字符串，并尝试将其转换为 UTF-8。
    ///
    /// 这不会复制数据。
    ///
    /// 如果内容格式不正确的 UTF-8 (即，如果字符串包含替代)，则返回原始 WTF-8 字符串。
    ///
    ///
    pub fn into_string(self) -> Result<String, Wtf8Buf> {
        if self.is_known_utf8 || self.next_surrogate(0).is_none() {
            Ok(unsafe { String::from_utf8_unchecked(self.bytes) })
        } else {
            Err(self)
        }
    }

    /// 消耗 WTF-8 字符串并将其有损地转换为 UTF-8。
    ///
    /// 这不会复制数据 (但可能会覆盖其部分内容)。
    ///
    /// 代理替换为 `"\u{FFFD}"` (替换字符 ``)
    pub fn into_string_lossy(mut self) -> String {
        // 快速路径: 如果我们已经有 UTF-8，我们可以立即返回。
        if self.is_known_utf8 {
            return unsafe { String::from_utf8_unchecked(self.bytes) };
        }

        let mut pos = 0;
        loop {
            match self.next_surrogate(pos) {
                Some((surrogate_pos, _)) => {
                    pos = surrogate_pos + 3;
                    self.bytes[surrogate_pos..pos]
                        .copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
                }
                None => return unsafe { String::from_utf8_unchecked(self.bytes) },
            }
        }
    }

    /// 将此 `Wtf8Buf` 转换为 boxed `Wtf8`。
    #[inline]
    pub fn into_box(self) -> Box<Wtf8> {
        unsafe { mem::transmute(self.bytes.into_boxed_slice()) }
    }

    /// 将 `Box<Wtf8>` 转换为 `Wtf8Buf`。
    pub fn from_box(boxed: Box<Wtf8>) -> Wtf8Buf {
        let bytes: Box<[u8]> = unsafe { mem::transmute(boxed) };
        Wtf8Buf { bytes: bytes.into_vec(), is_known_utf8: false }
    }
}

/// 从代码点的迭代器创建新的 WTF-8 字符串。
///
/// 这将替代代码点对替换为补充代码点，就像有效地合并格式不正确的 UTF-16 字符串一样。
///
impl FromIterator<CodePoint> for Wtf8Buf {
    fn from_iter<T: IntoIterator<Item = CodePoint>>(iter: T) -> Wtf8Buf {
        let mut string = Wtf8Buf::new();
        string.extend(iter);
        string
    }
}

/// 将代码点从迭代器追加到字符串。
///
/// 这将替代代码点对替换为补充代码点，就像有效地合并格式不正确的 UTF-16 字符串一样。
///
impl Extend<CodePoint> for Wtf8Buf {
    fn extend<T: IntoIterator<Item = CodePoint>>(&mut self, iter: T) {
        let iterator = iter.into_iter();
        let (low, _high) = iterator.size_hint();
        // 每个代码点一个字节的下限 (仅 ASCII)
        self.bytes.reserve(low);
        iterator.for_each(move |code_point| self.push(code_point));
    }

    #[inline]
    fn extend_one(&mut self, code_point: CodePoint) {
        self.push(code_point);
    }

    #[inline]
    fn extend_reserve(&mut self, additional: usize) {
        // 每个代码点一个字节的下限 (仅 ASCII)
        self.bytes.reserve(additional);
    }
}

/// 借用的格式良好的 WTF-8 数据切片。
///
/// 与 `&str` 相似，但是如果它们不在代理对中，则可以另外包含代理代码点。
///
#[derive(Eq, Ord, PartialEq, PartialOrd)]
pub struct Wtf8 {
    bytes: [u8],
}

impl AsInner<[u8]> for Wtf8 {
    #[inline]
    fn as_inner(&self) -> &[u8] {
        &self.bytes
    }
}

/// 用双引号对切片进行格式设置，并代之以 `\u`，后跟四个十六进制数字。
///
/// 示例：代码点为 [U+0061, U+D800] 的切片的 `"a\u{D800}"`
impl fmt::Debug for Wtf8 {
    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
        fn write_str_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result {
            use crate::fmt::Write;
            for c in s.chars().flat_map(|c| c.escape_debug()) {
                f.write_char(c)?
            }
            Ok(())
        }

        formatter.write_str("\"")?;
        let mut pos = 0;
        while let Some((surrogate_pos, surrogate)) = self.next_surrogate(pos) {
            write_str_escaped(formatter, unsafe {
                str::from_utf8_unchecked(&self.bytes[pos..surrogate_pos])
            })?;
            write!(formatter, "\\u{{{:x}}}", surrogate)?;
            pos = surrogate_pos + 3;
        }
        write_str_escaped(formatter, unsafe { str::from_utf8_unchecked(&self.bytes[pos..]) })?;
        formatter.write_str("\"")
    }
}

impl fmt::Display for Wtf8 {
    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
        let wtf8_bytes = &self.bytes;
        let mut pos = 0;
        loop {
            match self.next_surrogate(pos) {
                Some((surrogate_pos, _)) => {
                    formatter.write_str(unsafe {
                        str::from_utf8_unchecked(&wtf8_bytes[pos..surrogate_pos])
                    })?;
                    formatter.write_str(UTF8_REPLACEMENT_CHARACTER)?;
                    pos = surrogate_pos + 3;
                }
                None => {
                    let s = unsafe { str::from_utf8_unchecked(&wtf8_bytes[pos..]) };
                    if pos == 0 { return s.fmt(formatter) } else { return formatter.write_str(s) }
                }
            }
        }
    }
}

impl Wtf8 {
    /// 从 UTF-8 `&str` 切片创建 WTF-8 切片。
    ///
    /// 由于 WTF-8 是 UTF-8 的超集，因此它总是成功。
    #[inline]
    pub fn from_str(value: &str) -> &Wtf8 {
        unsafe { Wtf8::from_bytes_unchecked(value.as_bytes()) }
    }

    /// 从 WTF-8 字节切片创建 WTF-8 切片。
    ///
    /// 由于未检查字节切片的有效 WTF-8，因此将该函数标记为不安全。
    ///
    #[inline]
    unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 {
        mem::transmute(value)
    }

    /// 从可变 WTF-8 字节切片创建可变 WTF-8 切片。
    ///
    /// 由于未检查字节切片的有效 WTF-8，因此将该函数标记为不安全。
    ///
    #[inline]
    unsafe fn from_mut_bytes_unchecked(value: &mut [u8]) -> &mut Wtf8 {
        mem::transmute(value)
    }

    /// 返回长度，以 WTF-8 字节为单位。
    #[inline]
    pub fn len(&self) -> usize {
        self.bytes.len()
    }

    #[inline]
    pub fn is_empty(&self) -> bool {
        self.bytes.is_empty()
    }

    /// 如果代码点在 ASCII 范围内，则返回 `position` 处的代码点，否则返回 `b'\xFF'`。
    ///
    ///
    /// # Panics
    ///
    /// 如果 `position` 超出字符串的末尾，就会出现 panics。
    #[inline]
    pub fn ascii_byte_at(&self, position: usize) -> u8 {
        match self.bytes[position] {
            ascii_byte @ 0x00..=0x7F => ascii_byte,
            _ => 0xFF,
        }
    }

    /// 返回字符串的代码点的迭代器。
    #[inline]
    pub fn code_points(&self) -> Wtf8CodePoints<'_> {
        Wtf8CodePoints { bytes: self.bytes.iter() }
    }

    /// 尝试将字符串转换为 UTF-8 并返回 `&str` 切片。
    ///
    /// 如果字符串包含代理，则返回 `None`。
    ///
    /// 这不会复制数据。
    #[inline]
    pub fn as_str(&self) -> Option<&str> {
        // 格式良好的 WTF-8 且仅当不包含替代项时才是格式良好的 UTF-8。
        //
        match self.next_surrogate(0) {
            None => Some(unsafe { str::from_utf8_unchecked(&self.bytes) }),
            Some(_) => None,
        }
    }

    /// 从借用的 `Wtf8` 创建一个拥有所有权的 `Wtf8Buf`。
    pub fn to_owned(&self) -> Wtf8Buf {
        Wtf8Buf { bytes: self.bytes.to_vec(), is_known_utf8: false }
    }

    /// 将字符串错误地转换为 UTF-8。
    /// 如果内容在 UTF-8 中格式正确，则返回 UTF-8 `&str` 切片。
    ///
    /// 代理替换为 `"\u{FFFD}"` (替换字符 ``)。
    ///
    /// 仅在必要时复制数据 (如果它包含任何替代)。
    pub fn to_string_lossy(&self) -> Cow<'_, str> {
        let surrogate_pos = match self.next_surrogate(0) {
            None => return Cow::Borrowed(unsafe { str::from_utf8_unchecked(&self.bytes) }),
            Some((pos, _)) => pos,
        };
        let wtf8_bytes = &self.bytes;
        let mut utf8_bytes = Vec::with_capacity(self.len());
        utf8_bytes.extend_from_slice(&wtf8_bytes[..surrogate_pos]);
        utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
        let mut pos = surrogate_pos + 3;
        loop {
            match self.next_surrogate(pos) {
                Some((surrogate_pos, _)) => {
                    utf8_bytes.extend_from_slice(&wtf8_bytes[pos..surrogate_pos]);
                    utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
                    pos = surrogate_pos + 3;
                }
                None => {
                    utf8_bytes.extend_from_slice(&wtf8_bytes[pos..]);
                    return Cow::Owned(unsafe { String::from_utf8_unchecked(utf8_bytes) });
                }
            }
        }
    }

    /// 将 WTF-8 字符串转换为可能格式不正确的 UTF-16，并返回一个 16 位代码单元的迭代器。
    ///
    /// 这是无损的：
    /// 在结果代码单元上调用 `Wtf8Buf::from_ill_formed_utf16` 将始终返回原始 WTF-8 字符串。
    ///
    ///
    #[inline]
    pub fn encode_wide(&self) -> EncodeWide<'_> {
        EncodeWide { code_points: self.code_points(), extra: 0 }
    }

    #[inline]
    fn next_surrogate(&self, mut pos: usize) -> Option<(usize, u16)> {
        let mut iter = self.bytes[pos..].iter();
        loop {
            let b = *iter.next()?;
            if b < 0x80 {
                pos += 1;
            } else if b < 0xE0 {
                iter.next();
                pos += 2;
            } else if b == 0xED {
                match (iter.next(), iter.next()) {
                    (Some(&b2), Some(&b3)) if b2 >= 0xA0 => {
                        return Some((pos, decode_surrogate(b2, b3)));
                    }
                    _ => pos += 3,
                }
            } else if b < 0xF0 {
                iter.next();
                iter.next();
                pos += 3;
            } else {
                iter.next();
                iter.next();
                iter.next();
                pos += 4;
            }
        }
    }

    #[inline]
    fn final_lead_surrogate(&self) -> Option<u16> {
        match self.bytes {
            [.., 0xED, b2 @ 0xA0..=0xAF, b3] => Some(decode_surrogate(b2, b3)),
            _ => None,
        }
    }

    #[inline]
    fn initial_trail_surrogate(&self) -> Option<u16> {
        match self.bytes {
            [0xED, b2 @ 0xB0..=0xBF, b3, ..] => Some(decode_surrogate(b2, b3)),
            _ => None,
        }
    }

    pub fn clone_into(&self, buf: &mut Wtf8Buf) {
        buf.is_known_utf8 = false;
        self.bytes.clone_into(&mut buf.bytes);
    }

    /// `Wtf8` 的 Boxes。
    #[inline]
    pub fn into_box(&self) -> Box<Wtf8> {
        let boxed: Box<[u8]> = self.bytes.into();
        unsafe { mem::transmute(boxed) }
    }

    /// 创建一个 boxed，空 `Wtf8`。
    pub fn empty_box() -> Box<Wtf8> {
        let boxed: Box<[u8]> = Default::default();
        unsafe { mem::transmute(boxed) }
    }

    #[inline]
    pub fn into_arc(&self) -> Arc<Wtf8> {
        let arc: Arc<[u8]> = Arc::from(&self.bytes);
        unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Wtf8) }
    }

    #[inline]
    pub fn into_rc(&self) -> Rc<Wtf8> {
        let rc: Rc<[u8]> = Rc::from(&self.bytes);
        unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Wtf8) }
    }

    #[inline]
    pub fn make_ascii_lowercase(&mut self) {
        self.bytes.make_ascii_lowercase()
    }

    #[inline]
    pub fn make_ascii_uppercase(&mut self) {
        self.bytes.make_ascii_uppercase()
    }

    #[inline]
    pub fn to_ascii_lowercase(&self) -> Wtf8Buf {
        Wtf8Buf { bytes: self.bytes.to_ascii_lowercase(), is_known_utf8: false }
    }

    #[inline]
    pub fn to_ascii_uppercase(&self) -> Wtf8Buf {
        Wtf8Buf { bytes: self.bytes.to_ascii_uppercase(), is_known_utf8: false }
    }

    #[inline]
    pub fn is_ascii(&self) -> bool {
        self.bytes.is_ascii()
    }

    #[inline]
    pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
        self.bytes.eq_ignore_ascii_case(&other.bytes)
    }
}

/// 返回字节范围 \[`begin`..`end`) 的给定字符串的切片。
///
/// # Panics
///
/// 当 `begin` 和 `end` 不指向代码点边界或指向字符串末尾时，就会出现 panics。
///
impl ops::Index<ops::Range<usize>> for Wtf8 {
    type Output = Wtf8;

    #[inline]
    fn index(&self, range: ops::Range<usize>) -> &Wtf8 {
        // is_code_point_boundary 检查索引是否在 [0, .len()] 中
        if range.start <= range.end
            && is_code_point_boundary(self, range.start)
            && is_code_point_boundary(self, range.end)
        {
            unsafe { slice_unchecked(self, range.start, range.end) }
        } else {
            slice_error_fail(self, range.start, range.end)
        }
    }
}

/// 返回给定字符串从字节 `begin` 到结尾的切片。
///
/// # Panics
///
/// 当 `begin` 不在代码点边界或不在字符串末尾时，就会出现 panics。
///
impl ops::Index<ops::RangeFrom<usize>> for Wtf8 {
    type Output = Wtf8;

    #[inline]
    fn index(&self, range: ops::RangeFrom<usize>) -> &Wtf8 {
        // is_code_point_boundary 检查索引是否在 [0, .len()] 中
        if is_code_point_boundary(self, range.start) {
            unsafe { slice_unchecked(self, range.start, self.len()) }
        } else {
            slice_error_fail(self, range.start, self.len())
        }
    }
}

/// 返回给定字符串从其开头到字节 `end` 的切片。
///
/// # Panics
///
/// 当 `end` 不在代码点边界或不在字符串末尾时，就会出现 panics。
///
impl ops::Index<ops::RangeTo<usize>> for Wtf8 {
    type Output = Wtf8;

    #[inline]
    fn index(&self, range: ops::RangeTo<usize>) -> &Wtf8 {
        // is_code_point_boundary 检查索引是否在 [0, .len()] 中
        if is_code_point_boundary(self, range.end) {
            unsafe { slice_unchecked(self, 0, range.end) }
        } else {
            slice_error_fail(self, 0, range.end)
        }
    }
}

impl ops::Index<ops::RangeFull> for Wtf8 {
    type Output = Wtf8;

    #[inline]
    fn index(&self, _range: ops::RangeFull) -> &Wtf8 {
        self
    }
}

#[inline]
fn decode_surrogate(second_byte: u8, third_byte: u8) -> u16 {
    // 假定第一个字节为 0xED
    0xD800 | (second_byte as u16 & 0x3F) << 6 | third_byte as u16 & 0x3F
}

#[inline]
fn decode_surrogate_pair(lead: u16, trail: u16) -> char {
    let code_point = 0x10000 + ((((lead - 0xD800) as u32) << 10) | (trail - 0xDC00) as u32);
    unsafe { char::from_u32_unchecked(code_point) }
}

/// 从 core::str::StrPrelude::is_char_boundary 复制
#[inline]
pub fn is_code_point_boundary(slice: &Wtf8, index: usize) -> bool {
    if index == slice.len() {
        return true;
    }
    match slice.bytes.get(index) {
        None => false,
        Some(&b) => b < 128 || b >= 192,
    }
}

/// 从 core::str::raw::slice_unchecked 复制
#[inline]
pub unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 {
    // &[u8] 和 &Wtf8 的内存布局是一样的
    Wtf8::from_bytes_unchecked(slice::from_raw_parts(s.bytes.as_ptr().add(begin), end - begin))
}

/// 从 core::str::raw::slice_error_fail 复制
#[inline(never)]
pub fn slice_error_fail(s: &Wtf8, begin: usize, end: usize) -> ! {
    assert!(begin <= end);
    panic!("index {begin} and/or {end} in `{s:?}` do not lie on character boundary");
}

/// WTF-8 字符串的代码点的迭代器。
///
/// 使用方法 `.code_points()` 创建。
#[derive(Clone)]
pub struct Wtf8CodePoints<'a> {
    bytes: slice::Iter<'a, u8>,
}

impl<'a> Iterator for Wtf8CodePoints<'a> {
    type Item = CodePoint;

    #[inline]
    fn next(&mut self) -> Option<CodePoint> {
        // SAFETY: `self.bytes` 是从 WTF-8 字符串创建的
        unsafe { next_code_point(&mut self.bytes).map(|c| CodePoint { value: c }) }
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        let len = self.bytes.len();
        (len.saturating_add(3) / 4, Some(len))
    }
}

/// 为潜在的格式不正确的 UTF-16 生成宽字符序列。
#[stable(feature = "rust1", since = "1.0.0")]
#[derive(Clone)]
pub struct EncodeWide<'a> {
    code_points: Wtf8CodePoints<'a>,
    extra: u16,
}

// 从 libunicode/u_str.rs 复制
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for EncodeWide<'a> {
    type Item = u16;

    #[inline]
    fn next(&mut self) -> Option<u16> {
        if self.extra != 0 {
            let tmp = self.extra;
            self.extra = 0;
            return Some(tmp);
        }

        let mut buf = [0; 2];
        self.code_points.next().map(|code_point| {
            let n = encode_utf16_raw(code_point.value, &mut buf).len();
            if n == 2 {
                self.extra = buf[1];
            }
            buf[0]
        })
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        let (low, high) = self.code_points.size_hint();
        let ext = (self.extra != 0) as usize;
        // 每个代码点要么得到一个 u16，要么得到两个 u16，所以这个迭代器的长度是底层迭代器的 1 到 2 倍。
        //
        //
        (low + ext, high.and_then(|n| n.checked_mul(2)).and_then(|n| n.checked_add(ext)))
    }
}

#[stable(feature = "encode_wide_fused_iterator", since = "1.62.0")]
impl FusedIterator for EncodeWide<'_> {}

impl Hash for CodePoint {
    #[inline]
    fn hash<H: Hasher>(&self, state: &mut H) {
        self.value.hash(state)
    }
}

impl Hash for Wtf8Buf {
    #[inline]
    fn hash<H: Hasher>(&self, state: &mut H) {
        state.write(&self.bytes);
        0xfeu8.hash(state)
    }
}

impl Hash for Wtf8 {
    #[inline]
    fn hash<H: Hasher>(&self, state: &mut H) {
        state.write(&self.bytes);
        0xfeu8.hash(state)
    }
}