1 //! Windows-specific extensions to primitives in the [`std::ffi`] module. 2 //! 3 //! # Overview 4 //! 5 //! For historical reasons, the Windows API uses a form of potentially 6 //! ill-formed UTF-16 encoding for strings. Specifically, the 16-bit 7 //! code units in Windows strings may contain [isolated surrogate code 8 //! points which are not paired together][ill-formed-utf-16]. The 9 //! Unicode standard requires that surrogate code points (those in the 10 //! range U+D800 to U+DFFF) always be *paired*, because in the UTF-16 11 //! encoding a *surrogate code unit pair* is used to encode a single 12 //! character. For compatibility with code that does not enforce 13 //! these pairings, Windows does not enforce them, either. 14 //! 15 //! While it is not always possible to convert such a string losslessly into 16 //! a valid UTF-16 string (or even UTF-8), it is often desirable to be 17 //! able to round-trip such a string from and to Windows APIs 18 //! losslessly. For example, some Rust code may be "bridging" some 19 //! Windows APIs together, just passing `WCHAR` strings among those 20 //! APIs without ever really looking into the strings. 21 //! 22 //! If Rust code *does* need to look into those strings, it can 23 //! convert them to valid UTF-8, possibly lossily, by substituting 24 //! invalid sequences with [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], as is 25 //! conventionally done in other Rust APIs that deal with string 26 //! encodings. 27 //! 28 //! # `OsStringExt` and `OsStrExt` 29 //! 30 //! [`OsString`] is the Rust wrapper for owned strings in the 31 //! preferred representation of the operating system. On Windows, 32 //! this struct gets augmented with an implementation of the 33 //! [`OsStringExt`] trait, which has an [`OsStringExt::from_wide`] method. This 34 //! lets you create an [`OsString`] from a `&[u16]` slice; presumably 35 //! you get such a slice out of a `WCHAR` Windows API. 36 //! 37 //! Similarly, [`OsStr`] is the Rust wrapper for borrowed strings from 38 //! preferred representation of the operating system. On Windows, the 39 //! [`OsStrExt`] trait provides the [`OsStrExt::encode_wide`] method, which 40 //! outputs an [`EncodeWide`] iterator. You can [`collect`] this 41 //! iterator, for example, to obtain a `Vec<u16>`; you can later get a 42 //! pointer to this vector's contents and feed it to Windows APIs. 43 //! 44 //! These traits, along with [`OsString`] and [`OsStr`], work in 45 //! conjunction so that it is possible to **round-trip** strings from 46 //! Windows and back, with no loss of data, even if the strings are 47 //! ill-formed UTF-16. 48 //! 49 //! [ill-formed-utf-16]: https://simonsapin.github.io/wtf-8/#ill-formed-utf-16 50 //! [`collect`]: crate::std::iter::Iterator::collect 51 //! [U+FFFD]: crate::std::char::REPLACEMENT_CHARACTER 52 //! [`std::ffi`]: crate::std::ffi 53 54 use crate::std::ffi::{OsStr, OsString}; 55 use crate::std::sealed::Sealed; 56 use crate::std::sys::os_str::Buf; 57 use crate::std::sys_common::wtf8::Wtf8Buf; 58 use crate::std::sys_common::{AsInner, FromInner}; 59 60 pub use crate::std::sys_common::wtf8::EncodeWide; 61 62 /// Windows-specific extensions to [`OsString`]. 63 /// 64 /// This trait is sealed: it cannot be implemented outside the standard library. 65 /// This is so that future additional methods are not breaking changes. 66 pub trait OsStringExt: Sealed { 67 /// Creates an `OsString` from a potentially ill-formed UTF-16 slice of 68 /// 16-bit code units. 69 /// 70 /// This is lossless: calling [`OsStrExt::encode_wide`] on the resulting string 71 /// will always return the original code units. 72 /// 73 /// # Examples 74 /// 75 /// ``` 76 /// use std::ffi::OsString; 77 /// use std::os::windows::prelude::*; 78 /// 79 /// // UTF-16 encoding for "Unicode". 80 /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065]; 81 /// 82 /// let string = OsString::from_wide(&source[..]); 83 /// ``` from_wide(wide: &[u16]) -> Self84 fn from_wide(wide: &[u16]) -> Self; 85 } 86 87 impl OsStringExt for OsString { from_wide(wide: &[u16]) -> OsString88 fn from_wide(wide: &[u16]) -> OsString { 89 FromInner::from_inner(Buf { 90 inner: Wtf8Buf::from_wide(wide), 91 }) 92 } 93 } 94 95 /// Windows-specific extensions to [`OsStr`]. 96 /// 97 /// This trait is sealed: it cannot be implemented outside the standard library. 98 /// This is so that future additional methods are not breaking changes. 99 pub trait OsStrExt: Sealed { 100 /// Re-encodes an `OsStr` as a wide character sequence, i.e., potentially 101 /// ill-formed UTF-16. 102 /// 103 /// This is lossless: calling [`OsStringExt::from_wide`] and then 104 /// `encode_wide` on the result will yield the original code units. 105 /// Note that the encoding does not add a final null terminator. 106 /// 107 /// # Examples 108 /// 109 /// ``` 110 /// use std::ffi::OsString; 111 /// use std::os::windows::prelude::*; 112 /// 113 /// // UTF-16 encoding for "Unicode". 114 /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065]; 115 /// 116 /// let string = OsString::from_wide(&source[..]); 117 /// 118 /// let result: Vec<u16> = string.encode_wide().collect(); 119 /// assert_eq!(&source[..], &result[..]); 120 /// ``` encode_wide(&self) -> EncodeWide<'_>121 fn encode_wide(&self) -> EncodeWide<'_>; 122 } 123 124 impl OsStrExt for OsStr { 125 #[inline] encode_wide(&self) -> EncodeWide<'_>126 fn encode_wide(&self) -> EncodeWide<'_> { 127 self.as_inner().inner.encode_wide() 128 } 129 } 130