xref: /drstd/src/std/io/error/repr_bitpacked.rs (revision 9670759b785600bf6315e4173e46a602f16add7a)
1 //! This is a densely packed error representation which is used on targets with
2 //! 64-bit pointers.
3 //!
4 //! (Note that `bitpacked` vs `unpacked` here has no relationship to
5 //! `#[repr(packed)]`, it just refers to attempting to use any available bits in
6 //! a more clever manner than `rustc`'s default layout algorithm would).
7 //!
8 //! Conceptually, it stores the same data as the "unpacked" equivalent we use on
9 //! other targets. Specifically, you can imagine it as an optimized version of
10 //! the following enum (which is roughly equivalent to what's stored by
11 //! `repr_unpacked::Repr`, e.g. `super::ErrorData<Box<Custom>>`):
12 //!
13 //! ```ignore (exposition-only)
14 //! enum ErrorData {
15 //!    Os(i32),
16 //!    Simple(ErrorKind),
17 //!    SimpleMessage(&'static SimpleMessage),
18 //!    Custom(Box<Custom>),
19 //! }
20 //! ```
21 //!
22 //! However, it packs this data into a 64bit non-zero value.
23 //!
24 //! This optimization not only allows `io::Error` to occupy a single pointer,
25 //! but improves `io::Result` as well, especially for situations like
26 //! `io::Result<()>` (which is now 64 bits) or `io::Result<u64>` (which is now
27 //! 128 bits), which are quite common.
28 //!
29 //! # Layout
30 //! Tagged values are 64 bits, with the 2 least significant bits used for the
31 //! tag. This means there are there are 4 "variants":
32 //!
33 //! - **Tag 0b00**: The first variant is equivalent to
34 //!   `ErrorData::SimpleMessage`, and holds a `&'static SimpleMessage` directly.
35 //!
36 //!   `SimpleMessage` has an alignment >= 4 (which is requested with
37 //!   `#[repr(align)]` and checked statically at the bottom of this file), which
38 //!   means every `&'static SimpleMessage` should have the both tag bits as 0,
39 //!   meaning its tagged and untagged representation are equivalent.
40 //!
41 //!   This means we can skip tagging it, which is necessary as this variant can
42 //!   be constructed from a `const fn`, which probably cannot tag pointers (or
43 //!   at least it would be difficult).
44 //!
45 //! - **Tag 0b01**: The other pointer variant holds the data for
46 //!   `ErrorData::Custom` and the remaining 62 bits are used to store a
47 //!   `Box<Custom>`. `Custom` also has alignment >= 4, so the bottom two bits
48 //!   are free to use for the tag.
49 //!
50 //!   The only important thing to note is that `ptr::wrapping_add` and
51 //!   `ptr::wrapping_sub` are used to tag the pointer, rather than bitwise
52 //!   operations. This should preserve the pointer's provenance, which would
53 //!   otherwise be lost.
54 //!
55 //! - **Tag 0b10**: Holds the data for `ErrorData::Os(i32)`. We store the `i32`
56 //!   in the pointer's most significant 32 bits, and don't use the bits `2..32`
57 //!   for anything. Using the top 32 bits is just to let us easily recover the
58 //!   `i32` code with the correct sign.
59 //!
60 //! - **Tag 0b11**: Holds the data for `ErrorData::Simple(ErrorKind)`. This
61 //!   stores the `ErrorKind` in the top 32 bits as well, although it doesn't
62 //!   occupy nearly that many. Most of the bits are unused here, but it's not
63 //!   like we need them for anything else yet.
64 //!
65 //! # Use of `NonNull<()>`
66 //!
67 //! Everything is stored in a `NonNull<()>`, which is odd, but actually serves a
68 //! purpose.
69 //!
70 //! Conceptually you might think of this more like:
71 //!
72 //! ```ignore (exposition-only)
73 //! union Repr {
74 //!     // holds integer (Simple/Os) variants, and
75 //!     // provides access to the tag bits.
76 //!     bits: NonZeroU64,
77 //!     // Tag is 0, so this is stored untagged.
78 //!     msg: &'static SimpleMessage,
79 //!     // Tagged (offset) `Box<Custom>` pointer.
80 //!     tagged_custom: NonNull<()>,
81 //! }
82 //! ```
83 //!
84 //! But there are a few problems with this:
85 //!
86 //! 1. Union access is equivalent to a transmute, so this representation would
87 //!    require we transmute between integers and pointers in at least one
88 //!    direction, which may be UB (and even if not, it is likely harder for a
89 //!    compiler to reason about than explicit ptr->int operations).
90 //!
91 //! 2. Even if all fields of a union have a niche, the union itself doesn't,
92 //!    although this may change in the future. This would make things like
93 //!    `io::Result<()>` and `io::Result<usize>` larger, which defeats part of
94 //!    the motivation of this bitpacking.
95 //!
96 //! Storing everything in a `NonZeroUsize` (or some other integer) would be a
97 //! bit more traditional for pointer tagging, but it would lose provenance
98 //! information, couldn't be constructed from a `const fn`, and would probably
99 //! run into other issues as well.
100 //!
101 //! The `NonNull<()>` seems like the only alternative, even if it's fairly odd
102 //! to use a pointer type to store something that may hold an integer, some of
103 //! the time.
104 
105 use super::{Custom, ErrorData, ErrorKind, RawOsError, SimpleMessage};
106 use alloc::boxed::Box;
107 use core::marker::PhantomData;
108 use core::mem::{align_of, size_of};
109 use core::ptr::{self, NonNull};
110 
111 // The 2 least-significant bits are used as tag.
112 const TAG_MASK: usize = 0b11;
113 const TAG_SIMPLE_MESSAGE: usize = 0b00;
114 const TAG_CUSTOM: usize = 0b01;
115 const TAG_OS: usize = 0b10;
116 const TAG_SIMPLE: usize = 0b11;
117 
118 /// The internal representation.
119 ///
120 /// See the module docs for more, this is just a way to hack in a check that we
121 /// indeed are not unwind-safe.
122 ///
123 /// ```compile_fail,E0277
124 /// fn is_unwind_safe<T: core::panic::UnwindSafe>() {}
125 /// is_unwind_safe::<std::io::Error>();
126 /// ```
127 #[repr(transparent)]
128 pub(super) struct Repr(NonNull<()>, PhantomData<ErrorData<Box<Custom>>>);
129 
130 // All the types `Repr` stores internally are Send + Sync, and so is it.
131 unsafe impl Send for Repr {}
132 unsafe impl Sync for Repr {}
133 
134 impl Repr {
new(dat: ErrorData<Box<Custom>>) -> Self135     pub(super) fn new(dat: ErrorData<Box<Custom>>) -> Self {
136         match dat {
137             ErrorData::Os(code) => Self::new_os(code),
138             ErrorData::Simple(kind) => Self::new_simple(kind),
139             ErrorData::SimpleMessage(simple_message) => Self::new_simple_message(simple_message),
140             ErrorData::Custom(b) => Self::new_custom(b),
141         }
142     }
143 
new_custom(b: Box<Custom>) -> Self144     pub(super) fn new_custom(b: Box<Custom>) -> Self {
145         let p = Box::into_raw(b).cast::<u8>();
146         // Should only be possible if an allocator handed out a pointer with
147         // wrong alignment.
148         debug_assert_eq!(p.addr() & TAG_MASK, 0);
149         // Note: We know `TAG_CUSTOM <= size_of::<Custom>()` (static_assert at
150         // end of file), and both the start and end of the expression must be
151         // valid without address space wraparound due to `Box`'s semantics.
152         //
153         // This means it would be correct to implement this using `ptr::add`
154         // (rather than `ptr::wrapping_add`), but it's unclear this would give
155         // any benefit, so we just use `wrapping_add` instead.
156         let tagged = p.wrapping_add(TAG_CUSTOM).cast::<()>();
157         // Safety: `TAG_CUSTOM + p` is the same as `TAG_CUSTOM | p`,
158         // because `p`'s alignment means it isn't allowed to have any of the
159         // `TAG_BITS` set (you can verify that addition and bitwise-or are the
160         // same when the operands have no bits in common using a truth table).
161         //
162         // Then, `TAG_CUSTOM | p` is not zero, as that would require
163         // `TAG_CUSTOM` and `p` both be zero, and neither is (as `p` came from a
164         // box, and `TAG_CUSTOM` just... isn't zero -- it's `0b01`). Therefore,
165         // `TAG_CUSTOM + p` isn't zero and so `tagged` can't be, and the
166         // `new_unchecked` is safe.
167         let res = Self(unsafe { NonNull::new_unchecked(tagged) }, PhantomData);
168         // quickly smoke-check we encoded the right thing (This generally will
169         // only run in std's tests, unless the user uses -Zbuild-std)
170         debug_assert!(
171             matches!(res.data(), ErrorData::Custom(_)),
172             "repr(custom) encoding failed"
173         );
174         res
175     }
176 
177     #[inline]
new_os(code: RawOsError) -> Self178     pub(super) fn new_os(code: RawOsError) -> Self {
179         let utagged = ((code as usize) << 32) | TAG_OS;
180         // Safety: `TAG_OS` is not zero, so the result of the `|` is not 0.
181         let res = Self(
182             unsafe { NonNull::new_unchecked(ptr::invalid_mut(utagged)) },
183             PhantomData,
184         );
185         // quickly smoke-check we encoded the right thing (This generally will
186         // only run in std's tests, unless the user uses -Zbuild-std)
187         debug_assert!(
188             matches!(res.data(), ErrorData::Os(c) if c == code),
189             "repr(os) encoding failed for {code}"
190         );
191         res
192     }
193 
194     #[inline]
new_simple(kind: ErrorKind) -> Self195     pub(super) fn new_simple(kind: ErrorKind) -> Self {
196         let utagged = ((kind as usize) << 32) | TAG_SIMPLE;
197         // Safety: `TAG_SIMPLE` is not zero, so the result of the `|` is not 0.
198         let res = Self(
199             unsafe { NonNull::new_unchecked(ptr::invalid_mut(utagged)) },
200             PhantomData,
201         );
202         // quickly smoke-check we encoded the right thing (This generally will
203         // only run in std's tests, unless the user uses -Zbuild-std)
204         debug_assert!(
205             matches!(res.data(), ErrorData::Simple(k) if k == kind),
206             "repr(simple) encoding failed {:?}",
207             kind,
208         );
209         res
210     }
211 
212     #[inline]
new_simple_message(m: &'static SimpleMessage) -> Self213     pub(super) const fn new_simple_message(m: &'static SimpleMessage) -> Self {
214         // Safety: References are never null.
215         Self(
216             unsafe { NonNull::new_unchecked(m as *const _ as *mut ()) },
217             PhantomData,
218         )
219     }
220 
221     #[inline]
data(&self) -> ErrorData<&Custom>222     pub(super) fn data(&self) -> ErrorData<&Custom> {
223         // Safety: We're a Repr, decode_repr is fine.
224         unsafe { decode_repr(self.0, |c| &*c) }
225     }
226 
227     #[inline]
data_mut(&mut self) -> ErrorData<&mut Custom>228     pub(super) fn data_mut(&mut self) -> ErrorData<&mut Custom> {
229         // Safety: We're a Repr, decode_repr is fine.
230         unsafe { decode_repr(self.0, |c| &mut *c) }
231     }
232 
233     #[inline]
into_data(self) -> ErrorData<Box<Custom>>234     pub(super) fn into_data(self) -> ErrorData<Box<Custom>> {
235         let this = core::mem::ManuallyDrop::new(self);
236         // Safety: We're a Repr, decode_repr is fine. The `Box::from_raw` is
237         // safe because we prevent double-drop using `ManuallyDrop`.
238         unsafe { decode_repr(this.0, |p| Box::from_raw(p)) }
239     }
240 }
241 
242 impl Drop for Repr {
243     #[inline]
drop(&mut self)244     fn drop(&mut self) {
245         // Safety: We're a Repr, decode_repr is fine. The `Box::from_raw` is
246         // safe because we're being dropped.
247         unsafe {
248             let _ = decode_repr(self.0, |p| Box::<Custom>::from_raw(p));
249         }
250     }
251 }
252 
253 // Shared helper to decode a `Repr`'s internal pointer into an ErrorData.
254 //
255 // Safety: `ptr`'s bits should be encoded as described in the document at the
256 // top (it should `some_repr.0`)
257 #[inline]
decode_repr<C, F>(ptr: NonNull<()>, make_custom: F) -> ErrorData<C> where F: FnOnce(*mut Custom) -> C,258 unsafe fn decode_repr<C, F>(ptr: NonNull<()>, make_custom: F) -> ErrorData<C>
259 where
260     F: FnOnce(*mut Custom) -> C,
261 {
262     let bits = ptr.as_ptr().addr();
263     match bits & TAG_MASK {
264         TAG_OS => {
265             let code = ((bits as i64) >> 32) as RawOsError;
266             ErrorData::Os(code)
267         }
268         TAG_SIMPLE => {
269             let kind_bits = (bits >> 32) as u32;
270             let kind = kind_from_prim(kind_bits).unwrap_or_else(|| {
271                 debug_assert!(
272                     false,
273                     "Invalid io::error::Repr bits: `Repr({:#018x})`",
274                     bits
275                 );
276                 // This means the `ptr` passed in was not valid, which violates
277                 // the unsafe contract of `decode_repr`.
278                 //
279                 // Using this rather than unwrap meaningfully improves the code
280                 // for callers which only care about one variant (usually
281                 // `Custom`)
282                 core::hint::unreachable_unchecked();
283             });
284             ErrorData::Simple(kind)
285         }
286         TAG_SIMPLE_MESSAGE => ErrorData::SimpleMessage(&*ptr.cast::<SimpleMessage>().as_ptr()),
287         TAG_CUSTOM => {
288             // It would be correct for us to use `ptr::byte_sub` here (see the
289             // comment above the `wrapping_add` call in `new_custom` for why),
290             // but it isn't clear that it makes a difference, so we don't.
291             let custom = ptr.as_ptr().wrapping_byte_sub(TAG_CUSTOM).cast::<Custom>();
292             ErrorData::Custom(make_custom(custom))
293         }
294         _ => {
295             // Can't happen, and compiler can tell
296             unreachable!();
297         }
298     }
299 }
300 
301 // This compiles to the same code as the check+transmute, but doesn't require
302 // unsafe, or to hard-code max ErrorKind or its size in a way the compiler
303 // couldn't verify.
304 #[inline]
kind_from_prim(ek: u32) -> Option<ErrorKind>305 fn kind_from_prim(ek: u32) -> Option<ErrorKind> {
306     macro_rules! from_prim {
307         ($prim:expr => $Enum:ident { $($Variant:ident),* $(,)? }) => {{
308             // Force a compile error if the list gets out of date.
309             const _: fn(e: $Enum) = |e: $Enum| match e {
310                 $($Enum::$Variant => ()),*
311             };
312             match $prim {
313                 $(v if v == ($Enum::$Variant as _) => Some($Enum::$Variant),)*
314                 _ => None,
315             }
316         }}
317     }
318     from_prim!(ek => ErrorKind {
319         NotFound,
320         PermissionDenied,
321         ConnectionRefused,
322         ConnectionReset,
323         HostUnreachable,
324         NetworkUnreachable,
325         ConnectionAborted,
326         NotConnected,
327         AddrInUse,
328         AddrNotAvailable,
329         NetworkDown,
330         BrokenPipe,
331         AlreadyExists,
332         WouldBlock,
333         NotADirectory,
334         IsADirectory,
335         DirectoryNotEmpty,
336         ReadOnlyFilesystem,
337         FilesystemLoop,
338         StaleNetworkFileHandle,
339         InvalidInput,
340         InvalidData,
341         TimedOut,
342         WriteZero,
343         StorageFull,
344         NotSeekable,
345         FilesystemQuotaExceeded,
346         FileTooLarge,
347         ResourceBusy,
348         ExecutableFileBusy,
349         Deadlock,
350         CrossesDevices,
351         TooManyLinks,
352         InvalidFilename,
353         ArgumentListTooLong,
354         Interrupted,
355         Other,
356         UnexpectedEof,
357         Unsupported,
358         OutOfMemory,
359         Uncategorized,
360     })
361 }
362 
363 // Some static checking to alert us if a change breaks any of the assumptions
364 // that our encoding relies on for correctness and soundness. (Some of these are
365 // a bit overly thorough/cautious, admittedly)
366 //
367 // If any of these are hit on a platform that std supports, we should likely
368 // just use `repr_unpacked.rs` there instead (unless the fix is easy).
369 macro_rules! static_assert {
370     ($condition:expr) => {
371         const _: () = assert!($condition);
372     };
373     (@usize_eq: $lhs:expr, $rhs:expr) => {
374         const _: [(); $lhs] = [(); $rhs];
375     };
376 }
377 
378 // The bitpacking we use requires pointers be exactly 64 bits.
379 static_assert!(@usize_eq: size_of::<NonNull<()>>(), 8);
380 
381 // We also require pointers and usize be the same size.
382 static_assert!(@usize_eq: size_of::<NonNull<()>>(), size_of::<usize>());
383 
384 // `Custom` and `SimpleMessage` need to be thin pointers.
385 static_assert!(@usize_eq: size_of::<&'static SimpleMessage>(), 8);
386 static_assert!(@usize_eq: size_of::<Box<Custom>>(), 8);
387 
388 static_assert!((TAG_MASK + 1).is_power_of_two());
389 // And they must have sufficient alignment.
390 static_assert!(align_of::<SimpleMessage>() >= TAG_MASK + 1);
391 static_assert!(align_of::<Custom>() >= TAG_MASK + 1);
392 
393 static_assert!(@usize_eq: TAG_MASK & TAG_SIMPLE_MESSAGE, TAG_SIMPLE_MESSAGE);
394 static_assert!(@usize_eq: TAG_MASK & TAG_CUSTOM, TAG_CUSTOM);
395 static_assert!(@usize_eq: TAG_MASK & TAG_OS, TAG_OS);
396 static_assert!(@usize_eq: TAG_MASK & TAG_SIMPLE, TAG_SIMPLE);
397 
398 // This is obviously true (`TAG_CUSTOM` is `0b01`), but in `Repr::new_custom` we
399 // offset a pointer by this value, and expect it to both be within the same
400 // object, and to not wrap around the address space. See the comment in that
401 // function for further details.
402 //
403 // Actually, at the moment we use `ptr::wrapping_add`, not `ptr::add`, so this
404 // check isn't needed for that one, although the assertion that we don't
405 // actually wrap around in that wrapping_add does simplify the safety reasoning
406 // elsewhere considerably.
407 static_assert!(size_of::<Custom>() >= TAG_CUSTOM);
408 
409 // These two store a payload which is allowed to be zero, so they must be
410 // non-zero to preserve the `NonNull`'s range invariant.
411 static_assert!(TAG_OS != 0);
412 static_assert!(TAG_SIMPLE != 0);
413 // We can't tag `SimpleMessage`s, the tag must be 0.
414 static_assert!(@usize_eq: TAG_SIMPLE_MESSAGE, 0);
415 
416 // Check that the point of all of this still holds.
417 //
418 // We'd check against `io::Error`, but *technically* it's allowed to vary,
419 // as it's not `#[repr(transparent)]`/`#[repr(C)]`. We could add that, but
420 // the `#[repr()]` would show up in rustdoc, which might be seen as a stable
421 // commitment.
422 static_assert!(@usize_eq: size_of::<Repr>(), 8);
423 static_assert!(@usize_eq: size_of::<Option<Repr>>(), 8);
424 static_assert!(@usize_eq: size_of::<Result<(), Repr>>(), 8);
425 static_assert!(@usize_eq: size_of::<Result<usize, Repr>>(), 16);
426