1 use crate::std::cell::UnsafeCell;
2 use crate::std::ptr;
3 use crate::std::sync::atomic::{
4 AtomicBool, AtomicPtr, AtomicU32,
5 Ordering::{AcqRel, Acquire, Relaxed, Release},
6 };
7 use crate::std::sys::c;
8
9 #[cfg(test)]
10 mod tests;
11
12 /// An optimization hint. The compiler is often smart enough to know if an atomic
13 /// is never set and can remove dead code based on that fact.
14 static HAS_DTORS: AtomicBool = AtomicBool::new(false);
15
16 // Using a per-thread list avoids the problems in synchronizing global state.
17 #[thread_local]
18 #[cfg(target_thread_local)]
19 static mut DESTRUCTORS: Vec<(*mut u8, unsafe extern "C" fn(*mut u8))> = Vec::new();
20
21 // Ensure this can never be inlined because otherwise this may break in dylibs.
22 // See #44391.
23 #[inline(never)]
24 #[cfg(target_thread_local)]
register_keyless_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8))25 pub unsafe fn register_keyless_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
26 DESTRUCTORS.push((t, dtor));
27 HAS_DTORS.store(true, Relaxed);
28 }
29
30 #[inline(never)] // See comment above
31 #[cfg(target_thread_local)]
32 /// Runs destructors. This should not be called until thread exit.
run_keyless_dtors()33 unsafe fn run_keyless_dtors() {
34 // Drop all the destructors.
35 //
36 // Note: While this is potentially an infinite loop, it *should* be
37 // the case that this loop always terminates because we provide the
38 // guarantee that a TLS key cannot be set after it is flagged for
39 // destruction.
40 while let Some((ptr, dtor)) = DESTRUCTORS.pop() {
41 (dtor)(ptr);
42 }
43 // We're done so free the memory.
44 DESTRUCTORS = Vec::new();
45 }
46
47 type Key = c::DWORD;
48 type Dtor = unsafe extern "C" fn(*mut u8);
49
50 // Turns out, like pretty much everything, Windows is pretty close the
51 // functionality that Unix provides, but slightly different! In the case of
52 // TLS, Windows does not provide an API to provide a destructor for a TLS
53 // variable. This ends up being pretty crucial to this implementation, so we
54 // need a way around this.
55 //
56 // The solution here ended up being a little obscure, but fear not, the
57 // internet has informed me [1][2] that this solution is not unique (no way
58 // I could have thought of it as well!). The key idea is to insert some hook
59 // somewhere to run arbitrary code on thread termination. With this in place
60 // we'll be able to run anything we like, including all TLS destructors!
61 //
62 // To accomplish this feat, we perform a number of threads, all contained
63 // within this module:
64 //
65 // * All TLS destructors are tracked by *us*, not the Windows runtime. This
66 // means that we have a global list of destructors for each TLS key that
67 // we know about.
68 // * When a thread exits, we run over the entire list and run dtors for all
69 // non-null keys. This attempts to match Unix semantics in this regard.
70 //
71 // For more details and nitty-gritty, see the code sections below!
72 //
73 // [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
74 // [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42
75
76 pub struct StaticKey {
77 /// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX
78 /// is not a valid key value, this allows us to use zero as sentinel value
79 /// without risking overflow.
80 key: AtomicU32,
81 dtor: Option<Dtor>,
82 next: AtomicPtr<StaticKey>,
83 /// Currently, destructors cannot be unregistered, so we cannot use racy
84 /// initialization for keys. Instead, we need synchronize initialization.
85 /// Use the Windows-provided `Once` since it does not require TLS.
86 once: UnsafeCell<c::INIT_ONCE>,
87 }
88
89 impl StaticKey {
90 #[inline]
new(dtor: Option<Dtor>) -> StaticKey91 pub const fn new(dtor: Option<Dtor>) -> StaticKey {
92 StaticKey {
93 key: AtomicU32::new(0),
94 dtor,
95 next: AtomicPtr::new(ptr::null_mut()),
96 once: UnsafeCell::new(c::INIT_ONCE_STATIC_INIT),
97 }
98 }
99
100 #[inline]
set(&'static self, val: *mut u8)101 pub unsafe fn set(&'static self, val: *mut u8) {
102 let r = c::TlsSetValue(self.key(), val.cast());
103 debug_assert_eq!(r, c::TRUE);
104 }
105
106 #[inline]
get(&'static self) -> *mut u8107 pub unsafe fn get(&'static self) -> *mut u8 {
108 c::TlsGetValue(self.key()).cast()
109 }
110
111 #[inline]
key(&'static self) -> Key112 unsafe fn key(&'static self) -> Key {
113 match self.key.load(Acquire) {
114 0 => self.init(),
115 key => key - 1,
116 }
117 }
118
119 #[cold]
init(&'static self) -> Key120 unsafe fn init(&'static self) -> Key {
121 if self.dtor.is_some() {
122 let mut pending = c::FALSE;
123 let r = c::InitOnceBeginInitialize(self.once.get(), 0, &mut pending, ptr::null_mut());
124 assert_eq!(r, c::TRUE);
125
126 if pending == c::FALSE {
127 // Some other thread initialized the key, load it.
128 self.key.load(Relaxed) - 1
129 } else {
130 let key = c::TlsAlloc();
131 if key == c::TLS_OUT_OF_INDEXES {
132 // Wakeup the waiting threads before panicking to avoid deadlock.
133 c::InitOnceComplete(self.once.get(), c::INIT_ONCE_INIT_FAILED, ptr::null_mut());
134 panic!("out of TLS indexes");
135 }
136
137 self.key.store(key + 1, Release);
138 register_dtor(self);
139
140 let r = c::InitOnceComplete(self.once.get(), 0, ptr::null_mut());
141 debug_assert_eq!(r, c::TRUE);
142
143 key
144 }
145 } else {
146 // If there is no destructor to clean up, we can use racy initialization.
147
148 let key = c::TlsAlloc();
149 assert_ne!(key, c::TLS_OUT_OF_INDEXES, "out of TLS indexes");
150
151 match self.key.compare_exchange(0, key + 1, AcqRel, Acquire) {
152 Ok(_) => key,
153 Err(new) => {
154 // Some other thread completed initialization first, so destroy
155 // our key and use theirs.
156 let r = c::TlsFree(key);
157 debug_assert_eq!(r, c::TRUE);
158 new - 1
159 }
160 }
161 }
162 }
163 }
164
165 unsafe impl Send for StaticKey {}
166 unsafe impl Sync for StaticKey {}
167
168 // -------------------------------------------------------------------------
169 // Dtor registration
170 //
171 // Windows has no native support for running destructors so we manage our own
172 // list of destructors to keep track of how to destroy keys. We then install a
173 // callback later to get invoked whenever a thread exits, running all
174 // appropriate destructors.
175 //
176 // Currently unregistration from this list is not supported. A destructor can be
177 // registered but cannot be unregistered. There's various simplifying reasons
178 // for doing this, the big ones being:
179 //
180 // 1. Currently we don't even support deallocating TLS keys, so normal operation
181 // doesn't need to deallocate a destructor.
182 // 2. There is no point in time where we know we can unregister a destructor
183 // because it could always be getting run by some remote thread.
184 //
185 // Typically processes have a statically known set of TLS keys which is pretty
186 // small, and we'd want to keep this memory alive for the whole process anyway
187 // really.
188
189 static DTORS: AtomicPtr<StaticKey> = AtomicPtr::new(ptr::null_mut());
190
191 /// Should only be called once per key, otherwise loops or breaks may occur in
192 /// the linked list.
register_dtor(key: &'static StaticKey)193 unsafe fn register_dtor(key: &'static StaticKey) {
194 // Ensure this is never run when native thread locals are available.
195 assert_eq!(false, cfg!(target_thread_local));
196 let this = <*const StaticKey>::cast_mut(key);
197 // Use acquire ordering to pass along the changes done by the previously
198 // registered keys when we store the new head with release ordering.
199 let mut head = DTORS.load(Acquire);
200 loop {
201 key.next.store(head, Relaxed);
202 match DTORS.compare_exchange_weak(head, this, Release, Acquire) {
203 Ok(_) => break,
204 Err(new) => head = new,
205 }
206 }
207 HAS_DTORS.store(true, Release);
208 }
209
210 // -------------------------------------------------------------------------
211 // Where the Magic (TM) Happens
212 //
213 // If you're looking at this code, and wondering "what is this doing?",
214 // you're not alone! I'll try to break this down step by step:
215 //
216 // # What's up with CRT$XLB?
217 //
218 // For anything about TLS destructors to work on Windows, we have to be able
219 // to run *something* when a thread exits. To do so, we place a very special
220 // static in a very special location. If this is encoded in just the right
221 // way, the kernel's loader is apparently nice enough to run some function
222 // of ours whenever a thread exits! How nice of the kernel!
223 //
224 // Lots of detailed information can be found in source [1] above, but the
225 // gist of it is that this is leveraging a feature of Microsoft's PE format
226 // (executable format) which is not actually used by any compilers today.
227 // This apparently translates to any callbacks in the ".CRT$XLB" section
228 // being run on certain events.
229 //
230 // So after all that, we use the compiler's #[link_section] feature to place
231 // a callback pointer into the magic section so it ends up being called.
232 //
233 // # What's up with this callback?
234 //
235 // The callback specified receives a number of parameters from... someone!
236 // (the kernel? the runtime? I'm not quite sure!) There are a few events that
237 // this gets invoked for, but we're currently only interested on when a
238 // thread or a process "detaches" (exits). The process part happens for the
239 // last thread and the thread part happens for any normal thread.
240 //
241 // # Ok, what's up with running all these destructors?
242 //
243 // This will likely need to be improved over time, but this function
244 // attempts a "poor man's" destructor callback system. Once we've got a list
245 // of what to run, we iterate over all keys, check their values, and then run
246 // destructors if the values turn out to be non null (setting them to null just
247 // beforehand). We do this a few times in a loop to basically match Unix
248 // semantics. If we don't reach a fixed point after a short while then we just
249 // inevitably leak something most likely.
250 //
251 // # The article mentions weird stuff about "/INCLUDE"?
252 //
253 // It sure does! Specifically we're talking about this quote:
254 //
255 // The Microsoft run-time library facilitates this process by defining a
256 // memory image of the TLS Directory and giving it the special name
257 // “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
258 // linker looks for this memory image and uses the data there to create the
259 // TLS Directory. Other compilers that support TLS and work with the
260 // Microsoft linker must use this same technique.
261 //
262 // Basically what this means is that if we want support for our TLS
263 // destructors/our hook being called then we need to make sure the linker does
264 // not omit this symbol. Otherwise it will omit it and our callback won't be
265 // wired up.
266 //
267 // We don't actually use the `/INCLUDE` linker flag here like the article
268 // mentions because the Rust compiler doesn't propagate linker flags, but
269 // instead we use a shim function which performs a volatile 1-byte load from
270 // the address of the symbol to ensure it sticks around.
271
272 #[link_section = ".CRT$XLB"]
273 #[allow(dead_code, unused_variables)]
274 #[used] // we don't want LLVM eliminating this symbol for any reason, and
275 // when the symbol makes it to the linker the linker will take over
276 pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) =
277 on_tls_callback;
278
279 #[allow(dead_code, unused_variables)]
on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv: c::LPVOID)280 unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv: c::LPVOID) {
281 if !HAS_DTORS.load(Acquire) {
282 return;
283 }
284 if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH {
285 #[cfg(not(target_thread_local))]
286 run_dtors();
287 #[cfg(target_thread_local)]
288 run_keyless_dtors();
289 }
290
291 // See comments above for what this is doing. Note that we don't need this
292 // trickery on GNU windows, just on MSVC.
293 reference_tls_used();
294 #[cfg(target_env = "msvc")]
295 unsafe fn reference_tls_used() {
296 extern "C" {
297 static _tls_used: u8;
298 }
299 crate::std::intrinsics::volatile_load(&_tls_used);
300 }
301 #[cfg(not(target_env = "msvc"))]
302 unsafe fn reference_tls_used() {}
303 }
304
305 #[allow(dead_code)] // actually called below
run_dtors()306 unsafe fn run_dtors() {
307 for _ in 0..5 {
308 let mut any_run = false;
309
310 // Use acquire ordering to observe key initialization.
311 let mut cur = DTORS.load(Acquire);
312 while !cur.is_null() {
313 let key = (*cur).key.load(Relaxed) - 1;
314 let dtor = (*cur).dtor.unwrap();
315
316 let ptr = c::TlsGetValue(key);
317 if !ptr.is_null() {
318 c::TlsSetValue(key, ptr::null_mut());
319 dtor(ptr as *mut _);
320 any_run = true;
321 }
322
323 cur = (*cur).next.load(Relaxed);
324 }
325
326 if !any_run {
327 break;
328 }
329 }
330 }
331