1 use crate::std::cell::UnsafeCell; 2 use crate::std::ptr; 3 use crate::std::sync::atomic::{ 4 AtomicBool, AtomicPtr, AtomicU32, 5 Ordering::{AcqRel, Acquire, Relaxed, Release}, 6 }; 7 use crate::std::sys::c; 8 9 #[cfg(test)] 10 mod tests; 11 12 /// An optimization hint. The compiler is often smart enough to know if an atomic 13 /// is never set and can remove dead code based on that fact. 14 static HAS_DTORS: AtomicBool = AtomicBool::new(false); 15 16 // Using a per-thread list avoids the problems in synchronizing global state. 17 #[thread_local] 18 #[cfg(target_thread_local)] 19 static mut DESTRUCTORS: Vec<(*mut u8, unsafe extern "C" fn(*mut u8))> = Vec::new(); 20 21 // Ensure this can never be inlined because otherwise this may break in dylibs. 22 // See #44391. 23 #[inline(never)] 24 #[cfg(target_thread_local)] 25 pub unsafe fn register_keyless_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { 26 DESTRUCTORS.push((t, dtor)); 27 HAS_DTORS.store(true, Relaxed); 28 } 29 30 #[inline(never)] // See comment above 31 #[cfg(target_thread_local)] 32 /// Runs destructors. This should not be called until thread exit. 33 unsafe fn run_keyless_dtors() { 34 // Drop all the destructors. 35 // 36 // Note: While this is potentially an infinite loop, it *should* be 37 // the case that this loop always terminates because we provide the 38 // guarantee that a TLS key cannot be set after it is flagged for 39 // destruction. 40 while let Some((ptr, dtor)) = DESTRUCTORS.pop() { 41 (dtor)(ptr); 42 } 43 // We're done so free the memory. 44 DESTRUCTORS = Vec::new(); 45 } 46 47 type Key = c::DWORD; 48 type Dtor = unsafe extern "C" fn(*mut u8); 49 50 // Turns out, like pretty much everything, Windows is pretty close the 51 // functionality that Unix provides, but slightly different! In the case of 52 // TLS, Windows does not provide an API to provide a destructor for a TLS 53 // variable. This ends up being pretty crucial to this implementation, so we 54 // need a way around this. 55 // 56 // The solution here ended up being a little obscure, but fear not, the 57 // internet has informed me [1][2] that this solution is not unique (no way 58 // I could have thought of it as well!). The key idea is to insert some hook 59 // somewhere to run arbitrary code on thread termination. With this in place 60 // we'll be able to run anything we like, including all TLS destructors! 61 // 62 // To accomplish this feat, we perform a number of threads, all contained 63 // within this module: 64 // 65 // * All TLS destructors are tracked by *us*, not the Windows runtime. This 66 // means that we have a global list of destructors for each TLS key that 67 // we know about. 68 // * When a thread exits, we run over the entire list and run dtors for all 69 // non-null keys. This attempts to match Unix semantics in this regard. 70 // 71 // For more details and nitty-gritty, see the code sections below! 72 // 73 // [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way 74 // [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42 75 76 pub struct StaticKey { 77 /// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX 78 /// is not a valid key value, this allows us to use zero as sentinel value 79 /// without risking overflow. 80 key: AtomicU32, 81 dtor: Option<Dtor>, 82 next: AtomicPtr<StaticKey>, 83 /// Currently, destructors cannot be unregistered, so we cannot use racy 84 /// initialization for keys. Instead, we need synchronize initialization. 85 /// Use the Windows-provided `Once` since it does not require TLS. 86 once: UnsafeCell<c::INIT_ONCE>, 87 } 88 89 impl StaticKey { 90 #[inline] 91 pub const fn new(dtor: Option<Dtor>) -> StaticKey { 92 StaticKey { 93 key: AtomicU32::new(0), 94 dtor, 95 next: AtomicPtr::new(ptr::null_mut()), 96 once: UnsafeCell::new(c::INIT_ONCE_STATIC_INIT), 97 } 98 } 99 100 #[inline] 101 pub unsafe fn set(&'static self, val: *mut u8) { 102 let r = c::TlsSetValue(self.key(), val.cast()); 103 debug_assert_eq!(r, c::TRUE); 104 } 105 106 #[inline] 107 pub unsafe fn get(&'static self) -> *mut u8 { 108 c::TlsGetValue(self.key()).cast() 109 } 110 111 #[inline] 112 unsafe fn key(&'static self) -> Key { 113 match self.key.load(Acquire) { 114 0 => self.init(), 115 key => key - 1, 116 } 117 } 118 119 #[cold] 120 unsafe fn init(&'static self) -> Key { 121 if self.dtor.is_some() { 122 let mut pending = c::FALSE; 123 let r = c::InitOnceBeginInitialize(self.once.get(), 0, &mut pending, ptr::null_mut()); 124 assert_eq!(r, c::TRUE); 125 126 if pending == c::FALSE { 127 // Some other thread initialized the key, load it. 128 self.key.load(Relaxed) - 1 129 } else { 130 let key = c::TlsAlloc(); 131 if key == c::TLS_OUT_OF_INDEXES { 132 // Wakeup the waiting threads before panicking to avoid deadlock. 133 c::InitOnceComplete(self.once.get(), c::INIT_ONCE_INIT_FAILED, ptr::null_mut()); 134 panic!("out of TLS indexes"); 135 } 136 137 self.key.store(key + 1, Release); 138 register_dtor(self); 139 140 let r = c::InitOnceComplete(self.once.get(), 0, ptr::null_mut()); 141 debug_assert_eq!(r, c::TRUE); 142 143 key 144 } 145 } else { 146 // If there is no destructor to clean up, we can use racy initialization. 147 148 let key = c::TlsAlloc(); 149 assert_ne!(key, c::TLS_OUT_OF_INDEXES, "out of TLS indexes"); 150 151 match self.key.compare_exchange(0, key + 1, AcqRel, Acquire) { 152 Ok(_) => key, 153 Err(new) => { 154 // Some other thread completed initialization first, so destroy 155 // our key and use theirs. 156 let r = c::TlsFree(key); 157 debug_assert_eq!(r, c::TRUE); 158 new - 1 159 } 160 } 161 } 162 } 163 } 164 165 unsafe impl Send for StaticKey {} 166 unsafe impl Sync for StaticKey {} 167 168 // ------------------------------------------------------------------------- 169 // Dtor registration 170 // 171 // Windows has no native support for running destructors so we manage our own 172 // list of destructors to keep track of how to destroy keys. We then install a 173 // callback later to get invoked whenever a thread exits, running all 174 // appropriate destructors. 175 // 176 // Currently unregistration from this list is not supported. A destructor can be 177 // registered but cannot be unregistered. There's various simplifying reasons 178 // for doing this, the big ones being: 179 // 180 // 1. Currently we don't even support deallocating TLS keys, so normal operation 181 // doesn't need to deallocate a destructor. 182 // 2. There is no point in time where we know we can unregister a destructor 183 // because it could always be getting run by some remote thread. 184 // 185 // Typically processes have a statically known set of TLS keys which is pretty 186 // small, and we'd want to keep this memory alive for the whole process anyway 187 // really. 188 189 static DTORS: AtomicPtr<StaticKey> = AtomicPtr::new(ptr::null_mut()); 190 191 /// Should only be called once per key, otherwise loops or breaks may occur in 192 /// the linked list. 193 unsafe fn register_dtor(key: &'static StaticKey) { 194 // Ensure this is never run when native thread locals are available. 195 assert_eq!(false, cfg!(target_thread_local)); 196 let this = <*const StaticKey>::cast_mut(key); 197 // Use acquire ordering to pass along the changes done by the previously 198 // registered keys when we store the new head with release ordering. 199 let mut head = DTORS.load(Acquire); 200 loop { 201 key.next.store(head, Relaxed); 202 match DTORS.compare_exchange_weak(head, this, Release, Acquire) { 203 Ok(_) => break, 204 Err(new) => head = new, 205 } 206 } 207 HAS_DTORS.store(true, Release); 208 } 209 210 // ------------------------------------------------------------------------- 211 // Where the Magic (TM) Happens 212 // 213 // If you're looking at this code, and wondering "what is this doing?", 214 // you're not alone! I'll try to break this down step by step: 215 // 216 // # What's up with CRT$XLB? 217 // 218 // For anything about TLS destructors to work on Windows, we have to be able 219 // to run *something* when a thread exits. To do so, we place a very special 220 // static in a very special location. If this is encoded in just the right 221 // way, the kernel's loader is apparently nice enough to run some function 222 // of ours whenever a thread exits! How nice of the kernel! 223 // 224 // Lots of detailed information can be found in source [1] above, but the 225 // gist of it is that this is leveraging a feature of Microsoft's PE format 226 // (executable format) which is not actually used by any compilers today. 227 // This apparently translates to any callbacks in the ".CRT$XLB" section 228 // being run on certain events. 229 // 230 // So after all that, we use the compiler's #[link_section] feature to place 231 // a callback pointer into the magic section so it ends up being called. 232 // 233 // # What's up with this callback? 234 // 235 // The callback specified receives a number of parameters from... someone! 236 // (the kernel? the runtime? I'm not quite sure!) There are a few events that 237 // this gets invoked for, but we're currently only interested on when a 238 // thread or a process "detaches" (exits). The process part happens for the 239 // last thread and the thread part happens for any normal thread. 240 // 241 // # Ok, what's up with running all these destructors? 242 // 243 // This will likely need to be improved over time, but this function 244 // attempts a "poor man's" destructor callback system. Once we've got a list 245 // of what to run, we iterate over all keys, check their values, and then run 246 // destructors if the values turn out to be non null (setting them to null just 247 // beforehand). We do this a few times in a loop to basically match Unix 248 // semantics. If we don't reach a fixed point after a short while then we just 249 // inevitably leak something most likely. 250 // 251 // # The article mentions weird stuff about "/INCLUDE"? 252 // 253 // It sure does! Specifically we're talking about this quote: 254 // 255 // The Microsoft run-time library facilitates this process by defining a 256 // memory image of the TLS Directory and giving it the special name 257 // “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The 258 // linker looks for this memory image and uses the data there to create the 259 // TLS Directory. Other compilers that support TLS and work with the 260 // Microsoft linker must use this same technique. 261 // 262 // Basically what this means is that if we want support for our TLS 263 // destructors/our hook being called then we need to make sure the linker does 264 // not omit this symbol. Otherwise it will omit it and our callback won't be 265 // wired up. 266 // 267 // We don't actually use the `/INCLUDE` linker flag here like the article 268 // mentions because the Rust compiler doesn't propagate linker flags, but 269 // instead we use a shim function which performs a volatile 1-byte load from 270 // the address of the symbol to ensure it sticks around. 271 272 #[link_section = ".CRT$XLB"] 273 #[allow(dead_code, unused_variables)] 274 #[used] // we don't want LLVM eliminating this symbol for any reason, and 275 // when the symbol makes it to the linker the linker will take over 276 pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) = 277 on_tls_callback; 278 279 #[allow(dead_code, unused_variables)] 280 unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv: c::LPVOID) { 281 if !HAS_DTORS.load(Acquire) { 282 return; 283 } 284 if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH { 285 #[cfg(not(target_thread_local))] 286 run_dtors(); 287 #[cfg(target_thread_local)] 288 run_keyless_dtors(); 289 } 290 291 // See comments above for what this is doing. Note that we don't need this 292 // trickery on GNU windows, just on MSVC. 293 reference_tls_used(); 294 #[cfg(target_env = "msvc")] 295 unsafe fn reference_tls_used() { 296 extern "C" { 297 static _tls_used: u8; 298 } 299 crate::std::intrinsics::volatile_load(&_tls_used); 300 } 301 #[cfg(not(target_env = "msvc"))] 302 unsafe fn reference_tls_used() {} 303 } 304 305 #[allow(dead_code)] // actually called below 306 unsafe fn run_dtors() { 307 for _ in 0..5 { 308 let mut any_run = false; 309 310 // Use acquire ordering to observe key initialization. 311 let mut cur = DTORS.load(Acquire); 312 while !cur.is_null() { 313 let key = (*cur).key.load(Relaxed) - 1; 314 let dtor = (*cur).dtor.unwrap(); 315 316 let ptr = c::TlsGetValue(key); 317 if !ptr.is_null() { 318 c::TlsSetValue(key, ptr::null_mut()); 319 dtor(ptr as *mut _); 320 any_run = true; 321 } 322 323 cur = (*cur).next.load(Relaxed); 324 } 325 326 if !any_run { 327 break; 328 } 329 } 330 } 331