1 //! Collection types. 2 //! 3 //! Rust's standard collection library provides efficient implementations of the 4 //! most common general purpose programming data structures. By using the 5 //! standard implementations, it should be possible for two libraries to 6 //! communicate without significant data conversion. 7 //! 8 //! To get this out of the way: you should probably just use [`Vec`] or [`HashMap`]. 9 //! These two collections cover most use cases for generic data storage and 10 //! processing. They are exceptionally good at doing what they do. All the other 11 //! collections in the standard library have specific use cases where they are 12 //! the optimal choice, but these cases are borderline *niche* in comparison. 13 //! Even when `Vec` and `HashMap` are technically suboptimal, they're probably a 14 //! good enough choice to get started. 15 //! 16 //! Rust's collections can be grouped into four major categories: 17 //! 18 //! * Sequences: [`Vec`], [`VecDeque`], [`LinkedList`] 19 //! * Maps: [`HashMap`], [`BTreeMap`] 20 //! * Sets: [`HashSet`], [`BTreeSet`] 21 //! * Misc: [`BinaryHeap`] 22 //! 23 //! # When Should You Use Which Collection? 24 //! 25 //! These are fairly high-level and quick break-downs of when each collection 26 //! should be considered. Detailed discussions of strengths and weaknesses of 27 //! individual collections can be found on their own documentation pages. 28 //! 29 //! ### Use a `Vec` when: 30 //! * You want to collect items up to be processed or sent elsewhere later, and 31 //! don't care about any properties of the actual values being stored. 32 //! * You want a sequence of elements in a particular order, and will only be 33 //! appending to (or near) the end. 34 //! * You want a stack. 35 //! * You want a resizable array. 36 //! * You want a heap-allocated array. 37 //! 38 //! ### Use a `VecDeque` when: 39 //! * You want a [`Vec`] that supports efficient insertion at both ends of the 40 //! sequence. 41 //! * You want a queue. 42 //! * You want a double-ended queue (deque). 43 //! 44 //! ### Use a `LinkedList` when: 45 //! * You want a [`Vec`] or [`VecDeque`] of unknown size, and can't tolerate 46 //! amortization. 47 //! * You want to efficiently split and append lists. 48 //! * You are *absolutely* certain you *really*, *truly*, want a doubly linked 49 //! list. 50 //! 51 //! ### Use a `HashMap` when: 52 //! * You want to associate arbitrary keys with an arbitrary value. 53 //! * You want a cache. 54 //! * You want a map, with no extra functionality. 55 //! 56 //! ### Use a `BTreeMap` when: 57 //! * You want a map sorted by its keys. 58 //! * You want to be able to get a range of entries on-demand. 59 //! * You're interested in what the smallest or largest key-value pair is. 60 //! * You want to find the largest or smallest key that is smaller or larger 61 //! than something. 62 //! 63 //! ### Use the `Set` variant of any of these `Map`s when: 64 //! * You just want to remember which keys you've seen. 65 //! * There is no meaningful value to associate with your keys. 66 //! * You just want a set. 67 //! 68 //! ### Use a `BinaryHeap` when: 69 //! 70 //! * You want to store a bunch of elements, but only ever want to process the 71 //! "biggest" or "most important" one at any given time. 72 //! * You want a priority queue. 73 //! 74 //! # Performance 75 //! 76 //! Choosing the right collection for the job requires an understanding of what 77 //! each collection is good at. Here we briefly summarize the performance of 78 //! different collections for certain important operations. For further details, 79 //! see each type's documentation, and note that the names of actual methods may 80 //! differ from the tables below on certain collections. 81 //! 82 //! Throughout the documentation, we will follow a few conventions. For all 83 //! operations, the collection's size is denoted by n. If another collection is 84 //! involved in the operation, it contains m elements. Operations which have an 85 //! *amortized* cost are suffixed with a `*`. Operations with an *expected* 86 //! cost are suffixed with a `~`. 87 //! 88 //! All amortized costs are for the potential need to resize when capacity is 89 //! exhausted. If a resize occurs it will take *O*(*n*) time. Our collections never 90 //! automatically shrink, so removal operations aren't amortized. Over a 91 //! sufficiently large series of operations, the average cost per operation will 92 //! deterministically equal the given cost. 93 //! 94 //! Only [`HashMap`] has expected costs, due to the probabilistic nature of hashing. 95 //! It is theoretically possible, though very unlikely, for [`HashMap`] to 96 //! experience worse performance. 97 //! 98 //! ## Sequences 99 //! 100 //! | | get(i) | insert(i) | remove(i) | append | split_off(i) | 101 //! |----------------|------------------------|-------------------------|------------------------|-----------|------------------------| 102 //! | [`Vec`] | *O*(1) | *O*(*n*-*i*)* | *O*(*n*-*i*) | *O*(*m*)* | *O*(*n*-*i*) | 103 //! | [`VecDeque`] | *O*(1) | *O*(min(*i*, *n*-*i*))* | *O*(min(*i*, *n*-*i*)) | *O*(*m*)* | *O*(min(*i*, *n*-*i*)) | 104 //! | [`LinkedList`] | *O*(min(*i*, *n*-*i*)) | *O*(min(*i*, *n*-*i*)) | *O*(min(*i*, *n*-*i*)) | *O*(1) | *O*(min(*i*, *n*-*i*)) | 105 //! 106 //! Note that where ties occur, [`Vec`] is generally going to be faster than [`VecDeque`], and 107 //! [`VecDeque`] is generally going to be faster than [`LinkedList`]. 108 //! 109 //! ## Maps 110 //! 111 //! For Sets, all operations have the cost of the equivalent Map operation. 112 //! 113 //! | | get | insert | remove | range | append | 114 //! |--------------|---------------|---------------|---------------|---------------|--------------| 115 //! | [`HashMap`] | *O*(1)~ | *O*(1)~* | *O*(1)~ | N/A | N/A | 116 //! | [`BTreeMap`] | *O*(log(*n*)) | *O*(log(*n*)) | *O*(log(*n*)) | *O*(log(*n*)) | *O*(*n*+*m*) | 117 //! 118 //! # Correct and Efficient Usage of Collections 119 //! 120 //! Of course, knowing which collection is the right one for the job doesn't 121 //! instantly permit you to use it correctly. Here are some quick tips for 122 //! efficient and correct usage of the standard collections in general. If 123 //! you're interested in how to use a specific collection in particular, consult 124 //! its documentation for detailed discussion and code examples. 125 //! 126 //! ## Capacity Management 127 //! 128 //! Many collections provide several constructors and methods that refer to 129 //! "capacity". These collections are generally built on top of an array. 130 //! Optimally, this array would be exactly the right size to fit only the 131 //! elements stored in the collection, but for the collection to do this would 132 //! be very inefficient. If the backing array was exactly the right size at all 133 //! times, then every time an element is inserted, the collection would have to 134 //! grow the array to fit it. Due to the way memory is allocated and managed on 135 //! most computers, this would almost surely require allocating an entirely new 136 //! array and copying every single element from the old one into the new one. 137 //! Hopefully you can see that this wouldn't be very efficient to do on every 138 //! operation. 139 //! 140 //! Most collections therefore use an *amortized* allocation strategy. They 141 //! generally let themselves have a fair amount of unoccupied space so that they 142 //! only have to grow on occasion. When they do grow, they allocate a 143 //! substantially larger array to move the elements into so that it will take a 144 //! while for another grow to be required. While this strategy is great in 145 //! general, it would be even better if the collection *never* had to resize its 146 //! backing array. Unfortunately, the collection itself doesn't have enough 147 //! information to do this itself. Therefore, it is up to us programmers to give 148 //! it hints. 149 //! 150 //! Any `with_capacity` constructor will instruct the collection to allocate 151 //! enough space for the specified number of elements. Ideally this will be for 152 //! exactly that many elements, but some implementation details may prevent 153 //! this. See collection-specific documentation for details. In general, use 154 //! `with_capacity` when you know exactly how many elements will be inserted, or 155 //! at least have a reasonable upper-bound on that number. 156 //! 157 //! When anticipating a large influx of elements, the `reserve` family of 158 //! methods can be used to hint to the collection how much room it should make 159 //! for the coming items. As with `with_capacity`, the precise behavior of 160 //! these methods will be specific to the collection of interest. 161 //! 162 //! For optimal performance, collections will generally avoid shrinking 163 //! themselves. If you believe that a collection will not soon contain any more 164 //! elements, or just really need the memory, the `shrink_to_fit` method prompts 165 //! the collection to shrink the backing array to the minimum size capable of 166 //! holding its elements. 167 //! 168 //! Finally, if ever you're interested in what the actual capacity of the 169 //! collection is, most collections provide a `capacity` method to query this 170 //! information on demand. This can be useful for debugging purposes, or for 171 //! use with the `reserve` methods. 172 //! 173 //! ## Iterators 174 //! 175 //! [Iterators][crate::std::iter] 176 //! are a powerful and robust mechanism used throughout Rust's 177 //! standard libraries. Iterators provide a sequence of values in a generic, 178 //! safe, efficient and convenient way. The contents of an iterator are usually 179 //! *lazily* evaluated, so that only the values that are actually needed are 180 //! ever actually produced, and no allocation need be done to temporarily store 181 //! them. Iterators are primarily consumed using a `for` loop, although many 182 //! functions also take iterators where a collection or sequence of values is 183 //! desired. 184 //! 185 //! All of the standard collections provide several iterators for performing 186 //! bulk manipulation of their contents. The three primary iterators almost 187 //! every collection should provide are `iter`, `iter_mut`, and `into_iter`. 188 //! Some of these are not provided on collections where it would be unsound or 189 //! unreasonable to provide them. 190 //! 191 //! `iter` provides an iterator of immutable references to all the contents of a 192 //! collection in the most "natural" order. For sequence collections like [`Vec`], 193 //! this means the items will be yielded in increasing order of index starting 194 //! at 0. For ordered collections like [`BTreeMap`], this means that the items 195 //! will be yielded in sorted order. For unordered collections like [`HashMap`], 196 //! the items will be yielded in whatever order the internal representation made 197 //! most convenient. This is great for reading through all the contents of the 198 //! collection. 199 //! 200 //! ``` 201 //! let vec = vec![1, 2, 3, 4]; 202 //! for x in vec.iter() { 203 //! println!("vec contained {x:?}"); 204 //! } 205 //! ``` 206 //! 207 //! `iter_mut` provides an iterator of *mutable* references in the same order as 208 //! `iter`. This is great for mutating all the contents of the collection. 209 //! 210 //! ``` 211 //! let mut vec = vec![1, 2, 3, 4]; 212 //! for x in vec.iter_mut() { 213 //! *x += 1; 214 //! } 215 //! ``` 216 //! 217 //! `into_iter` transforms the actual collection into an iterator over its 218 //! contents by-value. This is great when the collection itself is no longer 219 //! needed, and the values are needed elsewhere. Using `extend` with `into_iter` 220 //! is the main way that contents of one collection are moved into another. 221 //! `extend` automatically calls `into_iter`, and takes any <code>T: [IntoIterator]</code>. 222 //! Calling `collect` on an iterator itself is also a great way to convert one 223 //! collection into another. Both of these methods should internally use the 224 //! capacity management tools discussed in the previous section to do this as 225 //! efficiently as possible. 226 //! 227 //! ``` 228 //! let mut vec1 = vec![1, 2, 3, 4]; 229 //! let vec2 = vec![10, 20, 30, 40]; 230 //! vec1.extend(vec2); 231 //! ``` 232 //! 233 //! ``` 234 //! use std::collections::VecDeque; 235 //! 236 //! let vec = [1, 2, 3, 4]; 237 //! let buf: VecDeque<_> = vec.into_iter().collect(); 238 //! ``` 239 //! 240 //! Iterators also provide a series of *adapter* methods for performing common 241 //! threads to sequences. Among the adapters are functional favorites like `map`, 242 //! `fold`, `skip` and `take`. Of particular interest to collections is the 243 //! `rev` adapter, which reverses any iterator that supports this operation. Most 244 //! collections provide reversible iterators as the way to iterate over them in 245 //! reverse order. 246 //! 247 //! ``` 248 //! let vec = vec![1, 2, 3, 4]; 249 //! for x in vec.iter().rev() { 250 //! println!("vec contained {x:?}"); 251 //! } 252 //! ``` 253 //! 254 //! Several other collection methods also return iterators to yield a sequence 255 //! of results but avoid allocating an entire collection to store the result in. 256 //! This provides maximum flexibility as 257 //! [`collect`][crate::std::iter::Iterator::collect] or 258 //! [`extend`][crate::std::iter::Extend::extend] can be called to 259 //! "pipe" the sequence into any collection if desired. Otherwise, the sequence 260 //! can be looped over with a `for` loop. The iterator can also be discarded 261 //! after partial use, preventing the computation of the unused items. 262 //! 263 //! ## Entries 264 //! 265 //! The `entry` API is intended to provide an efficient mechanism for 266 //! manipulating the contents of a map conditionally on the presence of a key or 267 //! not. The primary motivating use case for this is to provide efficient 268 //! accumulator maps. For instance, if one wishes to maintain a count of the 269 //! number of times each key has been seen, they will have to perform some 270 //! conditional logic on whether this is the first time the key has been seen or 271 //! not. Normally, this would require a `find` followed by an `insert`, 272 //! effectively duplicating the search effort on each insertion. 273 //! 274 //! When a user calls `map.entry(key)`, the map will search for the key and 275 //! then yield a variant of the `Entry` enum. 276 //! 277 //! If a `Vacant(entry)` is yielded, then the key *was not* found. In this case 278 //! the only valid operation is to `insert` a value into the entry. When this is 279 //! done, the vacant entry is consumed and converted into a mutable reference to 280 //! the value that was inserted. This allows for further manipulation of the 281 //! value beyond the lifetime of the search itself. This is useful if complex 282 //! logic needs to be performed on the value regardless of whether the value was 283 //! just inserted. 284 //! 285 //! If an `Occupied(entry)` is yielded, then the key *was* found. In this case, 286 //! the user has several options: they can `get`, `insert` or `remove` the 287 //! value of the occupied entry. Additionally, they can convert the occupied 288 //! entry into a mutable reference to its value, providing symmetry to the 289 //! vacant `insert` case. 290 //! 291 //! ### Examples 292 //! 293 //! Here are the two primary ways in which `entry` is used. First, a simple 294 //! example where the logic performed on the values is trivial. 295 //! 296 //! #### Counting the number of times each character in a string occurs 297 //! 298 //! ``` 299 //! use std::collections::btree_map::BTreeMap; 300 //! 301 //! let mut count = BTreeMap::new(); 302 //! let message = "she sells sea shells by the sea shore"; 303 //! 304 //! for c in message.chars() { 305 //! *count.entry(c).or_insert(0) += 1; 306 //! } 307 //! 308 //! assert_eq!(count.get(&'s'), Some(&8)); 309 //! 310 //! println!("Number of occurrences of each character"); 311 //! for (char, count) in &count { 312 //! println!("{char}: {count}"); 313 //! } 314 //! ``` 315 //! 316 //! When the logic to be performed on the value is more complex, we may simply 317 //! use the `entry` API to ensure that the value is initialized and perform the 318 //! logic afterwards. 319 //! 320 //! #### Tracking the inebriation of customers at a bar 321 //! 322 //! ``` 323 //! use std::collections::btree_map::BTreeMap; 324 //! 325 //! // A client of the bar. They have a blood alcohol level. 326 //! struct Person { blood_alcohol: f32 } 327 //! 328 //! // All the orders made to the bar, by client ID. 329 //! let orders = vec![1, 2, 1, 2, 3, 4, 1, 2, 2, 3, 4, 1, 1, 1]; 330 //! 331 //! // Our clients. 332 //! let mut blood_alcohol = BTreeMap::new(); 333 //! 334 //! for id in orders { 335 //! // If this is the first time we've seen this customer, initialize them 336 //! // with no blood alcohol. Otherwise, just retrieve them. 337 //! let person = blood_alcohol.entry(id).or_insert(Person { blood_alcohol: 0.0 }); 338 //! 339 //! // Reduce their blood alcohol level. It takes time to order and drink a beer! 340 //! person.blood_alcohol *= 0.9; 341 //! 342 //! // Check if they're sober enough to have another beer. 343 //! if person.blood_alcohol > 0.3 { 344 //! // Too drunk... for now. 345 //! println!("Sorry {id}, I have to cut you off"); 346 //! } else { 347 //! // Have another! 348 //! person.blood_alcohol += 0.1; 349 //! } 350 //! } 351 //! ``` 352 //! 353 //! # Insert and complex keys 354 //! 355 //! If we have a more complex key, calls to `insert` will 356 //! not update the value of the key. For example: 357 //! 358 //! ``` 359 //! use std::cmp::Ordering; 360 //! use std::collections::BTreeMap; 361 //! use std::hash::{Hash, Hasher}; 362 //! 363 //! #[derive(Debug)] 364 //! struct Foo { 365 //! a: u32, 366 //! b: &'static str, 367 //! } 368 //! 369 //! // we will compare `Foo`s by their `a` value only. 370 //! impl PartialEq for Foo { 371 //! fn eq(&self, other: &Self) -> bool { self.a == other.a } 372 //! } 373 //! 374 //! impl Eq for Foo {} 375 //! 376 //! // we will hash `Foo`s by their `a` value only. 377 //! impl Hash for Foo { 378 //! fn hash<H: Hasher>(&self, h: &mut H) { self.a.hash(h); } 379 //! } 380 //! 381 //! impl PartialOrd for Foo { 382 //! fn partial_cmp(&self, other: &Self) -> Option<Ordering> { self.a.partial_cmp(&other.a) } 383 //! } 384 //! 385 //! impl Ord for Foo { 386 //! fn cmp(&self, other: &Self) -> Ordering { self.a.cmp(&other.a) } 387 //! } 388 //! 389 //! let mut map = BTreeMap::new(); 390 //! map.insert(Foo { a: 1, b: "baz" }, 99); 391 //! 392 //! // We already have a Foo with an a of 1, so this will be updating the value. 393 //! map.insert(Foo { a: 1, b: "xyz" }, 100); 394 //! 395 //! // The value has been updated... 396 //! assert_eq!(map.values().next().unwrap(), &100); 397 //! 398 //! // ...but the key hasn't changed. b is still "baz", not "xyz". 399 //! assert_eq!(map.keys().next().unwrap().b, "baz"); 400 //! ``` 401 402 // FIXME(#82080) The deprecation here is only theoretical, and does not actually produce a warning. 403 #[deprecated(note = "moved to `std::ops::Bound`", since = "1.26.0")] 404 #[doc(hidden)] 405 pub use crate::std::ops::Bound; 406 407 pub use alloc::collections::{binary_heap, btree_map, btree_set}; 408 pub use alloc::collections::{linked_list, vec_deque}; 409 pub use alloc::collections::{BTreeMap, BTreeSet, BinaryHeap}; 410 pub use alloc::collections::{LinkedList, VecDeque}; 411 412 #[doc(inline)] 413 pub use hashbrown::HashMap; 414 //pub use self::hash_map::HashMap; 415 #[doc(inline)] 416 // pub use self::hash_set::HashSet; 417 pub use hashbrown::HashSet; 418 419 pub use alloc::collections::TryReserveError; 420 421 pub use alloc::collections::TryReserveErrorKind; 422 423 mod hash; 424 425 pub mod hash_map { 426 //! A hash map implemented with quadratic probing and SIMD lookup. 427 pub use super::hash::map::*; 428 } 429 430 pub mod hash_set { 431 //! A hash set implemented as a `HashMap` where the value is `()`. 432 pub use super::hash::set::*; 433 } 434