1 mod buffer; 2 3 use crate::std::fmt; 4 use crate::std::io::{ 5 self, BorrowedCursor, BufRead, IoSliceMut, Read, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE, 6 }; 7 use buffer::Buffer; 8 9 /// The `BufReader<R>` struct adds buffering to any reader. 10 /// 11 /// It can be excessively inefficient to work directly with a [`Read`] instance. 12 /// For example, every call to [`read`][`TcpStream::read`] on [`TcpStream`] 13 /// results in a system call. A `BufReader<R>` performs large, infrequent reads on 14 /// the underlying [`Read`] and maintains an in-memory buffer of the results. 15 /// 16 /// `BufReader<R>` can improve the speed of programs that make *small* and 17 /// *repeated* read calls to the same file or network socket. It does not 18 /// help when reading very large amounts at once, or reading just one or a few 19 /// times. It also provides no advantage when reading from a source that is 20 /// already in memory, like a <code>[Vec]\<u8></code>. 21 /// 22 /// When the `BufReader<R>` is dropped, the contents of its buffer will be 23 /// discarded. Creating multiple instances of a `BufReader<R>` on the same 24 /// stream can cause data loss. Reading from the underlying reader after 25 /// unwrapping the `BufReader<R>` with [`BufReader::into_inner`] can also cause 26 /// data loss. 27 /// 28 // HACK(#78696): can't use `crate` for associated items 29 /// [`TcpStream::read`]: super::super::super::net::TcpStream::read 30 /// [`TcpStream`]: crate::std::net::TcpStream 31 /// 32 /// # Examples 33 /// 34 /// ```no_run 35 /// use std::io::prelude::*; 36 /// use std::io::BufReader; 37 /// use std::fs::File; 38 /// 39 /// fn main() -> std::io::Result<()> { 40 /// let f = File::open("log.txt")?; 41 /// let mut reader = BufReader::new(f); 42 /// 43 /// let mut line = String::new(); 44 /// let len = reader.read_line(&mut line)?; 45 /// println!("First line is {len} bytes long"); 46 /// Ok(()) 47 /// } 48 /// ``` 49 pub struct BufReader<R: ?Sized> { 50 buf: Buffer, 51 inner: R, 52 } 53 54 impl<R: Read> BufReader<R> { 55 /// Creates a new `BufReader<R>` with a default buffer capacity. The default is currently 8 KiB, 56 /// but may change in the future. 57 /// 58 /// # Examples 59 /// 60 /// ```no_run 61 /// use std::io::BufReader; 62 /// use std::fs::File; 63 /// 64 /// fn main() -> std::io::Result<()> { 65 /// let f = File::open("log.txt")?; 66 /// let reader = BufReader::new(f); 67 /// Ok(()) 68 /// } 69 /// ``` 70 pub fn new(inner: R) -> BufReader<R> { 71 BufReader::with_capacity(DEFAULT_BUF_SIZE, inner) 72 } 73 74 /// Creates a new `BufReader<R>` with the specified buffer capacity. 75 /// 76 /// # Examples 77 /// 78 /// Creating a buffer with ten bytes of capacity: 79 /// 80 /// ```no_run 81 /// use std::io::BufReader; 82 /// use std::fs::File; 83 /// 84 /// fn main() -> std::io::Result<()> { 85 /// let f = File::open("log.txt")?; 86 /// let reader = BufReader::with_capacity(10, f); 87 /// Ok(()) 88 /// } 89 /// ``` 90 pub fn with_capacity(capacity: usize, inner: R) -> BufReader<R> { 91 BufReader { 92 inner, 93 buf: Buffer::with_capacity(capacity), 94 } 95 } 96 } 97 98 impl<R: ?Sized> BufReader<R> { 99 /// Gets a reference to the underlying reader. 100 /// 101 /// It is inadvisable to directly read from the underlying reader. 102 /// 103 /// # Examples 104 /// 105 /// ```no_run 106 /// use std::io::BufReader; 107 /// use std::fs::File; 108 /// 109 /// fn main() -> std::io::Result<()> { 110 /// let f1 = File::open("log.txt")?; 111 /// let reader = BufReader::new(f1); 112 /// 113 /// let f2 = reader.get_ref(); 114 /// Ok(()) 115 /// } 116 /// ``` 117 pub fn get_ref(&self) -> &R { 118 &self.inner 119 } 120 121 /// Gets a mutable reference to the underlying reader. 122 /// 123 /// It is inadvisable to directly read from the underlying reader. 124 /// 125 /// # Examples 126 /// 127 /// ```no_run 128 /// use std::io::BufReader; 129 /// use std::fs::File; 130 /// 131 /// fn main() -> std::io::Result<()> { 132 /// let f1 = File::open("log.txt")?; 133 /// let mut reader = BufReader::new(f1); 134 /// 135 /// let f2 = reader.get_mut(); 136 /// Ok(()) 137 /// } 138 /// ``` 139 pub fn get_mut(&mut self) -> &mut R { 140 &mut self.inner 141 } 142 143 /// Returns a reference to the internally buffered data. 144 /// 145 /// Unlike [`fill_buf`], this will not attempt to fill the buffer if it is empty. 146 /// 147 /// [`fill_buf`]: BufRead::fill_buf 148 /// 149 /// # Examples 150 /// 151 /// ```no_run 152 /// use std::io::{BufReader, BufRead}; 153 /// use std::fs::File; 154 /// 155 /// fn main() -> std::io::Result<()> { 156 /// let f = File::open("log.txt")?; 157 /// let mut reader = BufReader::new(f); 158 /// assert!(reader.buffer().is_empty()); 159 /// 160 /// if reader.fill_buf()?.len() > 0 { 161 /// assert!(!reader.buffer().is_empty()); 162 /// } 163 /// Ok(()) 164 /// } 165 /// ``` 166 pub fn buffer(&self) -> &[u8] { 167 self.buf.buffer() 168 } 169 170 /// Returns the number of bytes the internal buffer can hold at once. 171 /// 172 /// # Examples 173 /// 174 /// ```no_run 175 /// use std::io::{BufReader, BufRead}; 176 /// use std::fs::File; 177 /// 178 /// fn main() -> std::io::Result<()> { 179 /// let f = File::open("log.txt")?; 180 /// let mut reader = BufReader::new(f); 181 /// 182 /// let capacity = reader.capacity(); 183 /// let buffer = reader.fill_buf()?; 184 /// assert!(buffer.len() <= capacity); 185 /// Ok(()) 186 /// } 187 /// ``` 188 pub fn capacity(&self) -> usize { 189 self.buf.capacity() 190 } 191 192 /// Unwraps this `BufReader<R>`, returning the underlying reader. 193 /// 194 /// Note that any leftover data in the internal buffer is lost. Therefore, 195 /// a following read from the underlying reader may lead to data loss. 196 /// 197 /// # Examples 198 /// 199 /// ```no_run 200 /// use std::io::BufReader; 201 /// use std::fs::File; 202 /// 203 /// fn main() -> std::io::Result<()> { 204 /// let f1 = File::open("log.txt")?; 205 /// let reader = BufReader::new(f1); 206 /// 207 /// let f2 = reader.into_inner(); 208 /// Ok(()) 209 /// } 210 /// ``` 211 pub fn into_inner(self) -> R 212 where 213 R: Sized, 214 { 215 self.inner 216 } 217 218 /// Invalidates all data in the internal buffer. 219 #[inline] 220 pub(in crate::std::io) fn discard_buffer(&mut self) { 221 self.buf.discard_buffer() 222 } 223 } 224 225 // This is only used by a test which asserts that the initialization-tracking is correct. 226 #[cfg(test)] 227 impl<R: ?Sized> BufReader<R> { 228 pub fn initialized(&self) -> usize { 229 self.buf.initialized() 230 } 231 } 232 233 impl<R: ?Sized + Seek> BufReader<R> { 234 /// Seeks relative to the current position. If the new position lies within the buffer, 235 /// the buffer will not be flushed, allowing for more efficient seeks. 236 /// This method does not return the location of the underlying reader, so the caller 237 /// must track this information themselves if it is required. 238 pub fn seek_relative(&mut self, offset: i64) -> io::Result<()> { 239 let pos = self.buf.pos() as u64; 240 if offset < 0 { 241 if let Some(_) = pos.checked_sub((-offset) as u64) { 242 self.buf.unconsume((-offset) as usize); 243 return Ok(()); 244 } 245 } else if let Some(new_pos) = pos.checked_add(offset as u64) { 246 if new_pos <= self.buf.filled() as u64 { 247 self.buf.consume(offset as usize); 248 return Ok(()); 249 } 250 } 251 252 self.seek(SeekFrom::Current(offset)).map(drop) 253 } 254 } 255 256 impl<R: ?Sized + Read> Read for BufReader<R> { 257 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { 258 // If we don't have any buffered data and we're doing a massive read 259 // (larger than our internal buffer), bypass our internal buffer 260 // entirely. 261 if self.buf.pos() == self.buf.filled() && buf.len() >= self.capacity() { 262 self.discard_buffer(); 263 return self.inner.read(buf); 264 } 265 let nread = { 266 let mut rem = self.fill_buf()?; 267 rem.read(buf)? 268 }; 269 self.consume(nread); 270 Ok(nread) 271 } 272 273 fn read_buf(&mut self, mut cursor: BorrowedCursor<'_>) -> io::Result<()> { 274 // If we don't have any buffered data and we're doing a massive read 275 // (larger than our internal buffer), bypass our internal buffer 276 // entirely. 277 if self.buf.pos() == self.buf.filled() && cursor.capacity() >= self.capacity() { 278 self.discard_buffer(); 279 return self.inner.read_buf(cursor); 280 } 281 282 let prev = cursor.written(); 283 284 let mut rem = self.fill_buf()?; 285 rem.read_buf(cursor.reborrow())?; 286 287 self.consume(cursor.written() - prev); //slice impl of read_buf known to never unfill buf 288 289 Ok(()) 290 } 291 292 // Small read_exacts from a BufReader are extremely common when used with a deserializer. 293 // The default implementation calls read in a loop, which results in surprisingly poor code 294 // generation for the common path where the buffer has enough bytes to fill the passed-in 295 // buffer. 296 fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { 297 if self 298 .buf 299 .consume_with(buf.len(), |claimed| buf.copy_from_slice(claimed)) 300 { 301 return Ok(()); 302 } 303 304 crate::std::io::default_read_exact(self, buf) 305 } 306 307 fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> { 308 let total_len = bufs.iter().map(|b| b.len()).sum::<usize>(); 309 if self.buf.pos() == self.buf.filled() && total_len >= self.capacity() { 310 self.discard_buffer(); 311 return self.inner.read_vectored(bufs); 312 } 313 let nread = { 314 let mut rem = self.fill_buf()?; 315 rem.read_vectored(bufs)? 316 }; 317 self.consume(nread); 318 Ok(nread) 319 } 320 321 fn is_read_vectored(&self) -> bool { 322 self.inner.is_read_vectored() 323 } 324 325 // The inner reader might have an optimized `read_to_end`. Drain our buffer and then 326 // delegate to the inner implementation. 327 fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> { 328 let inner_buf = self.buffer(); 329 buf.extend_from_slice(inner_buf); 330 let nread = inner_buf.len(); 331 self.discard_buffer(); 332 Ok(nread + self.inner.read_to_end(buf)?) 333 } 334 335 // The inner reader might have an optimized `read_to_end`. Drain our buffer and then 336 // delegate to the inner implementation. 337 fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> { 338 // In the general `else` case below we must read bytes into a side buffer, check 339 // that they are valid UTF-8, and then append them to `buf`. This requires a 340 // potentially large memcpy. 341 // 342 // If `buf` is empty--the most common case--we can leverage `append_to_string` 343 // to read directly into `buf`'s internal byte buffer, saving an allocation and 344 // a memcpy. 345 if buf.is_empty() { 346 // `append_to_string`'s safety relies on the buffer only being appended to since 347 // it only checks the UTF-8 validity of new data. If there were existing content in 348 // `buf` then an untrustworthy reader (i.e. `self.inner`) could not only append 349 // bytes but also modify existing bytes and render them invalid. On the other hand, 350 // if `buf` is empty then by definition any writes must be appends and 351 // `append_to_string` will validate all of the new bytes. 352 unsafe { crate::std::io::append_to_string(buf, |b| self.read_to_end(b)) } 353 } else { 354 // We cannot append our byte buffer directly onto the `buf` String as there could 355 // be an incomplete UTF-8 sequence that has only been partially read. We must read 356 // everything into a side buffer first and then call `from_utf8` on the complete 357 // buffer. 358 let mut bytes = Vec::new(); 359 self.read_to_end(&mut bytes)?; 360 let string = crate::std::str::from_utf8(&bytes).map_err(|_| { 361 io::const_io_error!( 362 io::ErrorKind::InvalidData, 363 "stream did not contain valid UTF-8", 364 ) 365 })?; 366 *buf += string; 367 Ok(string.len()) 368 } 369 } 370 } 371 372 impl<R: ?Sized + Read> BufRead for BufReader<R> { 373 fn fill_buf(&mut self) -> io::Result<&[u8]> { 374 self.buf.fill_buf(&mut self.inner) 375 } 376 377 fn consume(&mut self, amt: usize) { 378 self.buf.consume(amt) 379 } 380 } 381 382 impl<R> fmt::Debug for BufReader<R> 383 where 384 R: ?Sized + fmt::Debug, 385 { 386 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { 387 fmt.debug_struct("BufReader") 388 .field("reader", &&self.inner) 389 .field( 390 "buffer", 391 &format_args!("{}/{}", self.buf.filled() - self.buf.pos(), self.capacity()), 392 ) 393 .finish() 394 } 395 } 396 397 impl<R: ?Sized + Seek> Seek for BufReader<R> { 398 /// Seek to an offset, in bytes, in the underlying reader. 399 /// 400 /// The position used for seeking with <code>[SeekFrom::Current]\(_)</code> is the 401 /// position the underlying reader would be at if the `BufReader<R>` had no 402 /// internal buffer. 403 /// 404 /// Seeking always discards the internal buffer, even if the seek position 405 /// would otherwise fall within it. This guarantees that calling 406 /// [`BufReader::into_inner()`] immediately after a seek yields the underlying reader 407 /// at the same position. 408 /// 409 /// To seek without discarding the internal buffer, use [`BufReader::seek_relative`]. 410 /// 411 /// See [`std::io::Seek`] for more details. 412 /// 413 /// Note: In the edge case where you're seeking with <code>[SeekFrom::Current]\(n)</code> 414 /// where `n` minus the internal buffer length overflows an `i64`, two 415 /// seeks will be performed instead of one. If the second seek returns 416 /// [`Err`], the underlying reader will be left at the same position it would 417 /// have if you called `seek` with <code>[SeekFrom::Current]\(0)</code>. 418 /// 419 /// [`std::io::Seek`]: Seek 420 fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> { 421 let result: u64; 422 if let SeekFrom::Current(n) = pos { 423 let remainder = (self.buf.filled() - self.buf.pos()) as i64; 424 // it should be safe to assume that remainder fits within an i64 as the alternative 425 // means we managed to allocate 8 exbibytes and that's absurd. 426 // But it's not out of the realm of possibility for some weird underlying reader to 427 // support seeking by i64::MIN so we need to handle underflow when subtracting 428 // remainder. 429 if let Some(offset) = n.checked_sub(remainder) { 430 result = self.inner.seek(SeekFrom::Current(offset))?; 431 } else { 432 // seek backwards by our remainder, and then by the offset 433 self.inner.seek(SeekFrom::Current(-remainder))?; 434 self.discard_buffer(); 435 result = self.inner.seek(SeekFrom::Current(n))?; 436 } 437 } else { 438 // Seeking with Start/End doesn't care about our buffer length. 439 result = self.inner.seek(pos)?; 440 } 441 self.discard_buffer(); 442 Ok(result) 443 } 444 445 /// Returns the current seek position from the start of the stream. 446 /// 447 /// The value returned is equivalent to `self.seek(SeekFrom::Current(0))` 448 /// but does not flush the internal buffer. Due to this optimization the 449 /// function does not guarantee that calling `.into_inner()` immediately 450 /// afterwards will yield the underlying reader at the same position. Use 451 /// [`BufReader::seek`] instead if you require that guarantee. 452 /// 453 /// # Panics 454 /// 455 /// This function will panic if the position of the inner reader is smaller 456 /// than the amount of buffered data. That can happen if the inner reader 457 /// has an incorrect implementation of [`Seek::stream_position`], or if the 458 /// position has gone out of sync due to calling [`Seek::seek`] directly on 459 /// the underlying reader. 460 /// 461 /// # Example 462 /// 463 /// ```no_run 464 /// use std::{ 465 /// io::{self, BufRead, BufReader, Seek}, 466 /// fs::File, 467 /// }; 468 /// 469 /// fn main() -> io::Result<()> { 470 /// let mut f = BufReader::new(File::open("foo.txt")?); 471 /// 472 /// let before = f.stream_position()?; 473 /// f.read_line(&mut String::new())?; 474 /// let after = f.stream_position()?; 475 /// 476 /// println!("The first line was {} bytes long", after - before); 477 /// Ok(()) 478 /// } 479 /// ``` 480 fn stream_position(&mut self) -> io::Result<u64> { 481 let remainder = (self.buf.filled() - self.buf.pos()) as u64; 482 self.inner.stream_position().map(|pos| { 483 pos.checked_sub(remainder).expect( 484 "overflow when subtracting remaining buffer size from inner stream position", 485 ) 486 }) 487 } 488 } 489 490 impl<T: ?Sized> SizeHint for BufReader<T> { 491 #[inline] 492 fn lower_bound(&self) -> usize { 493 SizeHint::lower_bound(self.get_ref()) + self.buffer().len() 494 } 495 496 #[inline] 497 fn upper_bound(&self) -> Option<usize> { 498 SizeHint::upper_bound(self.get_ref()).and_then(|up| self.buffer().len().checked_add(up)) 499 } 500 } 501