1 // SPDX-License-Identifier: GPL-2.0 2 3 // Copyright (C) 2025 Google LLC. 4 5 //! This module has utilities for managing a page range where unused pages may be reclaimed by a 6 //! vma shrinker. 7 8 // To avoid deadlocks, locks are taken in the order: 9 // 10 // 1. mmap lock 11 // 2. spinlock 12 // 3. lru spinlock 13 // 14 // The shrinker will use trylock methods because it locks them in a different order. 15 16 use core::{ 17 marker::PhantomPinned, 18 mem::{size_of, size_of_val, MaybeUninit}, 19 ptr, 20 }; 21 22 use kernel::{ 23 bindings, 24 error::Result, 25 ffi::{c_ulong, c_void}, 26 mm::{virt, Mm, MmWithUser}, 27 new_mutex, new_spinlock, 28 page::{Page, PAGE_SHIFT, PAGE_SIZE}, 29 prelude::*, 30 str::CStr, 31 sync::{aref::ARef, Mutex, SpinLock}, 32 task::Pid, 33 transmute::FromBytes, 34 types::Opaque, 35 uaccess::UserSliceReader, 36 }; 37 38 /// Represents a shrinker that can be registered with the kernel. 39 /// 40 /// Each shrinker can be used by many `ShrinkablePageRange` objects. 41 #[repr(C)] 42 pub(crate) struct Shrinker { 43 inner: Opaque<*mut bindings::shrinker>, 44 list_lru: Opaque<bindings::list_lru>, 45 } 46 47 // SAFETY: The shrinker and list_lru are thread safe. 48 unsafe impl Send for Shrinker {} 49 // SAFETY: The shrinker and list_lru are thread safe. 50 unsafe impl Sync for Shrinker {} 51 52 impl Shrinker { 53 /// Create a new shrinker. 54 /// 55 /// # Safety 56 /// 57 /// Before using this shrinker with a `ShrinkablePageRange`, the `register` method must have 58 /// been called exactly once, and it must not have returned an error. 59 pub(crate) const unsafe fn new() -> Self { 60 Self { 61 inner: Opaque::uninit(), 62 list_lru: Opaque::uninit(), 63 } 64 } 65 66 /// Register this shrinker with the kernel. 67 pub(crate) fn register(&'static self, name: &CStr) -> Result<()> { 68 // SAFETY: These fields are not yet used, so it's okay to zero them. 69 unsafe { 70 self.inner.get().write(ptr::null_mut()); 71 self.list_lru.get().write_bytes(0, 1); 72 } 73 74 // SAFETY: The field is not yet used, so we can initialize it. 75 let ret = unsafe { bindings::__list_lru_init(self.list_lru.get(), false, ptr::null_mut()) }; 76 if ret != 0 { 77 return Err(Error::from_errno(ret)); 78 } 79 80 // SAFETY: The `name` points at a valid c string. 81 let shrinker = unsafe { bindings::shrinker_alloc(0, name.as_char_ptr()) }; 82 if shrinker.is_null() { 83 // SAFETY: We initialized it, so its okay to destroy it. 84 unsafe { bindings::list_lru_destroy(self.list_lru.get()) }; 85 return Err(Error::from_errno(ret)); 86 } 87 88 // SAFETY: We're about to register the shrinker, and these are the fields we need to 89 // initialize. (All other fields are already zeroed.) 90 unsafe { 91 (&raw mut (*shrinker).count_objects).write(Some(rust_shrink_count)); 92 (&raw mut (*shrinker).scan_objects).write(Some(rust_shrink_scan)); 93 (&raw mut (*shrinker).private_data).write(self.list_lru.get().cast()); 94 } 95 96 // SAFETY: The new shrinker has been fully initialized, so we can register it. 97 unsafe { bindings::shrinker_register(shrinker) }; 98 99 // SAFETY: This initializes the pointer to the shrinker so that we can use it. 100 unsafe { self.inner.get().write(shrinker) }; 101 102 Ok(()) 103 } 104 } 105 106 /// A container that manages a page range in a vma. 107 /// 108 /// The pages can be thought of as an array of booleans of whether the pages are usable. The 109 /// methods `use_range` and `stop_using_range` set all booleans in a range to true or false 110 /// respectively. Initially, no pages are allocated. When a page is not used, it is not freed 111 /// immediately. Instead, it is made available to the memory shrinker to free it if the device is 112 /// under memory pressure. 113 /// 114 /// It's okay for `use_range` and `stop_using_range` to race with each other, although there's no 115 /// way to know whether an index ends up with true or false if a call to `use_range` races with 116 /// another call to `stop_using_range` on a given index. 117 /// 118 /// It's also okay for the two methods to race with themselves, e.g. if two threads call 119 /// `use_range` on the same index, then that's fine and neither call will return until the page is 120 /// allocated and mapped. 121 /// 122 /// The methods that read or write to a range require that the page is marked as in use. So it is 123 /// _not_ okay to call `stop_using_range` on a page that is in use by the methods that read or 124 /// write to the page. 125 #[pin_data(PinnedDrop)] 126 pub(crate) struct ShrinkablePageRange { 127 /// Shrinker object registered with the kernel. 128 shrinker: &'static Shrinker, 129 /// Pid using this page range. Only used as debugging information. 130 pid: Pid, 131 /// The mm for the relevant process. 132 mm: ARef<Mm>, 133 /// Used to synchronize calls to `vm_insert_page` and `zap_page_range_single`. 134 #[pin] 135 mm_lock: Mutex<()>, 136 /// Spinlock protecting changes to pages. 137 #[pin] 138 lock: SpinLock<Inner>, 139 140 /// Must not move, since page info has pointers back. 141 #[pin] 142 _pin: PhantomPinned, 143 } 144 145 struct Inner { 146 /// Array of pages. 147 /// 148 /// Since this is also accessed by the shrinker, we can't use a `Box`, which asserts exclusive 149 /// ownership. To deal with that, we manage it using raw pointers. 150 pages: *mut PageInfo, 151 /// Length of the `pages` array. 152 size: usize, 153 /// The address of the vma to insert the pages into. 154 vma_addr: usize, 155 } 156 157 // SAFETY: proper locking is in place for `Inner` 158 unsafe impl Send for Inner {} 159 160 type StableMmGuard = 161 kernel::sync::lock::Guard<'static, (), kernel::sync::lock::mutex::MutexBackend>; 162 163 /// An array element that describes the current state of a page. 164 /// 165 /// There are three states: 166 /// 167 /// * Free. The page is None. The `lru` element is not queued. 168 /// * Available. The page is Some. The `lru` element is queued to the shrinker's lru. 169 /// * Used. The page is Some. The `lru` element is not queued. 170 /// 171 /// When an element is available, the shrinker is able to free the page. 172 #[repr(C)] 173 struct PageInfo { 174 lru: bindings::list_head, 175 page: Option<Page>, 176 range: *const ShrinkablePageRange, 177 } 178 179 impl PageInfo { 180 /// # Safety 181 /// 182 /// The caller ensures that writing to `me.page` is ok, and that the page is not currently set. 183 unsafe fn set_page(me: *mut PageInfo, page: Page) { 184 // SAFETY: This pointer offset is in bounds. 185 let ptr = unsafe { &raw mut (*me).page }; 186 187 // SAFETY: The pointer is valid for writing, so also valid for reading. 188 if unsafe { (*ptr).is_some() } { 189 pr_err!("set_page called when there is already a page"); 190 // SAFETY: We will initialize the page again below. 191 unsafe { ptr::drop_in_place(ptr) }; 192 } 193 194 // SAFETY: The pointer is valid for writing. 195 unsafe { ptr::write(ptr, Some(page)) }; 196 } 197 198 /// # Safety 199 /// 200 /// The caller ensures that reading from `me.page` is ok for the duration of 'a. 201 unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> { 202 // SAFETY: This pointer offset is in bounds. 203 let ptr = unsafe { &raw const (*me).page }; 204 205 // SAFETY: The pointer is valid for reading. 206 unsafe { (*ptr).as_ref() } 207 } 208 209 /// # Safety 210 /// 211 /// The caller ensures that writing to `me.page` is ok for the duration of 'a. 212 unsafe fn take_page(me: *mut PageInfo) -> Option<Page> { 213 // SAFETY: This pointer offset is in bounds. 214 let ptr = unsafe { &raw mut (*me).page }; 215 216 // SAFETY: The pointer is valid for reading. 217 unsafe { (*ptr).take() } 218 } 219 220 /// Add this page to the lru list, if not already in the list. 221 /// 222 /// # Safety 223 /// 224 /// The pointer must be valid, and it must be the right shrinker and nid. 225 unsafe fn list_lru_add(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) { 226 // SAFETY: This pointer offset is in bounds. 227 let lru_ptr = unsafe { &raw mut (*me).lru }; 228 // SAFETY: The lru pointer is valid, and we're not using it with any other lru list. 229 unsafe { bindings::list_lru_add(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) }; 230 } 231 232 /// Remove this page from the lru list, if it is in the list. 233 /// 234 /// # Safety 235 /// 236 /// The pointer must be valid, and it must be the right shrinker and nid. 237 unsafe fn list_lru_del(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) { 238 // SAFETY: This pointer offset is in bounds. 239 let lru_ptr = unsafe { &raw mut (*me).lru }; 240 // SAFETY: The lru pointer is valid, and we're not using it with any other lru list. 241 unsafe { bindings::list_lru_del(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) }; 242 } 243 } 244 245 impl ShrinkablePageRange { 246 /// Create a new `ShrinkablePageRange` using the given shrinker. 247 pub(crate) fn new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error> { 248 try_pin_init!(Self { 249 shrinker, 250 pid: kernel::current!().pid(), 251 mm: ARef::from(&**kernel::current!().mm().ok_or(ESRCH)?), 252 mm_lock <- new_mutex!((), "ShrinkablePageRange::mm"), 253 lock <- new_spinlock!(Inner { 254 pages: ptr::null_mut(), 255 size: 0, 256 vma_addr: 0, 257 }, "ShrinkablePageRange"), 258 _pin: PhantomPinned, 259 }) 260 } 261 262 pub(crate) fn stable_trylock_mm(&self) -> Option<StableMmGuard> { 263 // SAFETY: This extends the duration of the reference. Since this call happens before 264 // `mm_lock` is taken in the destructor of `ShrinkablePageRange`, the destructor will block 265 // until the returned guard is dropped. This ensures that the guard is valid until dropped. 266 let mm_lock = unsafe { &*ptr::from_ref(&self.mm_lock) }; 267 268 mm_lock.try_lock() 269 } 270 271 /// Register a vma with this page range. Returns the size of the region. 272 pub(crate) fn register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize> { 273 let num_bytes = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize); 274 let num_pages = num_bytes >> PAGE_SHIFT; 275 276 if !ptr::eq::<Mm>(&*self.mm, &**vma.mm()) { 277 pr_debug!("Failed to register with vma: invalid vma->vm_mm"); 278 return Err(EINVAL); 279 } 280 if num_pages == 0 { 281 pr_debug!("Failed to register with vma: size zero"); 282 return Err(EINVAL); 283 } 284 285 let mut pages = KVVec::<PageInfo>::with_capacity(num_pages, GFP_KERNEL)?; 286 287 // SAFETY: This just initializes the pages array. 288 unsafe { 289 let self_ptr = self as *const ShrinkablePageRange; 290 for i in 0..num_pages { 291 let info = pages.as_mut_ptr().add(i); 292 (&raw mut (*info).range).write(self_ptr); 293 (&raw mut (*info).page).write(None); 294 let lru = &raw mut (*info).lru; 295 (&raw mut (*lru).next).write(lru); 296 (&raw mut (*lru).prev).write(lru); 297 } 298 } 299 300 let mut inner = self.lock.lock(); 301 if inner.size > 0 { 302 pr_debug!("Failed to register with vma: already registered"); 303 drop(inner); 304 return Err(EBUSY); 305 } 306 307 inner.pages = pages.into_raw_parts().0; 308 inner.size = num_pages; 309 inner.vma_addr = vma.start(); 310 311 Ok(num_pages) 312 } 313 314 /// Make sure that the given pages are allocated and mapped. 315 /// 316 /// Must not be called from an atomic context. 317 pub(crate) fn use_range(&self, start: usize, end: usize) -> Result<()> { 318 if start >= end { 319 return Ok(()); 320 } 321 let mut inner = self.lock.lock(); 322 assert!(end <= inner.size); 323 324 for i in start..end { 325 // SAFETY: This pointer offset is in bounds. 326 let page_info = unsafe { inner.pages.add(i) }; 327 328 // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay. 329 if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 330 // Since we're going to use the page, we should remove it from the lru list so that 331 // the shrinker will not free it. 332 // 333 // SAFETY: The pointer is valid, and this is the right shrinker. 334 // 335 // The shrinker can't free the page between the check and this call to 336 // `list_lru_del` because we hold the lock. 337 unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) }; 338 } else { 339 // We have to allocate a new page. Use the slow path. 340 drop(inner); 341 // SAFETY: `i < end <= inner.size` so `i` is in bounds. 342 match unsafe { self.use_page_slow(i) } { 343 Ok(()) => {} 344 Err(err) => { 345 pr_warn!("Error in use_page_slow: {:?}", err); 346 return Err(err); 347 } 348 } 349 inner = self.lock.lock(); 350 } 351 } 352 Ok(()) 353 } 354 355 /// Mark the given page as in use, slow path. 356 /// 357 /// Must not be called from an atomic context. 358 /// 359 /// # Safety 360 /// 361 /// Assumes that `i` is in bounds. 362 #[cold] 363 unsafe fn use_page_slow(&self, i: usize) -> Result<()> { 364 let new_page = Page::alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO)?; 365 366 let mm_mutex = self.mm_lock.lock(); 367 let inner = self.lock.lock(); 368 369 // SAFETY: This pointer offset is in bounds. 370 let page_info = unsafe { inner.pages.add(i) }; 371 372 // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay. 373 if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 374 // The page was already there, or someone else added the page while we didn't hold the 375 // spinlock. 376 // 377 // SAFETY: The pointer is valid, and this is the right shrinker. 378 // 379 // The shrinker can't free the page between the check and this call to 380 // `list_lru_del` because we hold the lock. 381 unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) }; 382 return Ok(()); 383 } 384 385 let vma_addr = inner.vma_addr; 386 // Release the spinlock while we insert the page into the vma. 387 drop(inner); 388 389 // No overflow since we stay in bounds of the vma. 390 let user_page_addr = vma_addr + (i << PAGE_SHIFT); 391 392 // We use `mmput_async` when dropping the `mm` because `use_page_slow` is usually used from 393 // a remote process. If the call to `mmput` races with the process shutting down, then the 394 // caller of `use_page_slow` becomes responsible for cleaning up the `mm`, which doesn't 395 // happen until it returns to userspace. However, the caller might instead go to sleep and 396 // wait for the owner of the `mm` to wake it up, which doesn't happen because it's in the 397 // middle of a shutdown process that won't complete until the `mm` is dropped. This can 398 // amount to a deadlock. 399 // 400 // Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a 401 // workqueue. 402 MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?) 403 .mmap_read_lock() 404 .vma_lookup(vma_addr) 405 .ok_or(ESRCH)? 406 .as_mixedmap_vma() 407 .ok_or(ESRCH)? 408 .vm_insert_page(user_page_addr, &new_page) 409 .inspect_err(|err| { 410 pr_warn!( 411 "Failed to vm_insert_page({}): vma_addr:{} i:{} err:{:?}", 412 user_page_addr, 413 vma_addr, 414 i, 415 err 416 ) 417 })?; 418 419 let inner = self.lock.lock(); 420 421 // SAFETY: The `page_info` pointer is valid and currently does not have a page. The page 422 // can be written to since we hold the lock. 423 // 424 // We released and reacquired the spinlock since we checked that the page is null, but we 425 // always hold the mm_lock mutex when setting the page to a non-null value, so it's not 426 // possible for someone else to have changed it since our check. 427 unsafe { PageInfo::set_page(page_info, new_page) }; 428 429 drop(inner); 430 drop(mm_mutex); 431 432 Ok(()) 433 } 434 435 /// If the given page is in use, then mark it as available so that the shrinker can free it. 436 /// 437 /// May be called from an atomic context. 438 pub(crate) fn stop_using_range(&self, start: usize, end: usize) { 439 if start >= end { 440 return; 441 } 442 let inner = self.lock.lock(); 443 assert!(end <= inner.size); 444 445 for i in (start..end).rev() { 446 // SAFETY: The pointer is in bounds. 447 let page_info = unsafe { inner.pages.add(i) }; 448 449 // SAFETY: Okay for reading since we have the lock. 450 if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 451 // SAFETY: The pointer is valid, and it's the right shrinker. 452 unsafe { PageInfo::list_lru_add(page_info, page.nid(), self.shrinker) }; 453 } 454 } 455 } 456 457 /// Helper for reading or writing to a range of bytes that may overlap with several pages. 458 /// 459 /// # Safety 460 /// 461 /// All pages touched by this operation must be in use for the duration of this call. 462 unsafe fn iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result 463 where 464 T: FnMut(&Page, usize, usize) -> Result, 465 { 466 if size == 0 { 467 return Ok(()); 468 } 469 470 let (pages, num_pages) = { 471 let inner = self.lock.lock(); 472 (inner.pages, inner.size) 473 }; 474 let num_bytes = num_pages << PAGE_SHIFT; 475 476 // Check that the request is within the buffer. 477 if offset.checked_add(size).ok_or(EFAULT)? > num_bytes { 478 return Err(EFAULT); 479 } 480 481 let mut page_index = offset >> PAGE_SHIFT; 482 offset &= PAGE_SIZE - 1; 483 while size > 0 { 484 let available = usize::min(size, PAGE_SIZE - offset); 485 // SAFETY: The pointer is in bounds. 486 let page_info = unsafe { pages.add(page_index) }; 487 // SAFETY: The caller guarantees that this page is in the "in use" state for the 488 // duration of this call to `iterate`, so nobody will change the page. 489 let page = unsafe { PageInfo::get_page(page_info) }; 490 if page.is_none() { 491 pr_warn!("Page is null!"); 492 } 493 let page = page.ok_or(EFAULT)?; 494 cb(page, offset, available)?; 495 size -= available; 496 page_index += 1; 497 offset = 0; 498 } 499 Ok(()) 500 } 501 502 /// Copy from userspace into this page range. 503 /// 504 /// # Safety 505 /// 506 /// All pages touched by this operation must be in use for the duration of this call. 507 pub(crate) unsafe fn copy_from_user_slice( 508 &self, 509 reader: &mut UserSliceReader, 510 offset: usize, 511 size: usize, 512 ) -> Result { 513 // SAFETY: `self.iterate` has the same safety requirements as `copy_from_user_slice`. 514 unsafe { 515 self.iterate(offset, size, |page, offset, to_copy| { 516 page.copy_from_user_slice_raw(reader, offset, to_copy) 517 }) 518 } 519 } 520 521 /// Copy from this page range into kernel space. 522 /// 523 /// # Safety 524 /// 525 /// All pages touched by this operation must be in use for the duration of this call. 526 pub(crate) unsafe fn read<T: FromBytes>(&self, offset: usize) -> Result<T> { 527 let mut out = MaybeUninit::<T>::uninit(); 528 let mut out_offset = 0; 529 // SAFETY: `self.iterate` has the same safety requirements as `read`. 530 unsafe { 531 self.iterate(offset, size_of::<T>(), |page, offset, to_copy| { 532 // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T. 533 let obj_ptr = (out.as_mut_ptr() as *mut u8).add(out_offset); 534 // SAFETY: The pointer points is in-bounds of the `out` variable, so it is valid. 535 page.read_raw(obj_ptr, offset, to_copy)?; 536 out_offset += to_copy; 537 Ok(()) 538 })?; 539 } 540 // SAFETY: We just initialised the data. 541 Ok(unsafe { out.assume_init() }) 542 } 543 544 /// Copy from kernel space into this page range. 545 /// 546 /// # Safety 547 /// 548 /// All pages touched by this operation must be in use for the duration of this call. 549 pub(crate) unsafe fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result { 550 let mut obj_offset = 0; 551 // SAFETY: `self.iterate` has the same safety requirements as `write`. 552 unsafe { 553 self.iterate(offset, size_of_val(obj), |page, offset, to_copy| { 554 // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T. 555 let obj_ptr = (obj as *const T as *const u8).add(obj_offset); 556 // SAFETY: We have a reference to the object, so the pointer is valid. 557 page.write_raw(obj_ptr, offset, to_copy)?; 558 obj_offset += to_copy; 559 Ok(()) 560 }) 561 } 562 } 563 564 /// Write zeroes to the given range. 565 /// 566 /// # Safety 567 /// 568 /// All pages touched by this operation must be in use for the duration of this call. 569 pub(crate) unsafe fn fill_zero(&self, offset: usize, size: usize) -> Result { 570 // SAFETY: `self.iterate` has the same safety requirements as `copy_into`. 571 unsafe { 572 self.iterate(offset, size, |page, offset, len| { 573 page.fill_zero_raw(offset, len) 574 }) 575 } 576 } 577 } 578 579 #[pinned_drop] 580 impl PinnedDrop for ShrinkablePageRange { 581 fn drop(self: Pin<&mut Self>) { 582 let (pages, size) = { 583 let lock = self.lock.lock(); 584 (lock.pages, lock.size) 585 }; 586 587 if size == 0 { 588 return; 589 } 590 591 // Note: This call is also necessary for the safety of `stable_trylock_mm`. 592 let mm_lock = self.mm_lock.lock(); 593 594 // This is the destructor, so unlike the other methods, we only need to worry about races 595 // with the shrinker here. Since we hold the `mm_lock`, we also can't race with the 596 // shrinker, and after this loop, the shrinker will not access any of our pages since we 597 // removed them from the lru list. 598 for i in 0..size { 599 // SAFETY: Loop is in-bounds of the size. 600 let p_ptr = unsafe { pages.add(i) }; 601 // SAFETY: No other readers, so we can read. 602 if let Some(p) = unsafe { PageInfo::get_page(p_ptr) } { 603 // SAFETY: The pointer is valid and it's the right shrinker. 604 unsafe { PageInfo::list_lru_del(p_ptr, p.nid(), self.shrinker) }; 605 } 606 } 607 608 drop(mm_lock); 609 610 // SAFETY: `pages` was allocated as an `KVVec<PageInfo>` with capacity `size`. Furthermore, 611 // all `size` elements are initialized. Also, the array is no longer shared with the 612 // shrinker due to the above loop. 613 drop(unsafe { KVVec::from_raw_parts(pages, size, size) }); 614 } 615 } 616 617 /// # Safety 618 /// Called by the shrinker. 619 #[no_mangle] 620 unsafe extern "C" fn rust_shrink_count( 621 shrink: *mut bindings::shrinker, 622 _sc: *mut bindings::shrink_control, 623 ) -> c_ulong { 624 // SAFETY: We can access our own private data. 625 let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() }; 626 // SAFETY: Accessing the lru list is okay. Just an FFI call. 627 unsafe { bindings::list_lru_count(list_lru) } 628 } 629 630 /// # Safety 631 /// Called by the shrinker. 632 #[no_mangle] 633 unsafe extern "C" fn rust_shrink_scan( 634 shrink: *mut bindings::shrinker, 635 sc: *mut bindings::shrink_control, 636 ) -> c_ulong { 637 // SAFETY: We can access our own private data. 638 let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() }; 639 // SAFETY: Caller guarantees that it is safe to read this field. 640 let nr_to_scan = unsafe { (*sc).nr_to_scan }; 641 // SAFETY: Accessing the lru list is okay. Just an FFI call. 642 unsafe { 643 bindings::list_lru_walk( 644 list_lru, 645 Some(bindings::rust_shrink_free_page_wrap), 646 ptr::null_mut(), 647 nr_to_scan, 648 ) 649 } 650 } 651 652 const LRU_SKIP: bindings::lru_status = bindings::lru_status_LRU_SKIP; 653 const LRU_REMOVED_ENTRY: bindings::lru_status = bindings::lru_status_LRU_REMOVED_RETRY; 654 655 /// # Safety 656 /// Called by the shrinker. 657 #[no_mangle] 658 unsafe extern "C" fn rust_shrink_free_page( 659 item: *mut bindings::list_head, 660 lru: *mut bindings::list_lru_one, 661 _cb_arg: *mut c_void, 662 ) -> bindings::lru_status { 663 // Fields that should survive after unlocking the lru lock. 664 let page; 665 let page_index; 666 let mm; 667 let mmap_read; 668 let mm_mutex; 669 let vma_addr; 670 671 { 672 // CAST: The `list_head` field is first in `PageInfo`. 673 let info = item as *mut PageInfo; 674 // SAFETY: The `range` field of `PageInfo` is immutable. 675 let range = unsafe { &*((*info).range) }; 676 677 mm = match range.mm.mmget_not_zero() { 678 Some(mm) => MmWithUser::into_mmput_async(mm), 679 None => return LRU_SKIP, 680 }; 681 682 mm_mutex = match range.stable_trylock_mm() { 683 Some(guard) => guard, 684 None => return LRU_SKIP, 685 }; 686 687 mmap_read = match mm.mmap_read_trylock() { 688 Some(guard) => guard, 689 None => return LRU_SKIP, 690 }; 691 692 // We can't lock it normally here, since we hold the lru lock. 693 let inner = match range.lock.try_lock() { 694 Some(inner) => inner, 695 None => return LRU_SKIP, 696 }; 697 698 // SAFETY: The item is in this lru list, so it's okay to remove it. 699 unsafe { bindings::list_lru_isolate(lru, item) }; 700 701 // SAFETY: Both pointers are in bounds of the same allocation. 702 page_index = unsafe { info.offset_from(inner.pages) } as usize; 703 704 // SAFETY: We hold the spinlock, so we can take the page. 705 // 706 // This sets the page pointer to zero before we unmap it from the vma. However, we call 707 // `zap_page_range` before we release the mmap lock, so `use_page_slow` will not be able to 708 // insert a new page until after our call to `zap_page_range`. 709 page = unsafe { PageInfo::take_page(info) }; 710 vma_addr = inner.vma_addr; 711 712 // From this point on, we don't access this PageInfo or ShrinkablePageRange again, because 713 // they can be freed at any point after we unlock `lru_lock`. This is with the exception of 714 // `mm_mutex` which is kept alive by holding the lock. 715 } 716 717 // SAFETY: The lru lock is locked when this method is called. 718 unsafe { bindings::spin_unlock(&raw mut (*lru).lock) }; 719 720 if let Some(vma) = mmap_read.vma_lookup(vma_addr) { 721 let user_page_addr = vma_addr + (page_index << PAGE_SHIFT); 722 vma.zap_page_range_single(user_page_addr, PAGE_SIZE); 723 } 724 725 drop(mmap_read); 726 drop(mm_mutex); 727 drop(mm); 728 drop(page); 729 730 // SAFETY: We just unlocked the lru lock, but it should be locked when we return. 731 unsafe { bindings::spin_lock(&raw mut (*lru).lock) }; 732 733 LRU_REMOVED_ENTRY 734 } 735