1 // SPDX-License-Identifier: GPL-2.0 2 3 // Copyright (C) 2025 Google LLC. 4 5 //! This module has utilities for managing a page range where unused pages may be reclaimed by a 6 //! vma shrinker. 7 8 // To avoid deadlocks, locks are taken in the order: 9 // 10 // 1. mmap lock 11 // 2. spinlock 12 // 3. lru spinlock 13 // 14 // The shrinker will use trylock methods because it locks them in a different order. 15 16 use core::{ 17 marker::PhantomPinned, 18 mem::{size_of, size_of_val, MaybeUninit}, 19 ptr, 20 }; 21 22 use kernel::{ 23 bindings, 24 error::Result, 25 ffi::{c_ulong, c_void}, 26 mm::{virt, Mm, MmWithUser}, 27 new_mutex, new_spinlock, 28 page::{Page, PAGE_SHIFT, PAGE_SIZE}, 29 prelude::*, 30 str::CStr, 31 sync::{aref::ARef, Mutex, SpinLock}, 32 task::Pid, 33 transmute::FromBytes, 34 types::Opaque, 35 uaccess::UserSliceReader, 36 }; 37 38 /// Represents a shrinker that can be registered with the kernel. 39 /// 40 /// Each shrinker can be used by many `ShrinkablePageRange` objects. 41 #[repr(C)] 42 pub(crate) struct Shrinker { 43 inner: Opaque<*mut bindings::shrinker>, 44 list_lru: Opaque<bindings::list_lru>, 45 } 46 47 // SAFETY: The shrinker and list_lru are thread safe. 48 unsafe impl Send for Shrinker {} 49 // SAFETY: The shrinker and list_lru are thread safe. 50 unsafe impl Sync for Shrinker {} 51 52 impl Shrinker { 53 /// Create a new shrinker. 54 /// 55 /// # Safety 56 /// 57 /// Before using this shrinker with a `ShrinkablePageRange`, the `register` method must have 58 /// been called exactly once, and it must not have returned an error. 59 pub(crate) const unsafe fn new() -> Self { 60 Self { 61 inner: Opaque::uninit(), 62 list_lru: Opaque::uninit(), 63 } 64 } 65 66 /// Register this shrinker with the kernel. 67 pub(crate) fn register(&'static self, name: &CStr) -> Result<()> { 68 // SAFETY: These fields are not yet used, so it's okay to zero them. 69 unsafe { 70 self.inner.get().write(ptr::null_mut()); 71 self.list_lru.get().write_bytes(0, 1); 72 } 73 74 // SAFETY: The field is not yet used, so we can initialize it. 75 let ret = unsafe { bindings::__list_lru_init(self.list_lru.get(), false, ptr::null_mut()) }; 76 if ret != 0 { 77 return Err(Error::from_errno(ret)); 78 } 79 80 // SAFETY: The `name` points at a valid c string. 81 let shrinker = unsafe { bindings::shrinker_alloc(0, name.as_char_ptr()) }; 82 if shrinker.is_null() { 83 // SAFETY: We initialized it, so its okay to destroy it. 84 unsafe { bindings::list_lru_destroy(self.list_lru.get()) }; 85 return Err(Error::from_errno(ret)); 86 } 87 88 // SAFETY: We're about to register the shrinker, and these are the fields we need to 89 // initialize. (All other fields are already zeroed.) 90 unsafe { 91 (&raw mut (*shrinker).count_objects).write(Some(rust_shrink_count)); 92 (&raw mut (*shrinker).scan_objects).write(Some(rust_shrink_scan)); 93 (&raw mut (*shrinker).private_data).write(self.list_lru.get().cast()); 94 } 95 96 // SAFETY: The new shrinker has been fully initialized, so we can register it. 97 unsafe { bindings::shrinker_register(shrinker) }; 98 99 // SAFETY: This initializes the pointer to the shrinker so that we can use it. 100 unsafe { self.inner.get().write(shrinker) }; 101 102 Ok(()) 103 } 104 } 105 106 /// A container that manages a page range in a vma. 107 /// 108 /// The pages can be thought of as an array of booleans of whether the pages are usable. The 109 /// methods `use_range` and `stop_using_range` set all booleans in a range to true or false 110 /// respectively. Initially, no pages are allocated. When a page is not used, it is not freed 111 /// immediately. Instead, it is made available to the memory shrinker to free it if the device is 112 /// under memory pressure. 113 /// 114 /// It's okay for `use_range` and `stop_using_range` to race with each other, although there's no 115 /// way to know whether an index ends up with true or false if a call to `use_range` races with 116 /// another call to `stop_using_range` on a given index. 117 /// 118 /// It's also okay for the two methods to race with themselves, e.g. if two threads call 119 /// `use_range` on the same index, then that's fine and neither call will return until the page is 120 /// allocated and mapped. 121 /// 122 /// The methods that read or write to a range require that the page is marked as in use. So it is 123 /// _not_ okay to call `stop_using_range` on a page that is in use by the methods that read or 124 /// write to the page. 125 #[pin_data(PinnedDrop)] 126 pub(crate) struct ShrinkablePageRange { 127 /// Shrinker object registered with the kernel. 128 shrinker: &'static Shrinker, 129 /// Pid using this page range. Only used as debugging information. 130 pid: Pid, 131 /// The mm for the relevant process. 132 mm: ARef<Mm>, 133 /// Used to synchronize calls to `vm_insert_page` and `zap_page_range_single`. 134 #[pin] 135 mm_lock: Mutex<()>, 136 /// Spinlock protecting changes to pages. 137 #[pin] 138 lock: SpinLock<Inner>, 139 140 /// Must not move, since page info has pointers back. 141 #[pin] 142 _pin: PhantomPinned, 143 } 144 145 // We do not define any ops. For now, used only to check identity of vmas. 146 static BINDER_VM_OPS: bindings::vm_operations_struct = pin_init::zeroed(); 147 148 // To ensure that we do not accidentally install pages into or zap pages from the wrong vma, we 149 // check its vm_ops and private data before using it. 150 fn check_vma(vma: &virt::VmaRef, owner: *const ShrinkablePageRange) -> Option<&virt::VmaMixedMap> { 151 // SAFETY: Just reading the vm_ops pointer of any active vma is safe. 152 let vm_ops = unsafe { (*vma.as_ptr()).vm_ops }; 153 if !ptr::eq(vm_ops, &BINDER_VM_OPS) { 154 return None; 155 } 156 157 // SAFETY: Reading the vm_private_data pointer of a binder-owned vma is safe. 158 let vm_private_data = unsafe { (*vma.as_ptr()).vm_private_data }; 159 // The ShrinkablePageRange is only dropped when the Process is dropped, which only happens once 160 // the file's ->release handler is invoked, which means the ShrinkablePageRange outlives any 161 // VMA associated with it, so there can't be any false positives due to pointer reuse here. 162 if !ptr::eq(vm_private_data, owner.cast()) { 163 return None; 164 } 165 166 vma.as_mixedmap_vma() 167 } 168 169 struct Inner { 170 /// Array of pages. 171 /// 172 /// Since this is also accessed by the shrinker, we can't use a `Box`, which asserts exclusive 173 /// ownership. To deal with that, we manage it using raw pointers. 174 pages: *mut PageInfo, 175 /// Length of the `pages` array. 176 size: usize, 177 /// The address of the vma to insert the pages into. 178 vma_addr: usize, 179 } 180 181 // SAFETY: proper locking is in place for `Inner` 182 unsafe impl Send for Inner {} 183 184 type StableMmGuard = 185 kernel::sync::lock::Guard<'static, (), kernel::sync::lock::mutex::MutexBackend>; 186 187 /// An array element that describes the current state of a page. 188 /// 189 /// There are three states: 190 /// 191 /// * Free. The page is None. The `lru` element is not queued. 192 /// * Available. The page is Some. The `lru` element is queued to the shrinker's lru. 193 /// * Used. The page is Some. The `lru` element is not queued. 194 /// 195 /// When an element is available, the shrinker is able to free the page. 196 #[repr(C)] 197 struct PageInfo { 198 lru: bindings::list_head, 199 page: Option<Page>, 200 range: *const ShrinkablePageRange, 201 } 202 203 impl PageInfo { 204 /// # Safety 205 /// 206 /// The caller ensures that writing to `me.page` is ok, and that the page is not currently set. 207 unsafe fn set_page(me: *mut PageInfo, page: Page) { 208 // SAFETY: This pointer offset is in bounds. 209 let ptr = unsafe { &raw mut (*me).page }; 210 211 // SAFETY: The pointer is valid for writing, so also valid for reading. 212 if unsafe { (*ptr).is_some() } { 213 pr_err!("set_page called when there is already a page"); 214 // SAFETY: We will initialize the page again below. 215 unsafe { ptr::drop_in_place(ptr) }; 216 } 217 218 // SAFETY: The pointer is valid for writing. 219 unsafe { ptr::write(ptr, Some(page)) }; 220 } 221 222 /// # Safety 223 /// 224 /// The caller ensures that reading from `me.page` is ok for the duration of 'a. 225 unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> { 226 // SAFETY: This pointer offset is in bounds. 227 let ptr = unsafe { &raw const (*me).page }; 228 229 // SAFETY: The pointer is valid for reading. 230 unsafe { (*ptr).as_ref() } 231 } 232 233 /// # Safety 234 /// 235 /// The caller ensures that writing to `me.page` is ok for the duration of 'a. 236 unsafe fn take_page(me: *mut PageInfo) -> Option<Page> { 237 // SAFETY: This pointer offset is in bounds. 238 let ptr = unsafe { &raw mut (*me).page }; 239 240 // SAFETY: The pointer is valid for reading. 241 unsafe { (*ptr).take() } 242 } 243 244 /// Add this page to the lru list, if not already in the list. 245 /// 246 /// # Safety 247 /// 248 /// The pointer must be valid, and it must be the right shrinker and nid. 249 unsafe fn list_lru_add(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) { 250 // SAFETY: This pointer offset is in bounds. 251 let lru_ptr = unsafe { &raw mut (*me).lru }; 252 // SAFETY: The lru pointer is valid, and we're not using it with any other lru list. 253 unsafe { bindings::list_lru_add(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) }; 254 } 255 256 /// Remove this page from the lru list, if it is in the list. 257 /// 258 /// # Safety 259 /// 260 /// The pointer must be valid, and it must be the right shrinker and nid. 261 unsafe fn list_lru_del(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) { 262 // SAFETY: This pointer offset is in bounds. 263 let lru_ptr = unsafe { &raw mut (*me).lru }; 264 // SAFETY: The lru pointer is valid, and we're not using it with any other lru list. 265 unsafe { bindings::list_lru_del(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) }; 266 } 267 } 268 269 impl ShrinkablePageRange { 270 /// Create a new `ShrinkablePageRange` using the given shrinker. 271 pub(crate) fn new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error> { 272 try_pin_init!(Self { 273 shrinker, 274 pid: kernel::current!().pid(), 275 mm: ARef::from(&**kernel::current!().mm().ok_or(ESRCH)?), 276 mm_lock <- new_mutex!((), "ShrinkablePageRange::mm"), 277 lock <- new_spinlock!(Inner { 278 pages: ptr::null_mut(), 279 size: 0, 280 vma_addr: 0, 281 }, "ShrinkablePageRange"), 282 _pin: PhantomPinned, 283 }) 284 } 285 286 pub(crate) fn stable_trylock_mm(&self) -> Option<StableMmGuard> { 287 // SAFETY: This extends the duration of the reference. Since this call happens before 288 // `mm_lock` is taken in the destructor of `ShrinkablePageRange`, the destructor will block 289 // until the returned guard is dropped. This ensures that the guard is valid until dropped. 290 let mm_lock = unsafe { &*ptr::from_ref(&self.mm_lock) }; 291 292 mm_lock.try_lock() 293 } 294 295 /// Register a vma with this page range. Returns the size of the region. 296 pub(crate) fn register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize> { 297 let num_bytes = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize); 298 let num_pages = num_bytes >> PAGE_SHIFT; 299 300 if !ptr::eq::<Mm>(&*self.mm, &**vma.mm()) { 301 pr_debug!("Failed to register with vma: invalid vma->vm_mm"); 302 return Err(EINVAL); 303 } 304 if num_pages == 0 { 305 pr_debug!("Failed to register with vma: size zero"); 306 return Err(EINVAL); 307 } 308 309 let mut pages = KVVec::<PageInfo>::with_capacity(num_pages, GFP_KERNEL)?; 310 311 // SAFETY: This just initializes the pages array. 312 unsafe { 313 let self_ptr = self as *const ShrinkablePageRange; 314 for i in 0..num_pages { 315 let info = pages.as_mut_ptr().add(i); 316 (&raw mut (*info).range).write(self_ptr); 317 (&raw mut (*info).page).write(None); 318 let lru = &raw mut (*info).lru; 319 (&raw mut (*lru).next).write(lru); 320 (&raw mut (*lru).prev).write(lru); 321 } 322 } 323 324 let mut inner = self.lock.lock(); 325 if inner.size > 0 { 326 pr_debug!("Failed to register with vma: already registered"); 327 drop(inner); 328 return Err(EBUSY); 329 } 330 331 inner.pages = pages.into_raw_parts().0; 332 inner.size = num_pages; 333 inner.vma_addr = vma.start(); 334 335 // This pointer is only used for comparison - it's not dereferenced. 336 // 337 // SAFETY: We own the vma, and we don't use any methods on VmaNew that rely on 338 // `vm_private_data`. 339 unsafe { 340 (*vma.as_ptr()).vm_private_data = ptr::from_ref(self).cast_mut().cast::<c_void>() 341 }; 342 343 // SAFETY: We own the vma, and we don't use any methods on VmaNew that rely on 344 // `vm_ops`. 345 unsafe { (*vma.as_ptr()).vm_ops = &BINDER_VM_OPS }; 346 347 Ok(num_pages) 348 } 349 350 /// Make sure that the given pages are allocated and mapped. 351 /// 352 /// Must not be called from an atomic context. 353 pub(crate) fn use_range(&self, start: usize, end: usize) -> Result<()> { 354 if start >= end { 355 return Ok(()); 356 } 357 let mut inner = self.lock.lock(); 358 assert!(end <= inner.size); 359 360 for i in start..end { 361 // SAFETY: This pointer offset is in bounds. 362 let page_info = unsafe { inner.pages.add(i) }; 363 364 // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay. 365 if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 366 // Since we're going to use the page, we should remove it from the lru list so that 367 // the shrinker will not free it. 368 // 369 // SAFETY: The pointer is valid, and this is the right shrinker. 370 // 371 // The shrinker can't free the page between the check and this call to 372 // `list_lru_del` because we hold the lock. 373 unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) }; 374 } else { 375 // We have to allocate a new page. Use the slow path. 376 drop(inner); 377 // SAFETY: `i < end <= inner.size` so `i` is in bounds. 378 match unsafe { self.use_page_slow(i) } { 379 Ok(()) => {} 380 Err(err) => { 381 pr_warn!("Error in use_page_slow: {:?}", err); 382 return Err(err); 383 } 384 } 385 inner = self.lock.lock(); 386 } 387 } 388 Ok(()) 389 } 390 391 /// Mark the given page as in use, slow path. 392 /// 393 /// Must not be called from an atomic context. 394 /// 395 /// # Safety 396 /// 397 /// Assumes that `i` is in bounds. 398 #[cold] 399 unsafe fn use_page_slow(&self, i: usize) -> Result<()> { 400 let new_page = Page::alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO)?; 401 402 let mm_mutex = self.mm_lock.lock(); 403 let inner = self.lock.lock(); 404 405 // SAFETY: This pointer offset is in bounds. 406 let page_info = unsafe { inner.pages.add(i) }; 407 408 // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay. 409 if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 410 // The page was already there, or someone else added the page while we didn't hold the 411 // spinlock. 412 // 413 // SAFETY: The pointer is valid, and this is the right shrinker. 414 // 415 // The shrinker can't free the page between the check and this call to 416 // `list_lru_del` because we hold the lock. 417 unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) }; 418 return Ok(()); 419 } 420 421 let vma_addr = inner.vma_addr; 422 // Release the spinlock while we insert the page into the vma. 423 drop(inner); 424 425 // No overflow since we stay in bounds of the vma. 426 let user_page_addr = vma_addr + (i << PAGE_SHIFT); 427 428 // We use `mmput_async` when dropping the `mm` because `use_page_slow` is usually used from 429 // a remote process. If the call to `mmput` races with the process shutting down, then the 430 // caller of `use_page_slow` becomes responsible for cleaning up the `mm`, which doesn't 431 // happen until it returns to userspace. However, the caller might instead go to sleep and 432 // wait for the owner of the `mm` to wake it up, which doesn't happen because it's in the 433 // middle of a shutdown process that won't complete until the `mm` is dropped. This can 434 // amount to a deadlock. 435 // 436 // Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a 437 // workqueue. 438 let mm = MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?); 439 { 440 let vma_read; 441 let mmap_read; 442 let vma = if let Some(ret) = mm.lock_vma_under_rcu(vma_addr) { 443 vma_read = ret; 444 check_vma(&vma_read, self) 445 } else { 446 mmap_read = mm.mmap_read_lock(); 447 mmap_read 448 .vma_lookup(vma_addr) 449 .and_then(|vma| check_vma(vma, self)) 450 }; 451 452 match vma { 453 Some(vma) => vma.vm_insert_page(user_page_addr, &new_page)?, 454 None => return Err(ESRCH), 455 } 456 } 457 458 let inner = self.lock.lock(); 459 460 // SAFETY: The `page_info` pointer is valid and currently does not have a page. The page 461 // can be written to since we hold the lock. 462 // 463 // We released and reacquired the spinlock since we checked that the page is null, but we 464 // always hold the mm_lock mutex when setting the page to a non-null value, so it's not 465 // possible for someone else to have changed it since our check. 466 unsafe { PageInfo::set_page(page_info, new_page) }; 467 468 drop(inner); 469 drop(mm_mutex); 470 471 Ok(()) 472 } 473 474 /// If the given page is in use, then mark it as available so that the shrinker can free it. 475 /// 476 /// May be called from an atomic context. 477 pub(crate) fn stop_using_range(&self, start: usize, end: usize) { 478 if start >= end { 479 return; 480 } 481 let inner = self.lock.lock(); 482 assert!(end <= inner.size); 483 484 for i in (start..end).rev() { 485 // SAFETY: The pointer is in bounds. 486 let page_info = unsafe { inner.pages.add(i) }; 487 488 // SAFETY: Okay for reading since we have the lock. 489 if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 490 // SAFETY: The pointer is valid, and it's the right shrinker. 491 unsafe { PageInfo::list_lru_add(page_info, page.nid(), self.shrinker) }; 492 } 493 } 494 } 495 496 /// Helper for reading or writing to a range of bytes that may overlap with several pages. 497 /// 498 /// # Safety 499 /// 500 /// All pages touched by this operation must be in use for the duration of this call. 501 unsafe fn iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result 502 where 503 T: FnMut(&Page, usize, usize) -> Result, 504 { 505 if size == 0 { 506 return Ok(()); 507 } 508 509 let (pages, num_pages) = { 510 let inner = self.lock.lock(); 511 (inner.pages, inner.size) 512 }; 513 let num_bytes = num_pages << PAGE_SHIFT; 514 515 // Check that the request is within the buffer. 516 if offset.checked_add(size).ok_or(EFAULT)? > num_bytes { 517 return Err(EFAULT); 518 } 519 520 let mut page_index = offset >> PAGE_SHIFT; 521 offset &= PAGE_SIZE - 1; 522 while size > 0 { 523 let available = usize::min(size, PAGE_SIZE - offset); 524 // SAFETY: The pointer is in bounds. 525 let page_info = unsafe { pages.add(page_index) }; 526 // SAFETY: The caller guarantees that this page is in the "in use" state for the 527 // duration of this call to `iterate`, so nobody will change the page. 528 let page = unsafe { PageInfo::get_page(page_info) }; 529 if page.is_none() { 530 pr_warn!("Page is null!"); 531 } 532 let page = page.ok_or(EFAULT)?; 533 cb(page, offset, available)?; 534 size -= available; 535 page_index += 1; 536 offset = 0; 537 } 538 Ok(()) 539 } 540 541 /// Copy from userspace into this page range. 542 /// 543 /// # Safety 544 /// 545 /// All pages touched by this operation must be in use for the duration of this call. 546 pub(crate) unsafe fn copy_from_user_slice( 547 &self, 548 reader: &mut UserSliceReader, 549 offset: usize, 550 size: usize, 551 ) -> Result { 552 // SAFETY: `self.iterate` has the same safety requirements as `copy_from_user_slice`. 553 unsafe { 554 self.iterate(offset, size, |page, offset, to_copy| { 555 page.copy_from_user_slice_raw(reader, offset, to_copy) 556 }) 557 } 558 } 559 560 /// Copy from this page range into kernel space. 561 /// 562 /// # Safety 563 /// 564 /// All pages touched by this operation must be in use for the duration of this call. 565 pub(crate) unsafe fn read<T: FromBytes>(&self, offset: usize) -> Result<T> { 566 let mut out = MaybeUninit::<T>::uninit(); 567 let mut out_offset = 0; 568 // SAFETY: `self.iterate` has the same safety requirements as `read`. 569 unsafe { 570 self.iterate(offset, size_of::<T>(), |page, offset, to_copy| { 571 // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T. 572 let obj_ptr = (out.as_mut_ptr() as *mut u8).add(out_offset); 573 // SAFETY: The pointer points is in-bounds of the `out` variable, so it is valid. 574 page.read_raw(obj_ptr, offset, to_copy)?; 575 out_offset += to_copy; 576 Ok(()) 577 })?; 578 } 579 // SAFETY: We just initialised the data. 580 Ok(unsafe { out.assume_init() }) 581 } 582 583 /// Copy from kernel space into this page range. 584 /// 585 /// # Safety 586 /// 587 /// All pages touched by this operation must be in use for the duration of this call. 588 pub(crate) unsafe fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result { 589 let mut obj_offset = 0; 590 // SAFETY: `self.iterate` has the same safety requirements as `write`. 591 unsafe { 592 self.iterate(offset, size_of_val(obj), |page, offset, to_copy| { 593 // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T. 594 let obj_ptr = (obj as *const T as *const u8).add(obj_offset); 595 // SAFETY: We have a reference to the object, so the pointer is valid. 596 page.write_raw(obj_ptr, offset, to_copy)?; 597 obj_offset += to_copy; 598 Ok(()) 599 }) 600 } 601 } 602 603 /// Write zeroes to the given range. 604 /// 605 /// # Safety 606 /// 607 /// All pages touched by this operation must be in use for the duration of this call. 608 pub(crate) unsafe fn fill_zero(&self, offset: usize, size: usize) -> Result { 609 // SAFETY: `self.iterate` has the same safety requirements as `copy_into`. 610 unsafe { 611 self.iterate(offset, size, |page, offset, len| { 612 page.fill_zero_raw(offset, len) 613 }) 614 } 615 } 616 } 617 618 #[pinned_drop] 619 impl PinnedDrop for ShrinkablePageRange { 620 fn drop(self: Pin<&mut Self>) { 621 let (pages, size) = { 622 let lock = self.lock.lock(); 623 (lock.pages, lock.size) 624 }; 625 626 if size == 0 { 627 return; 628 } 629 630 // Note: This call is also necessary for the safety of `stable_trylock_mm`. 631 let mm_lock = self.mm_lock.lock(); 632 633 // This is the destructor, so unlike the other methods, we only need to worry about races 634 // with the shrinker here. Since we hold the `mm_lock`, we also can't race with the 635 // shrinker, and after this loop, the shrinker will not access any of our pages since we 636 // removed them from the lru list. 637 for i in 0..size { 638 // SAFETY: Loop is in-bounds of the size. 639 let p_ptr = unsafe { pages.add(i) }; 640 // SAFETY: No other readers, so we can read. 641 if let Some(p) = unsafe { PageInfo::get_page(p_ptr) } { 642 // SAFETY: The pointer is valid and it's the right shrinker. 643 unsafe { PageInfo::list_lru_del(p_ptr, p.nid(), self.shrinker) }; 644 } 645 } 646 647 drop(mm_lock); 648 649 // SAFETY: `pages` was allocated as an `KVVec<PageInfo>` with capacity `size`. Furthermore, 650 // all `size` elements are initialized. Also, the array is no longer shared with the 651 // shrinker due to the above loop. 652 drop(unsafe { KVVec::from_raw_parts(pages, size, size) }); 653 } 654 } 655 656 /// # Safety 657 /// Called by the shrinker. 658 #[no_mangle] 659 unsafe extern "C" fn rust_shrink_count( 660 shrink: *mut bindings::shrinker, 661 _sc: *mut bindings::shrink_control, 662 ) -> c_ulong { 663 // SAFETY: We can access our own private data. 664 let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() }; 665 // SAFETY: Accessing the lru list is okay. Just an FFI call. 666 unsafe { bindings::list_lru_count(list_lru) } 667 } 668 669 /// # Safety 670 /// Called by the shrinker. 671 #[no_mangle] 672 unsafe extern "C" fn rust_shrink_scan( 673 shrink: *mut bindings::shrinker, 674 sc: *mut bindings::shrink_control, 675 ) -> c_ulong { 676 // SAFETY: We can access our own private data. 677 let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() }; 678 // SAFETY: Caller guarantees that it is safe to read this field. 679 let nr_to_scan = unsafe { (*sc).nr_to_scan }; 680 // SAFETY: Accessing the lru list is okay. Just an FFI call. 681 unsafe { 682 bindings::list_lru_walk( 683 list_lru, 684 Some(bindings::rust_shrink_free_page_wrap), 685 ptr::null_mut(), 686 nr_to_scan, 687 ) 688 } 689 } 690 691 const LRU_SKIP: bindings::lru_status = bindings::lru_status_LRU_SKIP; 692 const LRU_REMOVED_ENTRY: bindings::lru_status = bindings::lru_status_LRU_REMOVED_RETRY; 693 694 /// # Safety 695 /// Called by the shrinker. 696 #[no_mangle] 697 unsafe extern "C" fn rust_shrink_free_page( 698 item: *mut bindings::list_head, 699 lru: *mut bindings::list_lru_one, 700 _cb_arg: *mut c_void, 701 ) -> bindings::lru_status { 702 // Fields that should survive after unlocking the lru lock. 703 let page; 704 let page_index; 705 let mm; 706 let mmap_read; 707 let mm_mutex; 708 let vma_addr; 709 let range_ptr; 710 711 { 712 // CAST: The `list_head` field is first in `PageInfo`. 713 let info = item as *mut PageInfo; 714 // SAFETY: The `range` field of `PageInfo` is immutable. 715 range_ptr = unsafe { (*info).range }; 716 // SAFETY: The `range` outlives its `PageInfo` values. 717 let range = unsafe { &*range_ptr }; 718 719 mm = match range.mm.mmget_not_zero() { 720 Some(mm) => MmWithUser::into_mmput_async(mm), 721 None => return LRU_SKIP, 722 }; 723 724 mm_mutex = match range.stable_trylock_mm() { 725 Some(guard) => guard, 726 None => return LRU_SKIP, 727 }; 728 729 mmap_read = match mm.mmap_read_trylock() { 730 Some(guard) => guard, 731 None => return LRU_SKIP, 732 }; 733 734 // We can't lock it normally here, since we hold the lru lock. 735 let inner = match range.lock.try_lock() { 736 Some(inner) => inner, 737 None => return LRU_SKIP, 738 }; 739 740 // SAFETY: The item is in this lru list, so it's okay to remove it. 741 unsafe { bindings::list_lru_isolate(lru, item) }; 742 743 // SAFETY: Both pointers are in bounds of the same allocation. 744 page_index = unsafe { info.offset_from(inner.pages) } as usize; 745 746 // SAFETY: We hold the spinlock, so we can take the page. 747 // 748 // This sets the page pointer to zero before we unmap it from the vma. However, we call 749 // `zap_page_range` before we release the mmap lock, so `use_page_slow` will not be able to 750 // insert a new page until after our call to `zap_page_range`. 751 page = unsafe { PageInfo::take_page(info) }; 752 vma_addr = inner.vma_addr; 753 754 // From this point on, we don't access this PageInfo or ShrinkablePageRange again, because 755 // they can be freed at any point after we unlock `lru_lock`. This is with the exception of 756 // `mm_mutex` which is kept alive by holding the lock. 757 } 758 759 // SAFETY: The lru lock is locked when this method is called. 760 unsafe { bindings::spin_unlock(&raw mut (*lru).lock) }; 761 762 if let Some(unchecked_vma) = mmap_read.vma_lookup(vma_addr) { 763 if let Some(vma) = check_vma(unchecked_vma, range_ptr) { 764 let user_page_addr = vma_addr + (page_index << PAGE_SHIFT); 765 vma.zap_page_range_single(user_page_addr, PAGE_SIZE); 766 } 767 } 768 769 drop(mmap_read); 770 drop(mm_mutex); 771 drop(mm); 772 drop(page); 773 774 LRU_REMOVED_ENTRY 775 } 776