1 // SPDX-License-Identifier: GPL-2.0 2 3 // Copyright (C) 2025 Google LLC. 4 5 //! This module has utilities for managing a page range where unused pages may be reclaimed by a 6 //! vma shrinker. 7 8 // To avoid deadlocks, locks are taken in the order: 9 // 10 // 1. mmap lock 11 // 2. spinlock 12 // 3. lru spinlock 13 // 14 // The shrinker will use trylock methods because it locks them in a different order. 15 16 use crate::AssertSync; 17 18 use core::{ 19 marker::PhantomPinned, 20 mem::{size_of, size_of_val, MaybeUninit}, 21 ptr, 22 }; 23 24 use kernel::{ 25 bindings, 26 error::Result, 27 ffi::{c_ulong, c_void}, 28 mm::{virt, Mm, MmWithUser}, 29 new_mutex, new_spinlock, 30 page::{Page, PAGE_SHIFT, PAGE_SIZE}, 31 prelude::*, 32 str::CStr, 33 sync::{aref::ARef, Mutex, SpinLock}, 34 task::Pid, 35 transmute::FromBytes, 36 types::Opaque, 37 uaccess::UserSliceReader, 38 }; 39 40 /// Represents a shrinker that can be registered with the kernel. 41 /// 42 /// Each shrinker can be used by many `ShrinkablePageRange` objects. 43 #[repr(C)] 44 pub(crate) struct Shrinker { 45 inner: Opaque<*mut bindings::shrinker>, 46 list_lru: Opaque<bindings::list_lru>, 47 } 48 49 // SAFETY: The shrinker and list_lru are thread safe. 50 unsafe impl Send for Shrinker {} 51 // SAFETY: The shrinker and list_lru are thread safe. 52 unsafe impl Sync for Shrinker {} 53 54 impl Shrinker { 55 /// Create a new shrinker. 56 /// 57 /// # Safety 58 /// 59 /// Before using this shrinker with a `ShrinkablePageRange`, the `register` method must have 60 /// been called exactly once, and it must not have returned an error. 61 pub(crate) const unsafe fn new() -> Self { 62 Self { 63 inner: Opaque::uninit(), 64 list_lru: Opaque::uninit(), 65 } 66 } 67 68 /// Register this shrinker with the kernel. 69 pub(crate) fn register(&'static self, name: &CStr) -> Result<()> { 70 // SAFETY: These fields are not yet used, so it's okay to zero them. 71 unsafe { 72 self.inner.get().write(ptr::null_mut()); 73 self.list_lru.get().write_bytes(0, 1); 74 } 75 76 // SAFETY: The field is not yet used, so we can initialize it. 77 let ret = unsafe { bindings::__list_lru_init(self.list_lru.get(), false, ptr::null_mut()) }; 78 if ret != 0 { 79 return Err(Error::from_errno(ret)); 80 } 81 82 // SAFETY: The `name` points at a valid c string. 83 let shrinker = unsafe { bindings::shrinker_alloc(0, name.as_char_ptr()) }; 84 if shrinker.is_null() { 85 // SAFETY: We initialized it, so its okay to destroy it. 86 unsafe { bindings::list_lru_destroy(self.list_lru.get()) }; 87 return Err(Error::from_errno(ret)); 88 } 89 90 // SAFETY: We're about to register the shrinker, and these are the fields we need to 91 // initialize. (All other fields are already zeroed.) 92 unsafe { 93 (&raw mut (*shrinker).count_objects).write(Some(rust_shrink_count)); 94 (&raw mut (*shrinker).scan_objects).write(Some(rust_shrink_scan)); 95 (&raw mut (*shrinker).private_data).write(self.list_lru.get().cast()); 96 } 97 98 // SAFETY: The new shrinker has been fully initialized, so we can register it. 99 unsafe { bindings::shrinker_register(shrinker) }; 100 101 // SAFETY: This initializes the pointer to the shrinker so that we can use it. 102 unsafe { self.inner.get().write(shrinker) }; 103 104 Ok(()) 105 } 106 } 107 108 /// A container that manages a page range in a vma. 109 /// 110 /// The pages can be thought of as an array of booleans of whether the pages are usable. The 111 /// methods `use_range` and `stop_using_range` set all booleans in a range to true or false 112 /// respectively. Initially, no pages are allocated. When a page is not used, it is not freed 113 /// immediately. Instead, it is made available to the memory shrinker to free it if the device is 114 /// under memory pressure. 115 /// 116 /// It's okay for `use_range` and `stop_using_range` to race with each other, although there's no 117 /// way to know whether an index ends up with true or false if a call to `use_range` races with 118 /// another call to `stop_using_range` on a given index. 119 /// 120 /// It's also okay for the two methods to race with themselves, e.g. if two threads call 121 /// `use_range` on the same index, then that's fine and neither call will return until the page is 122 /// allocated and mapped. 123 /// 124 /// The methods that read or write to a range require that the page is marked as in use. So it is 125 /// _not_ okay to call `stop_using_range` on a page that is in use by the methods that read or 126 /// write to the page. 127 #[pin_data(PinnedDrop)] 128 pub(crate) struct ShrinkablePageRange { 129 /// Shrinker object registered with the kernel. 130 shrinker: &'static Shrinker, 131 /// Pid using this page range. Only used as debugging information. 132 pid: Pid, 133 /// The mm for the relevant process. 134 mm: ARef<Mm>, 135 /// Used to synchronize calls to `vm_insert_page` and `zap_page_range_single`. 136 #[pin] 137 mm_lock: Mutex<()>, 138 /// Spinlock protecting changes to pages. 139 #[pin] 140 lock: SpinLock<Inner>, 141 142 /// Must not move, since page info has pointers back. 143 #[pin] 144 _pin: PhantomPinned, 145 } 146 147 // We do not define any ops. For now, used only to check identity of vmas. 148 static BINDER_VM_OPS: AssertSync<bindings::vm_operations_struct> = AssertSync(pin_init::zeroed()); 149 150 // To ensure that we do not accidentally install pages into or zap pages from the wrong vma, we 151 // check its vm_ops and private data before using it. 152 fn check_vma(vma: &virt::VmaRef, owner: *const ShrinkablePageRange) -> Option<&virt::VmaMixedMap> { 153 // SAFETY: Just reading the vm_ops pointer of any active vma is safe. 154 let vm_ops = unsafe { (*vma.as_ptr()).vm_ops }; 155 if !ptr::eq(vm_ops, &BINDER_VM_OPS.0) { 156 return None; 157 } 158 159 // SAFETY: Reading the vm_private_data pointer of a binder-owned vma is safe. 160 let vm_private_data = unsafe { (*vma.as_ptr()).vm_private_data }; 161 // The ShrinkablePageRange is only dropped when the Process is dropped, which only happens once 162 // the file's ->release handler is invoked, which means the ShrinkablePageRange outlives any 163 // VMA associated with it, so there can't be any false positives due to pointer reuse here. 164 if !ptr::eq(vm_private_data, owner.cast()) { 165 return None; 166 } 167 168 vma.as_mixedmap_vma() 169 } 170 171 struct Inner { 172 /// Array of pages. 173 /// 174 /// Since this is also accessed by the shrinker, we can't use a `Box`, which asserts exclusive 175 /// ownership. To deal with that, we manage it using raw pointers. 176 pages: *mut PageInfo, 177 /// Length of the `pages` array. 178 size: usize, 179 /// The address of the vma to insert the pages into. 180 vma_addr: usize, 181 } 182 183 // SAFETY: proper locking is in place for `Inner` 184 unsafe impl Send for Inner {} 185 186 type StableMmGuard = 187 kernel::sync::lock::Guard<'static, (), kernel::sync::lock::mutex::MutexBackend>; 188 189 /// An array element that describes the current state of a page. 190 /// 191 /// There are three states: 192 /// 193 /// * Free. The page is None. The `lru` element is not queued. 194 /// * Available. The page is Some. The `lru` element is queued to the shrinker's lru. 195 /// * Used. The page is Some. The `lru` element is not queued. 196 /// 197 /// When an element is available, the shrinker is able to free the page. 198 #[repr(C)] 199 struct PageInfo { 200 lru: bindings::list_head, 201 page: Option<Page>, 202 range: *const ShrinkablePageRange, 203 } 204 205 impl PageInfo { 206 /// # Safety 207 /// 208 /// The caller ensures that writing to `me.page` is ok, and that the page is not currently set. 209 unsafe fn set_page(me: *mut PageInfo, page: Page) { 210 // SAFETY: This pointer offset is in bounds. 211 let ptr = unsafe { &raw mut (*me).page }; 212 213 // SAFETY: The pointer is valid for writing, so also valid for reading. 214 if unsafe { (*ptr).is_some() } { 215 pr_err!("set_page called when there is already a page"); 216 // SAFETY: We will initialize the page again below. 217 unsafe { ptr::drop_in_place(ptr) }; 218 } 219 220 // SAFETY: The pointer is valid for writing. 221 unsafe { ptr::write(ptr, Some(page)) }; 222 } 223 224 /// # Safety 225 /// 226 /// The caller ensures that reading from `me.page` is ok for the duration of 'a. 227 unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> { 228 // SAFETY: This pointer offset is in bounds. 229 let ptr = unsafe { &raw const (*me).page }; 230 231 // SAFETY: The pointer is valid for reading. 232 unsafe { (*ptr).as_ref() } 233 } 234 235 /// # Safety 236 /// 237 /// The caller ensures that writing to `me.page` is ok for the duration of 'a. 238 unsafe fn take_page(me: *mut PageInfo) -> Option<Page> { 239 // SAFETY: This pointer offset is in bounds. 240 let ptr = unsafe { &raw mut (*me).page }; 241 242 // SAFETY: The pointer is valid for reading. 243 unsafe { (*ptr).take() } 244 } 245 246 /// Add this page to the lru list, if not already in the list. 247 /// 248 /// # Safety 249 /// 250 /// The pointer must be valid, and it must be the right shrinker and nid. 251 unsafe fn list_lru_add(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) { 252 // SAFETY: This pointer offset is in bounds. 253 let lru_ptr = unsafe { &raw mut (*me).lru }; 254 // SAFETY: The lru pointer is valid, and we're not using it with any other lru list. 255 unsafe { bindings::list_lru_add(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) }; 256 } 257 258 /// Remove this page from the lru list, if it is in the list. 259 /// 260 /// # Safety 261 /// 262 /// The pointer must be valid, and it must be the right shrinker and nid. 263 unsafe fn list_lru_del(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) { 264 // SAFETY: This pointer offset is in bounds. 265 let lru_ptr = unsafe { &raw mut (*me).lru }; 266 // SAFETY: The lru pointer is valid, and we're not using it with any other lru list. 267 unsafe { bindings::list_lru_del(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) }; 268 } 269 } 270 271 impl ShrinkablePageRange { 272 /// Create a new `ShrinkablePageRange` using the given shrinker. 273 pub(crate) fn new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error> { 274 try_pin_init!(Self { 275 shrinker, 276 pid: kernel::current!().pid(), 277 mm: ARef::from(&**kernel::current!().mm().ok_or(ESRCH)?), 278 mm_lock <- new_mutex!((), "ShrinkablePageRange::mm"), 279 lock <- new_spinlock!(Inner { 280 pages: ptr::null_mut(), 281 size: 0, 282 vma_addr: 0, 283 }, "ShrinkablePageRange"), 284 _pin: PhantomPinned, 285 }) 286 } 287 288 pub(crate) fn stable_trylock_mm(&self) -> Option<StableMmGuard> { 289 // SAFETY: This extends the duration of the reference. Since this call happens before 290 // `mm_lock` is taken in the destructor of `ShrinkablePageRange`, the destructor will block 291 // until the returned guard is dropped. This ensures that the guard is valid until dropped. 292 let mm_lock = unsafe { &*ptr::from_ref(&self.mm_lock) }; 293 294 mm_lock.try_lock() 295 } 296 297 /// Register a vma with this page range. Returns the size of the region. 298 pub(crate) fn register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize> { 299 let num_bytes = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize); 300 let num_pages = num_bytes >> PAGE_SHIFT; 301 302 if !ptr::eq::<Mm>(&*self.mm, &**vma.mm()) { 303 pr_debug!("Failed to register with vma: invalid vma->vm_mm"); 304 return Err(EINVAL); 305 } 306 if num_pages == 0 { 307 pr_debug!("Failed to register with vma: size zero"); 308 return Err(EINVAL); 309 } 310 311 let mut pages = KVVec::<PageInfo>::with_capacity(num_pages, GFP_KERNEL)?; 312 313 // SAFETY: This just initializes the pages array. 314 unsafe { 315 let self_ptr = self as *const ShrinkablePageRange; 316 for i in 0..num_pages { 317 let info = pages.as_mut_ptr().add(i); 318 (&raw mut (*info).range).write(self_ptr); 319 (&raw mut (*info).page).write(None); 320 let lru = &raw mut (*info).lru; 321 (&raw mut (*lru).next).write(lru); 322 (&raw mut (*lru).prev).write(lru); 323 } 324 } 325 326 let mut inner = self.lock.lock(); 327 if inner.size > 0 { 328 pr_debug!("Failed to register with vma: already registered"); 329 drop(inner); 330 return Err(EBUSY); 331 } 332 333 inner.pages = pages.into_raw_parts().0; 334 inner.size = num_pages; 335 inner.vma_addr = vma.start(); 336 337 // This pointer is only used for comparison - it's not dereferenced. 338 // 339 // SAFETY: We own the vma, and we don't use any methods on VmaNew that rely on 340 // `vm_private_data`. 341 unsafe { 342 (*vma.as_ptr()).vm_private_data = ptr::from_ref(self).cast_mut().cast::<c_void>() 343 }; 344 345 // SAFETY: We own the vma, and we don't use any methods on VmaNew that rely on 346 // `vm_ops`. 347 unsafe { (*vma.as_ptr()).vm_ops = &BINDER_VM_OPS.0 }; 348 349 Ok(num_pages) 350 } 351 352 /// Make sure that the given pages are allocated and mapped. 353 /// 354 /// Must not be called from an atomic context. 355 pub(crate) fn use_range(&self, start: usize, end: usize) -> Result<()> { 356 if start >= end { 357 return Ok(()); 358 } 359 let mut inner = self.lock.lock(); 360 assert!(end <= inner.size); 361 362 for i in start..end { 363 // SAFETY: This pointer offset is in bounds. 364 let page_info = unsafe { inner.pages.add(i) }; 365 366 // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay. 367 if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 368 // Since we're going to use the page, we should remove it from the lru list so that 369 // the shrinker will not free it. 370 // 371 // SAFETY: The pointer is valid, and this is the right shrinker. 372 // 373 // The shrinker can't free the page between the check and this call to 374 // `list_lru_del` because we hold the lock. 375 unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) }; 376 } else { 377 // We have to allocate a new page. Use the slow path. 378 drop(inner); 379 // SAFETY: `i < end <= inner.size` so `i` is in bounds. 380 match unsafe { self.use_page_slow(i) } { 381 Ok(()) => {} 382 Err(err) => { 383 pr_warn!("Error in use_page_slow: {:?}", err); 384 return Err(err); 385 } 386 } 387 inner = self.lock.lock(); 388 } 389 } 390 Ok(()) 391 } 392 393 /// Mark the given page as in use, slow path. 394 /// 395 /// Must not be called from an atomic context. 396 /// 397 /// # Safety 398 /// 399 /// Assumes that `i` is in bounds. 400 #[cold] 401 unsafe fn use_page_slow(&self, i: usize) -> Result<()> { 402 let new_page = Page::alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO)?; 403 404 let mm_mutex = self.mm_lock.lock(); 405 let inner = self.lock.lock(); 406 407 // SAFETY: This pointer offset is in bounds. 408 let page_info = unsafe { inner.pages.add(i) }; 409 410 // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay. 411 if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 412 // The page was already there, or someone else added the page while we didn't hold the 413 // spinlock. 414 // 415 // SAFETY: The pointer is valid, and this is the right shrinker. 416 // 417 // The shrinker can't free the page between the check and this call to 418 // `list_lru_del` because we hold the lock. 419 unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) }; 420 return Ok(()); 421 } 422 423 let vma_addr = inner.vma_addr; 424 // Release the spinlock while we insert the page into the vma. 425 drop(inner); 426 427 // No overflow since we stay in bounds of the vma. 428 let user_page_addr = vma_addr + (i << PAGE_SHIFT); 429 430 // We use `mmput_async` when dropping the `mm` because `use_page_slow` is usually used from 431 // a remote process. If the call to `mmput` races with the process shutting down, then the 432 // caller of `use_page_slow` becomes responsible for cleaning up the `mm`, which doesn't 433 // happen until it returns to userspace. However, the caller might instead go to sleep and 434 // wait for the owner of the `mm` to wake it up, which doesn't happen because it's in the 435 // middle of a shutdown process that won't complete until the `mm` is dropped. This can 436 // amount to a deadlock. 437 // 438 // Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a 439 // workqueue. 440 let mm = MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?); 441 { 442 let vma_read; 443 let mmap_read; 444 let vma = if let Some(ret) = mm.lock_vma_under_rcu(vma_addr) { 445 vma_read = ret; 446 check_vma(&vma_read, self) 447 } else { 448 mmap_read = mm.mmap_read_lock(); 449 mmap_read 450 .vma_lookup(vma_addr) 451 .and_then(|vma| check_vma(vma, self)) 452 }; 453 454 match vma { 455 Some(vma) => vma.vm_insert_page(user_page_addr, &new_page)?, 456 None => return Err(ESRCH), 457 } 458 } 459 460 let inner = self.lock.lock(); 461 462 // SAFETY: The `page_info` pointer is valid and currently does not have a page. The page 463 // can be written to since we hold the lock. 464 // 465 // We released and reacquired the spinlock since we checked that the page is null, but we 466 // always hold the mm_lock mutex when setting the page to a non-null value, so it's not 467 // possible for someone else to have changed it since our check. 468 unsafe { PageInfo::set_page(page_info, new_page) }; 469 470 drop(inner); 471 drop(mm_mutex); 472 473 Ok(()) 474 } 475 476 /// If the given page is in use, then mark it as available so that the shrinker can free it. 477 /// 478 /// May be called from an atomic context. 479 pub(crate) fn stop_using_range(&self, start: usize, end: usize) { 480 if start >= end { 481 return; 482 } 483 let inner = self.lock.lock(); 484 assert!(end <= inner.size); 485 486 for i in (start..end).rev() { 487 // SAFETY: The pointer is in bounds. 488 let page_info = unsafe { inner.pages.add(i) }; 489 490 // SAFETY: Okay for reading since we have the lock. 491 if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 492 // SAFETY: The pointer is valid, and it's the right shrinker. 493 unsafe { PageInfo::list_lru_add(page_info, page.nid(), self.shrinker) }; 494 } 495 } 496 } 497 498 /// Helper for reading or writing to a range of bytes that may overlap with several pages. 499 /// 500 /// # Safety 501 /// 502 /// All pages touched by this operation must be in use for the duration of this call. 503 unsafe fn iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result 504 where 505 T: FnMut(&Page, usize, usize) -> Result, 506 { 507 if size == 0 { 508 return Ok(()); 509 } 510 511 let (pages, num_pages) = { 512 let inner = self.lock.lock(); 513 (inner.pages, inner.size) 514 }; 515 let num_bytes = num_pages << PAGE_SHIFT; 516 517 // Check that the request is within the buffer. 518 if offset.checked_add(size).ok_or(EFAULT)? > num_bytes { 519 return Err(EFAULT); 520 } 521 522 let mut page_index = offset >> PAGE_SHIFT; 523 offset &= PAGE_SIZE - 1; 524 while size > 0 { 525 let available = usize::min(size, PAGE_SIZE - offset); 526 // SAFETY: The pointer is in bounds. 527 let page_info = unsafe { pages.add(page_index) }; 528 // SAFETY: The caller guarantees that this page is in the "in use" state for the 529 // duration of this call to `iterate`, so nobody will change the page. 530 let page = unsafe { PageInfo::get_page(page_info) }; 531 if page.is_none() { 532 pr_warn!("Page is null!"); 533 } 534 let page = page.ok_or(EFAULT)?; 535 cb(page, offset, available)?; 536 size -= available; 537 page_index += 1; 538 offset = 0; 539 } 540 Ok(()) 541 } 542 543 /// Copy from userspace into this page range. 544 /// 545 /// # Safety 546 /// 547 /// All pages touched by this operation must be in use for the duration of this call. 548 pub(crate) unsafe fn copy_from_user_slice( 549 &self, 550 reader: &mut UserSliceReader, 551 offset: usize, 552 size: usize, 553 ) -> Result { 554 // SAFETY: `self.iterate` has the same safety requirements as `copy_from_user_slice`. 555 unsafe { 556 self.iterate(offset, size, |page, offset, to_copy| { 557 page.copy_from_user_slice_raw(reader, offset, to_copy) 558 }) 559 } 560 } 561 562 /// Copy from this page range into kernel space. 563 /// 564 /// # Safety 565 /// 566 /// All pages touched by this operation must be in use for the duration of this call. 567 pub(crate) unsafe fn read<T: FromBytes>(&self, offset: usize) -> Result<T> { 568 let mut out = MaybeUninit::<T>::uninit(); 569 let mut out_offset = 0; 570 // SAFETY: `self.iterate` has the same safety requirements as `read`. 571 unsafe { 572 self.iterate(offset, size_of::<T>(), |page, offset, to_copy| { 573 // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T. 574 let obj_ptr = (out.as_mut_ptr() as *mut u8).add(out_offset); 575 // SAFETY: The pointer points is in-bounds of the `out` variable, so it is valid. 576 page.read_raw(obj_ptr, offset, to_copy)?; 577 out_offset += to_copy; 578 Ok(()) 579 })?; 580 } 581 // SAFETY: We just initialised the data. 582 Ok(unsafe { out.assume_init() }) 583 } 584 585 /// Copy from kernel space into this page range. 586 /// 587 /// # Safety 588 /// 589 /// All pages touched by this operation must be in use for the duration of this call. 590 pub(crate) unsafe fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result { 591 let mut obj_offset = 0; 592 // SAFETY: `self.iterate` has the same safety requirements as `write`. 593 unsafe { 594 self.iterate(offset, size_of_val(obj), |page, offset, to_copy| { 595 // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T. 596 let obj_ptr = (obj as *const T as *const u8).add(obj_offset); 597 // SAFETY: We have a reference to the object, so the pointer is valid. 598 page.write_raw(obj_ptr, offset, to_copy)?; 599 obj_offset += to_copy; 600 Ok(()) 601 }) 602 } 603 } 604 605 /// Write zeroes to the given range. 606 /// 607 /// # Safety 608 /// 609 /// All pages touched by this operation must be in use for the duration of this call. 610 pub(crate) unsafe fn fill_zero(&self, offset: usize, size: usize) -> Result { 611 // SAFETY: `self.iterate` has the same safety requirements as `copy_into`. 612 unsafe { 613 self.iterate(offset, size, |page, offset, len| { 614 page.fill_zero_raw(offset, len) 615 }) 616 } 617 } 618 } 619 620 #[pinned_drop] 621 impl PinnedDrop for ShrinkablePageRange { 622 fn drop(self: Pin<&mut Self>) { 623 let (pages, size) = { 624 let lock = self.lock.lock(); 625 (lock.pages, lock.size) 626 }; 627 628 if size == 0 { 629 return; 630 } 631 632 // Note: This call is also necessary for the safety of `stable_trylock_mm`. 633 let mm_lock = self.mm_lock.lock(); 634 635 // This is the destructor, so unlike the other methods, we only need to worry about races 636 // with the shrinker here. Since we hold the `mm_lock`, we also can't race with the 637 // shrinker, and after this loop, the shrinker will not access any of our pages since we 638 // removed them from the lru list. 639 for i in 0..size { 640 // SAFETY: Loop is in-bounds of the size. 641 let p_ptr = unsafe { pages.add(i) }; 642 // SAFETY: No other readers, so we can read. 643 if let Some(p) = unsafe { PageInfo::get_page(p_ptr) } { 644 // SAFETY: The pointer is valid and it's the right shrinker. 645 unsafe { PageInfo::list_lru_del(p_ptr, p.nid(), self.shrinker) }; 646 } 647 } 648 649 drop(mm_lock); 650 651 // SAFETY: `pages` was allocated as an `KVVec<PageInfo>` with capacity `size`. Furthermore, 652 // all `size` elements are initialized. Also, the array is no longer shared with the 653 // shrinker due to the above loop. 654 drop(unsafe { KVVec::from_raw_parts(pages, size, size) }); 655 } 656 } 657 658 /// # Safety 659 /// Called by the shrinker. 660 #[no_mangle] 661 unsafe extern "C" fn rust_shrink_count( 662 shrink: *mut bindings::shrinker, 663 _sc: *mut bindings::shrink_control, 664 ) -> c_ulong { 665 // SAFETY: We can access our own private data. 666 let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() }; 667 // SAFETY: Accessing the lru list is okay. Just an FFI call. 668 unsafe { bindings::list_lru_count(list_lru) } 669 } 670 671 /// # Safety 672 /// Called by the shrinker. 673 #[no_mangle] 674 unsafe extern "C" fn rust_shrink_scan( 675 shrink: *mut bindings::shrinker, 676 sc: *mut bindings::shrink_control, 677 ) -> c_ulong { 678 // SAFETY: We can access our own private data. 679 let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() }; 680 // SAFETY: Caller guarantees that it is safe to read this field. 681 let nr_to_scan = unsafe { (*sc).nr_to_scan }; 682 // SAFETY: Accessing the lru list is okay. Just an FFI call. 683 unsafe { 684 bindings::list_lru_walk( 685 list_lru, 686 Some(rust_shrink_free_page), 687 ptr::null_mut(), 688 nr_to_scan, 689 ) 690 } 691 } 692 693 const LRU_SKIP: bindings::lru_status = bindings::lru_status::LRU_SKIP; 694 const LRU_REMOVED_ENTRY: bindings::lru_status = bindings::lru_status::LRU_REMOVED_RETRY; 695 696 /// # Safety 697 /// Called by the shrinker. 698 #[no_mangle] 699 unsafe extern "C" fn rust_shrink_free_page( 700 item: *mut bindings::list_head, 701 lru: *mut bindings::list_lru_one, 702 _cb_arg: *mut c_void, 703 ) -> bindings::lru_status { 704 // Fields that should survive after unlocking the lru lock. 705 let page; 706 let page_index; 707 let mm; 708 let mmap_read; 709 let mm_mutex; 710 let vma_addr; 711 let range_ptr; 712 713 { 714 // CAST: The `list_head` field is first in `PageInfo`. 715 let info = item as *mut PageInfo; 716 // SAFETY: The `range` field of `PageInfo` is immutable. 717 range_ptr = unsafe { (*info).range }; 718 // SAFETY: The `range` outlives its `PageInfo` values. 719 let range = unsafe { &*range_ptr }; 720 721 mm = match range.mm.mmget_not_zero() { 722 Some(mm) => MmWithUser::into_mmput_async(mm), 723 None => return LRU_SKIP, 724 }; 725 726 mm_mutex = match range.stable_trylock_mm() { 727 Some(guard) => guard, 728 None => return LRU_SKIP, 729 }; 730 731 mmap_read = match mm.mmap_read_trylock() { 732 Some(guard) => guard, 733 None => return LRU_SKIP, 734 }; 735 736 // We can't lock it normally here, since we hold the lru lock. 737 let inner = match range.lock.try_lock() { 738 Some(inner) => inner, 739 None => return LRU_SKIP, 740 }; 741 742 // SAFETY: The item is in this lru list, so it's okay to remove it. 743 unsafe { bindings::list_lru_isolate(lru, item) }; 744 745 // SAFETY: Both pointers are in bounds of the same allocation. 746 page_index = unsafe { info.offset_from(inner.pages) } as usize; 747 748 // SAFETY: We hold the spinlock, so we can take the page. 749 // 750 // This sets the page pointer to zero before we unmap it from the vma. However, we call 751 // `zap_page_range` before we release the mmap lock, so `use_page_slow` will not be able to 752 // insert a new page until after our call to `zap_page_range`. 753 page = unsafe { PageInfo::take_page(info) }; 754 vma_addr = inner.vma_addr; 755 756 // From this point on, we don't access this PageInfo or ShrinkablePageRange again, because 757 // they can be freed at any point after we unlock `lru_lock`. This is with the exception of 758 // `mm_mutex` which is kept alive by holding the lock. 759 } 760 761 // SAFETY: The lru lock is locked when this method is called. 762 unsafe { bindings::spin_unlock(&raw mut (*lru).lock) }; 763 764 if let Some(unchecked_vma) = mmap_read.vma_lookup(vma_addr) { 765 if let Some(vma) = check_vma(unchecked_vma, range_ptr) { 766 let user_page_addr = vma_addr + (page_index << PAGE_SHIFT); 767 vma.zap_page_range_single(user_page_addr, PAGE_SIZE); 768 } 769 } 770 771 drop(mmap_read); 772 drop(mm_mutex); 773 drop(mm); 774 drop(page); 775 776 LRU_REMOVED_ENTRY 777 } 778