1*eafedbc7SAlice Ryhl // SPDX-License-Identifier: GPL-2.0 2*eafedbc7SAlice Ryhl 3*eafedbc7SAlice Ryhl // Copyright (C) 2025 Google LLC. 4*eafedbc7SAlice Ryhl 5*eafedbc7SAlice Ryhl //! This module has utilities for managing a page range where unused pages may be reclaimed by a 6*eafedbc7SAlice Ryhl //! vma shrinker. 7*eafedbc7SAlice Ryhl 8*eafedbc7SAlice Ryhl // To avoid deadlocks, locks are taken in the order: 9*eafedbc7SAlice Ryhl // 10*eafedbc7SAlice Ryhl // 1. mmap lock 11*eafedbc7SAlice Ryhl // 2. spinlock 12*eafedbc7SAlice Ryhl // 3. lru spinlock 13*eafedbc7SAlice Ryhl // 14*eafedbc7SAlice Ryhl // The shrinker will use trylock methods because it locks them in a different order. 15*eafedbc7SAlice Ryhl 16*eafedbc7SAlice Ryhl use core::{ 17*eafedbc7SAlice Ryhl marker::PhantomPinned, 18*eafedbc7SAlice Ryhl mem::{size_of, size_of_val, MaybeUninit}, 19*eafedbc7SAlice Ryhl ptr, 20*eafedbc7SAlice Ryhl }; 21*eafedbc7SAlice Ryhl 22*eafedbc7SAlice Ryhl use kernel::{ 23*eafedbc7SAlice Ryhl bindings, 24*eafedbc7SAlice Ryhl error::Result, 25*eafedbc7SAlice Ryhl ffi::{c_ulong, c_void}, 26*eafedbc7SAlice Ryhl mm::{virt, Mm, MmWithUser}, 27*eafedbc7SAlice Ryhl new_mutex, new_spinlock, 28*eafedbc7SAlice Ryhl page::{Page, PAGE_SHIFT, PAGE_SIZE}, 29*eafedbc7SAlice Ryhl prelude::*, 30*eafedbc7SAlice Ryhl str::CStr, 31*eafedbc7SAlice Ryhl sync::{aref::ARef, Mutex, SpinLock}, 32*eafedbc7SAlice Ryhl task::Pid, 33*eafedbc7SAlice Ryhl transmute::FromBytes, 34*eafedbc7SAlice Ryhl types::Opaque, 35*eafedbc7SAlice Ryhl uaccess::UserSliceReader, 36*eafedbc7SAlice Ryhl }; 37*eafedbc7SAlice Ryhl 38*eafedbc7SAlice Ryhl /// Represents a shrinker that can be registered with the kernel. 39*eafedbc7SAlice Ryhl /// 40*eafedbc7SAlice Ryhl /// Each shrinker can be used by many `ShrinkablePageRange` objects. 41*eafedbc7SAlice Ryhl #[repr(C)] 42*eafedbc7SAlice Ryhl pub(crate) struct Shrinker { 43*eafedbc7SAlice Ryhl inner: Opaque<*mut bindings::shrinker>, 44*eafedbc7SAlice Ryhl list_lru: Opaque<bindings::list_lru>, 45*eafedbc7SAlice Ryhl } 46*eafedbc7SAlice Ryhl 47*eafedbc7SAlice Ryhl // SAFETY: The shrinker and list_lru are thread safe. 48*eafedbc7SAlice Ryhl unsafe impl Send for Shrinker {} 49*eafedbc7SAlice Ryhl // SAFETY: The shrinker and list_lru are thread safe. 50*eafedbc7SAlice Ryhl unsafe impl Sync for Shrinker {} 51*eafedbc7SAlice Ryhl 52*eafedbc7SAlice Ryhl impl Shrinker { 53*eafedbc7SAlice Ryhl /// Create a new shrinker. 54*eafedbc7SAlice Ryhl /// 55*eafedbc7SAlice Ryhl /// # Safety 56*eafedbc7SAlice Ryhl /// 57*eafedbc7SAlice Ryhl /// Before using this shrinker with a `ShrinkablePageRange`, the `register` method must have 58*eafedbc7SAlice Ryhl /// been called exactly once, and it must not have returned an error. 59*eafedbc7SAlice Ryhl pub(crate) const unsafe fn new() -> Self { 60*eafedbc7SAlice Ryhl Self { 61*eafedbc7SAlice Ryhl inner: Opaque::uninit(), 62*eafedbc7SAlice Ryhl list_lru: Opaque::uninit(), 63*eafedbc7SAlice Ryhl } 64*eafedbc7SAlice Ryhl } 65*eafedbc7SAlice Ryhl 66*eafedbc7SAlice Ryhl /// Register this shrinker with the kernel. 67*eafedbc7SAlice Ryhl pub(crate) fn register(&'static self, name: &CStr) -> Result<()> { 68*eafedbc7SAlice Ryhl // SAFETY: These fields are not yet used, so it's okay to zero them. 69*eafedbc7SAlice Ryhl unsafe { 70*eafedbc7SAlice Ryhl self.inner.get().write(ptr::null_mut()); 71*eafedbc7SAlice Ryhl self.list_lru.get().write_bytes(0, 1); 72*eafedbc7SAlice Ryhl } 73*eafedbc7SAlice Ryhl 74*eafedbc7SAlice Ryhl // SAFETY: The field is not yet used, so we can initialize it. 75*eafedbc7SAlice Ryhl let ret = unsafe { bindings::__list_lru_init(self.list_lru.get(), false, ptr::null_mut()) }; 76*eafedbc7SAlice Ryhl if ret != 0 { 77*eafedbc7SAlice Ryhl return Err(Error::from_errno(ret)); 78*eafedbc7SAlice Ryhl } 79*eafedbc7SAlice Ryhl 80*eafedbc7SAlice Ryhl // SAFETY: The `name` points at a valid c string. 81*eafedbc7SAlice Ryhl let shrinker = unsafe { bindings::shrinker_alloc(0, name.as_char_ptr()) }; 82*eafedbc7SAlice Ryhl if shrinker.is_null() { 83*eafedbc7SAlice Ryhl // SAFETY: We initialized it, so its okay to destroy it. 84*eafedbc7SAlice Ryhl unsafe { bindings::list_lru_destroy(self.list_lru.get()) }; 85*eafedbc7SAlice Ryhl return Err(Error::from_errno(ret)); 86*eafedbc7SAlice Ryhl } 87*eafedbc7SAlice Ryhl 88*eafedbc7SAlice Ryhl // SAFETY: We're about to register the shrinker, and these are the fields we need to 89*eafedbc7SAlice Ryhl // initialize. (All other fields are already zeroed.) 90*eafedbc7SAlice Ryhl unsafe { 91*eafedbc7SAlice Ryhl (&raw mut (*shrinker).count_objects).write(Some(rust_shrink_count)); 92*eafedbc7SAlice Ryhl (&raw mut (*shrinker).scan_objects).write(Some(rust_shrink_scan)); 93*eafedbc7SAlice Ryhl (&raw mut (*shrinker).private_data).write(self.list_lru.get().cast()); 94*eafedbc7SAlice Ryhl } 95*eafedbc7SAlice Ryhl 96*eafedbc7SAlice Ryhl // SAFETY: The new shrinker has been fully initialized, so we can register it. 97*eafedbc7SAlice Ryhl unsafe { bindings::shrinker_register(shrinker) }; 98*eafedbc7SAlice Ryhl 99*eafedbc7SAlice Ryhl // SAFETY: This initializes the pointer to the shrinker so that we can use it. 100*eafedbc7SAlice Ryhl unsafe { self.inner.get().write(shrinker) }; 101*eafedbc7SAlice Ryhl 102*eafedbc7SAlice Ryhl Ok(()) 103*eafedbc7SAlice Ryhl } 104*eafedbc7SAlice Ryhl } 105*eafedbc7SAlice Ryhl 106*eafedbc7SAlice Ryhl /// A container that manages a page range in a vma. 107*eafedbc7SAlice Ryhl /// 108*eafedbc7SAlice Ryhl /// The pages can be thought of as an array of booleans of whether the pages are usable. The 109*eafedbc7SAlice Ryhl /// methods `use_range` and `stop_using_range` set all booleans in a range to true or false 110*eafedbc7SAlice Ryhl /// respectively. Initially, no pages are allocated. When a page is not used, it is not freed 111*eafedbc7SAlice Ryhl /// immediately. Instead, it is made available to the memory shrinker to free it if the device is 112*eafedbc7SAlice Ryhl /// under memory pressure. 113*eafedbc7SAlice Ryhl /// 114*eafedbc7SAlice Ryhl /// It's okay for `use_range` and `stop_using_range` to race with each other, although there's no 115*eafedbc7SAlice Ryhl /// way to know whether an index ends up with true or false if a call to `use_range` races with 116*eafedbc7SAlice Ryhl /// another call to `stop_using_range` on a given index. 117*eafedbc7SAlice Ryhl /// 118*eafedbc7SAlice Ryhl /// It's also okay for the two methods to race with themselves, e.g. if two threads call 119*eafedbc7SAlice Ryhl /// `use_range` on the same index, then that's fine and neither call will return until the page is 120*eafedbc7SAlice Ryhl /// allocated and mapped. 121*eafedbc7SAlice Ryhl /// 122*eafedbc7SAlice Ryhl /// The methods that read or write to a range require that the page is marked as in use. So it is 123*eafedbc7SAlice Ryhl /// _not_ okay to call `stop_using_range` on a page that is in use by the methods that read or 124*eafedbc7SAlice Ryhl /// write to the page. 125*eafedbc7SAlice Ryhl #[pin_data(PinnedDrop)] 126*eafedbc7SAlice Ryhl pub(crate) struct ShrinkablePageRange { 127*eafedbc7SAlice Ryhl /// Shrinker object registered with the kernel. 128*eafedbc7SAlice Ryhl shrinker: &'static Shrinker, 129*eafedbc7SAlice Ryhl /// Pid using this page range. Only used as debugging information. 130*eafedbc7SAlice Ryhl pid: Pid, 131*eafedbc7SAlice Ryhl /// The mm for the relevant process. 132*eafedbc7SAlice Ryhl mm: ARef<Mm>, 133*eafedbc7SAlice Ryhl /// Used to synchronize calls to `vm_insert_page` and `zap_page_range_single`. 134*eafedbc7SAlice Ryhl #[pin] 135*eafedbc7SAlice Ryhl mm_lock: Mutex<()>, 136*eafedbc7SAlice Ryhl /// Spinlock protecting changes to pages. 137*eafedbc7SAlice Ryhl #[pin] 138*eafedbc7SAlice Ryhl lock: SpinLock<Inner>, 139*eafedbc7SAlice Ryhl 140*eafedbc7SAlice Ryhl /// Must not move, since page info has pointers back. 141*eafedbc7SAlice Ryhl #[pin] 142*eafedbc7SAlice Ryhl _pin: PhantomPinned, 143*eafedbc7SAlice Ryhl } 144*eafedbc7SAlice Ryhl 145*eafedbc7SAlice Ryhl struct Inner { 146*eafedbc7SAlice Ryhl /// Array of pages. 147*eafedbc7SAlice Ryhl /// 148*eafedbc7SAlice Ryhl /// Since this is also accessed by the shrinker, we can't use a `Box`, which asserts exclusive 149*eafedbc7SAlice Ryhl /// ownership. To deal with that, we manage it using raw pointers. 150*eafedbc7SAlice Ryhl pages: *mut PageInfo, 151*eafedbc7SAlice Ryhl /// Length of the `pages` array. 152*eafedbc7SAlice Ryhl size: usize, 153*eafedbc7SAlice Ryhl /// The address of the vma to insert the pages into. 154*eafedbc7SAlice Ryhl vma_addr: usize, 155*eafedbc7SAlice Ryhl } 156*eafedbc7SAlice Ryhl 157*eafedbc7SAlice Ryhl // SAFETY: proper locking is in place for `Inner` 158*eafedbc7SAlice Ryhl unsafe impl Send for Inner {} 159*eafedbc7SAlice Ryhl 160*eafedbc7SAlice Ryhl type StableMmGuard = 161*eafedbc7SAlice Ryhl kernel::sync::lock::Guard<'static, (), kernel::sync::lock::mutex::MutexBackend>; 162*eafedbc7SAlice Ryhl 163*eafedbc7SAlice Ryhl /// An array element that describes the current state of a page. 164*eafedbc7SAlice Ryhl /// 165*eafedbc7SAlice Ryhl /// There are three states: 166*eafedbc7SAlice Ryhl /// 167*eafedbc7SAlice Ryhl /// * Free. The page is None. The `lru` element is not queued. 168*eafedbc7SAlice Ryhl /// * Available. The page is Some. The `lru` element is queued to the shrinker's lru. 169*eafedbc7SAlice Ryhl /// * Used. The page is Some. The `lru` element is not queued. 170*eafedbc7SAlice Ryhl /// 171*eafedbc7SAlice Ryhl /// When an element is available, the shrinker is able to free the page. 172*eafedbc7SAlice Ryhl #[repr(C)] 173*eafedbc7SAlice Ryhl struct PageInfo { 174*eafedbc7SAlice Ryhl lru: bindings::list_head, 175*eafedbc7SAlice Ryhl page: Option<Page>, 176*eafedbc7SAlice Ryhl range: *const ShrinkablePageRange, 177*eafedbc7SAlice Ryhl } 178*eafedbc7SAlice Ryhl 179*eafedbc7SAlice Ryhl impl PageInfo { 180*eafedbc7SAlice Ryhl /// # Safety 181*eafedbc7SAlice Ryhl /// 182*eafedbc7SAlice Ryhl /// The caller ensures that writing to `me.page` is ok, and that the page is not currently set. 183*eafedbc7SAlice Ryhl unsafe fn set_page(me: *mut PageInfo, page: Page) { 184*eafedbc7SAlice Ryhl // SAFETY: This pointer offset is in bounds. 185*eafedbc7SAlice Ryhl let ptr = unsafe { &raw mut (*me).page }; 186*eafedbc7SAlice Ryhl 187*eafedbc7SAlice Ryhl // SAFETY: The pointer is valid for writing, so also valid for reading. 188*eafedbc7SAlice Ryhl if unsafe { (*ptr).is_some() } { 189*eafedbc7SAlice Ryhl pr_err!("set_page called when there is already a page"); 190*eafedbc7SAlice Ryhl // SAFETY: We will initialize the page again below. 191*eafedbc7SAlice Ryhl unsafe { ptr::drop_in_place(ptr) }; 192*eafedbc7SAlice Ryhl } 193*eafedbc7SAlice Ryhl 194*eafedbc7SAlice Ryhl // SAFETY: The pointer is valid for writing. 195*eafedbc7SAlice Ryhl unsafe { ptr::write(ptr, Some(page)) }; 196*eafedbc7SAlice Ryhl } 197*eafedbc7SAlice Ryhl 198*eafedbc7SAlice Ryhl /// # Safety 199*eafedbc7SAlice Ryhl /// 200*eafedbc7SAlice Ryhl /// The caller ensures that reading from `me.page` is ok for the duration of 'a. 201*eafedbc7SAlice Ryhl unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> { 202*eafedbc7SAlice Ryhl // SAFETY: This pointer offset is in bounds. 203*eafedbc7SAlice Ryhl let ptr = unsafe { &raw const (*me).page }; 204*eafedbc7SAlice Ryhl 205*eafedbc7SAlice Ryhl // SAFETY: The pointer is valid for reading. 206*eafedbc7SAlice Ryhl unsafe { (*ptr).as_ref() } 207*eafedbc7SAlice Ryhl } 208*eafedbc7SAlice Ryhl 209*eafedbc7SAlice Ryhl /// # Safety 210*eafedbc7SAlice Ryhl /// 211*eafedbc7SAlice Ryhl /// The caller ensures that writing to `me.page` is ok for the duration of 'a. 212*eafedbc7SAlice Ryhl unsafe fn take_page(me: *mut PageInfo) -> Option<Page> { 213*eafedbc7SAlice Ryhl // SAFETY: This pointer offset is in bounds. 214*eafedbc7SAlice Ryhl let ptr = unsafe { &raw mut (*me).page }; 215*eafedbc7SAlice Ryhl 216*eafedbc7SAlice Ryhl // SAFETY: The pointer is valid for reading. 217*eafedbc7SAlice Ryhl unsafe { (*ptr).take() } 218*eafedbc7SAlice Ryhl } 219*eafedbc7SAlice Ryhl 220*eafedbc7SAlice Ryhl /// Add this page to the lru list, if not already in the list. 221*eafedbc7SAlice Ryhl /// 222*eafedbc7SAlice Ryhl /// # Safety 223*eafedbc7SAlice Ryhl /// 224*eafedbc7SAlice Ryhl /// The pointer must be valid, and it must be the right shrinker and nid. 225*eafedbc7SAlice Ryhl unsafe fn list_lru_add(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) { 226*eafedbc7SAlice Ryhl // SAFETY: This pointer offset is in bounds. 227*eafedbc7SAlice Ryhl let lru_ptr = unsafe { &raw mut (*me).lru }; 228*eafedbc7SAlice Ryhl // SAFETY: The lru pointer is valid, and we're not using it with any other lru list. 229*eafedbc7SAlice Ryhl unsafe { bindings::list_lru_add(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) }; 230*eafedbc7SAlice Ryhl } 231*eafedbc7SAlice Ryhl 232*eafedbc7SAlice Ryhl /// Remove this page from the lru list, if it is in the list. 233*eafedbc7SAlice Ryhl /// 234*eafedbc7SAlice Ryhl /// # Safety 235*eafedbc7SAlice Ryhl /// 236*eafedbc7SAlice Ryhl /// The pointer must be valid, and it must be the right shrinker and nid. 237*eafedbc7SAlice Ryhl unsafe fn list_lru_del(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) { 238*eafedbc7SAlice Ryhl // SAFETY: This pointer offset is in bounds. 239*eafedbc7SAlice Ryhl let lru_ptr = unsafe { &raw mut (*me).lru }; 240*eafedbc7SAlice Ryhl // SAFETY: The lru pointer is valid, and we're not using it with any other lru list. 241*eafedbc7SAlice Ryhl unsafe { bindings::list_lru_del(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) }; 242*eafedbc7SAlice Ryhl } 243*eafedbc7SAlice Ryhl } 244*eafedbc7SAlice Ryhl 245*eafedbc7SAlice Ryhl impl ShrinkablePageRange { 246*eafedbc7SAlice Ryhl /// Create a new `ShrinkablePageRange` using the given shrinker. 247*eafedbc7SAlice Ryhl pub(crate) fn new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error> { 248*eafedbc7SAlice Ryhl try_pin_init!(Self { 249*eafedbc7SAlice Ryhl shrinker, 250*eafedbc7SAlice Ryhl pid: kernel::current!().pid(), 251*eafedbc7SAlice Ryhl mm: ARef::from(&**kernel::current!().mm().ok_or(ESRCH)?), 252*eafedbc7SAlice Ryhl mm_lock <- new_mutex!((), "ShrinkablePageRange::mm"), 253*eafedbc7SAlice Ryhl lock <- new_spinlock!(Inner { 254*eafedbc7SAlice Ryhl pages: ptr::null_mut(), 255*eafedbc7SAlice Ryhl size: 0, 256*eafedbc7SAlice Ryhl vma_addr: 0, 257*eafedbc7SAlice Ryhl }, "ShrinkablePageRange"), 258*eafedbc7SAlice Ryhl _pin: PhantomPinned, 259*eafedbc7SAlice Ryhl }) 260*eafedbc7SAlice Ryhl } 261*eafedbc7SAlice Ryhl 262*eafedbc7SAlice Ryhl pub(crate) fn stable_trylock_mm(&self) -> Option<StableMmGuard> { 263*eafedbc7SAlice Ryhl // SAFETY: This extends the duration of the reference. Since this call happens before 264*eafedbc7SAlice Ryhl // `mm_lock` is taken in the destructor of `ShrinkablePageRange`, the destructor will block 265*eafedbc7SAlice Ryhl // until the returned guard is dropped. This ensures that the guard is valid until dropped. 266*eafedbc7SAlice Ryhl let mm_lock = unsafe { &*ptr::from_ref(&self.mm_lock) }; 267*eafedbc7SAlice Ryhl 268*eafedbc7SAlice Ryhl mm_lock.try_lock() 269*eafedbc7SAlice Ryhl } 270*eafedbc7SAlice Ryhl 271*eafedbc7SAlice Ryhl /// Register a vma with this page range. Returns the size of the region. 272*eafedbc7SAlice Ryhl pub(crate) fn register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize> { 273*eafedbc7SAlice Ryhl let num_bytes = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize); 274*eafedbc7SAlice Ryhl let num_pages = num_bytes >> PAGE_SHIFT; 275*eafedbc7SAlice Ryhl 276*eafedbc7SAlice Ryhl if !ptr::eq::<Mm>(&*self.mm, &**vma.mm()) { 277*eafedbc7SAlice Ryhl pr_debug!("Failed to register with vma: invalid vma->vm_mm"); 278*eafedbc7SAlice Ryhl return Err(EINVAL); 279*eafedbc7SAlice Ryhl } 280*eafedbc7SAlice Ryhl if num_pages == 0 { 281*eafedbc7SAlice Ryhl pr_debug!("Failed to register with vma: size zero"); 282*eafedbc7SAlice Ryhl return Err(EINVAL); 283*eafedbc7SAlice Ryhl } 284*eafedbc7SAlice Ryhl 285*eafedbc7SAlice Ryhl let mut pages = KVVec::<PageInfo>::with_capacity(num_pages, GFP_KERNEL)?; 286*eafedbc7SAlice Ryhl 287*eafedbc7SAlice Ryhl // SAFETY: This just initializes the pages array. 288*eafedbc7SAlice Ryhl unsafe { 289*eafedbc7SAlice Ryhl let self_ptr = self as *const ShrinkablePageRange; 290*eafedbc7SAlice Ryhl for i in 0..num_pages { 291*eafedbc7SAlice Ryhl let info = pages.as_mut_ptr().add(i); 292*eafedbc7SAlice Ryhl (&raw mut (*info).range).write(self_ptr); 293*eafedbc7SAlice Ryhl (&raw mut (*info).page).write(None); 294*eafedbc7SAlice Ryhl let lru = &raw mut (*info).lru; 295*eafedbc7SAlice Ryhl (&raw mut (*lru).next).write(lru); 296*eafedbc7SAlice Ryhl (&raw mut (*lru).prev).write(lru); 297*eafedbc7SAlice Ryhl } 298*eafedbc7SAlice Ryhl } 299*eafedbc7SAlice Ryhl 300*eafedbc7SAlice Ryhl let mut inner = self.lock.lock(); 301*eafedbc7SAlice Ryhl if inner.size > 0 { 302*eafedbc7SAlice Ryhl pr_debug!("Failed to register with vma: already registered"); 303*eafedbc7SAlice Ryhl drop(inner); 304*eafedbc7SAlice Ryhl return Err(EBUSY); 305*eafedbc7SAlice Ryhl } 306*eafedbc7SAlice Ryhl 307*eafedbc7SAlice Ryhl inner.pages = pages.into_raw_parts().0; 308*eafedbc7SAlice Ryhl inner.size = num_pages; 309*eafedbc7SAlice Ryhl inner.vma_addr = vma.start(); 310*eafedbc7SAlice Ryhl 311*eafedbc7SAlice Ryhl Ok(num_pages) 312*eafedbc7SAlice Ryhl } 313*eafedbc7SAlice Ryhl 314*eafedbc7SAlice Ryhl /// Make sure that the given pages are allocated and mapped. 315*eafedbc7SAlice Ryhl /// 316*eafedbc7SAlice Ryhl /// Must not be called from an atomic context. 317*eafedbc7SAlice Ryhl pub(crate) fn use_range(&self, start: usize, end: usize) -> Result<()> { 318*eafedbc7SAlice Ryhl if start >= end { 319*eafedbc7SAlice Ryhl return Ok(()); 320*eafedbc7SAlice Ryhl } 321*eafedbc7SAlice Ryhl let mut inner = self.lock.lock(); 322*eafedbc7SAlice Ryhl assert!(end <= inner.size); 323*eafedbc7SAlice Ryhl 324*eafedbc7SAlice Ryhl for i in start..end { 325*eafedbc7SAlice Ryhl // SAFETY: This pointer offset is in bounds. 326*eafedbc7SAlice Ryhl let page_info = unsafe { inner.pages.add(i) }; 327*eafedbc7SAlice Ryhl 328*eafedbc7SAlice Ryhl // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay. 329*eafedbc7SAlice Ryhl if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 330*eafedbc7SAlice Ryhl // Since we're going to use the page, we should remove it from the lru list so that 331*eafedbc7SAlice Ryhl // the shrinker will not free it. 332*eafedbc7SAlice Ryhl // 333*eafedbc7SAlice Ryhl // SAFETY: The pointer is valid, and this is the right shrinker. 334*eafedbc7SAlice Ryhl // 335*eafedbc7SAlice Ryhl // The shrinker can't free the page between the check and this call to 336*eafedbc7SAlice Ryhl // `list_lru_del` because we hold the lock. 337*eafedbc7SAlice Ryhl unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) }; 338*eafedbc7SAlice Ryhl } else { 339*eafedbc7SAlice Ryhl // We have to allocate a new page. Use the slow path. 340*eafedbc7SAlice Ryhl drop(inner); 341*eafedbc7SAlice Ryhl // SAFETY: `i < end <= inner.size` so `i` is in bounds. 342*eafedbc7SAlice Ryhl match unsafe { self.use_page_slow(i) } { 343*eafedbc7SAlice Ryhl Ok(()) => {} 344*eafedbc7SAlice Ryhl Err(err) => { 345*eafedbc7SAlice Ryhl pr_warn!("Error in use_page_slow: {:?}", err); 346*eafedbc7SAlice Ryhl return Err(err); 347*eafedbc7SAlice Ryhl } 348*eafedbc7SAlice Ryhl } 349*eafedbc7SAlice Ryhl inner = self.lock.lock(); 350*eafedbc7SAlice Ryhl } 351*eafedbc7SAlice Ryhl } 352*eafedbc7SAlice Ryhl Ok(()) 353*eafedbc7SAlice Ryhl } 354*eafedbc7SAlice Ryhl 355*eafedbc7SAlice Ryhl /// Mark the given page as in use, slow path. 356*eafedbc7SAlice Ryhl /// 357*eafedbc7SAlice Ryhl /// Must not be called from an atomic context. 358*eafedbc7SAlice Ryhl /// 359*eafedbc7SAlice Ryhl /// # Safety 360*eafedbc7SAlice Ryhl /// 361*eafedbc7SAlice Ryhl /// Assumes that `i` is in bounds. 362*eafedbc7SAlice Ryhl #[cold] 363*eafedbc7SAlice Ryhl unsafe fn use_page_slow(&self, i: usize) -> Result<()> { 364*eafedbc7SAlice Ryhl let new_page = Page::alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO)?; 365*eafedbc7SAlice Ryhl 366*eafedbc7SAlice Ryhl let mm_mutex = self.mm_lock.lock(); 367*eafedbc7SAlice Ryhl let inner = self.lock.lock(); 368*eafedbc7SAlice Ryhl 369*eafedbc7SAlice Ryhl // SAFETY: This pointer offset is in bounds. 370*eafedbc7SAlice Ryhl let page_info = unsafe { inner.pages.add(i) }; 371*eafedbc7SAlice Ryhl 372*eafedbc7SAlice Ryhl // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay. 373*eafedbc7SAlice Ryhl if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 374*eafedbc7SAlice Ryhl // The page was already there, or someone else added the page while we didn't hold the 375*eafedbc7SAlice Ryhl // spinlock. 376*eafedbc7SAlice Ryhl // 377*eafedbc7SAlice Ryhl // SAFETY: The pointer is valid, and this is the right shrinker. 378*eafedbc7SAlice Ryhl // 379*eafedbc7SAlice Ryhl // The shrinker can't free the page between the check and this call to 380*eafedbc7SAlice Ryhl // `list_lru_del` because we hold the lock. 381*eafedbc7SAlice Ryhl unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) }; 382*eafedbc7SAlice Ryhl return Ok(()); 383*eafedbc7SAlice Ryhl } 384*eafedbc7SAlice Ryhl 385*eafedbc7SAlice Ryhl let vma_addr = inner.vma_addr; 386*eafedbc7SAlice Ryhl // Release the spinlock while we insert the page into the vma. 387*eafedbc7SAlice Ryhl drop(inner); 388*eafedbc7SAlice Ryhl 389*eafedbc7SAlice Ryhl // No overflow since we stay in bounds of the vma. 390*eafedbc7SAlice Ryhl let user_page_addr = vma_addr + (i << PAGE_SHIFT); 391*eafedbc7SAlice Ryhl 392*eafedbc7SAlice Ryhl // We use `mmput_async` when dropping the `mm` because `use_page_slow` is usually used from 393*eafedbc7SAlice Ryhl // a remote process. If the call to `mmput` races with the process shutting down, then the 394*eafedbc7SAlice Ryhl // caller of `use_page_slow` becomes responsible for cleaning up the `mm`, which doesn't 395*eafedbc7SAlice Ryhl // happen until it returns to userspace. However, the caller might instead go to sleep and 396*eafedbc7SAlice Ryhl // wait for the owner of the `mm` to wake it up, which doesn't happen because it's in the 397*eafedbc7SAlice Ryhl // middle of a shutdown process that won't complete until the `mm` is dropped. This can 398*eafedbc7SAlice Ryhl // amount to a deadlock. 399*eafedbc7SAlice Ryhl // 400*eafedbc7SAlice Ryhl // Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a 401*eafedbc7SAlice Ryhl // workqueue. 402*eafedbc7SAlice Ryhl MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?) 403*eafedbc7SAlice Ryhl .mmap_read_lock() 404*eafedbc7SAlice Ryhl .vma_lookup(vma_addr) 405*eafedbc7SAlice Ryhl .ok_or(ESRCH)? 406*eafedbc7SAlice Ryhl .as_mixedmap_vma() 407*eafedbc7SAlice Ryhl .ok_or(ESRCH)? 408*eafedbc7SAlice Ryhl .vm_insert_page(user_page_addr, &new_page) 409*eafedbc7SAlice Ryhl .inspect_err(|err| { 410*eafedbc7SAlice Ryhl pr_warn!( 411*eafedbc7SAlice Ryhl "Failed to vm_insert_page({}): vma_addr:{} i:{} err:{:?}", 412*eafedbc7SAlice Ryhl user_page_addr, 413*eafedbc7SAlice Ryhl vma_addr, 414*eafedbc7SAlice Ryhl i, 415*eafedbc7SAlice Ryhl err 416*eafedbc7SAlice Ryhl ) 417*eafedbc7SAlice Ryhl })?; 418*eafedbc7SAlice Ryhl 419*eafedbc7SAlice Ryhl let inner = self.lock.lock(); 420*eafedbc7SAlice Ryhl 421*eafedbc7SAlice Ryhl // SAFETY: The `page_info` pointer is valid and currently does not have a page. The page 422*eafedbc7SAlice Ryhl // can be written to since we hold the lock. 423*eafedbc7SAlice Ryhl // 424*eafedbc7SAlice Ryhl // We released and reacquired the spinlock since we checked that the page is null, but we 425*eafedbc7SAlice Ryhl // always hold the mm_lock mutex when setting the page to a non-null value, so it's not 426*eafedbc7SAlice Ryhl // possible for someone else to have changed it since our check. 427*eafedbc7SAlice Ryhl unsafe { PageInfo::set_page(page_info, new_page) }; 428*eafedbc7SAlice Ryhl 429*eafedbc7SAlice Ryhl drop(inner); 430*eafedbc7SAlice Ryhl drop(mm_mutex); 431*eafedbc7SAlice Ryhl 432*eafedbc7SAlice Ryhl Ok(()) 433*eafedbc7SAlice Ryhl } 434*eafedbc7SAlice Ryhl 435*eafedbc7SAlice Ryhl /// If the given page is in use, then mark it as available so that the shrinker can free it. 436*eafedbc7SAlice Ryhl /// 437*eafedbc7SAlice Ryhl /// May be called from an atomic context. 438*eafedbc7SAlice Ryhl pub(crate) fn stop_using_range(&self, start: usize, end: usize) { 439*eafedbc7SAlice Ryhl if start >= end { 440*eafedbc7SAlice Ryhl return; 441*eafedbc7SAlice Ryhl } 442*eafedbc7SAlice Ryhl let inner = self.lock.lock(); 443*eafedbc7SAlice Ryhl assert!(end <= inner.size); 444*eafedbc7SAlice Ryhl 445*eafedbc7SAlice Ryhl for i in (start..end).rev() { 446*eafedbc7SAlice Ryhl // SAFETY: The pointer is in bounds. 447*eafedbc7SAlice Ryhl let page_info = unsafe { inner.pages.add(i) }; 448*eafedbc7SAlice Ryhl 449*eafedbc7SAlice Ryhl // SAFETY: Okay for reading since we have the lock. 450*eafedbc7SAlice Ryhl if let Some(page) = unsafe { PageInfo::get_page(page_info) } { 451*eafedbc7SAlice Ryhl // SAFETY: The pointer is valid, and it's the right shrinker. 452*eafedbc7SAlice Ryhl unsafe { PageInfo::list_lru_add(page_info, page.nid(), self.shrinker) }; 453*eafedbc7SAlice Ryhl } 454*eafedbc7SAlice Ryhl } 455*eafedbc7SAlice Ryhl } 456*eafedbc7SAlice Ryhl 457*eafedbc7SAlice Ryhl /// Helper for reading or writing to a range of bytes that may overlap with several pages. 458*eafedbc7SAlice Ryhl /// 459*eafedbc7SAlice Ryhl /// # Safety 460*eafedbc7SAlice Ryhl /// 461*eafedbc7SAlice Ryhl /// All pages touched by this operation must be in use for the duration of this call. 462*eafedbc7SAlice Ryhl unsafe fn iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result 463*eafedbc7SAlice Ryhl where 464*eafedbc7SAlice Ryhl T: FnMut(&Page, usize, usize) -> Result, 465*eafedbc7SAlice Ryhl { 466*eafedbc7SAlice Ryhl if size == 0 { 467*eafedbc7SAlice Ryhl return Ok(()); 468*eafedbc7SAlice Ryhl } 469*eafedbc7SAlice Ryhl 470*eafedbc7SAlice Ryhl let (pages, num_pages) = { 471*eafedbc7SAlice Ryhl let inner = self.lock.lock(); 472*eafedbc7SAlice Ryhl (inner.pages, inner.size) 473*eafedbc7SAlice Ryhl }; 474*eafedbc7SAlice Ryhl let num_bytes = num_pages << PAGE_SHIFT; 475*eafedbc7SAlice Ryhl 476*eafedbc7SAlice Ryhl // Check that the request is within the buffer. 477*eafedbc7SAlice Ryhl if offset.checked_add(size).ok_or(EFAULT)? > num_bytes { 478*eafedbc7SAlice Ryhl return Err(EFAULT); 479*eafedbc7SAlice Ryhl } 480*eafedbc7SAlice Ryhl 481*eafedbc7SAlice Ryhl let mut page_index = offset >> PAGE_SHIFT; 482*eafedbc7SAlice Ryhl offset &= PAGE_SIZE - 1; 483*eafedbc7SAlice Ryhl while size > 0 { 484*eafedbc7SAlice Ryhl let available = usize::min(size, PAGE_SIZE - offset); 485*eafedbc7SAlice Ryhl // SAFETY: The pointer is in bounds. 486*eafedbc7SAlice Ryhl let page_info = unsafe { pages.add(page_index) }; 487*eafedbc7SAlice Ryhl // SAFETY: The caller guarantees that this page is in the "in use" state for the 488*eafedbc7SAlice Ryhl // duration of this call to `iterate`, so nobody will change the page. 489*eafedbc7SAlice Ryhl let page = unsafe { PageInfo::get_page(page_info) }; 490*eafedbc7SAlice Ryhl if page.is_none() { 491*eafedbc7SAlice Ryhl pr_warn!("Page is null!"); 492*eafedbc7SAlice Ryhl } 493*eafedbc7SAlice Ryhl let page = page.ok_or(EFAULT)?; 494*eafedbc7SAlice Ryhl cb(page, offset, available)?; 495*eafedbc7SAlice Ryhl size -= available; 496*eafedbc7SAlice Ryhl page_index += 1; 497*eafedbc7SAlice Ryhl offset = 0; 498*eafedbc7SAlice Ryhl } 499*eafedbc7SAlice Ryhl Ok(()) 500*eafedbc7SAlice Ryhl } 501*eafedbc7SAlice Ryhl 502*eafedbc7SAlice Ryhl /// Copy from userspace into this page range. 503*eafedbc7SAlice Ryhl /// 504*eafedbc7SAlice Ryhl /// # Safety 505*eafedbc7SAlice Ryhl /// 506*eafedbc7SAlice Ryhl /// All pages touched by this operation must be in use for the duration of this call. 507*eafedbc7SAlice Ryhl pub(crate) unsafe fn copy_from_user_slice( 508*eafedbc7SAlice Ryhl &self, 509*eafedbc7SAlice Ryhl reader: &mut UserSliceReader, 510*eafedbc7SAlice Ryhl offset: usize, 511*eafedbc7SAlice Ryhl size: usize, 512*eafedbc7SAlice Ryhl ) -> Result { 513*eafedbc7SAlice Ryhl // SAFETY: `self.iterate` has the same safety requirements as `copy_from_user_slice`. 514*eafedbc7SAlice Ryhl unsafe { 515*eafedbc7SAlice Ryhl self.iterate(offset, size, |page, offset, to_copy| { 516*eafedbc7SAlice Ryhl page.copy_from_user_slice_raw(reader, offset, to_copy) 517*eafedbc7SAlice Ryhl }) 518*eafedbc7SAlice Ryhl } 519*eafedbc7SAlice Ryhl } 520*eafedbc7SAlice Ryhl 521*eafedbc7SAlice Ryhl /// Copy from this page range into kernel space. 522*eafedbc7SAlice Ryhl /// 523*eafedbc7SAlice Ryhl /// # Safety 524*eafedbc7SAlice Ryhl /// 525*eafedbc7SAlice Ryhl /// All pages touched by this operation must be in use for the duration of this call. 526*eafedbc7SAlice Ryhl pub(crate) unsafe fn read<T: FromBytes>(&self, offset: usize) -> Result<T> { 527*eafedbc7SAlice Ryhl let mut out = MaybeUninit::<T>::uninit(); 528*eafedbc7SAlice Ryhl let mut out_offset = 0; 529*eafedbc7SAlice Ryhl // SAFETY: `self.iterate` has the same safety requirements as `read`. 530*eafedbc7SAlice Ryhl unsafe { 531*eafedbc7SAlice Ryhl self.iterate(offset, size_of::<T>(), |page, offset, to_copy| { 532*eafedbc7SAlice Ryhl // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T. 533*eafedbc7SAlice Ryhl let obj_ptr = (out.as_mut_ptr() as *mut u8).add(out_offset); 534*eafedbc7SAlice Ryhl // SAFETY: The pointer points is in-bounds of the `out` variable, so it is valid. 535*eafedbc7SAlice Ryhl page.read_raw(obj_ptr, offset, to_copy)?; 536*eafedbc7SAlice Ryhl out_offset += to_copy; 537*eafedbc7SAlice Ryhl Ok(()) 538*eafedbc7SAlice Ryhl })?; 539*eafedbc7SAlice Ryhl } 540*eafedbc7SAlice Ryhl // SAFETY: We just initialised the data. 541*eafedbc7SAlice Ryhl Ok(unsafe { out.assume_init() }) 542*eafedbc7SAlice Ryhl } 543*eafedbc7SAlice Ryhl 544*eafedbc7SAlice Ryhl /// Copy from kernel space into this page range. 545*eafedbc7SAlice Ryhl /// 546*eafedbc7SAlice Ryhl /// # Safety 547*eafedbc7SAlice Ryhl /// 548*eafedbc7SAlice Ryhl /// All pages touched by this operation must be in use for the duration of this call. 549*eafedbc7SAlice Ryhl pub(crate) unsafe fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result { 550*eafedbc7SAlice Ryhl let mut obj_offset = 0; 551*eafedbc7SAlice Ryhl // SAFETY: `self.iterate` has the same safety requirements as `write`. 552*eafedbc7SAlice Ryhl unsafe { 553*eafedbc7SAlice Ryhl self.iterate(offset, size_of_val(obj), |page, offset, to_copy| { 554*eafedbc7SAlice Ryhl // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T. 555*eafedbc7SAlice Ryhl let obj_ptr = (obj as *const T as *const u8).add(obj_offset); 556*eafedbc7SAlice Ryhl // SAFETY: We have a reference to the object, so the pointer is valid. 557*eafedbc7SAlice Ryhl page.write_raw(obj_ptr, offset, to_copy)?; 558*eafedbc7SAlice Ryhl obj_offset += to_copy; 559*eafedbc7SAlice Ryhl Ok(()) 560*eafedbc7SAlice Ryhl }) 561*eafedbc7SAlice Ryhl } 562*eafedbc7SAlice Ryhl } 563*eafedbc7SAlice Ryhl 564*eafedbc7SAlice Ryhl /// Write zeroes to the given range. 565*eafedbc7SAlice Ryhl /// 566*eafedbc7SAlice Ryhl /// # Safety 567*eafedbc7SAlice Ryhl /// 568*eafedbc7SAlice Ryhl /// All pages touched by this operation must be in use for the duration of this call. 569*eafedbc7SAlice Ryhl pub(crate) unsafe fn fill_zero(&self, offset: usize, size: usize) -> Result { 570*eafedbc7SAlice Ryhl // SAFETY: `self.iterate` has the same safety requirements as `copy_into`. 571*eafedbc7SAlice Ryhl unsafe { 572*eafedbc7SAlice Ryhl self.iterate(offset, size, |page, offset, len| { 573*eafedbc7SAlice Ryhl page.fill_zero_raw(offset, len) 574*eafedbc7SAlice Ryhl }) 575*eafedbc7SAlice Ryhl } 576*eafedbc7SAlice Ryhl } 577*eafedbc7SAlice Ryhl } 578*eafedbc7SAlice Ryhl 579*eafedbc7SAlice Ryhl #[pinned_drop] 580*eafedbc7SAlice Ryhl impl PinnedDrop for ShrinkablePageRange { 581*eafedbc7SAlice Ryhl fn drop(self: Pin<&mut Self>) { 582*eafedbc7SAlice Ryhl let (pages, size) = { 583*eafedbc7SAlice Ryhl let lock = self.lock.lock(); 584*eafedbc7SAlice Ryhl (lock.pages, lock.size) 585*eafedbc7SAlice Ryhl }; 586*eafedbc7SAlice Ryhl 587*eafedbc7SAlice Ryhl if size == 0 { 588*eafedbc7SAlice Ryhl return; 589*eafedbc7SAlice Ryhl } 590*eafedbc7SAlice Ryhl 591*eafedbc7SAlice Ryhl // Note: This call is also necessary for the safety of `stable_trylock_mm`. 592*eafedbc7SAlice Ryhl let mm_lock = self.mm_lock.lock(); 593*eafedbc7SAlice Ryhl 594*eafedbc7SAlice Ryhl // This is the destructor, so unlike the other methods, we only need to worry about races 595*eafedbc7SAlice Ryhl // with the shrinker here. Since we hold the `mm_lock`, we also can't race with the 596*eafedbc7SAlice Ryhl // shrinker, and after this loop, the shrinker will not access any of our pages since we 597*eafedbc7SAlice Ryhl // removed them from the lru list. 598*eafedbc7SAlice Ryhl for i in 0..size { 599*eafedbc7SAlice Ryhl // SAFETY: Loop is in-bounds of the size. 600*eafedbc7SAlice Ryhl let p_ptr = unsafe { pages.add(i) }; 601*eafedbc7SAlice Ryhl // SAFETY: No other readers, so we can read. 602*eafedbc7SAlice Ryhl if let Some(p) = unsafe { PageInfo::get_page(p_ptr) } { 603*eafedbc7SAlice Ryhl // SAFETY: The pointer is valid and it's the right shrinker. 604*eafedbc7SAlice Ryhl unsafe { PageInfo::list_lru_del(p_ptr, p.nid(), self.shrinker) }; 605*eafedbc7SAlice Ryhl } 606*eafedbc7SAlice Ryhl } 607*eafedbc7SAlice Ryhl 608*eafedbc7SAlice Ryhl drop(mm_lock); 609*eafedbc7SAlice Ryhl 610*eafedbc7SAlice Ryhl // SAFETY: `pages` was allocated as an `KVVec<PageInfo>` with capacity `size`. Furthermore, 611*eafedbc7SAlice Ryhl // all `size` elements are initialized. Also, the array is no longer shared with the 612*eafedbc7SAlice Ryhl // shrinker due to the above loop. 613*eafedbc7SAlice Ryhl drop(unsafe { KVVec::from_raw_parts(pages, size, size) }); 614*eafedbc7SAlice Ryhl } 615*eafedbc7SAlice Ryhl } 616*eafedbc7SAlice Ryhl 617*eafedbc7SAlice Ryhl /// # Safety 618*eafedbc7SAlice Ryhl /// Called by the shrinker. 619*eafedbc7SAlice Ryhl #[no_mangle] 620*eafedbc7SAlice Ryhl unsafe extern "C" fn rust_shrink_count( 621*eafedbc7SAlice Ryhl shrink: *mut bindings::shrinker, 622*eafedbc7SAlice Ryhl _sc: *mut bindings::shrink_control, 623*eafedbc7SAlice Ryhl ) -> c_ulong { 624*eafedbc7SAlice Ryhl // SAFETY: We can access our own private data. 625*eafedbc7SAlice Ryhl let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() }; 626*eafedbc7SAlice Ryhl // SAFETY: Accessing the lru list is okay. Just an FFI call. 627*eafedbc7SAlice Ryhl unsafe { bindings::list_lru_count(list_lru) } 628*eafedbc7SAlice Ryhl } 629*eafedbc7SAlice Ryhl 630*eafedbc7SAlice Ryhl /// # Safety 631*eafedbc7SAlice Ryhl /// Called by the shrinker. 632*eafedbc7SAlice Ryhl #[no_mangle] 633*eafedbc7SAlice Ryhl unsafe extern "C" fn rust_shrink_scan( 634*eafedbc7SAlice Ryhl shrink: *mut bindings::shrinker, 635*eafedbc7SAlice Ryhl sc: *mut bindings::shrink_control, 636*eafedbc7SAlice Ryhl ) -> c_ulong { 637*eafedbc7SAlice Ryhl // SAFETY: We can access our own private data. 638*eafedbc7SAlice Ryhl let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() }; 639*eafedbc7SAlice Ryhl // SAFETY: Caller guarantees that it is safe to read this field. 640*eafedbc7SAlice Ryhl let nr_to_scan = unsafe { (*sc).nr_to_scan }; 641*eafedbc7SAlice Ryhl // SAFETY: Accessing the lru list is okay. Just an FFI call. 642*eafedbc7SAlice Ryhl unsafe { 643*eafedbc7SAlice Ryhl bindings::list_lru_walk( 644*eafedbc7SAlice Ryhl list_lru, 645*eafedbc7SAlice Ryhl Some(bindings::rust_shrink_free_page_wrap), 646*eafedbc7SAlice Ryhl ptr::null_mut(), 647*eafedbc7SAlice Ryhl nr_to_scan, 648*eafedbc7SAlice Ryhl ) 649*eafedbc7SAlice Ryhl } 650*eafedbc7SAlice Ryhl } 651*eafedbc7SAlice Ryhl 652*eafedbc7SAlice Ryhl const LRU_SKIP: bindings::lru_status = bindings::lru_status_LRU_SKIP; 653*eafedbc7SAlice Ryhl const LRU_REMOVED_ENTRY: bindings::lru_status = bindings::lru_status_LRU_REMOVED_RETRY; 654*eafedbc7SAlice Ryhl 655*eafedbc7SAlice Ryhl /// # Safety 656*eafedbc7SAlice Ryhl /// Called by the shrinker. 657*eafedbc7SAlice Ryhl #[no_mangle] 658*eafedbc7SAlice Ryhl unsafe extern "C" fn rust_shrink_free_page( 659*eafedbc7SAlice Ryhl item: *mut bindings::list_head, 660*eafedbc7SAlice Ryhl lru: *mut bindings::list_lru_one, 661*eafedbc7SAlice Ryhl _cb_arg: *mut c_void, 662*eafedbc7SAlice Ryhl ) -> bindings::lru_status { 663*eafedbc7SAlice Ryhl // Fields that should survive after unlocking the lru lock. 664*eafedbc7SAlice Ryhl let page; 665*eafedbc7SAlice Ryhl let page_index; 666*eafedbc7SAlice Ryhl let mm; 667*eafedbc7SAlice Ryhl let mmap_read; 668*eafedbc7SAlice Ryhl let mm_mutex; 669*eafedbc7SAlice Ryhl let vma_addr; 670*eafedbc7SAlice Ryhl 671*eafedbc7SAlice Ryhl { 672*eafedbc7SAlice Ryhl // CAST: The `list_head` field is first in `PageInfo`. 673*eafedbc7SAlice Ryhl let info = item as *mut PageInfo; 674*eafedbc7SAlice Ryhl // SAFETY: The `range` field of `PageInfo` is immutable. 675*eafedbc7SAlice Ryhl let range = unsafe { &*((*info).range) }; 676*eafedbc7SAlice Ryhl 677*eafedbc7SAlice Ryhl mm = match range.mm.mmget_not_zero() { 678*eafedbc7SAlice Ryhl Some(mm) => MmWithUser::into_mmput_async(mm), 679*eafedbc7SAlice Ryhl None => return LRU_SKIP, 680*eafedbc7SAlice Ryhl }; 681*eafedbc7SAlice Ryhl 682*eafedbc7SAlice Ryhl mm_mutex = match range.stable_trylock_mm() { 683*eafedbc7SAlice Ryhl Some(guard) => guard, 684*eafedbc7SAlice Ryhl None => return LRU_SKIP, 685*eafedbc7SAlice Ryhl }; 686*eafedbc7SAlice Ryhl 687*eafedbc7SAlice Ryhl mmap_read = match mm.mmap_read_trylock() { 688*eafedbc7SAlice Ryhl Some(guard) => guard, 689*eafedbc7SAlice Ryhl None => return LRU_SKIP, 690*eafedbc7SAlice Ryhl }; 691*eafedbc7SAlice Ryhl 692*eafedbc7SAlice Ryhl // We can't lock it normally here, since we hold the lru lock. 693*eafedbc7SAlice Ryhl let inner = match range.lock.try_lock() { 694*eafedbc7SAlice Ryhl Some(inner) => inner, 695*eafedbc7SAlice Ryhl None => return LRU_SKIP, 696*eafedbc7SAlice Ryhl }; 697*eafedbc7SAlice Ryhl 698*eafedbc7SAlice Ryhl // SAFETY: The item is in this lru list, so it's okay to remove it. 699*eafedbc7SAlice Ryhl unsafe { bindings::list_lru_isolate(lru, item) }; 700*eafedbc7SAlice Ryhl 701*eafedbc7SAlice Ryhl // SAFETY: Both pointers are in bounds of the same allocation. 702*eafedbc7SAlice Ryhl page_index = unsafe { info.offset_from(inner.pages) } as usize; 703*eafedbc7SAlice Ryhl 704*eafedbc7SAlice Ryhl // SAFETY: We hold the spinlock, so we can take the page. 705*eafedbc7SAlice Ryhl // 706*eafedbc7SAlice Ryhl // This sets the page pointer to zero before we unmap it from the vma. However, we call 707*eafedbc7SAlice Ryhl // `zap_page_range` before we release the mmap lock, so `use_page_slow` will not be able to 708*eafedbc7SAlice Ryhl // insert a new page until after our call to `zap_page_range`. 709*eafedbc7SAlice Ryhl page = unsafe { PageInfo::take_page(info) }; 710*eafedbc7SAlice Ryhl vma_addr = inner.vma_addr; 711*eafedbc7SAlice Ryhl 712*eafedbc7SAlice Ryhl // From this point on, we don't access this PageInfo or ShrinkablePageRange again, because 713*eafedbc7SAlice Ryhl // they can be freed at any point after we unlock `lru_lock`. This is with the exception of 714*eafedbc7SAlice Ryhl // `mm_mutex` which is kept alive by holding the lock. 715*eafedbc7SAlice Ryhl } 716*eafedbc7SAlice Ryhl 717*eafedbc7SAlice Ryhl // SAFETY: The lru lock is locked when this method is called. 718*eafedbc7SAlice Ryhl unsafe { bindings::spin_unlock(&raw mut (*lru).lock) }; 719*eafedbc7SAlice Ryhl 720*eafedbc7SAlice Ryhl if let Some(vma) = mmap_read.vma_lookup(vma_addr) { 721*eafedbc7SAlice Ryhl let user_page_addr = vma_addr + (page_index << PAGE_SHIFT); 722*eafedbc7SAlice Ryhl vma.zap_page_range_single(user_page_addr, PAGE_SIZE); 723*eafedbc7SAlice Ryhl } 724*eafedbc7SAlice Ryhl 725*eafedbc7SAlice Ryhl drop(mmap_read); 726*eafedbc7SAlice Ryhl drop(mm_mutex); 727*eafedbc7SAlice Ryhl drop(mm); 728*eafedbc7SAlice Ryhl drop(page); 729*eafedbc7SAlice Ryhl 730*eafedbc7SAlice Ryhl // SAFETY: We just unlocked the lru lock, but it should be locked when we return. 731*eafedbc7SAlice Ryhl unsafe { bindings::spin_lock(&raw mut (*lru).lock) }; 732*eafedbc7SAlice Ryhl 733*eafedbc7SAlice Ryhl LRU_REMOVED_ENTRY 734*eafedbc7SAlice Ryhl } 735