xref: /linux/drivers/android/binder/page_range.rs (revision 4f38da1f027ea2c9f01bb71daa7a299c191b6940)
1*eafedbc7SAlice Ryhl // SPDX-License-Identifier: GPL-2.0
2*eafedbc7SAlice Ryhl 
3*eafedbc7SAlice Ryhl // Copyright (C) 2025 Google LLC.
4*eafedbc7SAlice Ryhl 
5*eafedbc7SAlice Ryhl //! This module has utilities for managing a page range where unused pages may be reclaimed by a
6*eafedbc7SAlice Ryhl //! vma shrinker.
7*eafedbc7SAlice Ryhl 
8*eafedbc7SAlice Ryhl // To avoid deadlocks, locks are taken in the order:
9*eafedbc7SAlice Ryhl //
10*eafedbc7SAlice Ryhl //  1. mmap lock
11*eafedbc7SAlice Ryhl //  2. spinlock
12*eafedbc7SAlice Ryhl //  3. lru spinlock
13*eafedbc7SAlice Ryhl //
14*eafedbc7SAlice Ryhl // The shrinker will use trylock methods because it locks them in a different order.
15*eafedbc7SAlice Ryhl 
16*eafedbc7SAlice Ryhl use core::{
17*eafedbc7SAlice Ryhl     marker::PhantomPinned,
18*eafedbc7SAlice Ryhl     mem::{size_of, size_of_val, MaybeUninit},
19*eafedbc7SAlice Ryhl     ptr,
20*eafedbc7SAlice Ryhl };
21*eafedbc7SAlice Ryhl 
22*eafedbc7SAlice Ryhl use kernel::{
23*eafedbc7SAlice Ryhl     bindings,
24*eafedbc7SAlice Ryhl     error::Result,
25*eafedbc7SAlice Ryhl     ffi::{c_ulong, c_void},
26*eafedbc7SAlice Ryhl     mm::{virt, Mm, MmWithUser},
27*eafedbc7SAlice Ryhl     new_mutex, new_spinlock,
28*eafedbc7SAlice Ryhl     page::{Page, PAGE_SHIFT, PAGE_SIZE},
29*eafedbc7SAlice Ryhl     prelude::*,
30*eafedbc7SAlice Ryhl     str::CStr,
31*eafedbc7SAlice Ryhl     sync::{aref::ARef, Mutex, SpinLock},
32*eafedbc7SAlice Ryhl     task::Pid,
33*eafedbc7SAlice Ryhl     transmute::FromBytes,
34*eafedbc7SAlice Ryhl     types::Opaque,
35*eafedbc7SAlice Ryhl     uaccess::UserSliceReader,
36*eafedbc7SAlice Ryhl };
37*eafedbc7SAlice Ryhl 
38*eafedbc7SAlice Ryhl /// Represents a shrinker that can be registered with the kernel.
39*eafedbc7SAlice Ryhl ///
40*eafedbc7SAlice Ryhl /// Each shrinker can be used by many `ShrinkablePageRange` objects.
41*eafedbc7SAlice Ryhl #[repr(C)]
42*eafedbc7SAlice Ryhl pub(crate) struct Shrinker {
43*eafedbc7SAlice Ryhl     inner: Opaque<*mut bindings::shrinker>,
44*eafedbc7SAlice Ryhl     list_lru: Opaque<bindings::list_lru>,
45*eafedbc7SAlice Ryhl }
46*eafedbc7SAlice Ryhl 
47*eafedbc7SAlice Ryhl // SAFETY: The shrinker and list_lru are thread safe.
48*eafedbc7SAlice Ryhl unsafe impl Send for Shrinker {}
49*eafedbc7SAlice Ryhl // SAFETY: The shrinker and list_lru are thread safe.
50*eafedbc7SAlice Ryhl unsafe impl Sync for Shrinker {}
51*eafedbc7SAlice Ryhl 
52*eafedbc7SAlice Ryhl impl Shrinker {
53*eafedbc7SAlice Ryhl     /// Create a new shrinker.
54*eafedbc7SAlice Ryhl     ///
55*eafedbc7SAlice Ryhl     /// # Safety
56*eafedbc7SAlice Ryhl     ///
57*eafedbc7SAlice Ryhl     /// Before using this shrinker with a `ShrinkablePageRange`, the `register` method must have
58*eafedbc7SAlice Ryhl     /// been called exactly once, and it must not have returned an error.
59*eafedbc7SAlice Ryhl     pub(crate) const unsafe fn new() -> Self {
60*eafedbc7SAlice Ryhl         Self {
61*eafedbc7SAlice Ryhl             inner: Opaque::uninit(),
62*eafedbc7SAlice Ryhl             list_lru: Opaque::uninit(),
63*eafedbc7SAlice Ryhl         }
64*eafedbc7SAlice Ryhl     }
65*eafedbc7SAlice Ryhl 
66*eafedbc7SAlice Ryhl     /// Register this shrinker with the kernel.
67*eafedbc7SAlice Ryhl     pub(crate) fn register(&'static self, name: &CStr) -> Result<()> {
68*eafedbc7SAlice Ryhl         // SAFETY: These fields are not yet used, so it's okay to zero them.
69*eafedbc7SAlice Ryhl         unsafe {
70*eafedbc7SAlice Ryhl             self.inner.get().write(ptr::null_mut());
71*eafedbc7SAlice Ryhl             self.list_lru.get().write_bytes(0, 1);
72*eafedbc7SAlice Ryhl         }
73*eafedbc7SAlice Ryhl 
74*eafedbc7SAlice Ryhl         // SAFETY: The field is not yet used, so we can initialize it.
75*eafedbc7SAlice Ryhl         let ret = unsafe { bindings::__list_lru_init(self.list_lru.get(), false, ptr::null_mut()) };
76*eafedbc7SAlice Ryhl         if ret != 0 {
77*eafedbc7SAlice Ryhl             return Err(Error::from_errno(ret));
78*eafedbc7SAlice Ryhl         }
79*eafedbc7SAlice Ryhl 
80*eafedbc7SAlice Ryhl         // SAFETY: The `name` points at a valid c string.
81*eafedbc7SAlice Ryhl         let shrinker = unsafe { bindings::shrinker_alloc(0, name.as_char_ptr()) };
82*eafedbc7SAlice Ryhl         if shrinker.is_null() {
83*eafedbc7SAlice Ryhl             // SAFETY: We initialized it, so its okay to destroy it.
84*eafedbc7SAlice Ryhl             unsafe { bindings::list_lru_destroy(self.list_lru.get()) };
85*eafedbc7SAlice Ryhl             return Err(Error::from_errno(ret));
86*eafedbc7SAlice Ryhl         }
87*eafedbc7SAlice Ryhl 
88*eafedbc7SAlice Ryhl         // SAFETY: We're about to register the shrinker, and these are the fields we need to
89*eafedbc7SAlice Ryhl         // initialize. (All other fields are already zeroed.)
90*eafedbc7SAlice Ryhl         unsafe {
91*eafedbc7SAlice Ryhl             (&raw mut (*shrinker).count_objects).write(Some(rust_shrink_count));
92*eafedbc7SAlice Ryhl             (&raw mut (*shrinker).scan_objects).write(Some(rust_shrink_scan));
93*eafedbc7SAlice Ryhl             (&raw mut (*shrinker).private_data).write(self.list_lru.get().cast());
94*eafedbc7SAlice Ryhl         }
95*eafedbc7SAlice Ryhl 
96*eafedbc7SAlice Ryhl         // SAFETY: The new shrinker has been fully initialized, so we can register it.
97*eafedbc7SAlice Ryhl         unsafe { bindings::shrinker_register(shrinker) };
98*eafedbc7SAlice Ryhl 
99*eafedbc7SAlice Ryhl         // SAFETY: This initializes the pointer to the shrinker so that we can use it.
100*eafedbc7SAlice Ryhl         unsafe { self.inner.get().write(shrinker) };
101*eafedbc7SAlice Ryhl 
102*eafedbc7SAlice Ryhl         Ok(())
103*eafedbc7SAlice Ryhl     }
104*eafedbc7SAlice Ryhl }
105*eafedbc7SAlice Ryhl 
106*eafedbc7SAlice Ryhl /// A container that manages a page range in a vma.
107*eafedbc7SAlice Ryhl ///
108*eafedbc7SAlice Ryhl /// The pages can be thought of as an array of booleans of whether the pages are usable. The
109*eafedbc7SAlice Ryhl /// methods `use_range` and `stop_using_range` set all booleans in a range to true or false
110*eafedbc7SAlice Ryhl /// respectively. Initially, no pages are allocated. When a page is not used, it is not freed
111*eafedbc7SAlice Ryhl /// immediately. Instead, it is made available to the memory shrinker to free it if the device is
112*eafedbc7SAlice Ryhl /// under memory pressure.
113*eafedbc7SAlice Ryhl ///
114*eafedbc7SAlice Ryhl /// It's okay for `use_range` and `stop_using_range` to race with each other, although there's no
115*eafedbc7SAlice Ryhl /// way to know whether an index ends up with true or false if a call to `use_range` races with
116*eafedbc7SAlice Ryhl /// another call to `stop_using_range` on a given index.
117*eafedbc7SAlice Ryhl ///
118*eafedbc7SAlice Ryhl /// It's also okay for the two methods to race with themselves, e.g. if two threads call
119*eafedbc7SAlice Ryhl /// `use_range` on the same index, then that's fine and neither call will return until the page is
120*eafedbc7SAlice Ryhl /// allocated and mapped.
121*eafedbc7SAlice Ryhl ///
122*eafedbc7SAlice Ryhl /// The methods that read or write to a range require that the page is marked as in use. So it is
123*eafedbc7SAlice Ryhl /// _not_ okay to call `stop_using_range` on a page that is in use by the methods that read or
124*eafedbc7SAlice Ryhl /// write to the page.
125*eafedbc7SAlice Ryhl #[pin_data(PinnedDrop)]
126*eafedbc7SAlice Ryhl pub(crate) struct ShrinkablePageRange {
127*eafedbc7SAlice Ryhl     /// Shrinker object registered with the kernel.
128*eafedbc7SAlice Ryhl     shrinker: &'static Shrinker,
129*eafedbc7SAlice Ryhl     /// Pid using this page range. Only used as debugging information.
130*eafedbc7SAlice Ryhl     pid: Pid,
131*eafedbc7SAlice Ryhl     /// The mm for the relevant process.
132*eafedbc7SAlice Ryhl     mm: ARef<Mm>,
133*eafedbc7SAlice Ryhl     /// Used to synchronize calls to `vm_insert_page` and `zap_page_range_single`.
134*eafedbc7SAlice Ryhl     #[pin]
135*eafedbc7SAlice Ryhl     mm_lock: Mutex<()>,
136*eafedbc7SAlice Ryhl     /// Spinlock protecting changes to pages.
137*eafedbc7SAlice Ryhl     #[pin]
138*eafedbc7SAlice Ryhl     lock: SpinLock<Inner>,
139*eafedbc7SAlice Ryhl 
140*eafedbc7SAlice Ryhl     /// Must not move, since page info has pointers back.
141*eafedbc7SAlice Ryhl     #[pin]
142*eafedbc7SAlice Ryhl     _pin: PhantomPinned,
143*eafedbc7SAlice Ryhl }
144*eafedbc7SAlice Ryhl 
145*eafedbc7SAlice Ryhl struct Inner {
146*eafedbc7SAlice Ryhl     /// Array of pages.
147*eafedbc7SAlice Ryhl     ///
148*eafedbc7SAlice Ryhl     /// Since this is also accessed by the shrinker, we can't use a `Box`, which asserts exclusive
149*eafedbc7SAlice Ryhl     /// ownership. To deal with that, we manage it using raw pointers.
150*eafedbc7SAlice Ryhl     pages: *mut PageInfo,
151*eafedbc7SAlice Ryhl     /// Length of the `pages` array.
152*eafedbc7SAlice Ryhl     size: usize,
153*eafedbc7SAlice Ryhl     /// The address of the vma to insert the pages into.
154*eafedbc7SAlice Ryhl     vma_addr: usize,
155*eafedbc7SAlice Ryhl }
156*eafedbc7SAlice Ryhl 
157*eafedbc7SAlice Ryhl // SAFETY: proper locking is in place for `Inner`
158*eafedbc7SAlice Ryhl unsafe impl Send for Inner {}
159*eafedbc7SAlice Ryhl 
160*eafedbc7SAlice Ryhl type StableMmGuard =
161*eafedbc7SAlice Ryhl     kernel::sync::lock::Guard<'static, (), kernel::sync::lock::mutex::MutexBackend>;
162*eafedbc7SAlice Ryhl 
163*eafedbc7SAlice Ryhl /// An array element that describes the current state of a page.
164*eafedbc7SAlice Ryhl ///
165*eafedbc7SAlice Ryhl /// There are three states:
166*eafedbc7SAlice Ryhl ///
167*eafedbc7SAlice Ryhl ///  * Free. The page is None. The `lru` element is not queued.
168*eafedbc7SAlice Ryhl ///  * Available. The page is Some. The `lru` element is queued to the shrinker's lru.
169*eafedbc7SAlice Ryhl ///  * Used. The page is Some. The `lru` element is not queued.
170*eafedbc7SAlice Ryhl ///
171*eafedbc7SAlice Ryhl /// When an element is available, the shrinker is able to free the page.
172*eafedbc7SAlice Ryhl #[repr(C)]
173*eafedbc7SAlice Ryhl struct PageInfo {
174*eafedbc7SAlice Ryhl     lru: bindings::list_head,
175*eafedbc7SAlice Ryhl     page: Option<Page>,
176*eafedbc7SAlice Ryhl     range: *const ShrinkablePageRange,
177*eafedbc7SAlice Ryhl }
178*eafedbc7SAlice Ryhl 
179*eafedbc7SAlice Ryhl impl PageInfo {
180*eafedbc7SAlice Ryhl     /// # Safety
181*eafedbc7SAlice Ryhl     ///
182*eafedbc7SAlice Ryhl     /// The caller ensures that writing to `me.page` is ok, and that the page is not currently set.
183*eafedbc7SAlice Ryhl     unsafe fn set_page(me: *mut PageInfo, page: Page) {
184*eafedbc7SAlice Ryhl         // SAFETY: This pointer offset is in bounds.
185*eafedbc7SAlice Ryhl         let ptr = unsafe { &raw mut (*me).page };
186*eafedbc7SAlice Ryhl 
187*eafedbc7SAlice Ryhl         // SAFETY: The pointer is valid for writing, so also valid for reading.
188*eafedbc7SAlice Ryhl         if unsafe { (*ptr).is_some() } {
189*eafedbc7SAlice Ryhl             pr_err!("set_page called when there is already a page");
190*eafedbc7SAlice Ryhl             // SAFETY: We will initialize the page again below.
191*eafedbc7SAlice Ryhl             unsafe { ptr::drop_in_place(ptr) };
192*eafedbc7SAlice Ryhl         }
193*eafedbc7SAlice Ryhl 
194*eafedbc7SAlice Ryhl         // SAFETY: The pointer is valid for writing.
195*eafedbc7SAlice Ryhl         unsafe { ptr::write(ptr, Some(page)) };
196*eafedbc7SAlice Ryhl     }
197*eafedbc7SAlice Ryhl 
198*eafedbc7SAlice Ryhl     /// # Safety
199*eafedbc7SAlice Ryhl     ///
200*eafedbc7SAlice Ryhl     /// The caller ensures that reading from `me.page` is ok for the duration of 'a.
201*eafedbc7SAlice Ryhl     unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> {
202*eafedbc7SAlice Ryhl         // SAFETY: This pointer offset is in bounds.
203*eafedbc7SAlice Ryhl         let ptr = unsafe { &raw const (*me).page };
204*eafedbc7SAlice Ryhl 
205*eafedbc7SAlice Ryhl         // SAFETY: The pointer is valid for reading.
206*eafedbc7SAlice Ryhl         unsafe { (*ptr).as_ref() }
207*eafedbc7SAlice Ryhl     }
208*eafedbc7SAlice Ryhl 
209*eafedbc7SAlice Ryhl     /// # Safety
210*eafedbc7SAlice Ryhl     ///
211*eafedbc7SAlice Ryhl     /// The caller ensures that writing to `me.page` is ok for the duration of 'a.
212*eafedbc7SAlice Ryhl     unsafe fn take_page(me: *mut PageInfo) -> Option<Page> {
213*eafedbc7SAlice Ryhl         // SAFETY: This pointer offset is in bounds.
214*eafedbc7SAlice Ryhl         let ptr = unsafe { &raw mut (*me).page };
215*eafedbc7SAlice Ryhl 
216*eafedbc7SAlice Ryhl         // SAFETY: The pointer is valid for reading.
217*eafedbc7SAlice Ryhl         unsafe { (*ptr).take() }
218*eafedbc7SAlice Ryhl     }
219*eafedbc7SAlice Ryhl 
220*eafedbc7SAlice Ryhl     /// Add this page to the lru list, if not already in the list.
221*eafedbc7SAlice Ryhl     ///
222*eafedbc7SAlice Ryhl     /// # Safety
223*eafedbc7SAlice Ryhl     ///
224*eafedbc7SAlice Ryhl     /// The pointer must be valid, and it must be the right shrinker and nid.
225*eafedbc7SAlice Ryhl     unsafe fn list_lru_add(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) {
226*eafedbc7SAlice Ryhl         // SAFETY: This pointer offset is in bounds.
227*eafedbc7SAlice Ryhl         let lru_ptr = unsafe { &raw mut (*me).lru };
228*eafedbc7SAlice Ryhl         // SAFETY: The lru pointer is valid, and we're not using it with any other lru list.
229*eafedbc7SAlice Ryhl         unsafe { bindings::list_lru_add(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) };
230*eafedbc7SAlice Ryhl     }
231*eafedbc7SAlice Ryhl 
232*eafedbc7SAlice Ryhl     /// Remove this page from the lru list, if it is in the list.
233*eafedbc7SAlice Ryhl     ///
234*eafedbc7SAlice Ryhl     /// # Safety
235*eafedbc7SAlice Ryhl     ///
236*eafedbc7SAlice Ryhl     /// The pointer must be valid, and it must be the right shrinker and nid.
237*eafedbc7SAlice Ryhl     unsafe fn list_lru_del(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) {
238*eafedbc7SAlice Ryhl         // SAFETY: This pointer offset is in bounds.
239*eafedbc7SAlice Ryhl         let lru_ptr = unsafe { &raw mut (*me).lru };
240*eafedbc7SAlice Ryhl         // SAFETY: The lru pointer is valid, and we're not using it with any other lru list.
241*eafedbc7SAlice Ryhl         unsafe { bindings::list_lru_del(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) };
242*eafedbc7SAlice Ryhl     }
243*eafedbc7SAlice Ryhl }
244*eafedbc7SAlice Ryhl 
245*eafedbc7SAlice Ryhl impl ShrinkablePageRange {
246*eafedbc7SAlice Ryhl     /// Create a new `ShrinkablePageRange` using the given shrinker.
247*eafedbc7SAlice Ryhl     pub(crate) fn new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error> {
248*eafedbc7SAlice Ryhl         try_pin_init!(Self {
249*eafedbc7SAlice Ryhl             shrinker,
250*eafedbc7SAlice Ryhl             pid: kernel::current!().pid(),
251*eafedbc7SAlice Ryhl             mm: ARef::from(&**kernel::current!().mm().ok_or(ESRCH)?),
252*eafedbc7SAlice Ryhl             mm_lock <- new_mutex!((), "ShrinkablePageRange::mm"),
253*eafedbc7SAlice Ryhl             lock <- new_spinlock!(Inner {
254*eafedbc7SAlice Ryhl                 pages: ptr::null_mut(),
255*eafedbc7SAlice Ryhl                 size: 0,
256*eafedbc7SAlice Ryhl                 vma_addr: 0,
257*eafedbc7SAlice Ryhl             }, "ShrinkablePageRange"),
258*eafedbc7SAlice Ryhl             _pin: PhantomPinned,
259*eafedbc7SAlice Ryhl         })
260*eafedbc7SAlice Ryhl     }
261*eafedbc7SAlice Ryhl 
262*eafedbc7SAlice Ryhl     pub(crate) fn stable_trylock_mm(&self) -> Option<StableMmGuard> {
263*eafedbc7SAlice Ryhl         // SAFETY: This extends the duration of the reference. Since this call happens before
264*eafedbc7SAlice Ryhl         // `mm_lock` is taken in the destructor of `ShrinkablePageRange`, the destructor will block
265*eafedbc7SAlice Ryhl         // until the returned guard is dropped. This ensures that the guard is valid until dropped.
266*eafedbc7SAlice Ryhl         let mm_lock = unsafe { &*ptr::from_ref(&self.mm_lock) };
267*eafedbc7SAlice Ryhl 
268*eafedbc7SAlice Ryhl         mm_lock.try_lock()
269*eafedbc7SAlice Ryhl     }
270*eafedbc7SAlice Ryhl 
271*eafedbc7SAlice Ryhl     /// Register a vma with this page range. Returns the size of the region.
272*eafedbc7SAlice Ryhl     pub(crate) fn register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize> {
273*eafedbc7SAlice Ryhl         let num_bytes = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize);
274*eafedbc7SAlice Ryhl         let num_pages = num_bytes >> PAGE_SHIFT;
275*eafedbc7SAlice Ryhl 
276*eafedbc7SAlice Ryhl         if !ptr::eq::<Mm>(&*self.mm, &**vma.mm()) {
277*eafedbc7SAlice Ryhl             pr_debug!("Failed to register with vma: invalid vma->vm_mm");
278*eafedbc7SAlice Ryhl             return Err(EINVAL);
279*eafedbc7SAlice Ryhl         }
280*eafedbc7SAlice Ryhl         if num_pages == 0 {
281*eafedbc7SAlice Ryhl             pr_debug!("Failed to register with vma: size zero");
282*eafedbc7SAlice Ryhl             return Err(EINVAL);
283*eafedbc7SAlice Ryhl         }
284*eafedbc7SAlice Ryhl 
285*eafedbc7SAlice Ryhl         let mut pages = KVVec::<PageInfo>::with_capacity(num_pages, GFP_KERNEL)?;
286*eafedbc7SAlice Ryhl 
287*eafedbc7SAlice Ryhl         // SAFETY: This just initializes the pages array.
288*eafedbc7SAlice Ryhl         unsafe {
289*eafedbc7SAlice Ryhl             let self_ptr = self as *const ShrinkablePageRange;
290*eafedbc7SAlice Ryhl             for i in 0..num_pages {
291*eafedbc7SAlice Ryhl                 let info = pages.as_mut_ptr().add(i);
292*eafedbc7SAlice Ryhl                 (&raw mut (*info).range).write(self_ptr);
293*eafedbc7SAlice Ryhl                 (&raw mut (*info).page).write(None);
294*eafedbc7SAlice Ryhl                 let lru = &raw mut (*info).lru;
295*eafedbc7SAlice Ryhl                 (&raw mut (*lru).next).write(lru);
296*eafedbc7SAlice Ryhl                 (&raw mut (*lru).prev).write(lru);
297*eafedbc7SAlice Ryhl             }
298*eafedbc7SAlice Ryhl         }
299*eafedbc7SAlice Ryhl 
300*eafedbc7SAlice Ryhl         let mut inner = self.lock.lock();
301*eafedbc7SAlice Ryhl         if inner.size > 0 {
302*eafedbc7SAlice Ryhl             pr_debug!("Failed to register with vma: already registered");
303*eafedbc7SAlice Ryhl             drop(inner);
304*eafedbc7SAlice Ryhl             return Err(EBUSY);
305*eafedbc7SAlice Ryhl         }
306*eafedbc7SAlice Ryhl 
307*eafedbc7SAlice Ryhl         inner.pages = pages.into_raw_parts().0;
308*eafedbc7SAlice Ryhl         inner.size = num_pages;
309*eafedbc7SAlice Ryhl         inner.vma_addr = vma.start();
310*eafedbc7SAlice Ryhl 
311*eafedbc7SAlice Ryhl         Ok(num_pages)
312*eafedbc7SAlice Ryhl     }
313*eafedbc7SAlice Ryhl 
314*eafedbc7SAlice Ryhl     /// Make sure that the given pages are allocated and mapped.
315*eafedbc7SAlice Ryhl     ///
316*eafedbc7SAlice Ryhl     /// Must not be called from an atomic context.
317*eafedbc7SAlice Ryhl     pub(crate) fn use_range(&self, start: usize, end: usize) -> Result<()> {
318*eafedbc7SAlice Ryhl         if start >= end {
319*eafedbc7SAlice Ryhl             return Ok(());
320*eafedbc7SAlice Ryhl         }
321*eafedbc7SAlice Ryhl         let mut inner = self.lock.lock();
322*eafedbc7SAlice Ryhl         assert!(end <= inner.size);
323*eafedbc7SAlice Ryhl 
324*eafedbc7SAlice Ryhl         for i in start..end {
325*eafedbc7SAlice Ryhl             // SAFETY: This pointer offset is in bounds.
326*eafedbc7SAlice Ryhl             let page_info = unsafe { inner.pages.add(i) };
327*eafedbc7SAlice Ryhl 
328*eafedbc7SAlice Ryhl             // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay.
329*eafedbc7SAlice Ryhl             if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
330*eafedbc7SAlice Ryhl                 // Since we're going to use the page, we should remove it from the lru list so that
331*eafedbc7SAlice Ryhl                 // the shrinker will not free it.
332*eafedbc7SAlice Ryhl                 //
333*eafedbc7SAlice Ryhl                 // SAFETY: The pointer is valid, and this is the right shrinker.
334*eafedbc7SAlice Ryhl                 //
335*eafedbc7SAlice Ryhl                 // The shrinker can't free the page between the check and this call to
336*eafedbc7SAlice Ryhl                 // `list_lru_del` because we hold the lock.
337*eafedbc7SAlice Ryhl                 unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) };
338*eafedbc7SAlice Ryhl             } else {
339*eafedbc7SAlice Ryhl                 // We have to allocate a new page. Use the slow path.
340*eafedbc7SAlice Ryhl                 drop(inner);
341*eafedbc7SAlice Ryhl                 // SAFETY: `i < end <= inner.size` so `i` is in bounds.
342*eafedbc7SAlice Ryhl                 match unsafe { self.use_page_slow(i) } {
343*eafedbc7SAlice Ryhl                     Ok(()) => {}
344*eafedbc7SAlice Ryhl                     Err(err) => {
345*eafedbc7SAlice Ryhl                         pr_warn!("Error in use_page_slow: {:?}", err);
346*eafedbc7SAlice Ryhl                         return Err(err);
347*eafedbc7SAlice Ryhl                     }
348*eafedbc7SAlice Ryhl                 }
349*eafedbc7SAlice Ryhl                 inner = self.lock.lock();
350*eafedbc7SAlice Ryhl             }
351*eafedbc7SAlice Ryhl         }
352*eafedbc7SAlice Ryhl         Ok(())
353*eafedbc7SAlice Ryhl     }
354*eafedbc7SAlice Ryhl 
355*eafedbc7SAlice Ryhl     /// Mark the given page as in use, slow path.
356*eafedbc7SAlice Ryhl     ///
357*eafedbc7SAlice Ryhl     /// Must not be called from an atomic context.
358*eafedbc7SAlice Ryhl     ///
359*eafedbc7SAlice Ryhl     /// # Safety
360*eafedbc7SAlice Ryhl     ///
361*eafedbc7SAlice Ryhl     /// Assumes that `i` is in bounds.
362*eafedbc7SAlice Ryhl     #[cold]
363*eafedbc7SAlice Ryhl     unsafe fn use_page_slow(&self, i: usize) -> Result<()> {
364*eafedbc7SAlice Ryhl         let new_page = Page::alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO)?;
365*eafedbc7SAlice Ryhl 
366*eafedbc7SAlice Ryhl         let mm_mutex = self.mm_lock.lock();
367*eafedbc7SAlice Ryhl         let inner = self.lock.lock();
368*eafedbc7SAlice Ryhl 
369*eafedbc7SAlice Ryhl         // SAFETY: This pointer offset is in bounds.
370*eafedbc7SAlice Ryhl         let page_info = unsafe { inner.pages.add(i) };
371*eafedbc7SAlice Ryhl 
372*eafedbc7SAlice Ryhl         // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay.
373*eafedbc7SAlice Ryhl         if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
374*eafedbc7SAlice Ryhl             // The page was already there, or someone else added the page while we didn't hold the
375*eafedbc7SAlice Ryhl             // spinlock.
376*eafedbc7SAlice Ryhl             //
377*eafedbc7SAlice Ryhl             // SAFETY: The pointer is valid, and this is the right shrinker.
378*eafedbc7SAlice Ryhl             //
379*eafedbc7SAlice Ryhl             // The shrinker can't free the page between the check and this call to
380*eafedbc7SAlice Ryhl             // `list_lru_del` because we hold the lock.
381*eafedbc7SAlice Ryhl             unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) };
382*eafedbc7SAlice Ryhl             return Ok(());
383*eafedbc7SAlice Ryhl         }
384*eafedbc7SAlice Ryhl 
385*eafedbc7SAlice Ryhl         let vma_addr = inner.vma_addr;
386*eafedbc7SAlice Ryhl         // Release the spinlock while we insert the page into the vma.
387*eafedbc7SAlice Ryhl         drop(inner);
388*eafedbc7SAlice Ryhl 
389*eafedbc7SAlice Ryhl         // No overflow since we stay in bounds of the vma.
390*eafedbc7SAlice Ryhl         let user_page_addr = vma_addr + (i << PAGE_SHIFT);
391*eafedbc7SAlice Ryhl 
392*eafedbc7SAlice Ryhl         // We use `mmput_async` when dropping the `mm` because `use_page_slow` is usually used from
393*eafedbc7SAlice Ryhl         // a remote process. If the call to `mmput` races with the process shutting down, then the
394*eafedbc7SAlice Ryhl         // caller of `use_page_slow` becomes responsible for cleaning up the `mm`, which doesn't
395*eafedbc7SAlice Ryhl         // happen until it returns to userspace. However, the caller might instead go to sleep and
396*eafedbc7SAlice Ryhl         // wait for the owner of the `mm` to wake it up, which doesn't happen because it's in the
397*eafedbc7SAlice Ryhl         // middle of a shutdown process that won't complete until the `mm` is dropped. This can
398*eafedbc7SAlice Ryhl         // amount to a deadlock.
399*eafedbc7SAlice Ryhl         //
400*eafedbc7SAlice Ryhl         // Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a
401*eafedbc7SAlice Ryhl         // workqueue.
402*eafedbc7SAlice Ryhl         MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?)
403*eafedbc7SAlice Ryhl             .mmap_read_lock()
404*eafedbc7SAlice Ryhl             .vma_lookup(vma_addr)
405*eafedbc7SAlice Ryhl             .ok_or(ESRCH)?
406*eafedbc7SAlice Ryhl             .as_mixedmap_vma()
407*eafedbc7SAlice Ryhl             .ok_or(ESRCH)?
408*eafedbc7SAlice Ryhl             .vm_insert_page(user_page_addr, &new_page)
409*eafedbc7SAlice Ryhl             .inspect_err(|err| {
410*eafedbc7SAlice Ryhl                 pr_warn!(
411*eafedbc7SAlice Ryhl                     "Failed to vm_insert_page({}): vma_addr:{} i:{} err:{:?}",
412*eafedbc7SAlice Ryhl                     user_page_addr,
413*eafedbc7SAlice Ryhl                     vma_addr,
414*eafedbc7SAlice Ryhl                     i,
415*eafedbc7SAlice Ryhl                     err
416*eafedbc7SAlice Ryhl                 )
417*eafedbc7SAlice Ryhl             })?;
418*eafedbc7SAlice Ryhl 
419*eafedbc7SAlice Ryhl         let inner = self.lock.lock();
420*eafedbc7SAlice Ryhl 
421*eafedbc7SAlice Ryhl         // SAFETY: The `page_info` pointer is valid and currently does not have a page. The page
422*eafedbc7SAlice Ryhl         // can be written to since we hold the lock.
423*eafedbc7SAlice Ryhl         //
424*eafedbc7SAlice Ryhl         // We released and reacquired the spinlock since we checked that the page is null, but we
425*eafedbc7SAlice Ryhl         // always hold the mm_lock mutex when setting the page to a non-null value, so it's not
426*eafedbc7SAlice Ryhl         // possible for someone else to have changed it since our check.
427*eafedbc7SAlice Ryhl         unsafe { PageInfo::set_page(page_info, new_page) };
428*eafedbc7SAlice Ryhl 
429*eafedbc7SAlice Ryhl         drop(inner);
430*eafedbc7SAlice Ryhl         drop(mm_mutex);
431*eafedbc7SAlice Ryhl 
432*eafedbc7SAlice Ryhl         Ok(())
433*eafedbc7SAlice Ryhl     }
434*eafedbc7SAlice Ryhl 
435*eafedbc7SAlice Ryhl     /// If the given page is in use, then mark it as available so that the shrinker can free it.
436*eafedbc7SAlice Ryhl     ///
437*eafedbc7SAlice Ryhl     /// May be called from an atomic context.
438*eafedbc7SAlice Ryhl     pub(crate) fn stop_using_range(&self, start: usize, end: usize) {
439*eafedbc7SAlice Ryhl         if start >= end {
440*eafedbc7SAlice Ryhl             return;
441*eafedbc7SAlice Ryhl         }
442*eafedbc7SAlice Ryhl         let inner = self.lock.lock();
443*eafedbc7SAlice Ryhl         assert!(end <= inner.size);
444*eafedbc7SAlice Ryhl 
445*eafedbc7SAlice Ryhl         for i in (start..end).rev() {
446*eafedbc7SAlice Ryhl             // SAFETY: The pointer is in bounds.
447*eafedbc7SAlice Ryhl             let page_info = unsafe { inner.pages.add(i) };
448*eafedbc7SAlice Ryhl 
449*eafedbc7SAlice Ryhl             // SAFETY: Okay for reading since we have the lock.
450*eafedbc7SAlice Ryhl             if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
451*eafedbc7SAlice Ryhl                 // SAFETY: The pointer is valid, and it's the right shrinker.
452*eafedbc7SAlice Ryhl                 unsafe { PageInfo::list_lru_add(page_info, page.nid(), self.shrinker) };
453*eafedbc7SAlice Ryhl             }
454*eafedbc7SAlice Ryhl         }
455*eafedbc7SAlice Ryhl     }
456*eafedbc7SAlice Ryhl 
457*eafedbc7SAlice Ryhl     /// Helper for reading or writing to a range of bytes that may overlap with several pages.
458*eafedbc7SAlice Ryhl     ///
459*eafedbc7SAlice Ryhl     /// # Safety
460*eafedbc7SAlice Ryhl     ///
461*eafedbc7SAlice Ryhl     /// All pages touched by this operation must be in use for the duration of this call.
462*eafedbc7SAlice Ryhl     unsafe fn iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result
463*eafedbc7SAlice Ryhl     where
464*eafedbc7SAlice Ryhl         T: FnMut(&Page, usize, usize) -> Result,
465*eafedbc7SAlice Ryhl     {
466*eafedbc7SAlice Ryhl         if size == 0 {
467*eafedbc7SAlice Ryhl             return Ok(());
468*eafedbc7SAlice Ryhl         }
469*eafedbc7SAlice Ryhl 
470*eafedbc7SAlice Ryhl         let (pages, num_pages) = {
471*eafedbc7SAlice Ryhl             let inner = self.lock.lock();
472*eafedbc7SAlice Ryhl             (inner.pages, inner.size)
473*eafedbc7SAlice Ryhl         };
474*eafedbc7SAlice Ryhl         let num_bytes = num_pages << PAGE_SHIFT;
475*eafedbc7SAlice Ryhl 
476*eafedbc7SAlice Ryhl         // Check that the request is within the buffer.
477*eafedbc7SAlice Ryhl         if offset.checked_add(size).ok_or(EFAULT)? > num_bytes {
478*eafedbc7SAlice Ryhl             return Err(EFAULT);
479*eafedbc7SAlice Ryhl         }
480*eafedbc7SAlice Ryhl 
481*eafedbc7SAlice Ryhl         let mut page_index = offset >> PAGE_SHIFT;
482*eafedbc7SAlice Ryhl         offset &= PAGE_SIZE - 1;
483*eafedbc7SAlice Ryhl         while size > 0 {
484*eafedbc7SAlice Ryhl             let available = usize::min(size, PAGE_SIZE - offset);
485*eafedbc7SAlice Ryhl             // SAFETY: The pointer is in bounds.
486*eafedbc7SAlice Ryhl             let page_info = unsafe { pages.add(page_index) };
487*eafedbc7SAlice Ryhl             // SAFETY: The caller guarantees that this page is in the "in use" state for the
488*eafedbc7SAlice Ryhl             // duration of this call to `iterate`, so nobody will change the page.
489*eafedbc7SAlice Ryhl             let page = unsafe { PageInfo::get_page(page_info) };
490*eafedbc7SAlice Ryhl             if page.is_none() {
491*eafedbc7SAlice Ryhl                 pr_warn!("Page is null!");
492*eafedbc7SAlice Ryhl             }
493*eafedbc7SAlice Ryhl             let page = page.ok_or(EFAULT)?;
494*eafedbc7SAlice Ryhl             cb(page, offset, available)?;
495*eafedbc7SAlice Ryhl             size -= available;
496*eafedbc7SAlice Ryhl             page_index += 1;
497*eafedbc7SAlice Ryhl             offset = 0;
498*eafedbc7SAlice Ryhl         }
499*eafedbc7SAlice Ryhl         Ok(())
500*eafedbc7SAlice Ryhl     }
501*eafedbc7SAlice Ryhl 
502*eafedbc7SAlice Ryhl     /// Copy from userspace into this page range.
503*eafedbc7SAlice Ryhl     ///
504*eafedbc7SAlice Ryhl     /// # Safety
505*eafedbc7SAlice Ryhl     ///
506*eafedbc7SAlice Ryhl     /// All pages touched by this operation must be in use for the duration of this call.
507*eafedbc7SAlice Ryhl     pub(crate) unsafe fn copy_from_user_slice(
508*eafedbc7SAlice Ryhl         &self,
509*eafedbc7SAlice Ryhl         reader: &mut UserSliceReader,
510*eafedbc7SAlice Ryhl         offset: usize,
511*eafedbc7SAlice Ryhl         size: usize,
512*eafedbc7SAlice Ryhl     ) -> Result {
513*eafedbc7SAlice Ryhl         // SAFETY: `self.iterate` has the same safety requirements as `copy_from_user_slice`.
514*eafedbc7SAlice Ryhl         unsafe {
515*eafedbc7SAlice Ryhl             self.iterate(offset, size, |page, offset, to_copy| {
516*eafedbc7SAlice Ryhl                 page.copy_from_user_slice_raw(reader, offset, to_copy)
517*eafedbc7SAlice Ryhl             })
518*eafedbc7SAlice Ryhl         }
519*eafedbc7SAlice Ryhl     }
520*eafedbc7SAlice Ryhl 
521*eafedbc7SAlice Ryhl     /// Copy from this page range into kernel space.
522*eafedbc7SAlice Ryhl     ///
523*eafedbc7SAlice Ryhl     /// # Safety
524*eafedbc7SAlice Ryhl     ///
525*eafedbc7SAlice Ryhl     /// All pages touched by this operation must be in use for the duration of this call.
526*eafedbc7SAlice Ryhl     pub(crate) unsafe fn read<T: FromBytes>(&self, offset: usize) -> Result<T> {
527*eafedbc7SAlice Ryhl         let mut out = MaybeUninit::<T>::uninit();
528*eafedbc7SAlice Ryhl         let mut out_offset = 0;
529*eafedbc7SAlice Ryhl         // SAFETY: `self.iterate` has the same safety requirements as `read`.
530*eafedbc7SAlice Ryhl         unsafe {
531*eafedbc7SAlice Ryhl             self.iterate(offset, size_of::<T>(), |page, offset, to_copy| {
532*eafedbc7SAlice Ryhl                 // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T.
533*eafedbc7SAlice Ryhl                 let obj_ptr = (out.as_mut_ptr() as *mut u8).add(out_offset);
534*eafedbc7SAlice Ryhl                 // SAFETY: The pointer points is in-bounds of the `out` variable, so it is valid.
535*eafedbc7SAlice Ryhl                 page.read_raw(obj_ptr, offset, to_copy)?;
536*eafedbc7SAlice Ryhl                 out_offset += to_copy;
537*eafedbc7SAlice Ryhl                 Ok(())
538*eafedbc7SAlice Ryhl             })?;
539*eafedbc7SAlice Ryhl         }
540*eafedbc7SAlice Ryhl         // SAFETY: We just initialised the data.
541*eafedbc7SAlice Ryhl         Ok(unsafe { out.assume_init() })
542*eafedbc7SAlice Ryhl     }
543*eafedbc7SAlice Ryhl 
544*eafedbc7SAlice Ryhl     /// Copy from kernel space into this page range.
545*eafedbc7SAlice Ryhl     ///
546*eafedbc7SAlice Ryhl     /// # Safety
547*eafedbc7SAlice Ryhl     ///
548*eafedbc7SAlice Ryhl     /// All pages touched by this operation must be in use for the duration of this call.
549*eafedbc7SAlice Ryhl     pub(crate) unsafe fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result {
550*eafedbc7SAlice Ryhl         let mut obj_offset = 0;
551*eafedbc7SAlice Ryhl         // SAFETY: `self.iterate` has the same safety requirements as `write`.
552*eafedbc7SAlice Ryhl         unsafe {
553*eafedbc7SAlice Ryhl             self.iterate(offset, size_of_val(obj), |page, offset, to_copy| {
554*eafedbc7SAlice Ryhl                 // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T.
555*eafedbc7SAlice Ryhl                 let obj_ptr = (obj as *const T as *const u8).add(obj_offset);
556*eafedbc7SAlice Ryhl                 // SAFETY: We have a reference to the object, so the pointer is valid.
557*eafedbc7SAlice Ryhl                 page.write_raw(obj_ptr, offset, to_copy)?;
558*eafedbc7SAlice Ryhl                 obj_offset += to_copy;
559*eafedbc7SAlice Ryhl                 Ok(())
560*eafedbc7SAlice Ryhl             })
561*eafedbc7SAlice Ryhl         }
562*eafedbc7SAlice Ryhl     }
563*eafedbc7SAlice Ryhl 
564*eafedbc7SAlice Ryhl     /// Write zeroes to the given range.
565*eafedbc7SAlice Ryhl     ///
566*eafedbc7SAlice Ryhl     /// # Safety
567*eafedbc7SAlice Ryhl     ///
568*eafedbc7SAlice Ryhl     /// All pages touched by this operation must be in use for the duration of this call.
569*eafedbc7SAlice Ryhl     pub(crate) unsafe fn fill_zero(&self, offset: usize, size: usize) -> Result {
570*eafedbc7SAlice Ryhl         // SAFETY: `self.iterate` has the same safety requirements as `copy_into`.
571*eafedbc7SAlice Ryhl         unsafe {
572*eafedbc7SAlice Ryhl             self.iterate(offset, size, |page, offset, len| {
573*eafedbc7SAlice Ryhl                 page.fill_zero_raw(offset, len)
574*eafedbc7SAlice Ryhl             })
575*eafedbc7SAlice Ryhl         }
576*eafedbc7SAlice Ryhl     }
577*eafedbc7SAlice Ryhl }
578*eafedbc7SAlice Ryhl 
579*eafedbc7SAlice Ryhl #[pinned_drop]
580*eafedbc7SAlice Ryhl impl PinnedDrop for ShrinkablePageRange {
581*eafedbc7SAlice Ryhl     fn drop(self: Pin<&mut Self>) {
582*eafedbc7SAlice Ryhl         let (pages, size) = {
583*eafedbc7SAlice Ryhl             let lock = self.lock.lock();
584*eafedbc7SAlice Ryhl             (lock.pages, lock.size)
585*eafedbc7SAlice Ryhl         };
586*eafedbc7SAlice Ryhl 
587*eafedbc7SAlice Ryhl         if size == 0 {
588*eafedbc7SAlice Ryhl             return;
589*eafedbc7SAlice Ryhl         }
590*eafedbc7SAlice Ryhl 
591*eafedbc7SAlice Ryhl         // Note: This call is also necessary for the safety of `stable_trylock_mm`.
592*eafedbc7SAlice Ryhl         let mm_lock = self.mm_lock.lock();
593*eafedbc7SAlice Ryhl 
594*eafedbc7SAlice Ryhl         // This is the destructor, so unlike the other methods, we only need to worry about races
595*eafedbc7SAlice Ryhl         // with the shrinker here. Since we hold the `mm_lock`, we also can't race with the
596*eafedbc7SAlice Ryhl         // shrinker, and after this loop, the shrinker will not access any of our pages since we
597*eafedbc7SAlice Ryhl         // removed them from the lru list.
598*eafedbc7SAlice Ryhl         for i in 0..size {
599*eafedbc7SAlice Ryhl             // SAFETY: Loop is in-bounds of the size.
600*eafedbc7SAlice Ryhl             let p_ptr = unsafe { pages.add(i) };
601*eafedbc7SAlice Ryhl             // SAFETY: No other readers, so we can read.
602*eafedbc7SAlice Ryhl             if let Some(p) = unsafe { PageInfo::get_page(p_ptr) } {
603*eafedbc7SAlice Ryhl                 // SAFETY: The pointer is valid and it's the right shrinker.
604*eafedbc7SAlice Ryhl                 unsafe { PageInfo::list_lru_del(p_ptr, p.nid(), self.shrinker) };
605*eafedbc7SAlice Ryhl             }
606*eafedbc7SAlice Ryhl         }
607*eafedbc7SAlice Ryhl 
608*eafedbc7SAlice Ryhl         drop(mm_lock);
609*eafedbc7SAlice Ryhl 
610*eafedbc7SAlice Ryhl         // SAFETY: `pages` was allocated as an `KVVec<PageInfo>` with capacity `size`. Furthermore,
611*eafedbc7SAlice Ryhl         // all `size` elements are initialized. Also, the array is no longer shared with the
612*eafedbc7SAlice Ryhl         // shrinker due to the above loop.
613*eafedbc7SAlice Ryhl         drop(unsafe { KVVec::from_raw_parts(pages, size, size) });
614*eafedbc7SAlice Ryhl     }
615*eafedbc7SAlice Ryhl }
616*eafedbc7SAlice Ryhl 
617*eafedbc7SAlice Ryhl /// # Safety
618*eafedbc7SAlice Ryhl /// Called by the shrinker.
619*eafedbc7SAlice Ryhl #[no_mangle]
620*eafedbc7SAlice Ryhl unsafe extern "C" fn rust_shrink_count(
621*eafedbc7SAlice Ryhl     shrink: *mut bindings::shrinker,
622*eafedbc7SAlice Ryhl     _sc: *mut bindings::shrink_control,
623*eafedbc7SAlice Ryhl ) -> c_ulong {
624*eafedbc7SAlice Ryhl     // SAFETY: We can access our own private data.
625*eafedbc7SAlice Ryhl     let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() };
626*eafedbc7SAlice Ryhl     // SAFETY: Accessing the lru list is okay. Just an FFI call.
627*eafedbc7SAlice Ryhl     unsafe { bindings::list_lru_count(list_lru) }
628*eafedbc7SAlice Ryhl }
629*eafedbc7SAlice Ryhl 
630*eafedbc7SAlice Ryhl /// # Safety
631*eafedbc7SAlice Ryhl /// Called by the shrinker.
632*eafedbc7SAlice Ryhl #[no_mangle]
633*eafedbc7SAlice Ryhl unsafe extern "C" fn rust_shrink_scan(
634*eafedbc7SAlice Ryhl     shrink: *mut bindings::shrinker,
635*eafedbc7SAlice Ryhl     sc: *mut bindings::shrink_control,
636*eafedbc7SAlice Ryhl ) -> c_ulong {
637*eafedbc7SAlice Ryhl     // SAFETY: We can access our own private data.
638*eafedbc7SAlice Ryhl     let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() };
639*eafedbc7SAlice Ryhl     // SAFETY: Caller guarantees that it is safe to read this field.
640*eafedbc7SAlice Ryhl     let nr_to_scan = unsafe { (*sc).nr_to_scan };
641*eafedbc7SAlice Ryhl     // SAFETY: Accessing the lru list is okay. Just an FFI call.
642*eafedbc7SAlice Ryhl     unsafe {
643*eafedbc7SAlice Ryhl         bindings::list_lru_walk(
644*eafedbc7SAlice Ryhl             list_lru,
645*eafedbc7SAlice Ryhl             Some(bindings::rust_shrink_free_page_wrap),
646*eafedbc7SAlice Ryhl             ptr::null_mut(),
647*eafedbc7SAlice Ryhl             nr_to_scan,
648*eafedbc7SAlice Ryhl         )
649*eafedbc7SAlice Ryhl     }
650*eafedbc7SAlice Ryhl }
651*eafedbc7SAlice Ryhl 
652*eafedbc7SAlice Ryhl const LRU_SKIP: bindings::lru_status = bindings::lru_status_LRU_SKIP;
653*eafedbc7SAlice Ryhl const LRU_REMOVED_ENTRY: bindings::lru_status = bindings::lru_status_LRU_REMOVED_RETRY;
654*eafedbc7SAlice Ryhl 
655*eafedbc7SAlice Ryhl /// # Safety
656*eafedbc7SAlice Ryhl /// Called by the shrinker.
657*eafedbc7SAlice Ryhl #[no_mangle]
658*eafedbc7SAlice Ryhl unsafe extern "C" fn rust_shrink_free_page(
659*eafedbc7SAlice Ryhl     item: *mut bindings::list_head,
660*eafedbc7SAlice Ryhl     lru: *mut bindings::list_lru_one,
661*eafedbc7SAlice Ryhl     _cb_arg: *mut c_void,
662*eafedbc7SAlice Ryhl ) -> bindings::lru_status {
663*eafedbc7SAlice Ryhl     // Fields that should survive after unlocking the lru lock.
664*eafedbc7SAlice Ryhl     let page;
665*eafedbc7SAlice Ryhl     let page_index;
666*eafedbc7SAlice Ryhl     let mm;
667*eafedbc7SAlice Ryhl     let mmap_read;
668*eafedbc7SAlice Ryhl     let mm_mutex;
669*eafedbc7SAlice Ryhl     let vma_addr;
670*eafedbc7SAlice Ryhl 
671*eafedbc7SAlice Ryhl     {
672*eafedbc7SAlice Ryhl         // CAST: The `list_head` field is first in `PageInfo`.
673*eafedbc7SAlice Ryhl         let info = item as *mut PageInfo;
674*eafedbc7SAlice Ryhl         // SAFETY: The `range` field of `PageInfo` is immutable.
675*eafedbc7SAlice Ryhl         let range = unsafe { &*((*info).range) };
676*eafedbc7SAlice Ryhl 
677*eafedbc7SAlice Ryhl         mm = match range.mm.mmget_not_zero() {
678*eafedbc7SAlice Ryhl             Some(mm) => MmWithUser::into_mmput_async(mm),
679*eafedbc7SAlice Ryhl             None => return LRU_SKIP,
680*eafedbc7SAlice Ryhl         };
681*eafedbc7SAlice Ryhl 
682*eafedbc7SAlice Ryhl         mm_mutex = match range.stable_trylock_mm() {
683*eafedbc7SAlice Ryhl             Some(guard) => guard,
684*eafedbc7SAlice Ryhl             None => return LRU_SKIP,
685*eafedbc7SAlice Ryhl         };
686*eafedbc7SAlice Ryhl 
687*eafedbc7SAlice Ryhl         mmap_read = match mm.mmap_read_trylock() {
688*eafedbc7SAlice Ryhl             Some(guard) => guard,
689*eafedbc7SAlice Ryhl             None => return LRU_SKIP,
690*eafedbc7SAlice Ryhl         };
691*eafedbc7SAlice Ryhl 
692*eafedbc7SAlice Ryhl         // We can't lock it normally here, since we hold the lru lock.
693*eafedbc7SAlice Ryhl         let inner = match range.lock.try_lock() {
694*eafedbc7SAlice Ryhl             Some(inner) => inner,
695*eafedbc7SAlice Ryhl             None => return LRU_SKIP,
696*eafedbc7SAlice Ryhl         };
697*eafedbc7SAlice Ryhl 
698*eafedbc7SAlice Ryhl         // SAFETY: The item is in this lru list, so it's okay to remove it.
699*eafedbc7SAlice Ryhl         unsafe { bindings::list_lru_isolate(lru, item) };
700*eafedbc7SAlice Ryhl 
701*eafedbc7SAlice Ryhl         // SAFETY: Both pointers are in bounds of the same allocation.
702*eafedbc7SAlice Ryhl         page_index = unsafe { info.offset_from(inner.pages) } as usize;
703*eafedbc7SAlice Ryhl 
704*eafedbc7SAlice Ryhl         // SAFETY: We hold the spinlock, so we can take the page.
705*eafedbc7SAlice Ryhl         //
706*eafedbc7SAlice Ryhl         // This sets the page pointer to zero before we unmap it from the vma. However, we call
707*eafedbc7SAlice Ryhl         // `zap_page_range` before we release the mmap lock, so `use_page_slow` will not be able to
708*eafedbc7SAlice Ryhl         // insert a new page until after our call to `zap_page_range`.
709*eafedbc7SAlice Ryhl         page = unsafe { PageInfo::take_page(info) };
710*eafedbc7SAlice Ryhl         vma_addr = inner.vma_addr;
711*eafedbc7SAlice Ryhl 
712*eafedbc7SAlice Ryhl         // From this point on, we don't access this PageInfo or ShrinkablePageRange again, because
713*eafedbc7SAlice Ryhl         // they can be freed at any point after we unlock `lru_lock`. This is with the exception of
714*eafedbc7SAlice Ryhl         // `mm_mutex` which is kept alive by holding the lock.
715*eafedbc7SAlice Ryhl     }
716*eafedbc7SAlice Ryhl 
717*eafedbc7SAlice Ryhl     // SAFETY: The lru lock is locked when this method is called.
718*eafedbc7SAlice Ryhl     unsafe { bindings::spin_unlock(&raw mut (*lru).lock) };
719*eafedbc7SAlice Ryhl 
720*eafedbc7SAlice Ryhl     if let Some(vma) = mmap_read.vma_lookup(vma_addr) {
721*eafedbc7SAlice Ryhl         let user_page_addr = vma_addr + (page_index << PAGE_SHIFT);
722*eafedbc7SAlice Ryhl         vma.zap_page_range_single(user_page_addr, PAGE_SIZE);
723*eafedbc7SAlice Ryhl     }
724*eafedbc7SAlice Ryhl 
725*eafedbc7SAlice Ryhl     drop(mmap_read);
726*eafedbc7SAlice Ryhl     drop(mm_mutex);
727*eafedbc7SAlice Ryhl     drop(mm);
728*eafedbc7SAlice Ryhl     drop(page);
729*eafedbc7SAlice Ryhl 
730*eafedbc7SAlice Ryhl     // SAFETY: We just unlocked the lru lock, but it should be locked when we return.
731*eafedbc7SAlice Ryhl     unsafe { bindings::spin_lock(&raw mut (*lru).lock) };
732*eafedbc7SAlice Ryhl 
733*eafedbc7SAlice Ryhl     LRU_REMOVED_ENTRY
734*eafedbc7SAlice Ryhl }
735