1 // SPDX-License-Identifier: GPL-2.0
2
3 // Copyright (C) 2025 Google LLC.
4
5 //! This module has utilities for managing a page range where unused pages may be reclaimed by a
6 //! vma shrinker.
7
8 // To avoid deadlocks, locks are taken in the order:
9 //
10 // 1. mmap lock
11 // 2. spinlock
12 // 3. lru spinlock
13 //
14 // The shrinker will use trylock methods because it locks them in a different order.
15
16 use core::{
17 marker::PhantomPinned,
18 mem::{size_of, size_of_val, MaybeUninit},
19 ptr,
20 };
21
22 use kernel::{
23 bindings,
24 error::Result,
25 ffi::{c_ulong, c_void},
26 mm::{virt, Mm, MmWithUser},
27 new_mutex, new_spinlock,
28 page::{Page, PAGE_SHIFT, PAGE_SIZE},
29 prelude::*,
30 str::CStr,
31 sync::{aref::ARef, Mutex, SpinLock},
32 task::Pid,
33 transmute::FromBytes,
34 types::Opaque,
35 uaccess::UserSliceReader,
36 };
37
38 /// Represents a shrinker that can be registered with the kernel.
39 ///
40 /// Each shrinker can be used by many `ShrinkablePageRange` objects.
41 #[repr(C)]
42 pub(crate) struct Shrinker {
43 inner: Opaque<*mut bindings::shrinker>,
44 list_lru: Opaque<bindings::list_lru>,
45 }
46
47 // SAFETY: The shrinker and list_lru are thread safe.
48 unsafe impl Send for Shrinker {}
49 // SAFETY: The shrinker and list_lru are thread safe.
50 unsafe impl Sync for Shrinker {}
51
52 impl Shrinker {
53 /// Create a new shrinker.
54 ///
55 /// # Safety
56 ///
57 /// Before using this shrinker with a `ShrinkablePageRange`, the `register` method must have
58 /// been called exactly once, and it must not have returned an error.
new() -> Self59 pub(crate) const unsafe fn new() -> Self {
60 Self {
61 inner: Opaque::uninit(),
62 list_lru: Opaque::uninit(),
63 }
64 }
65
66 /// Register this shrinker with the kernel.
register(&'static self, name: &CStr) -> Result<()>67 pub(crate) fn register(&'static self, name: &CStr) -> Result<()> {
68 // SAFETY: These fields are not yet used, so it's okay to zero them.
69 unsafe {
70 self.inner.get().write(ptr::null_mut());
71 self.list_lru.get().write_bytes(0, 1);
72 }
73
74 // SAFETY: The field is not yet used, so we can initialize it.
75 let ret = unsafe { bindings::__list_lru_init(self.list_lru.get(), false, ptr::null_mut()) };
76 if ret != 0 {
77 return Err(Error::from_errno(ret));
78 }
79
80 // SAFETY: The `name` points at a valid c string.
81 let shrinker = unsafe { bindings::shrinker_alloc(0, name.as_char_ptr()) };
82 if shrinker.is_null() {
83 // SAFETY: We initialized it, so its okay to destroy it.
84 unsafe { bindings::list_lru_destroy(self.list_lru.get()) };
85 return Err(Error::from_errno(ret));
86 }
87
88 // SAFETY: We're about to register the shrinker, and these are the fields we need to
89 // initialize. (All other fields are already zeroed.)
90 unsafe {
91 (&raw mut (*shrinker).count_objects).write(Some(rust_shrink_count));
92 (&raw mut (*shrinker).scan_objects).write(Some(rust_shrink_scan));
93 (&raw mut (*shrinker).private_data).write(self.list_lru.get().cast());
94 }
95
96 // SAFETY: The new shrinker has been fully initialized, so we can register it.
97 unsafe { bindings::shrinker_register(shrinker) };
98
99 // SAFETY: This initializes the pointer to the shrinker so that we can use it.
100 unsafe { self.inner.get().write(shrinker) };
101
102 Ok(())
103 }
104 }
105
106 /// A container that manages a page range in a vma.
107 ///
108 /// The pages can be thought of as an array of booleans of whether the pages are usable. The
109 /// methods `use_range` and `stop_using_range` set all booleans in a range to true or false
110 /// respectively. Initially, no pages are allocated. When a page is not used, it is not freed
111 /// immediately. Instead, it is made available to the memory shrinker to free it if the device is
112 /// under memory pressure.
113 ///
114 /// It's okay for `use_range` and `stop_using_range` to race with each other, although there's no
115 /// way to know whether an index ends up with true or false if a call to `use_range` races with
116 /// another call to `stop_using_range` on a given index.
117 ///
118 /// It's also okay for the two methods to race with themselves, e.g. if two threads call
119 /// `use_range` on the same index, then that's fine and neither call will return until the page is
120 /// allocated and mapped.
121 ///
122 /// The methods that read or write to a range require that the page is marked as in use. So it is
123 /// _not_ okay to call `stop_using_range` on a page that is in use by the methods that read or
124 /// write to the page.
125 #[pin_data(PinnedDrop)]
126 pub(crate) struct ShrinkablePageRange {
127 /// Shrinker object registered with the kernel.
128 shrinker: &'static Shrinker,
129 /// Pid using this page range. Only used as debugging information.
130 pid: Pid,
131 /// The mm for the relevant process.
132 mm: ARef<Mm>,
133 /// Used to synchronize calls to `vm_insert_page` and `zap_page_range_single`.
134 #[pin]
135 mm_lock: Mutex<()>,
136 /// Spinlock protecting changes to pages.
137 #[pin]
138 lock: SpinLock<Inner>,
139
140 /// Must not move, since page info has pointers back.
141 #[pin]
142 _pin: PhantomPinned,
143 }
144
145 struct Inner {
146 /// Array of pages.
147 ///
148 /// Since this is also accessed by the shrinker, we can't use a `Box`, which asserts exclusive
149 /// ownership. To deal with that, we manage it using raw pointers.
150 pages: *mut PageInfo,
151 /// Length of the `pages` array.
152 size: usize,
153 /// The address of the vma to insert the pages into.
154 vma_addr: usize,
155 }
156
157 // SAFETY: proper locking is in place for `Inner`
158 unsafe impl Send for Inner {}
159
160 type StableMmGuard =
161 kernel::sync::lock::Guard<'static, (), kernel::sync::lock::mutex::MutexBackend>;
162
163 /// An array element that describes the current state of a page.
164 ///
165 /// There are three states:
166 ///
167 /// * Free. The page is None. The `lru` element is not queued.
168 /// * Available. The page is Some. The `lru` element is queued to the shrinker's lru.
169 /// * Used. The page is Some. The `lru` element is not queued.
170 ///
171 /// When an element is available, the shrinker is able to free the page.
172 #[repr(C)]
173 struct PageInfo {
174 lru: bindings::list_head,
175 page: Option<Page>,
176 range: *const ShrinkablePageRange,
177 }
178
179 impl PageInfo {
180 /// # Safety
181 ///
182 /// The caller ensures that writing to `me.page` is ok, and that the page is not currently set.
set_page(me: *mut PageInfo, page: Page)183 unsafe fn set_page(me: *mut PageInfo, page: Page) {
184 // SAFETY: This pointer offset is in bounds.
185 let ptr = unsafe { &raw mut (*me).page };
186
187 // SAFETY: The pointer is valid for writing, so also valid for reading.
188 if unsafe { (*ptr).is_some() } {
189 pr_err!("set_page called when there is already a page");
190 // SAFETY: We will initialize the page again below.
191 unsafe { ptr::drop_in_place(ptr) };
192 }
193
194 // SAFETY: The pointer is valid for writing.
195 unsafe { ptr::write(ptr, Some(page)) };
196 }
197
198 /// # Safety
199 ///
200 /// The caller ensures that reading from `me.page` is ok for the duration of 'a.
get_page<'a>(me: *const PageInfo) -> Option<&'a Page>201 unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> {
202 // SAFETY: This pointer offset is in bounds.
203 let ptr = unsafe { &raw const (*me).page };
204
205 // SAFETY: The pointer is valid for reading.
206 unsafe { (*ptr).as_ref() }
207 }
208
209 /// # Safety
210 ///
211 /// The caller ensures that writing to `me.page` is ok for the duration of 'a.
take_page(me: *mut PageInfo) -> Option<Page>212 unsafe fn take_page(me: *mut PageInfo) -> Option<Page> {
213 // SAFETY: This pointer offset is in bounds.
214 let ptr = unsafe { &raw mut (*me).page };
215
216 // SAFETY: The pointer is valid for reading.
217 unsafe { (*ptr).take() }
218 }
219
220 /// Add this page to the lru list, if not already in the list.
221 ///
222 /// # Safety
223 ///
224 /// The pointer must be valid, and it must be the right shrinker and nid.
list_lru_add(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker)225 unsafe fn list_lru_add(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) {
226 // SAFETY: This pointer offset is in bounds.
227 let lru_ptr = unsafe { &raw mut (*me).lru };
228 // SAFETY: The lru pointer is valid, and we're not using it with any other lru list.
229 unsafe { bindings::list_lru_add(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) };
230 }
231
232 /// Remove this page from the lru list, if it is in the list.
233 ///
234 /// # Safety
235 ///
236 /// The pointer must be valid, and it must be the right shrinker and nid.
list_lru_del(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker)237 unsafe fn list_lru_del(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) {
238 // SAFETY: This pointer offset is in bounds.
239 let lru_ptr = unsafe { &raw mut (*me).lru };
240 // SAFETY: The lru pointer is valid, and we're not using it with any other lru list.
241 unsafe { bindings::list_lru_del(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) };
242 }
243 }
244
245 impl ShrinkablePageRange {
246 /// Create a new `ShrinkablePageRange` using the given shrinker.
new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error>247 pub(crate) fn new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error> {
248 try_pin_init!(Self {
249 shrinker,
250 pid: kernel::current!().pid(),
251 mm: ARef::from(&**kernel::current!().mm().ok_or(ESRCH)?),
252 mm_lock <- new_mutex!((), "ShrinkablePageRange::mm"),
253 lock <- new_spinlock!(Inner {
254 pages: ptr::null_mut(),
255 size: 0,
256 vma_addr: 0,
257 }, "ShrinkablePageRange"),
258 _pin: PhantomPinned,
259 })
260 }
261
stable_trylock_mm(&self) -> Option<StableMmGuard>262 pub(crate) fn stable_trylock_mm(&self) -> Option<StableMmGuard> {
263 // SAFETY: This extends the duration of the reference. Since this call happens before
264 // `mm_lock` is taken in the destructor of `ShrinkablePageRange`, the destructor will block
265 // until the returned guard is dropped. This ensures that the guard is valid until dropped.
266 let mm_lock = unsafe { &*ptr::from_ref(&self.mm_lock) };
267
268 mm_lock.try_lock()
269 }
270
271 /// Register a vma with this page range. Returns the size of the region.
register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize>272 pub(crate) fn register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize> {
273 let num_bytes = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize);
274 let num_pages = num_bytes >> PAGE_SHIFT;
275
276 if !ptr::eq::<Mm>(&*self.mm, &**vma.mm()) {
277 pr_debug!("Failed to register with vma: invalid vma->vm_mm");
278 return Err(EINVAL);
279 }
280 if num_pages == 0 {
281 pr_debug!("Failed to register with vma: size zero");
282 return Err(EINVAL);
283 }
284
285 let mut pages = KVVec::<PageInfo>::with_capacity(num_pages, GFP_KERNEL)?;
286
287 // SAFETY: This just initializes the pages array.
288 unsafe {
289 let self_ptr = self as *const ShrinkablePageRange;
290 for i in 0..num_pages {
291 let info = pages.as_mut_ptr().add(i);
292 (&raw mut (*info).range).write(self_ptr);
293 (&raw mut (*info).page).write(None);
294 let lru = &raw mut (*info).lru;
295 (&raw mut (*lru).next).write(lru);
296 (&raw mut (*lru).prev).write(lru);
297 }
298 }
299
300 let mut inner = self.lock.lock();
301 if inner.size > 0 {
302 pr_debug!("Failed to register with vma: already registered");
303 drop(inner);
304 return Err(EBUSY);
305 }
306
307 inner.pages = pages.into_raw_parts().0;
308 inner.size = num_pages;
309 inner.vma_addr = vma.start();
310
311 Ok(num_pages)
312 }
313
314 /// Make sure that the given pages are allocated and mapped.
315 ///
316 /// Must not be called from an atomic context.
use_range(&self, start: usize, end: usize) -> Result<()>317 pub(crate) fn use_range(&self, start: usize, end: usize) -> Result<()> {
318 if start >= end {
319 return Ok(());
320 }
321 let mut inner = self.lock.lock();
322 assert!(end <= inner.size);
323
324 for i in start..end {
325 // SAFETY: This pointer offset is in bounds.
326 let page_info = unsafe { inner.pages.add(i) };
327
328 // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay.
329 if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
330 // Since we're going to use the page, we should remove it from the lru list so that
331 // the shrinker will not free it.
332 //
333 // SAFETY: The pointer is valid, and this is the right shrinker.
334 //
335 // The shrinker can't free the page between the check and this call to
336 // `list_lru_del` because we hold the lock.
337 unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) };
338 } else {
339 // We have to allocate a new page. Use the slow path.
340 drop(inner);
341 // SAFETY: `i < end <= inner.size` so `i` is in bounds.
342 match unsafe { self.use_page_slow(i) } {
343 Ok(()) => {}
344 Err(err) => {
345 pr_warn!("Error in use_page_slow: {:?}", err);
346 return Err(err);
347 }
348 }
349 inner = self.lock.lock();
350 }
351 }
352 Ok(())
353 }
354
355 /// Mark the given page as in use, slow path.
356 ///
357 /// Must not be called from an atomic context.
358 ///
359 /// # Safety
360 ///
361 /// Assumes that `i` is in bounds.
362 #[cold]
use_page_slow(&self, i: usize) -> Result<()>363 unsafe fn use_page_slow(&self, i: usize) -> Result<()> {
364 let new_page = Page::alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO)?;
365
366 let mm_mutex = self.mm_lock.lock();
367 let inner = self.lock.lock();
368
369 // SAFETY: This pointer offset is in bounds.
370 let page_info = unsafe { inner.pages.add(i) };
371
372 // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay.
373 if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
374 // The page was already there, or someone else added the page while we didn't hold the
375 // spinlock.
376 //
377 // SAFETY: The pointer is valid, and this is the right shrinker.
378 //
379 // The shrinker can't free the page between the check and this call to
380 // `list_lru_del` because we hold the lock.
381 unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) };
382 return Ok(());
383 }
384
385 let vma_addr = inner.vma_addr;
386 // Release the spinlock while we insert the page into the vma.
387 drop(inner);
388
389 // No overflow since we stay in bounds of the vma.
390 let user_page_addr = vma_addr + (i << PAGE_SHIFT);
391
392 // We use `mmput_async` when dropping the `mm` because `use_page_slow` is usually used from
393 // a remote process. If the call to `mmput` races with the process shutting down, then the
394 // caller of `use_page_slow` becomes responsible for cleaning up the `mm`, which doesn't
395 // happen until it returns to userspace. However, the caller might instead go to sleep and
396 // wait for the owner of the `mm` to wake it up, which doesn't happen because it's in the
397 // middle of a shutdown process that won't complete until the `mm` is dropped. This can
398 // amount to a deadlock.
399 //
400 // Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a
401 // workqueue.
402 MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?)
403 .mmap_read_lock()
404 .vma_lookup(vma_addr)
405 .ok_or(ESRCH)?
406 .as_mixedmap_vma()
407 .ok_or(ESRCH)?
408 .vm_insert_page(user_page_addr, &new_page)
409 .inspect_err(|err| {
410 pr_warn!(
411 "Failed to vm_insert_page({}): vma_addr:{} i:{} err:{:?}",
412 user_page_addr,
413 vma_addr,
414 i,
415 err
416 )
417 })?;
418
419 let inner = self.lock.lock();
420
421 // SAFETY: The `page_info` pointer is valid and currently does not have a page. The page
422 // can be written to since we hold the lock.
423 //
424 // We released and reacquired the spinlock since we checked that the page is null, but we
425 // always hold the mm_lock mutex when setting the page to a non-null value, so it's not
426 // possible for someone else to have changed it since our check.
427 unsafe { PageInfo::set_page(page_info, new_page) };
428
429 drop(inner);
430 drop(mm_mutex);
431
432 Ok(())
433 }
434
435 /// If the given page is in use, then mark it as available so that the shrinker can free it.
436 ///
437 /// May be called from an atomic context.
stop_using_range(&self, start: usize, end: usize)438 pub(crate) fn stop_using_range(&self, start: usize, end: usize) {
439 if start >= end {
440 return;
441 }
442 let inner = self.lock.lock();
443 assert!(end <= inner.size);
444
445 for i in (start..end).rev() {
446 // SAFETY: The pointer is in bounds.
447 let page_info = unsafe { inner.pages.add(i) };
448
449 // SAFETY: Okay for reading since we have the lock.
450 if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
451 // SAFETY: The pointer is valid, and it's the right shrinker.
452 unsafe { PageInfo::list_lru_add(page_info, page.nid(), self.shrinker) };
453 }
454 }
455 }
456
457 /// Helper for reading or writing to a range of bytes that may overlap with several pages.
458 ///
459 /// # Safety
460 ///
461 /// All pages touched by this operation must be in use for the duration of this call.
iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result where T: FnMut(&Page, usize, usize) -> Result,462 unsafe fn iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result
463 where
464 T: FnMut(&Page, usize, usize) -> Result,
465 {
466 if size == 0 {
467 return Ok(());
468 }
469
470 let (pages, num_pages) = {
471 let inner = self.lock.lock();
472 (inner.pages, inner.size)
473 };
474 let num_bytes = num_pages << PAGE_SHIFT;
475
476 // Check that the request is within the buffer.
477 if offset.checked_add(size).ok_or(EFAULT)? > num_bytes {
478 return Err(EFAULT);
479 }
480
481 let mut page_index = offset >> PAGE_SHIFT;
482 offset &= PAGE_SIZE - 1;
483 while size > 0 {
484 let available = usize::min(size, PAGE_SIZE - offset);
485 // SAFETY: The pointer is in bounds.
486 let page_info = unsafe { pages.add(page_index) };
487 // SAFETY: The caller guarantees that this page is in the "in use" state for the
488 // duration of this call to `iterate`, so nobody will change the page.
489 let page = unsafe { PageInfo::get_page(page_info) };
490 if page.is_none() {
491 pr_warn!("Page is null!");
492 }
493 let page = page.ok_or(EFAULT)?;
494 cb(page, offset, available)?;
495 size -= available;
496 page_index += 1;
497 offset = 0;
498 }
499 Ok(())
500 }
501
502 /// Copy from userspace into this page range.
503 ///
504 /// # Safety
505 ///
506 /// All pages touched by this operation must be in use for the duration of this call.
copy_from_user_slice( &self, reader: &mut UserSliceReader, offset: usize, size: usize, ) -> Result507 pub(crate) unsafe fn copy_from_user_slice(
508 &self,
509 reader: &mut UserSliceReader,
510 offset: usize,
511 size: usize,
512 ) -> Result {
513 // SAFETY: `self.iterate` has the same safety requirements as `copy_from_user_slice`.
514 unsafe {
515 self.iterate(offset, size, |page, offset, to_copy| {
516 page.copy_from_user_slice_raw(reader, offset, to_copy)
517 })
518 }
519 }
520
521 /// Copy from this page range into kernel space.
522 ///
523 /// # Safety
524 ///
525 /// All pages touched by this operation must be in use for the duration of this call.
read<T: FromBytes>(&self, offset: usize) -> Result<T>526 pub(crate) unsafe fn read<T: FromBytes>(&self, offset: usize) -> Result<T> {
527 let mut out = MaybeUninit::<T>::uninit();
528 let mut out_offset = 0;
529 // SAFETY: `self.iterate` has the same safety requirements as `read`.
530 unsafe {
531 self.iterate(offset, size_of::<T>(), |page, offset, to_copy| {
532 // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T.
533 let obj_ptr = (out.as_mut_ptr() as *mut u8).add(out_offset);
534 // SAFETY: The pointer points is in-bounds of the `out` variable, so it is valid.
535 page.read_raw(obj_ptr, offset, to_copy)?;
536 out_offset += to_copy;
537 Ok(())
538 })?;
539 }
540 // SAFETY: We just initialised the data.
541 Ok(unsafe { out.assume_init() })
542 }
543
544 /// Copy from kernel space into this page range.
545 ///
546 /// # Safety
547 ///
548 /// All pages touched by this operation must be in use for the duration of this call.
write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result549 pub(crate) unsafe fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result {
550 let mut obj_offset = 0;
551 // SAFETY: `self.iterate` has the same safety requirements as `write`.
552 unsafe {
553 self.iterate(offset, size_of_val(obj), |page, offset, to_copy| {
554 // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T.
555 let obj_ptr = (obj as *const T as *const u8).add(obj_offset);
556 // SAFETY: We have a reference to the object, so the pointer is valid.
557 page.write_raw(obj_ptr, offset, to_copy)?;
558 obj_offset += to_copy;
559 Ok(())
560 })
561 }
562 }
563
564 /// Write zeroes to the given range.
565 ///
566 /// # Safety
567 ///
568 /// All pages touched by this operation must be in use for the duration of this call.
fill_zero(&self, offset: usize, size: usize) -> Result569 pub(crate) unsafe fn fill_zero(&self, offset: usize, size: usize) -> Result {
570 // SAFETY: `self.iterate` has the same safety requirements as `copy_into`.
571 unsafe {
572 self.iterate(offset, size, |page, offset, len| {
573 page.fill_zero_raw(offset, len)
574 })
575 }
576 }
577 }
578
579 #[pinned_drop]
580 impl PinnedDrop for ShrinkablePageRange {
drop(self: Pin<&mut Self>)581 fn drop(self: Pin<&mut Self>) {
582 let (pages, size) = {
583 let lock = self.lock.lock();
584 (lock.pages, lock.size)
585 };
586
587 if size == 0 {
588 return;
589 }
590
591 // Note: This call is also necessary for the safety of `stable_trylock_mm`.
592 let mm_lock = self.mm_lock.lock();
593
594 // This is the destructor, so unlike the other methods, we only need to worry about races
595 // with the shrinker here. Since we hold the `mm_lock`, we also can't race with the
596 // shrinker, and after this loop, the shrinker will not access any of our pages since we
597 // removed them from the lru list.
598 for i in 0..size {
599 // SAFETY: Loop is in-bounds of the size.
600 let p_ptr = unsafe { pages.add(i) };
601 // SAFETY: No other readers, so we can read.
602 if let Some(p) = unsafe { PageInfo::get_page(p_ptr) } {
603 // SAFETY: The pointer is valid and it's the right shrinker.
604 unsafe { PageInfo::list_lru_del(p_ptr, p.nid(), self.shrinker) };
605 }
606 }
607
608 drop(mm_lock);
609
610 // SAFETY: `pages` was allocated as an `KVVec<PageInfo>` with capacity `size`. Furthermore,
611 // all `size` elements are initialized. Also, the array is no longer shared with the
612 // shrinker due to the above loop.
613 drop(unsafe { KVVec::from_raw_parts(pages, size, size) });
614 }
615 }
616
617 /// # Safety
618 /// Called by the shrinker.
619 #[no_mangle]
rust_shrink_count( shrink: *mut bindings::shrinker, _sc: *mut bindings::shrink_control, ) -> c_ulong620 unsafe extern "C" fn rust_shrink_count(
621 shrink: *mut bindings::shrinker,
622 _sc: *mut bindings::shrink_control,
623 ) -> c_ulong {
624 // SAFETY: We can access our own private data.
625 let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() };
626 // SAFETY: Accessing the lru list is okay. Just an FFI call.
627 unsafe { bindings::list_lru_count(list_lru) }
628 }
629
630 /// # Safety
631 /// Called by the shrinker.
632 #[no_mangle]
rust_shrink_scan( shrink: *mut bindings::shrinker, sc: *mut bindings::shrink_control, ) -> c_ulong633 unsafe extern "C" fn rust_shrink_scan(
634 shrink: *mut bindings::shrinker,
635 sc: *mut bindings::shrink_control,
636 ) -> c_ulong {
637 // SAFETY: We can access our own private data.
638 let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() };
639 // SAFETY: Caller guarantees that it is safe to read this field.
640 let nr_to_scan = unsafe { (*sc).nr_to_scan };
641 // SAFETY: Accessing the lru list is okay. Just an FFI call.
642 unsafe {
643 bindings::list_lru_walk(
644 list_lru,
645 Some(bindings::rust_shrink_free_page_wrap),
646 ptr::null_mut(),
647 nr_to_scan,
648 )
649 }
650 }
651
652 const LRU_SKIP: bindings::lru_status = bindings::lru_status_LRU_SKIP;
653 const LRU_REMOVED_ENTRY: bindings::lru_status = bindings::lru_status_LRU_REMOVED_RETRY;
654
655 /// # Safety
656 /// Called by the shrinker.
657 #[no_mangle]
rust_shrink_free_page( item: *mut bindings::list_head, lru: *mut bindings::list_lru_one, _cb_arg: *mut c_void, ) -> bindings::lru_status658 unsafe extern "C" fn rust_shrink_free_page(
659 item: *mut bindings::list_head,
660 lru: *mut bindings::list_lru_one,
661 _cb_arg: *mut c_void,
662 ) -> bindings::lru_status {
663 // Fields that should survive after unlocking the lru lock.
664 let page;
665 let page_index;
666 let mm;
667 let mmap_read;
668 let mm_mutex;
669 let vma_addr;
670
671 {
672 // CAST: The `list_head` field is first in `PageInfo`.
673 let info = item as *mut PageInfo;
674 // SAFETY: The `range` field of `PageInfo` is immutable.
675 let range = unsafe { &*((*info).range) };
676
677 mm = match range.mm.mmget_not_zero() {
678 Some(mm) => MmWithUser::into_mmput_async(mm),
679 None => return LRU_SKIP,
680 };
681
682 mm_mutex = match range.stable_trylock_mm() {
683 Some(guard) => guard,
684 None => return LRU_SKIP,
685 };
686
687 mmap_read = match mm.mmap_read_trylock() {
688 Some(guard) => guard,
689 None => return LRU_SKIP,
690 };
691
692 // We can't lock it normally here, since we hold the lru lock.
693 let inner = match range.lock.try_lock() {
694 Some(inner) => inner,
695 None => return LRU_SKIP,
696 };
697
698 // SAFETY: The item is in this lru list, so it's okay to remove it.
699 unsafe { bindings::list_lru_isolate(lru, item) };
700
701 // SAFETY: Both pointers are in bounds of the same allocation.
702 page_index = unsafe { info.offset_from(inner.pages) } as usize;
703
704 // SAFETY: We hold the spinlock, so we can take the page.
705 //
706 // This sets the page pointer to zero before we unmap it from the vma. However, we call
707 // `zap_page_range` before we release the mmap lock, so `use_page_slow` will not be able to
708 // insert a new page until after our call to `zap_page_range`.
709 page = unsafe { PageInfo::take_page(info) };
710 vma_addr = inner.vma_addr;
711
712 // From this point on, we don't access this PageInfo or ShrinkablePageRange again, because
713 // they can be freed at any point after we unlock `lru_lock`. This is with the exception of
714 // `mm_mutex` which is kept alive by holding the lock.
715 }
716
717 // SAFETY: The lru lock is locked when this method is called.
718 unsafe { bindings::spin_unlock(&raw mut (*lru).lock) };
719
720 if let Some(vma) = mmap_read.vma_lookup(vma_addr) {
721 let user_page_addr = vma_addr + (page_index << PAGE_SHIFT);
722 vma.zap_page_range_single(user_page_addr, PAGE_SIZE);
723 }
724
725 drop(mmap_read);
726 drop(mm_mutex);
727 drop(mm);
728 drop(page);
729
730 LRU_REMOVED_ENTRY
731 }
732