1 // SPDX-License-Identifier: GPL-2.0
2
3 // Copyright (C) 2025 Google LLC.
4
5 //! This module has utilities for managing a page range where unused pages may be reclaimed by a
6 //! vma shrinker.
7
8 // To avoid deadlocks, locks are taken in the order:
9 //
10 // 1. mmap lock
11 // 2. spinlock
12 // 3. lru spinlock
13 //
14 // The shrinker will use trylock methods because it locks them in a different order.
15
16 use core::{
17 marker::PhantomPinned,
18 mem::{size_of, size_of_val, MaybeUninit},
19 ptr,
20 };
21
22 use kernel::{
23 bindings,
24 error::Result,
25 ffi::{c_ulong, c_void},
26 mm::{virt, Mm, MmWithUser},
27 new_mutex, new_spinlock,
28 page::{Page, PAGE_SHIFT, PAGE_SIZE},
29 prelude::*,
30 str::CStr,
31 sync::{aref::ARef, Mutex, SpinLock},
32 task::Pid,
33 transmute::FromBytes,
34 types::Opaque,
35 uaccess::UserSliceReader,
36 };
37
38 /// Represents a shrinker that can be registered with the kernel.
39 ///
40 /// Each shrinker can be used by many `ShrinkablePageRange` objects.
41 #[repr(C)]
42 pub(crate) struct Shrinker {
43 inner: Opaque<*mut bindings::shrinker>,
44 list_lru: Opaque<bindings::list_lru>,
45 }
46
47 // SAFETY: The shrinker and list_lru are thread safe.
48 unsafe impl Send for Shrinker {}
49 // SAFETY: The shrinker and list_lru are thread safe.
50 unsafe impl Sync for Shrinker {}
51
52 impl Shrinker {
53 /// Create a new shrinker.
54 ///
55 /// # Safety
56 ///
57 /// Before using this shrinker with a `ShrinkablePageRange`, the `register` method must have
58 /// been called exactly once, and it must not have returned an error.
new() -> Self59 pub(crate) const unsafe fn new() -> Self {
60 Self {
61 inner: Opaque::uninit(),
62 list_lru: Opaque::uninit(),
63 }
64 }
65
66 /// Register this shrinker with the kernel.
register(&'static self, name: &CStr) -> Result<()>67 pub(crate) fn register(&'static self, name: &CStr) -> Result<()> {
68 // SAFETY: These fields are not yet used, so it's okay to zero them.
69 unsafe {
70 self.inner.get().write(ptr::null_mut());
71 self.list_lru.get().write_bytes(0, 1);
72 }
73
74 // SAFETY: The field is not yet used, so we can initialize it.
75 let ret = unsafe { bindings::__list_lru_init(self.list_lru.get(), false, ptr::null_mut()) };
76 if ret != 0 {
77 return Err(Error::from_errno(ret));
78 }
79
80 // SAFETY: The `name` points at a valid c string.
81 let shrinker = unsafe { bindings::shrinker_alloc(0, name.as_char_ptr()) };
82 if shrinker.is_null() {
83 // SAFETY: We initialized it, so its okay to destroy it.
84 unsafe { bindings::list_lru_destroy(self.list_lru.get()) };
85 return Err(Error::from_errno(ret));
86 }
87
88 // SAFETY: We're about to register the shrinker, and these are the fields we need to
89 // initialize. (All other fields are already zeroed.)
90 unsafe {
91 (&raw mut (*shrinker).count_objects).write(Some(rust_shrink_count));
92 (&raw mut (*shrinker).scan_objects).write(Some(rust_shrink_scan));
93 (&raw mut (*shrinker).private_data).write(self.list_lru.get().cast());
94 }
95
96 // SAFETY: The new shrinker has been fully initialized, so we can register it.
97 unsafe { bindings::shrinker_register(shrinker) };
98
99 // SAFETY: This initializes the pointer to the shrinker so that we can use it.
100 unsafe { self.inner.get().write(shrinker) };
101
102 Ok(())
103 }
104 }
105
106 /// A container that manages a page range in a vma.
107 ///
108 /// The pages can be thought of as an array of booleans of whether the pages are usable. The
109 /// methods `use_range` and `stop_using_range` set all booleans in a range to true or false
110 /// respectively. Initially, no pages are allocated. When a page is not used, it is not freed
111 /// immediately. Instead, it is made available to the memory shrinker to free it if the device is
112 /// under memory pressure.
113 ///
114 /// It's okay for `use_range` and `stop_using_range` to race with each other, although there's no
115 /// way to know whether an index ends up with true or false if a call to `use_range` races with
116 /// another call to `stop_using_range` on a given index.
117 ///
118 /// It's also okay for the two methods to race with themselves, e.g. if two threads call
119 /// `use_range` on the same index, then that's fine and neither call will return until the page is
120 /// allocated and mapped.
121 ///
122 /// The methods that read or write to a range require that the page is marked as in use. So it is
123 /// _not_ okay to call `stop_using_range` on a page that is in use by the methods that read or
124 /// write to the page.
125 #[pin_data(PinnedDrop)]
126 pub(crate) struct ShrinkablePageRange {
127 /// Shrinker object registered with the kernel.
128 shrinker: &'static Shrinker,
129 /// Pid using this page range. Only used as debugging information.
130 pid: Pid,
131 /// The mm for the relevant process.
132 mm: ARef<Mm>,
133 /// Used to synchronize calls to `vm_insert_page` and `zap_page_range_single`.
134 #[pin]
135 mm_lock: Mutex<()>,
136 /// Spinlock protecting changes to pages.
137 #[pin]
138 lock: SpinLock<Inner>,
139
140 /// Must not move, since page info has pointers back.
141 #[pin]
142 _pin: PhantomPinned,
143 }
144
145 // We do not define any ops. For now, used only to check identity of vmas.
146 static BINDER_VM_OPS: bindings::vm_operations_struct = pin_init::zeroed();
147
148 // To ensure that we do not accidentally install pages into or zap pages from the wrong vma, we
149 // check its vm_ops and private data before using it.
check_vma(vma: &virt::VmaRef, owner: *const ShrinkablePageRange) -> Option<&virt::VmaMixedMap>150 fn check_vma(vma: &virt::VmaRef, owner: *const ShrinkablePageRange) -> Option<&virt::VmaMixedMap> {
151 // SAFETY: Just reading the vm_ops pointer of any active vma is safe.
152 let vm_ops = unsafe { (*vma.as_ptr()).vm_ops };
153 if !ptr::eq(vm_ops, &BINDER_VM_OPS) {
154 return None;
155 }
156
157 // SAFETY: Reading the vm_private_data pointer of a binder-owned vma is safe.
158 let vm_private_data = unsafe { (*vma.as_ptr()).vm_private_data };
159 // The ShrinkablePageRange is only dropped when the Process is dropped, which only happens once
160 // the file's ->release handler is invoked, which means the ShrinkablePageRange outlives any
161 // VMA associated with it, so there can't be any false positives due to pointer reuse here.
162 if !ptr::eq(vm_private_data, owner.cast()) {
163 return None;
164 }
165
166 vma.as_mixedmap_vma()
167 }
168
169 struct Inner {
170 /// Array of pages.
171 ///
172 /// Since this is also accessed by the shrinker, we can't use a `Box`, which asserts exclusive
173 /// ownership. To deal with that, we manage it using raw pointers.
174 pages: *mut PageInfo,
175 /// Length of the `pages` array.
176 size: usize,
177 /// The address of the vma to insert the pages into.
178 vma_addr: usize,
179 }
180
181 // SAFETY: proper locking is in place for `Inner`
182 unsafe impl Send for Inner {}
183
184 type StableMmGuard =
185 kernel::sync::lock::Guard<'static, (), kernel::sync::lock::mutex::MutexBackend>;
186
187 /// An array element that describes the current state of a page.
188 ///
189 /// There are three states:
190 ///
191 /// * Free. The page is None. The `lru` element is not queued.
192 /// * Available. The page is Some. The `lru` element is queued to the shrinker's lru.
193 /// * Used. The page is Some. The `lru` element is not queued.
194 ///
195 /// When an element is available, the shrinker is able to free the page.
196 #[repr(C)]
197 struct PageInfo {
198 lru: bindings::list_head,
199 page: Option<Page>,
200 range: *const ShrinkablePageRange,
201 }
202
203 impl PageInfo {
204 /// # Safety
205 ///
206 /// The caller ensures that writing to `me.page` is ok, and that the page is not currently set.
set_page(me: *mut PageInfo, page: Page)207 unsafe fn set_page(me: *mut PageInfo, page: Page) {
208 // SAFETY: This pointer offset is in bounds.
209 let ptr = unsafe { &raw mut (*me).page };
210
211 // SAFETY: The pointer is valid for writing, so also valid for reading.
212 if unsafe { (*ptr).is_some() } {
213 pr_err!("set_page called when there is already a page");
214 // SAFETY: We will initialize the page again below.
215 unsafe { ptr::drop_in_place(ptr) };
216 }
217
218 // SAFETY: The pointer is valid for writing.
219 unsafe { ptr::write(ptr, Some(page)) };
220 }
221
222 /// # Safety
223 ///
224 /// The caller ensures that reading from `me.page` is ok for the duration of 'a.
get_page<'a>(me: *const PageInfo) -> Option<&'a Page>225 unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> {
226 // SAFETY: This pointer offset is in bounds.
227 let ptr = unsafe { &raw const (*me).page };
228
229 // SAFETY: The pointer is valid for reading.
230 unsafe { (*ptr).as_ref() }
231 }
232
233 /// # Safety
234 ///
235 /// The caller ensures that writing to `me.page` is ok for the duration of 'a.
take_page(me: *mut PageInfo) -> Option<Page>236 unsafe fn take_page(me: *mut PageInfo) -> Option<Page> {
237 // SAFETY: This pointer offset is in bounds.
238 let ptr = unsafe { &raw mut (*me).page };
239
240 // SAFETY: The pointer is valid for reading.
241 unsafe { (*ptr).take() }
242 }
243
244 /// Add this page to the lru list, if not already in the list.
245 ///
246 /// # Safety
247 ///
248 /// The pointer must be valid, and it must be the right shrinker and nid.
list_lru_add(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker)249 unsafe fn list_lru_add(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) {
250 // SAFETY: This pointer offset is in bounds.
251 let lru_ptr = unsafe { &raw mut (*me).lru };
252 // SAFETY: The lru pointer is valid, and we're not using it with any other lru list.
253 unsafe { bindings::list_lru_add(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) };
254 }
255
256 /// Remove this page from the lru list, if it is in the list.
257 ///
258 /// # Safety
259 ///
260 /// The pointer must be valid, and it must be the right shrinker and nid.
list_lru_del(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker)261 unsafe fn list_lru_del(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) {
262 // SAFETY: This pointer offset is in bounds.
263 let lru_ptr = unsafe { &raw mut (*me).lru };
264 // SAFETY: The lru pointer is valid, and we're not using it with any other lru list.
265 unsafe { bindings::list_lru_del(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) };
266 }
267 }
268
269 impl ShrinkablePageRange {
270 /// Create a new `ShrinkablePageRange` using the given shrinker.
new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error>271 pub(crate) fn new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error> {
272 try_pin_init!(Self {
273 shrinker,
274 pid: kernel::current!().pid(),
275 mm: ARef::from(&**kernel::current!().mm().ok_or(ESRCH)?),
276 mm_lock <- new_mutex!((), "ShrinkablePageRange::mm"),
277 lock <- new_spinlock!(Inner {
278 pages: ptr::null_mut(),
279 size: 0,
280 vma_addr: 0,
281 }, "ShrinkablePageRange"),
282 _pin: PhantomPinned,
283 })
284 }
285
stable_trylock_mm(&self) -> Option<StableMmGuard>286 pub(crate) fn stable_trylock_mm(&self) -> Option<StableMmGuard> {
287 // SAFETY: This extends the duration of the reference. Since this call happens before
288 // `mm_lock` is taken in the destructor of `ShrinkablePageRange`, the destructor will block
289 // until the returned guard is dropped. This ensures that the guard is valid until dropped.
290 let mm_lock = unsafe { &*ptr::from_ref(&self.mm_lock) };
291
292 mm_lock.try_lock()
293 }
294
295 /// Register a vma with this page range. Returns the size of the region.
register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize>296 pub(crate) fn register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize> {
297 let num_bytes = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize);
298 let num_pages = num_bytes >> PAGE_SHIFT;
299
300 if !ptr::eq::<Mm>(&*self.mm, &**vma.mm()) {
301 pr_debug!("Failed to register with vma: invalid vma->vm_mm");
302 return Err(EINVAL);
303 }
304 if num_pages == 0 {
305 pr_debug!("Failed to register with vma: size zero");
306 return Err(EINVAL);
307 }
308
309 let mut pages = KVVec::<PageInfo>::with_capacity(num_pages, GFP_KERNEL)?;
310
311 // SAFETY: This just initializes the pages array.
312 unsafe {
313 let self_ptr = self as *const ShrinkablePageRange;
314 for i in 0..num_pages {
315 let info = pages.as_mut_ptr().add(i);
316 (&raw mut (*info).range).write(self_ptr);
317 (&raw mut (*info).page).write(None);
318 let lru = &raw mut (*info).lru;
319 (&raw mut (*lru).next).write(lru);
320 (&raw mut (*lru).prev).write(lru);
321 }
322 }
323
324 let mut inner = self.lock.lock();
325 if inner.size > 0 {
326 pr_debug!("Failed to register with vma: already registered");
327 drop(inner);
328 return Err(EBUSY);
329 }
330
331 inner.pages = pages.into_raw_parts().0;
332 inner.size = num_pages;
333 inner.vma_addr = vma.start();
334
335 // This pointer is only used for comparison - it's not dereferenced.
336 //
337 // SAFETY: We own the vma, and we don't use any methods on VmaNew that rely on
338 // `vm_private_data`.
339 unsafe {
340 (*vma.as_ptr()).vm_private_data = ptr::from_ref(self).cast_mut().cast::<c_void>()
341 };
342
343 // SAFETY: We own the vma, and we don't use any methods on VmaNew that rely on
344 // `vm_ops`.
345 unsafe { (*vma.as_ptr()).vm_ops = &BINDER_VM_OPS };
346
347 Ok(num_pages)
348 }
349
350 /// Make sure that the given pages are allocated and mapped.
351 ///
352 /// Must not be called from an atomic context.
use_range(&self, start: usize, end: usize) -> Result<()>353 pub(crate) fn use_range(&self, start: usize, end: usize) -> Result<()> {
354 if start >= end {
355 return Ok(());
356 }
357 let mut inner = self.lock.lock();
358 assert!(end <= inner.size);
359
360 for i in start..end {
361 // SAFETY: This pointer offset is in bounds.
362 let page_info = unsafe { inner.pages.add(i) };
363
364 // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay.
365 if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
366 // Since we're going to use the page, we should remove it from the lru list so that
367 // the shrinker will not free it.
368 //
369 // SAFETY: The pointer is valid, and this is the right shrinker.
370 //
371 // The shrinker can't free the page between the check and this call to
372 // `list_lru_del` because we hold the lock.
373 unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) };
374 } else {
375 // We have to allocate a new page. Use the slow path.
376 drop(inner);
377 // SAFETY: `i < end <= inner.size` so `i` is in bounds.
378 match unsafe { self.use_page_slow(i) } {
379 Ok(()) => {}
380 Err(err) => {
381 pr_warn!("Error in use_page_slow: {:?}", err);
382 return Err(err);
383 }
384 }
385 inner = self.lock.lock();
386 }
387 }
388 Ok(())
389 }
390
391 /// Mark the given page as in use, slow path.
392 ///
393 /// Must not be called from an atomic context.
394 ///
395 /// # Safety
396 ///
397 /// Assumes that `i` is in bounds.
398 #[cold]
use_page_slow(&self, i: usize) -> Result<()>399 unsafe fn use_page_slow(&self, i: usize) -> Result<()> {
400 let new_page = Page::alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO)?;
401
402 let mm_mutex = self.mm_lock.lock();
403 let inner = self.lock.lock();
404
405 // SAFETY: This pointer offset is in bounds.
406 let page_info = unsafe { inner.pages.add(i) };
407
408 // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay.
409 if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
410 // The page was already there, or someone else added the page while we didn't hold the
411 // spinlock.
412 //
413 // SAFETY: The pointer is valid, and this is the right shrinker.
414 //
415 // The shrinker can't free the page between the check and this call to
416 // `list_lru_del` because we hold the lock.
417 unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) };
418 return Ok(());
419 }
420
421 let vma_addr = inner.vma_addr;
422 // Release the spinlock while we insert the page into the vma.
423 drop(inner);
424
425 // No overflow since we stay in bounds of the vma.
426 let user_page_addr = vma_addr + (i << PAGE_SHIFT);
427
428 // We use `mmput_async` when dropping the `mm` because `use_page_slow` is usually used from
429 // a remote process. If the call to `mmput` races with the process shutting down, then the
430 // caller of `use_page_slow` becomes responsible for cleaning up the `mm`, which doesn't
431 // happen until it returns to userspace. However, the caller might instead go to sleep and
432 // wait for the owner of the `mm` to wake it up, which doesn't happen because it's in the
433 // middle of a shutdown process that won't complete until the `mm` is dropped. This can
434 // amount to a deadlock.
435 //
436 // Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a
437 // workqueue.
438 let mm = MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?);
439 {
440 let vma_read;
441 let mmap_read;
442 let vma = if let Some(ret) = mm.lock_vma_under_rcu(vma_addr) {
443 vma_read = ret;
444 check_vma(&vma_read, self)
445 } else {
446 mmap_read = mm.mmap_read_lock();
447 mmap_read
448 .vma_lookup(vma_addr)
449 .and_then(|vma| check_vma(vma, self))
450 };
451
452 match vma {
453 Some(vma) => vma.vm_insert_page(user_page_addr, &new_page)?,
454 None => return Err(ESRCH),
455 }
456 }
457
458 let inner = self.lock.lock();
459
460 // SAFETY: The `page_info` pointer is valid and currently does not have a page. The page
461 // can be written to since we hold the lock.
462 //
463 // We released and reacquired the spinlock since we checked that the page is null, but we
464 // always hold the mm_lock mutex when setting the page to a non-null value, so it's not
465 // possible for someone else to have changed it since our check.
466 unsafe { PageInfo::set_page(page_info, new_page) };
467
468 drop(inner);
469 drop(mm_mutex);
470
471 Ok(())
472 }
473
474 /// If the given page is in use, then mark it as available so that the shrinker can free it.
475 ///
476 /// May be called from an atomic context.
stop_using_range(&self, start: usize, end: usize)477 pub(crate) fn stop_using_range(&self, start: usize, end: usize) {
478 if start >= end {
479 return;
480 }
481 let inner = self.lock.lock();
482 assert!(end <= inner.size);
483
484 for i in (start..end).rev() {
485 // SAFETY: The pointer is in bounds.
486 let page_info = unsafe { inner.pages.add(i) };
487
488 // SAFETY: Okay for reading since we have the lock.
489 if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
490 // SAFETY: The pointer is valid, and it's the right shrinker.
491 unsafe { PageInfo::list_lru_add(page_info, page.nid(), self.shrinker) };
492 }
493 }
494 }
495
496 /// Helper for reading or writing to a range of bytes that may overlap with several pages.
497 ///
498 /// # Safety
499 ///
500 /// All pages touched by this operation must be in use for the duration of this call.
iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result where T: FnMut(&Page, usize, usize) -> Result,501 unsafe fn iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result
502 where
503 T: FnMut(&Page, usize, usize) -> Result,
504 {
505 if size == 0 {
506 return Ok(());
507 }
508
509 let (pages, num_pages) = {
510 let inner = self.lock.lock();
511 (inner.pages, inner.size)
512 };
513 let num_bytes = num_pages << PAGE_SHIFT;
514
515 // Check that the request is within the buffer.
516 if offset.checked_add(size).ok_or(EFAULT)? > num_bytes {
517 return Err(EFAULT);
518 }
519
520 let mut page_index = offset >> PAGE_SHIFT;
521 offset &= PAGE_SIZE - 1;
522 while size > 0 {
523 let available = usize::min(size, PAGE_SIZE - offset);
524 // SAFETY: The pointer is in bounds.
525 let page_info = unsafe { pages.add(page_index) };
526 // SAFETY: The caller guarantees that this page is in the "in use" state for the
527 // duration of this call to `iterate`, so nobody will change the page.
528 let page = unsafe { PageInfo::get_page(page_info) };
529 if page.is_none() {
530 pr_warn!("Page is null!");
531 }
532 let page = page.ok_or(EFAULT)?;
533 cb(page, offset, available)?;
534 size -= available;
535 page_index += 1;
536 offset = 0;
537 }
538 Ok(())
539 }
540
541 /// Copy from userspace into this page range.
542 ///
543 /// # Safety
544 ///
545 /// All pages touched by this operation must be in use for the duration of this call.
copy_from_user_slice( &self, reader: &mut UserSliceReader, offset: usize, size: usize, ) -> Result546 pub(crate) unsafe fn copy_from_user_slice(
547 &self,
548 reader: &mut UserSliceReader,
549 offset: usize,
550 size: usize,
551 ) -> Result {
552 // SAFETY: `self.iterate` has the same safety requirements as `copy_from_user_slice`.
553 unsafe {
554 self.iterate(offset, size, |page, offset, to_copy| {
555 page.copy_from_user_slice_raw(reader, offset, to_copy)
556 })
557 }
558 }
559
560 /// Copy from this page range into kernel space.
561 ///
562 /// # Safety
563 ///
564 /// All pages touched by this operation must be in use for the duration of this call.
read<T: FromBytes>(&self, offset: usize) -> Result<T>565 pub(crate) unsafe fn read<T: FromBytes>(&self, offset: usize) -> Result<T> {
566 let mut out = MaybeUninit::<T>::uninit();
567 let mut out_offset = 0;
568 // SAFETY: `self.iterate` has the same safety requirements as `read`.
569 unsafe {
570 self.iterate(offset, size_of::<T>(), |page, offset, to_copy| {
571 // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T.
572 let obj_ptr = (out.as_mut_ptr() as *mut u8).add(out_offset);
573 // SAFETY: The pointer points is in-bounds of the `out` variable, so it is valid.
574 page.read_raw(obj_ptr, offset, to_copy)?;
575 out_offset += to_copy;
576 Ok(())
577 })?;
578 }
579 // SAFETY: We just initialised the data.
580 Ok(unsafe { out.assume_init() })
581 }
582
583 /// Copy from kernel space into this page range.
584 ///
585 /// # Safety
586 ///
587 /// All pages touched by this operation must be in use for the duration of this call.
write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result588 pub(crate) unsafe fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result {
589 let mut obj_offset = 0;
590 // SAFETY: `self.iterate` has the same safety requirements as `write`.
591 unsafe {
592 self.iterate(offset, size_of_val(obj), |page, offset, to_copy| {
593 // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T.
594 let obj_ptr = (obj as *const T as *const u8).add(obj_offset);
595 // SAFETY: We have a reference to the object, so the pointer is valid.
596 page.write_raw(obj_ptr, offset, to_copy)?;
597 obj_offset += to_copy;
598 Ok(())
599 })
600 }
601 }
602
603 /// Write zeroes to the given range.
604 ///
605 /// # Safety
606 ///
607 /// All pages touched by this operation must be in use for the duration of this call.
fill_zero(&self, offset: usize, size: usize) -> Result608 pub(crate) unsafe fn fill_zero(&self, offset: usize, size: usize) -> Result {
609 // SAFETY: `self.iterate` has the same safety requirements as `copy_into`.
610 unsafe {
611 self.iterate(offset, size, |page, offset, len| {
612 page.fill_zero_raw(offset, len)
613 })
614 }
615 }
616 }
617
618 #[pinned_drop]
619 impl PinnedDrop for ShrinkablePageRange {
drop(self: Pin<&mut Self>)620 fn drop(self: Pin<&mut Self>) {
621 let (pages, size) = {
622 let lock = self.lock.lock();
623 (lock.pages, lock.size)
624 };
625
626 if size == 0 {
627 return;
628 }
629
630 // Note: This call is also necessary for the safety of `stable_trylock_mm`.
631 let mm_lock = self.mm_lock.lock();
632
633 // This is the destructor, so unlike the other methods, we only need to worry about races
634 // with the shrinker here. Since we hold the `mm_lock`, we also can't race with the
635 // shrinker, and after this loop, the shrinker will not access any of our pages since we
636 // removed them from the lru list.
637 for i in 0..size {
638 // SAFETY: Loop is in-bounds of the size.
639 let p_ptr = unsafe { pages.add(i) };
640 // SAFETY: No other readers, so we can read.
641 if let Some(p) = unsafe { PageInfo::get_page(p_ptr) } {
642 // SAFETY: The pointer is valid and it's the right shrinker.
643 unsafe { PageInfo::list_lru_del(p_ptr, p.nid(), self.shrinker) };
644 }
645 }
646
647 drop(mm_lock);
648
649 // SAFETY: `pages` was allocated as an `KVVec<PageInfo>` with capacity `size`. Furthermore,
650 // all `size` elements are initialized. Also, the array is no longer shared with the
651 // shrinker due to the above loop.
652 drop(unsafe { KVVec::from_raw_parts(pages, size, size) });
653 }
654 }
655
656 /// # Safety
657 /// Called by the shrinker.
658 #[no_mangle]
rust_shrink_count( shrink: *mut bindings::shrinker, _sc: *mut bindings::shrink_control, ) -> c_ulong659 unsafe extern "C" fn rust_shrink_count(
660 shrink: *mut bindings::shrinker,
661 _sc: *mut bindings::shrink_control,
662 ) -> c_ulong {
663 // SAFETY: We can access our own private data.
664 let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() };
665 // SAFETY: Accessing the lru list is okay. Just an FFI call.
666 unsafe { bindings::list_lru_count(list_lru) }
667 }
668
669 /// # Safety
670 /// Called by the shrinker.
671 #[no_mangle]
rust_shrink_scan( shrink: *mut bindings::shrinker, sc: *mut bindings::shrink_control, ) -> c_ulong672 unsafe extern "C" fn rust_shrink_scan(
673 shrink: *mut bindings::shrinker,
674 sc: *mut bindings::shrink_control,
675 ) -> c_ulong {
676 // SAFETY: We can access our own private data.
677 let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() };
678 // SAFETY: Caller guarantees that it is safe to read this field.
679 let nr_to_scan = unsafe { (*sc).nr_to_scan };
680 // SAFETY: Accessing the lru list is okay. Just an FFI call.
681 unsafe {
682 bindings::list_lru_walk(
683 list_lru,
684 Some(bindings::rust_shrink_free_page_wrap),
685 ptr::null_mut(),
686 nr_to_scan,
687 )
688 }
689 }
690
691 const LRU_SKIP: bindings::lru_status = bindings::lru_status_LRU_SKIP;
692 const LRU_REMOVED_ENTRY: bindings::lru_status = bindings::lru_status_LRU_REMOVED_RETRY;
693
694 /// # Safety
695 /// Called by the shrinker.
696 #[no_mangle]
rust_shrink_free_page( item: *mut bindings::list_head, lru: *mut bindings::list_lru_one, _cb_arg: *mut c_void, ) -> bindings::lru_status697 unsafe extern "C" fn rust_shrink_free_page(
698 item: *mut bindings::list_head,
699 lru: *mut bindings::list_lru_one,
700 _cb_arg: *mut c_void,
701 ) -> bindings::lru_status {
702 // Fields that should survive after unlocking the lru lock.
703 let page;
704 let page_index;
705 let mm;
706 let mmap_read;
707 let mm_mutex;
708 let vma_addr;
709 let range_ptr;
710
711 {
712 // CAST: The `list_head` field is first in `PageInfo`.
713 let info = item as *mut PageInfo;
714 // SAFETY: The `range` field of `PageInfo` is immutable.
715 range_ptr = unsafe { (*info).range };
716 // SAFETY: The `range` outlives its `PageInfo` values.
717 let range = unsafe { &*range_ptr };
718
719 mm = match range.mm.mmget_not_zero() {
720 Some(mm) => MmWithUser::into_mmput_async(mm),
721 None => return LRU_SKIP,
722 };
723
724 mm_mutex = match range.stable_trylock_mm() {
725 Some(guard) => guard,
726 None => return LRU_SKIP,
727 };
728
729 mmap_read = match mm.mmap_read_trylock() {
730 Some(guard) => guard,
731 None => return LRU_SKIP,
732 };
733
734 // We can't lock it normally here, since we hold the lru lock.
735 let inner = match range.lock.try_lock() {
736 Some(inner) => inner,
737 None => return LRU_SKIP,
738 };
739
740 // SAFETY: The item is in this lru list, so it's okay to remove it.
741 unsafe { bindings::list_lru_isolate(lru, item) };
742
743 // SAFETY: Both pointers are in bounds of the same allocation.
744 page_index = unsafe { info.offset_from(inner.pages) } as usize;
745
746 // SAFETY: We hold the spinlock, so we can take the page.
747 //
748 // This sets the page pointer to zero before we unmap it from the vma. However, we call
749 // `zap_page_range` before we release the mmap lock, so `use_page_slow` will not be able to
750 // insert a new page until after our call to `zap_page_range`.
751 page = unsafe { PageInfo::take_page(info) };
752 vma_addr = inner.vma_addr;
753
754 // From this point on, we don't access this PageInfo or ShrinkablePageRange again, because
755 // they can be freed at any point after we unlock `lru_lock`. This is with the exception of
756 // `mm_mutex` which is kept alive by holding the lock.
757 }
758
759 // SAFETY: The lru lock is locked when this method is called.
760 unsafe { bindings::spin_unlock(&raw mut (*lru).lock) };
761
762 if let Some(unchecked_vma) = mmap_read.vma_lookup(vma_addr) {
763 if let Some(vma) = check_vma(unchecked_vma, range_ptr) {
764 let user_page_addr = vma_addr + (page_index << PAGE_SHIFT);
765 vma.zap_page_range_single(user_page_addr, PAGE_SIZE);
766 }
767 }
768
769 drop(mmap_read);
770 drop(mm_mutex);
771 drop(mm);
772 drop(page);
773
774 LRU_REMOVED_ENTRY
775 }
776