xref: /linux/rust/kernel/page.rs (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 //! Kernel page allocation and management.
4 
5 use crate::{
6     alloc::{
7         AllocError,
8         Flags, //
9     },
10     bindings,
11     error::{
12         code::*,
13         Result, //
14     },
15     uaccess::UserSliceReader, //
16 };
17 use core::{
18     marker::PhantomData,
19     mem::ManuallyDrop,
20     ops::Deref,
21     ptr::{
22         self,
23         NonNull, //
24     }, //
25 };
26 
27 /// A bitwise shift for the page size.
28 pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize;
29 
30 /// The number of bytes in a page.
31 pub const PAGE_SIZE: usize = bindings::PAGE_SIZE;
32 
33 /// A bitmask that gives the page containing a given address.
34 pub const PAGE_MASK: usize = !(PAGE_SIZE - 1);
35 
36 /// Rounds up to the next multiple of [`PAGE_SIZE`].
37 ///
38 /// Returns [`None`] on integer overflow.
39 ///
40 /// # Examples
41 ///
42 /// ```
43 /// use kernel::page::{
44 ///     page_align,
45 ///     PAGE_SIZE,
46 /// };
47 ///
48 /// // Requested address is already aligned.
49 /// assert_eq!(page_align(0x0), Some(0x0));
50 /// assert_eq!(page_align(PAGE_SIZE), Some(PAGE_SIZE));
51 ///
52 /// // Requested address needs alignment up.
53 /// assert_eq!(page_align(0x1), Some(PAGE_SIZE));
54 /// assert_eq!(page_align(PAGE_SIZE + 1), Some(2 * PAGE_SIZE));
55 ///
56 /// // Requested address causes overflow (returns `None`).
57 /// let overflow_addr = usize::MAX - (PAGE_SIZE / 2);
58 /// assert_eq!(page_align(overflow_addr), None);
59 /// ```
60 #[inline(always)]
61 pub const fn page_align(addr: usize) -> Option<usize> {
62     let Some(sum) = addr.checked_add(PAGE_SIZE - 1) else {
63         return None;
64     };
65     Some(sum & PAGE_MASK)
66 }
67 
68 /// Representation of a non-owning reference to a [`Page`].
69 ///
70 /// This type provides a borrowed version of a [`Page`] that is owned by some other entity, e.g. a
71 /// [`Vmalloc`] allocation such as [`VBox`].
72 ///
73 /// # Example
74 ///
75 /// ```
76 /// # use kernel::{bindings, prelude::*};
77 /// use kernel::page::{BorrowedPage, Page, PAGE_SIZE};
78 /// # use core::{mem::MaybeUninit, ptr, ptr::NonNull };
79 ///
80 /// fn borrow_page<'a>(vbox: &'a mut VBox<MaybeUninit<[u8; PAGE_SIZE]>>) -> BorrowedPage<'a> {
81 ///     let ptr = ptr::from_ref(&**vbox);
82 ///
83 ///     // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory.
84 ///     let page = unsafe { bindings::vmalloc_to_page(ptr.cast()) };
85 ///
86 ///     // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid
87 ///     // pointer to `Vmalloc` memory.
88 ///     let page = unsafe { NonNull::new_unchecked(page) };
89 ///
90 ///     // SAFETY:
91 ///     // - `self.0` is a valid pointer to a `struct page`.
92 ///     // - `self.0` is valid for the entire lifetime of `self`.
93 ///     unsafe { BorrowedPage::from_raw(page) }
94 /// }
95 ///
96 /// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?;
97 /// let page = borrow_page(&mut vbox);
98 ///
99 /// // SAFETY: There is no concurrent read or write to this page.
100 /// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? };
101 /// # Ok::<(), Error>(())
102 /// ```
103 ///
104 /// # Invariants
105 ///
106 /// The borrowed underlying pointer to a `struct page` is valid for the entire lifetime `'a`.
107 ///
108 /// [`VBox`]: kernel::alloc::VBox
109 /// [`Vmalloc`]: kernel::alloc::allocator::Vmalloc
110 pub struct BorrowedPage<'a>(ManuallyDrop<Page>, PhantomData<&'a Page>);
111 
112 impl<'a> BorrowedPage<'a> {
113     /// Constructs a [`BorrowedPage`] from a raw pointer to a `struct page`.
114     ///
115     /// # Safety
116     ///
117     /// - `ptr` must point to a valid `bindings::page`.
118     /// - `ptr` must remain valid for the entire lifetime `'a`.
119     pub unsafe fn from_raw(ptr: NonNull<bindings::page>) -> Self {
120         let page = Page { page: ptr };
121 
122         // INVARIANT: The safety requirements guarantee that `ptr` is valid for the entire lifetime
123         // `'a`.
124         Self(ManuallyDrop::new(page), PhantomData)
125     }
126 }
127 
128 impl<'a> Deref for BorrowedPage<'a> {
129     type Target = Page;
130 
131     fn deref(&self) -> &Self::Target {
132         &self.0
133     }
134 }
135 
136 /// Trait to be implemented by types which provide an [`Iterator`] implementation of
137 /// [`BorrowedPage`] items, such as [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
138 pub trait AsPageIter {
139     /// The [`Iterator`] type, e.g. [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
140     type Iter<'a>: Iterator<Item = BorrowedPage<'a>>
141     where
142         Self: 'a;
143 
144     /// Returns an [`Iterator`] of [`BorrowedPage`] items over all pages owned by `self`.
145     fn page_iter(&mut self) -> Self::Iter<'_>;
146 }
147 
148 /// A pointer to a page that owns the page allocation.
149 ///
150 /// # Invariants
151 ///
152 /// The pointer is valid, and has ownership over the page.
153 pub struct Page {
154     page: NonNull<bindings::page>,
155 }
156 
157 // SAFETY: Pages have no logic that relies on them staying on a given thread, so moving them across
158 // threads is safe.
159 unsafe impl Send for Page {}
160 
161 // SAFETY: Pages have no logic that relies on them not being accessed concurrently, so accessing
162 // them concurrently is safe.
163 unsafe impl Sync for Page {}
164 
165 impl Page {
166     /// Allocates a new page.
167     ///
168     /// # Examples
169     ///
170     /// Allocate memory for a page.
171     ///
172     /// ```
173     /// use kernel::page::Page;
174     ///
175     /// let page = Page::alloc_page(GFP_KERNEL)?;
176     /// # Ok::<(), kernel::alloc::AllocError>(())
177     /// ```
178     ///
179     /// Allocate memory for a page and zero its contents.
180     ///
181     /// ```
182     /// use kernel::page::Page;
183     ///
184     /// let page = Page::alloc_page(GFP_KERNEL | __GFP_ZERO)?;
185     /// # Ok::<(), kernel::alloc::AllocError>(())
186     /// ```
187     #[inline]
188     pub fn alloc_page(flags: Flags) -> Result<Self, AllocError> {
189         // SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it
190         // is always safe to call this method.
191         let page = unsafe { bindings::alloc_pages(flags.as_raw(), 0) };
192         let page = NonNull::new(page).ok_or(AllocError)?;
193         // INVARIANT: We just successfully allocated a page, so we now have ownership of the newly
194         // allocated page. We transfer that ownership to the new `Page` object.
195         Ok(Self { page })
196     }
197 
198     /// Returns a raw pointer to the page.
199     pub fn as_ptr(&self) -> *mut bindings::page {
200         self.page.as_ptr()
201     }
202 
203     /// Get the node id containing this page.
204     #[inline]
205     pub fn nid(&self) -> i32 {
206         // SAFETY: Always safe to call with a valid page.
207         unsafe { bindings::page_to_nid(self.as_ptr()) }
208     }
209 
210     /// Runs a piece of code with this page mapped to an address.
211     ///
212     /// The page is unmapped when this call returns.
213     ///
214     /// # Using the raw pointer
215     ///
216     /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
217     /// `PAGE_SIZE` bytes and for the duration in which the closure is called. The pointer might
218     /// only be mapped on the current thread, and when that is the case, dereferencing it on other
219     /// threads is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't
220     /// cause data races, the memory may be uninitialized, and so on.
221     ///
222     /// If multiple threads map the same page at the same time, then they may reference with
223     /// different addresses. However, even if the addresses are different, the underlying memory is
224     /// still the same for these purposes (e.g., it's still a data race if they both write to the
225     /// same underlying byte at the same time).
226     fn with_page_mapped<T>(&self, f: impl FnOnce(*mut u8) -> T) -> T {
227         // SAFETY: `page` is valid due to the type invariants on `Page`.
228         let mapped_addr = unsafe { bindings::kmap_local_page(self.as_ptr()) };
229 
230         let res = f(mapped_addr.cast());
231 
232         // This unmaps the page mapped above.
233         //
234         // SAFETY: Since this API takes the user code as a closure, it can only be used in a manner
235         // where the pages are unmapped in reverse order. This is as required by `kunmap_local`.
236         //
237         // In other words, if this call to `kunmap_local` happens when a different page should be
238         // unmapped first, then there must necessarily be a call to `kmap_local_page` other than the
239         // call just above in `with_page_mapped` that made that possible. In this case, it is the
240         // unsafe block that wraps that other call that is incorrect.
241         unsafe { bindings::kunmap_local(mapped_addr) };
242 
243         res
244     }
245 
246     /// Runs a piece of code with a raw pointer to a slice of this page, with bounds checking.
247     ///
248     /// If `f` is called, then it will be called with a pointer that points at `off` bytes into the
249     /// page, and the pointer will be valid for at least `len` bytes. The pointer is only valid on
250     /// this task, as this method uses a local mapping.
251     ///
252     /// If `off` and `len` refers to a region outside of this page, then this method returns
253     /// [`EINVAL`] and does not call `f`.
254     ///
255     /// # Using the raw pointer
256     ///
257     /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
258     /// `len` bytes and for the duration in which the closure is called. The pointer might only be
259     /// mapped on the current thread, and when that is the case, dereferencing it on other threads
260     /// is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't cause
261     /// data races, the memory may be uninitialized, and so on.
262     ///
263     /// If multiple threads map the same page at the same time, then they may reference with
264     /// different addresses. However, even if the addresses are different, the underlying memory is
265     /// still the same for these purposes (e.g., it's still a data race if they both write to the
266     /// same underlying byte at the same time).
267     fn with_pointer_into_page<T>(
268         &self,
269         off: usize,
270         len: usize,
271         f: impl FnOnce(*mut u8) -> Result<T>,
272     ) -> Result<T> {
273         let bounds_ok = off <= PAGE_SIZE && len <= PAGE_SIZE && (off + len) <= PAGE_SIZE;
274 
275         if bounds_ok {
276             self.with_page_mapped(move |page_addr| {
277                 // SAFETY: The `off` integer is at most `PAGE_SIZE`, so this pointer offset will
278                 // result in a pointer that is in bounds or one off the end of the page.
279                 f(unsafe { page_addr.add(off) })
280             })
281         } else {
282             Err(EINVAL)
283         }
284     }
285 
286     /// Maps the page and reads from it into the given buffer.
287     ///
288     /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
289     /// outside of the page, then this call returns [`EINVAL`].
290     ///
291     /// # Safety
292     ///
293     /// * Callers must ensure that `dst` is valid for writing `len` bytes.
294     /// * Callers must ensure that this call does not race with a write to the same page that
295     ///   overlaps with this read.
296     pub unsafe fn read_raw(&self, dst: *mut u8, offset: usize, len: usize) -> Result {
297         self.with_pointer_into_page(offset, len, move |src| {
298             // SAFETY: If `with_pointer_into_page` calls into this closure, then
299             // it has performed a bounds check and guarantees that `src` is
300             // valid for `len` bytes.
301             //
302             // There caller guarantees that there is no data race.
303             unsafe { ptr::copy_nonoverlapping(src, dst, len) };
304             Ok(())
305         })
306     }
307 
308     /// Maps the page and writes into it from the given buffer.
309     ///
310     /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
311     /// outside of the page, then this call returns [`EINVAL`].
312     ///
313     /// # Safety
314     ///
315     /// * Callers must ensure that `src` is valid for reading `len` bytes.
316     /// * Callers must ensure that this call does not race with a read or write to the same page
317     ///   that overlaps with this write.
318     pub unsafe fn write_raw(&self, src: *const u8, offset: usize, len: usize) -> Result {
319         self.with_pointer_into_page(offset, len, move |dst| {
320             // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
321             // bounds check and guarantees that `dst` is valid for `len` bytes.
322             //
323             // There caller guarantees that there is no data race.
324             unsafe { ptr::copy_nonoverlapping(src, dst, len) };
325             Ok(())
326         })
327     }
328 
329     /// Maps the page and zeroes the given slice.
330     ///
331     /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
332     /// outside of the page, then this call returns [`EINVAL`].
333     ///
334     /// # Safety
335     ///
336     /// Callers must ensure that this call does not race with a read or write to the same page that
337     /// overlaps with this write.
338     pub unsafe fn fill_zero_raw(&self, offset: usize, len: usize) -> Result {
339         self.with_pointer_into_page(offset, len, move |dst| {
340             // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
341             // bounds check and guarantees that `dst` is valid for `len` bytes.
342             //
343             // There caller guarantees that there is no data race.
344             unsafe { ptr::write_bytes(dst, 0u8, len) };
345             Ok(())
346         })
347     }
348 
349     /// Copies data from userspace into this page.
350     ///
351     /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
352     /// outside of the page, then this call returns [`EINVAL`].
353     ///
354     /// Like the other `UserSliceReader` methods, data races are allowed on the userspace address.
355     /// However, they are not allowed on the page you are copying into.
356     ///
357     /// # Safety
358     ///
359     /// Callers must ensure that this call does not race with a read or write to the same page that
360     /// overlaps with this write.
361     pub unsafe fn copy_from_user_slice_raw(
362         &self,
363         reader: &mut UserSliceReader,
364         offset: usize,
365         len: usize,
366     ) -> Result {
367         self.with_pointer_into_page(offset, len, move |dst| {
368             // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
369             // bounds check and guarantees that `dst` is valid for `len` bytes. Furthermore, we have
370             // exclusive access to the slice since the caller guarantees that there are no races.
371             reader.read_raw(unsafe { core::slice::from_raw_parts_mut(dst.cast(), len) })
372         })
373     }
374 }
375 
376 impl Drop for Page {
377     #[inline]
378     fn drop(&mut self) {
379         // SAFETY: By the type invariants, we have ownership of the page and can free it.
380         unsafe { bindings::__free_pages(self.page.as_ptr(), 0) };
381     }
382 }
383