xref: /linux/rust/kernel/page.rs (revision 654826aa4a8f25cf825ad9254f37e6cb5092098f)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 //! Kernel page allocation and management.
4 
5 use crate::{
6     alloc::{AllocError, Flags},
7     bindings,
8     error::code::*,
9     error::Result,
10     uaccess::UserSliceReader,
11 };
12 use core::{
13     marker::PhantomData,
14     mem::ManuallyDrop,
15     ops::Deref,
16     ptr::{self, NonNull},
17 };
18 
19 /// A bitwise shift for the page size.
20 pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize;
21 
22 /// The number of bytes in a page.
23 pub const PAGE_SIZE: usize = bindings::PAGE_SIZE;
24 
25 /// A bitmask that gives the page containing a given address.
26 pub const PAGE_MASK: usize = !(PAGE_SIZE - 1);
27 
28 /// Rounds up to the next multiple of [`PAGE_SIZE`].
29 ///
30 /// Returns [`None`] on integer overflow.
31 ///
32 /// # Examples
33 ///
34 /// ```
35 /// use kernel::page::{
36 ///     page_align,
37 ///     PAGE_SIZE,
38 /// };
39 ///
40 /// // Requested address is already aligned.
41 /// assert_eq!(page_align(0x0), Some(0x0));
42 /// assert_eq!(page_align(PAGE_SIZE), Some(PAGE_SIZE));
43 ///
44 /// // Requested address needs alignment up.
45 /// assert_eq!(page_align(0x1), Some(PAGE_SIZE));
46 /// assert_eq!(page_align(PAGE_SIZE + 1), Some(2 * PAGE_SIZE));
47 ///
48 /// // Requested address causes overflow (returns `None`).
49 /// let overflow_addr = usize::MAX - (PAGE_SIZE / 2);
50 /// assert_eq!(page_align(overflow_addr), None);
51 /// ```
52 #[inline(always)]
53 pub const fn page_align(addr: usize) -> Option<usize> {
54     let Some(sum) = addr.checked_add(PAGE_SIZE - 1) else {
55         return None;
56     };
57     Some(sum & PAGE_MASK)
58 }
59 
60 /// Representation of a non-owning reference to a [`Page`].
61 ///
62 /// This type provides a borrowed version of a [`Page`] that is owned by some other entity, e.g. a
63 /// [`Vmalloc`] allocation such as [`VBox`].
64 ///
65 /// # Example
66 ///
67 /// ```
68 /// # use kernel::{bindings, prelude::*};
69 /// use kernel::page::{BorrowedPage, Page, PAGE_SIZE};
70 /// # use core::{mem::MaybeUninit, ptr, ptr::NonNull };
71 ///
72 /// fn borrow_page<'a>(vbox: &'a mut VBox<MaybeUninit<[u8; PAGE_SIZE]>>) -> BorrowedPage<'a> {
73 ///     let ptr = ptr::from_ref(&**vbox);
74 ///
75 ///     // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory.
76 ///     let page = unsafe { bindings::vmalloc_to_page(ptr.cast()) };
77 ///
78 ///     // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid
79 ///     // pointer to `Vmalloc` memory.
80 ///     let page = unsafe { NonNull::new_unchecked(page) };
81 ///
82 ///     // SAFETY:
83 ///     // - `self.0` is a valid pointer to a `struct page`.
84 ///     // - `self.0` is valid for the entire lifetime of `self`.
85 ///     unsafe { BorrowedPage::from_raw(page) }
86 /// }
87 ///
88 /// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?;
89 /// let page = borrow_page(&mut vbox);
90 ///
91 /// // SAFETY: There is no concurrent read or write to this page.
92 /// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? };
93 /// # Ok::<(), Error>(())
94 /// ```
95 ///
96 /// # Invariants
97 ///
98 /// The borrowed underlying pointer to a `struct page` is valid for the entire lifetime `'a`.
99 ///
100 /// [`VBox`]: kernel::alloc::VBox
101 /// [`Vmalloc`]: kernel::alloc::allocator::Vmalloc
102 pub struct BorrowedPage<'a>(ManuallyDrop<Page>, PhantomData<&'a Page>);
103 
104 impl<'a> BorrowedPage<'a> {
105     /// Constructs a [`BorrowedPage`] from a raw pointer to a `struct page`.
106     ///
107     /// # Safety
108     ///
109     /// - `ptr` must point to a valid `bindings::page`.
110     /// - `ptr` must remain valid for the entire lifetime `'a`.
111     pub unsafe fn from_raw(ptr: NonNull<bindings::page>) -> Self {
112         let page = Page { page: ptr };
113 
114         // INVARIANT: The safety requirements guarantee that `ptr` is valid for the entire lifetime
115         // `'a`.
116         Self(ManuallyDrop::new(page), PhantomData)
117     }
118 }
119 
120 impl<'a> Deref for BorrowedPage<'a> {
121     type Target = Page;
122 
123     fn deref(&self) -> &Self::Target {
124         &self.0
125     }
126 }
127 
128 /// Trait to be implemented by types which provide an [`Iterator`] implementation of
129 /// [`BorrowedPage`] items, such as [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
130 pub trait AsPageIter {
131     /// The [`Iterator`] type, e.g. [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
132     type Iter<'a>: Iterator<Item = BorrowedPage<'a>>
133     where
134         Self: 'a;
135 
136     /// Returns an [`Iterator`] of [`BorrowedPage`] items over all pages owned by `self`.
137     fn page_iter(&mut self) -> Self::Iter<'_>;
138 }
139 
140 /// A pointer to a page that owns the page allocation.
141 ///
142 /// # Invariants
143 ///
144 /// The pointer is valid, and has ownership over the page.
145 pub struct Page {
146     page: NonNull<bindings::page>,
147 }
148 
149 // SAFETY: Pages have no logic that relies on them staying on a given thread, so moving them across
150 // threads is safe.
151 unsafe impl Send for Page {}
152 
153 // SAFETY: Pages have no logic that relies on them not being accessed concurrently, so accessing
154 // them concurrently is safe.
155 unsafe impl Sync for Page {}
156 
157 impl Page {
158     /// Allocates a new page.
159     ///
160     /// # Examples
161     ///
162     /// Allocate memory for a page.
163     ///
164     /// ```
165     /// use kernel::page::Page;
166     ///
167     /// let page = Page::alloc_page(GFP_KERNEL)?;
168     /// # Ok::<(), kernel::alloc::AllocError>(())
169     /// ```
170     ///
171     /// Allocate memory for a page and zero its contents.
172     ///
173     /// ```
174     /// use kernel::page::Page;
175     ///
176     /// let page = Page::alloc_page(GFP_KERNEL | __GFP_ZERO)?;
177     /// # Ok::<(), kernel::alloc::AllocError>(())
178     /// ```
179     #[inline]
180     pub fn alloc_page(flags: Flags) -> Result<Self, AllocError> {
181         // SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it
182         // is always safe to call this method.
183         let page = unsafe { bindings::alloc_pages(flags.as_raw(), 0) };
184         let page = NonNull::new(page).ok_or(AllocError)?;
185         // INVARIANT: We just successfully allocated a page, so we now have ownership of the newly
186         // allocated page. We transfer that ownership to the new `Page` object.
187         Ok(Self { page })
188     }
189 
190     /// Returns a raw pointer to the page.
191     pub fn as_ptr(&self) -> *mut bindings::page {
192         self.page.as_ptr()
193     }
194 
195     /// Get the node id containing this page.
196     pub fn nid(&self) -> i32 {
197         // SAFETY: Always safe to call with a valid page.
198         unsafe { bindings::page_to_nid(self.as_ptr()) }
199     }
200 
201     /// Runs a piece of code with this page mapped to an address.
202     ///
203     /// The page is unmapped when this call returns.
204     ///
205     /// # Using the raw pointer
206     ///
207     /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
208     /// `PAGE_SIZE` bytes and for the duration in which the closure is called. The pointer might
209     /// only be mapped on the current thread, and when that is the case, dereferencing it on other
210     /// threads is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't
211     /// cause data races, the memory may be uninitialized, and so on.
212     ///
213     /// If multiple threads map the same page at the same time, then they may reference with
214     /// different addresses. However, even if the addresses are different, the underlying memory is
215     /// still the same for these purposes (e.g., it's still a data race if they both write to the
216     /// same underlying byte at the same time).
217     fn with_page_mapped<T>(&self, f: impl FnOnce(*mut u8) -> T) -> T {
218         // SAFETY: `page` is valid due to the type invariants on `Page`.
219         let mapped_addr = unsafe { bindings::kmap_local_page(self.as_ptr()) };
220 
221         let res = f(mapped_addr.cast());
222 
223         // This unmaps the page mapped above.
224         //
225         // SAFETY: Since this API takes the user code as a closure, it can only be used in a manner
226         // where the pages are unmapped in reverse order. This is as required by `kunmap_local`.
227         //
228         // In other words, if this call to `kunmap_local` happens when a different page should be
229         // unmapped first, then there must necessarily be a call to `kmap_local_page` other than the
230         // call just above in `with_page_mapped` that made that possible. In this case, it is the
231         // unsafe block that wraps that other call that is incorrect.
232         unsafe { bindings::kunmap_local(mapped_addr) };
233 
234         res
235     }
236 
237     /// Runs a piece of code with a raw pointer to a slice of this page, with bounds checking.
238     ///
239     /// If `f` is called, then it will be called with a pointer that points at `off` bytes into the
240     /// page, and the pointer will be valid for at least `len` bytes. The pointer is only valid on
241     /// this task, as this method uses a local mapping.
242     ///
243     /// If `off` and `len` refers to a region outside of this page, then this method returns
244     /// [`EINVAL`] and does not call `f`.
245     ///
246     /// # Using the raw pointer
247     ///
248     /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
249     /// `len` bytes and for the duration in which the closure is called. The pointer might only be
250     /// mapped on the current thread, and when that is the case, dereferencing it on other threads
251     /// is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't cause
252     /// data races, the memory may be uninitialized, and so on.
253     ///
254     /// If multiple threads map the same page at the same time, then they may reference with
255     /// different addresses. However, even if the addresses are different, the underlying memory is
256     /// still the same for these purposes (e.g., it's still a data race if they both write to the
257     /// same underlying byte at the same time).
258     fn with_pointer_into_page<T>(
259         &self,
260         off: usize,
261         len: usize,
262         f: impl FnOnce(*mut u8) -> Result<T>,
263     ) -> Result<T> {
264         let bounds_ok = off <= PAGE_SIZE && len <= PAGE_SIZE && (off + len) <= PAGE_SIZE;
265 
266         if bounds_ok {
267             self.with_page_mapped(move |page_addr| {
268                 // SAFETY: The `off` integer is at most `PAGE_SIZE`, so this pointer offset will
269                 // result in a pointer that is in bounds or one off the end of the page.
270                 f(unsafe { page_addr.add(off) })
271             })
272         } else {
273             Err(EINVAL)
274         }
275     }
276 
277     /// Maps the page and reads from it into the given buffer.
278     ///
279     /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
280     /// outside of the page, then this call returns [`EINVAL`].
281     ///
282     /// # Safety
283     ///
284     /// * Callers must ensure that `dst` is valid for writing `len` bytes.
285     /// * Callers must ensure that this call does not race with a write to the same page that
286     ///   overlaps with this read.
287     pub unsafe fn read_raw(&self, dst: *mut u8, offset: usize, len: usize) -> Result {
288         self.with_pointer_into_page(offset, len, move |src| {
289             // SAFETY: If `with_pointer_into_page` calls into this closure, then
290             // it has performed a bounds check and guarantees that `src` is
291             // valid for `len` bytes.
292             //
293             // There caller guarantees that there is no data race.
294             unsafe { ptr::copy_nonoverlapping(src, dst, len) };
295             Ok(())
296         })
297     }
298 
299     /// Maps the page and writes into it from the given buffer.
300     ///
301     /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
302     /// outside of the page, then this call returns [`EINVAL`].
303     ///
304     /// # Safety
305     ///
306     /// * Callers must ensure that `src` is valid for reading `len` bytes.
307     /// * Callers must ensure that this call does not race with a read or write to the same page
308     ///   that overlaps with this write.
309     pub unsafe fn write_raw(&self, src: *const u8, offset: usize, len: usize) -> Result {
310         self.with_pointer_into_page(offset, len, move |dst| {
311             // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
312             // bounds check and guarantees that `dst` is valid for `len` bytes.
313             //
314             // There caller guarantees that there is no data race.
315             unsafe { ptr::copy_nonoverlapping(src, dst, len) };
316             Ok(())
317         })
318     }
319 
320     /// Maps the page and zeroes the given slice.
321     ///
322     /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
323     /// outside of the page, then this call returns [`EINVAL`].
324     ///
325     /// # Safety
326     ///
327     /// Callers must ensure that this call does not race with a read or write to the same page that
328     /// overlaps with this write.
329     pub unsafe fn fill_zero_raw(&self, offset: usize, len: usize) -> Result {
330         self.with_pointer_into_page(offset, len, move |dst| {
331             // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
332             // bounds check and guarantees that `dst` is valid for `len` bytes.
333             //
334             // There caller guarantees that there is no data race.
335             unsafe { ptr::write_bytes(dst, 0u8, len) };
336             Ok(())
337         })
338     }
339 
340     /// Copies data from userspace into this page.
341     ///
342     /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
343     /// outside of the page, then this call returns [`EINVAL`].
344     ///
345     /// Like the other `UserSliceReader` methods, data races are allowed on the userspace address.
346     /// However, they are not allowed on the page you are copying into.
347     ///
348     /// # Safety
349     ///
350     /// Callers must ensure that this call does not race with a read or write to the same page that
351     /// overlaps with this write.
352     pub unsafe fn copy_from_user_slice_raw(
353         &self,
354         reader: &mut UserSliceReader,
355         offset: usize,
356         len: usize,
357     ) -> Result {
358         self.with_pointer_into_page(offset, len, move |dst| {
359             // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
360             // bounds check and guarantees that `dst` is valid for `len` bytes. Furthermore, we have
361             // exclusive access to the slice since the caller guarantees that there are no races.
362             reader.read_raw(unsafe { core::slice::from_raw_parts_mut(dst.cast(), len) })
363         })
364     }
365 }
366 
367 impl Drop for Page {
368     #[inline]
369     fn drop(&mut self) {
370         // SAFETY: By the type invariants, we have ownership of the page and can free it.
371         unsafe { bindings::__free_pages(self.page.as_ptr(), 0) };
372     }
373 }
374