1 // SPDX-License-Identifier: GPL-2.0 2 3 // Copyright (C) 2024 Google LLC. 4 5 //! Virtual memory. 6 //! 7 //! This module deals with managing a single VMA in the address space of a userspace process. Each 8 //! VMA corresponds to a region of memory that the userspace process can access, and the VMA lets 9 //! you control what happens when userspace reads or writes to that region of memory. 10 //! 11 //! The module has several different Rust types that all correspond to the C type called 12 //! `vm_area_struct`. The different structs represent what kind of access you have to the VMA, e.g. 13 //! [`VmaRef`] is used when you hold the mmap or vma read lock. Using the appropriate struct 14 //! ensures that you can't, for example, accidentally call a function that requires holding the 15 //! write lock when you only hold the read lock. 16 17 use crate::{ 18 bindings, 19 error::{code::EINVAL, to_result, Result}, 20 mm::MmWithUser, 21 page::Page, 22 types::Opaque, 23 }; 24 25 use core::ops::Deref; 26 27 /// A wrapper for the kernel's `struct vm_area_struct` with read access. 28 /// 29 /// It represents an area of virtual memory. 30 /// 31 /// # Invariants 32 /// 33 /// The caller must hold the mmap read lock or the vma read lock. 34 #[repr(transparent)] 35 pub struct VmaRef { 36 vma: Opaque<bindings::vm_area_struct>, 37 } 38 39 // Methods you can call when holding the mmap or vma read lock (or stronger). They must be usable 40 // no matter what the vma flags are. 41 impl VmaRef { 42 /// Access a virtual memory area given a raw pointer. 43 /// 44 /// # Safety 45 /// 46 /// Callers must ensure that `vma` is valid for the duration of 'a, and that the mmap or vma 47 /// read lock (or stronger) is held for at least the duration of 'a. 48 #[inline] from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self49 pub unsafe fn from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self { 50 // SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a. 51 unsafe { &*vma.cast() } 52 } 53 54 /// Returns a raw pointer to this area. 55 #[inline] as_ptr(&self) -> *mut bindings::vm_area_struct56 pub fn as_ptr(&self) -> *mut bindings::vm_area_struct { 57 self.vma.get() 58 } 59 60 /// Access the underlying `mm_struct`. 61 #[inline] mm(&self) -> &MmWithUser62 pub fn mm(&self) -> &MmWithUser { 63 // SAFETY: By the type invariants, this `vm_area_struct` is valid and we hold the mmap/vma 64 // read lock or stronger. This implies that the underlying mm has a non-zero value of 65 // `mm_users`. 66 unsafe { MmWithUser::from_raw((*self.as_ptr()).vm_mm) } 67 } 68 69 /// Returns the flags associated with the virtual memory area. 70 /// 71 /// The possible flags are a combination of the constants in [`flags`]. 72 #[inline] flags(&self) -> vm_flags_t73 pub fn flags(&self) -> vm_flags_t { 74 // SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this 75 // access is not a data race. 76 unsafe { (*self.as_ptr()).__bindgen_anon_2.vm_flags } 77 } 78 79 /// Returns the (inclusive) start address of the virtual memory area. 80 #[inline] start(&self) -> usize81 pub fn start(&self) -> usize { 82 // SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this 83 // access is not a data race. 84 unsafe { (*self.as_ptr()).__bindgen_anon_1.__bindgen_anon_1.vm_start } 85 } 86 87 /// Returns the (exclusive) end address of the virtual memory area. 88 #[inline] end(&self) -> usize89 pub fn end(&self) -> usize { 90 // SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this 91 // access is not a data race. 92 unsafe { (*self.as_ptr()).__bindgen_anon_1.__bindgen_anon_1.vm_end } 93 } 94 95 /// Zap pages in the given page range. 96 /// 97 /// This clears page table mappings for the range at the leaf level, leaving all other page 98 /// tables intact, and freeing any memory referenced by the VMA in this range. That is, 99 /// anonymous memory is completely freed, file-backed memory has its reference count on page 100 /// cache folio's dropped, any dirty data will still be written back to disk as usual. 101 /// 102 /// It may seem odd that we clear at the leaf level, this is however a product of the page 103 /// table structure used to map physical memory into a virtual address space - each virtual 104 /// address actually consists of a bitmap of array indices into page tables, which form a 105 /// hierarchical page table level structure. 106 /// 107 /// As a result, each page table level maps a multiple of page table levels below, and thus 108 /// span ever increasing ranges of pages. At the leaf or PTE level, we map the actual physical 109 /// memory. 110 /// 111 /// It is here where a zap operates, as it the only place we can be certain of clearing without 112 /// impacting any other virtual mappings. It is an implementation detail as to whether the 113 /// kernel goes further in freeing unused page tables, but for the purposes of this operation 114 /// we must only assume that the leaf level is cleared. 115 #[inline] zap_page_range_single(&self, address: usize, size: usize)116 pub fn zap_page_range_single(&self, address: usize, size: usize) { 117 let (end, did_overflow) = address.overflowing_add(size); 118 if did_overflow || address < self.start() || self.end() < end { 119 // TODO: call WARN_ONCE once Rust version of it is added 120 return; 121 } 122 123 // SAFETY: By the type invariants, the caller has read access to this VMA, which is 124 // sufficient for this method call. This method has no requirements on the vma flags. The 125 // address range is checked to be within the vma. 126 unsafe { 127 bindings::zap_page_range_single(self.as_ptr(), address, size, core::ptr::null_mut()) 128 }; 129 } 130 131 /// If the [`VM_MIXEDMAP`] flag is set, returns a [`VmaMixedMap`] to this VMA, otherwise 132 /// returns `None`. 133 /// 134 /// This can be used to access methods that require [`VM_MIXEDMAP`] to be set. 135 /// 136 /// [`VM_MIXEDMAP`]: flags::MIXEDMAP 137 #[inline] as_mixedmap_vma(&self) -> Option<&VmaMixedMap>138 pub fn as_mixedmap_vma(&self) -> Option<&VmaMixedMap> { 139 if self.flags() & flags::MIXEDMAP != 0 { 140 // SAFETY: We just checked that `VM_MIXEDMAP` is set. All other requirements are 141 // satisfied by the type invariants of `VmaRef`. 142 Some(unsafe { VmaMixedMap::from_raw(self.as_ptr()) }) 143 } else { 144 None 145 } 146 } 147 } 148 149 /// A wrapper for the kernel's `struct vm_area_struct` with read access and [`VM_MIXEDMAP`] set. 150 /// 151 /// It represents an area of virtual memory. 152 /// 153 /// This struct is identical to [`VmaRef`] except that it must only be used when the 154 /// [`VM_MIXEDMAP`] flag is set on the vma. 155 /// 156 /// # Invariants 157 /// 158 /// The caller must hold the mmap read lock or the vma read lock. The `VM_MIXEDMAP` flag must be 159 /// set. 160 /// 161 /// [`VM_MIXEDMAP`]: flags::MIXEDMAP 162 #[repr(transparent)] 163 pub struct VmaMixedMap { 164 vma: VmaRef, 165 } 166 167 // Make all `VmaRef` methods available on `VmaMixedMap`. 168 impl Deref for VmaMixedMap { 169 type Target = VmaRef; 170 171 #[inline] deref(&self) -> &VmaRef172 fn deref(&self) -> &VmaRef { 173 &self.vma 174 } 175 } 176 177 impl VmaMixedMap { 178 /// Access a virtual memory area given a raw pointer. 179 /// 180 /// # Safety 181 /// 182 /// Callers must ensure that `vma` is valid for the duration of 'a, and that the mmap read lock 183 /// (or stronger) is held for at least the duration of 'a. The `VM_MIXEDMAP` flag must be set. 184 #[inline] from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self185 pub unsafe fn from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self { 186 // SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a. 187 unsafe { &*vma.cast() } 188 } 189 190 /// Maps a single page at the given address within the virtual memory area. 191 /// 192 /// This operation does not take ownership of the page. 193 #[inline] vm_insert_page(&self, address: usize, page: &Page) -> Result194 pub fn vm_insert_page(&self, address: usize, page: &Page) -> Result { 195 // SAFETY: By the type invariant of `Self` caller has read access and has verified that 196 // `VM_MIXEDMAP` is set. By invariant on `Page` the page has order 0. 197 to_result(unsafe { bindings::vm_insert_page(self.as_ptr(), address, page.as_ptr()) }) 198 } 199 } 200 201 /// A configuration object for setting up a VMA in an `f_ops->mmap()` hook. 202 /// 203 /// The `f_ops->mmap()` hook is called when a new VMA is being created, and the hook is able to 204 /// configure the VMA in various ways to fit the driver that owns it. Using `VmaNew` indicates that 205 /// you are allowed to perform operations on the VMA that can only be performed before the VMA is 206 /// fully initialized. 207 /// 208 /// # Invariants 209 /// 210 /// For the duration of 'a, the referenced vma must be undergoing initialization in an 211 /// `f_ops->mmap()` hook. 212 pub struct VmaNew { 213 vma: VmaRef, 214 } 215 216 // Make all `VmaRef` methods available on `VmaNew`. 217 impl Deref for VmaNew { 218 type Target = VmaRef; 219 220 #[inline] deref(&self) -> &VmaRef221 fn deref(&self) -> &VmaRef { 222 &self.vma 223 } 224 } 225 226 impl VmaNew { 227 /// Access a virtual memory area given a raw pointer. 228 /// 229 /// # Safety 230 /// 231 /// Callers must ensure that `vma` is undergoing initial vma setup for the duration of 'a. 232 #[inline] from_raw<'a>(vma: *mut bindings::vm_area_struct) -> &'a Self233 pub unsafe fn from_raw<'a>(vma: *mut bindings::vm_area_struct) -> &'a Self { 234 // SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a. 235 unsafe { &*vma.cast() } 236 } 237 238 /// Internal method for updating the vma flags. 239 /// 240 /// # Safety 241 /// 242 /// This must not be used to set the flags to an invalid value. 243 #[inline] update_flags(&self, set: vm_flags_t, unset: vm_flags_t)244 unsafe fn update_flags(&self, set: vm_flags_t, unset: vm_flags_t) { 245 let mut flags = self.flags(); 246 flags |= set; 247 flags &= !unset; 248 249 // SAFETY: This is not a data race: the vma is undergoing initial setup, so it's not yet 250 // shared. Additionally, `VmaNew` is `!Sync`, so it cannot be used to write in parallel. 251 // The caller promises that this does not set the flags to an invalid value. 252 unsafe { (*self.as_ptr()).__bindgen_anon_2.__vm_flags = flags }; 253 } 254 255 /// Set the `VM_MIXEDMAP` flag on this vma. 256 /// 257 /// This enables the vma to contain both `struct page` and pure PFN pages. Returns a reference 258 /// that can be used to call `vm_insert_page` on the vma. 259 #[inline] set_mixedmap(&self) -> &VmaMixedMap260 pub fn set_mixedmap(&self) -> &VmaMixedMap { 261 // SAFETY: We don't yet provide a way to set VM_PFNMAP, so this cannot put the flags in an 262 // invalid state. 263 unsafe { self.update_flags(flags::MIXEDMAP, 0) }; 264 265 // SAFETY: We just set `VM_MIXEDMAP` on the vma. 266 unsafe { VmaMixedMap::from_raw(self.vma.as_ptr()) } 267 } 268 269 /// Set the `VM_IO` flag on this vma. 270 /// 271 /// This is used for memory mapped IO and similar. The flag tells other parts of the kernel to 272 /// avoid looking at the pages. For memory mapped IO this is useful as accesses to the pages 273 /// could have side effects. 274 #[inline] set_io(&self)275 pub fn set_io(&self) { 276 // SAFETY: Setting the VM_IO flag is always okay. 277 unsafe { self.update_flags(flags::IO, 0) }; 278 } 279 280 /// Set the `VM_DONTEXPAND` flag on this vma. 281 /// 282 /// This prevents the vma from being expanded with `mremap()`. 283 #[inline] set_dontexpand(&self)284 pub fn set_dontexpand(&self) { 285 // SAFETY: Setting the VM_DONTEXPAND flag is always okay. 286 unsafe { self.update_flags(flags::DONTEXPAND, 0) }; 287 } 288 289 /// Set the `VM_DONTCOPY` flag on this vma. 290 /// 291 /// This prevents the vma from being copied on fork. This option is only permanent if `VM_IO` 292 /// is set. 293 #[inline] set_dontcopy(&self)294 pub fn set_dontcopy(&self) { 295 // SAFETY: Setting the VM_DONTCOPY flag is always okay. 296 unsafe { self.update_flags(flags::DONTCOPY, 0) }; 297 } 298 299 /// Set the `VM_DONTDUMP` flag on this vma. 300 /// 301 /// This prevents the vma from being included in core dumps. This option is only permanent if 302 /// `VM_IO` is set. 303 #[inline] set_dontdump(&self)304 pub fn set_dontdump(&self) { 305 // SAFETY: Setting the VM_DONTDUMP flag is always okay. 306 unsafe { self.update_flags(flags::DONTDUMP, 0) }; 307 } 308 309 /// Returns whether `VM_READ` is set. 310 /// 311 /// This flag indicates whether userspace is mapping this vma as readable. 312 #[inline] readable(&self) -> bool313 pub fn readable(&self) -> bool { 314 (self.flags() & flags::READ) != 0 315 } 316 317 /// Try to clear the `VM_MAYREAD` flag, failing if `VM_READ` is set. 318 /// 319 /// This flag indicates whether userspace is allowed to make this vma readable with 320 /// `mprotect()`. 321 /// 322 /// Note that this operation is irreversible. Once `VM_MAYREAD` has been cleared, it can never 323 /// be set again. 324 #[inline] try_clear_mayread(&self) -> Result325 pub fn try_clear_mayread(&self) -> Result { 326 if self.readable() { 327 return Err(EINVAL); 328 } 329 // SAFETY: Clearing `VM_MAYREAD` is okay when `VM_READ` is not set. 330 unsafe { self.update_flags(0, flags::MAYREAD) }; 331 Ok(()) 332 } 333 334 /// Returns whether `VM_WRITE` is set. 335 /// 336 /// This flag indicates whether userspace is mapping this vma as writable. 337 #[inline] writable(&self) -> bool338 pub fn writable(&self) -> bool { 339 (self.flags() & flags::WRITE) != 0 340 } 341 342 /// Try to clear the `VM_MAYWRITE` flag, failing if `VM_WRITE` is set. 343 /// 344 /// This flag indicates whether userspace is allowed to make this vma writable with 345 /// `mprotect()`. 346 /// 347 /// Note that this operation is irreversible. Once `VM_MAYWRITE` has been cleared, it can never 348 /// be set again. 349 #[inline] try_clear_maywrite(&self) -> Result350 pub fn try_clear_maywrite(&self) -> Result { 351 if self.writable() { 352 return Err(EINVAL); 353 } 354 // SAFETY: Clearing `VM_MAYWRITE` is okay when `VM_WRITE` is not set. 355 unsafe { self.update_flags(0, flags::MAYWRITE) }; 356 Ok(()) 357 } 358 359 /// Returns whether `VM_EXEC` is set. 360 /// 361 /// This flag indicates whether userspace is mapping this vma as executable. 362 #[inline] executable(&self) -> bool363 pub fn executable(&self) -> bool { 364 (self.flags() & flags::EXEC) != 0 365 } 366 367 /// Try to clear the `VM_MAYEXEC` flag, failing if `VM_EXEC` is set. 368 /// 369 /// This flag indicates whether userspace is allowed to make this vma executable with 370 /// `mprotect()`. 371 /// 372 /// Note that this operation is irreversible. Once `VM_MAYEXEC` has been cleared, it can never 373 /// be set again. 374 #[inline] try_clear_mayexec(&self) -> Result375 pub fn try_clear_mayexec(&self) -> Result { 376 if self.executable() { 377 return Err(EINVAL); 378 } 379 // SAFETY: Clearing `VM_MAYEXEC` is okay when `VM_EXEC` is not set. 380 unsafe { self.update_flags(0, flags::MAYEXEC) }; 381 Ok(()) 382 } 383 } 384 385 /// The integer type used for vma flags. 386 #[doc(inline)] 387 pub use bindings::vm_flags_t; 388 389 /// All possible flags for [`VmaRef`]. 390 pub mod flags { 391 use super::vm_flags_t; 392 use crate::bindings; 393 394 /// No flags are set. 395 pub const NONE: vm_flags_t = bindings::VM_NONE as _; 396 397 /// Mapping allows reads. 398 pub const READ: vm_flags_t = bindings::VM_READ as _; 399 400 /// Mapping allows writes. 401 pub const WRITE: vm_flags_t = bindings::VM_WRITE as _; 402 403 /// Mapping allows execution. 404 pub const EXEC: vm_flags_t = bindings::VM_EXEC as _; 405 406 /// Mapping is shared. 407 pub const SHARED: vm_flags_t = bindings::VM_SHARED as _; 408 409 /// Mapping may be updated to allow reads. 410 pub const MAYREAD: vm_flags_t = bindings::VM_MAYREAD as _; 411 412 /// Mapping may be updated to allow writes. 413 pub const MAYWRITE: vm_flags_t = bindings::VM_MAYWRITE as _; 414 415 /// Mapping may be updated to allow execution. 416 pub const MAYEXEC: vm_flags_t = bindings::VM_MAYEXEC as _; 417 418 /// Mapping may be updated to be shared. 419 pub const MAYSHARE: vm_flags_t = bindings::VM_MAYSHARE as _; 420 421 /// Page-ranges managed without `struct page`, just pure PFN. 422 pub const PFNMAP: vm_flags_t = bindings::VM_PFNMAP as _; 423 424 /// Memory mapped I/O or similar. 425 pub const IO: vm_flags_t = bindings::VM_IO as _; 426 427 /// Do not copy this vma on fork. 428 pub const DONTCOPY: vm_flags_t = bindings::VM_DONTCOPY as _; 429 430 /// Cannot expand with mremap(). 431 pub const DONTEXPAND: vm_flags_t = bindings::VM_DONTEXPAND as _; 432 433 /// Lock the pages covered when they are faulted in. 434 pub const LOCKONFAULT: vm_flags_t = bindings::VM_LOCKONFAULT as _; 435 436 /// Is a VM accounted object. 437 pub const ACCOUNT: vm_flags_t = bindings::VM_ACCOUNT as _; 438 439 /// Should the VM suppress accounting. 440 pub const NORESERVE: vm_flags_t = bindings::VM_NORESERVE as _; 441 442 /// Huge TLB Page VM. 443 pub const HUGETLB: vm_flags_t = bindings::VM_HUGETLB as _; 444 445 /// Synchronous page faults. (DAX-specific) 446 pub const SYNC: vm_flags_t = bindings::VM_SYNC as _; 447 448 /// Architecture-specific flag. 449 pub const ARCH_1: vm_flags_t = bindings::VM_ARCH_1 as _; 450 451 /// Wipe VMA contents in child on fork. 452 pub const WIPEONFORK: vm_flags_t = bindings::VM_WIPEONFORK as _; 453 454 /// Do not include in the core dump. 455 pub const DONTDUMP: vm_flags_t = bindings::VM_DONTDUMP as _; 456 457 /// Not soft dirty clean area. 458 pub const SOFTDIRTY: vm_flags_t = bindings::VM_SOFTDIRTY as _; 459 460 /// Can contain `struct page` and pure PFN pages. 461 pub const MIXEDMAP: vm_flags_t = bindings::VM_MIXEDMAP as _; 462 463 /// MADV_HUGEPAGE marked this vma. 464 pub const HUGEPAGE: vm_flags_t = bindings::VM_HUGEPAGE as _; 465 466 /// MADV_NOHUGEPAGE marked this vma. 467 pub const NOHUGEPAGE: vm_flags_t = bindings::VM_NOHUGEPAGE as _; 468 469 /// KSM may merge identical pages. 470 pub const MERGEABLE: vm_flags_t = bindings::VM_MERGEABLE as _; 471 } 472