1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * include/linux/userfaultfd_k.h 4 * 5 * Copyright (C) 2015 Red Hat, Inc. 6 * 7 */ 8 9 #ifndef _LINUX_USERFAULTFD_K_H 10 #define _LINUX_USERFAULTFD_K_H 11 12 #ifdef CONFIG_USERFAULTFD 13 14 #include <linux/userfaultfd.h> /* linux/include/uapi/linux/userfaultfd.h */ 15 16 #include <linux/fcntl.h> 17 #include <linux/mm.h> 18 #include <linux/swap.h> 19 #include <linux/swapops.h> 20 #include <asm-generic/pgtable_uffd.h> 21 #include <linux/hugetlb_inline.h> 22 23 /* The set of all possible UFFD-related VM flags. */ 24 #define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR) 25 26 /* 27 * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining 28 * new flags, since they might collide with O_* ones. We want 29 * to re-use O_* flags that couldn't possibly have a meaning 30 * from userfaultfd, in order to leave a free define-space for 31 * shared O_* flags. 32 */ 33 #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) 34 35 /* 36 * Start with fault_pending_wqh and fault_wqh so they're more likely 37 * to be in the same cacheline. 38 * 39 * Locking order: 40 * fd_wqh.lock 41 * fault_pending_wqh.lock 42 * fault_wqh.lock 43 * event_wqh.lock 44 * 45 * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, 46 * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's 47 * also taken in IRQ context. 48 */ 49 struct userfaultfd_ctx { 50 /* waitqueue head for the pending (i.e. not read) userfaults */ 51 wait_queue_head_t fault_pending_wqh; 52 /* waitqueue head for the userfaults */ 53 wait_queue_head_t fault_wqh; 54 /* waitqueue head for the pseudo fd to wakeup poll/read */ 55 wait_queue_head_t fd_wqh; 56 /* waitqueue head for events */ 57 wait_queue_head_t event_wqh; 58 /* a refile sequence protected by fault_pending_wqh lock */ 59 seqcount_spinlock_t refile_seq; 60 /* pseudo fd refcounting */ 61 refcount_t refcount; 62 /* userfaultfd syscall flags */ 63 unsigned int flags; 64 /* features requested from the userspace */ 65 unsigned int features; 66 /* released */ 67 bool released; 68 /* 69 * Prevents userfaultfd operations (fill/move/wp) from happening while 70 * some non-cooperative event(s) is taking place. Increments are done 71 * in write-mode. Whereas, userfaultfd operations, which includes 72 * reading mmap_changing, is done under read-mode. 73 */ 74 struct rw_semaphore map_changing_lock; 75 /* memory mappings are changing because of non-cooperative event */ 76 atomic_t mmap_changing; 77 /* mm with one ore more vmas attached to this userfaultfd_ctx */ 78 struct mm_struct *mm; 79 }; 80 81 extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); 82 83 /* A combined operation mode + behavior flags. */ 84 typedef unsigned int __bitwise uffd_flags_t; 85 86 /* Mutually exclusive modes of operation. */ 87 enum mfill_atomic_mode { 88 MFILL_ATOMIC_COPY, 89 MFILL_ATOMIC_ZEROPAGE, 90 MFILL_ATOMIC_CONTINUE, 91 MFILL_ATOMIC_POISON, 92 NR_MFILL_ATOMIC_MODES, 93 }; 94 95 #define MFILL_ATOMIC_MODE_BITS (const_ilog2(NR_MFILL_ATOMIC_MODES - 1) + 1) 96 #define MFILL_ATOMIC_BIT(nr) BIT(MFILL_ATOMIC_MODE_BITS + (nr)) 97 #define MFILL_ATOMIC_FLAG(nr) ((__force uffd_flags_t) MFILL_ATOMIC_BIT(nr)) 98 #define MFILL_ATOMIC_MODE_MASK ((__force uffd_flags_t) (MFILL_ATOMIC_BIT(0) - 1)) 99 100 static inline bool uffd_flags_mode_is(uffd_flags_t flags, enum mfill_atomic_mode expected) 101 { 102 return (flags & MFILL_ATOMIC_MODE_MASK) == ((__force uffd_flags_t) expected); 103 } 104 105 static inline uffd_flags_t uffd_flags_set_mode(uffd_flags_t flags, enum mfill_atomic_mode mode) 106 { 107 flags &= ~MFILL_ATOMIC_MODE_MASK; 108 return flags | ((__force uffd_flags_t) mode); 109 } 110 111 /* Flags controlling behavior. These behavior changes are mode-independent. */ 112 #define MFILL_ATOMIC_WP MFILL_ATOMIC_FLAG(0) 113 114 extern int mfill_atomic_install_pte(pmd_t *dst_pmd, 115 struct vm_area_struct *dst_vma, 116 unsigned long dst_addr, struct page *page, 117 bool newly_allocated, uffd_flags_t flags); 118 119 extern ssize_t mfill_atomic_copy(struct userfaultfd_ctx *ctx, unsigned long dst_start, 120 unsigned long src_start, unsigned long len, 121 uffd_flags_t flags); 122 extern ssize_t mfill_atomic_zeropage(struct userfaultfd_ctx *ctx, 123 unsigned long dst_start, 124 unsigned long len); 125 extern ssize_t mfill_atomic_continue(struct userfaultfd_ctx *ctx, unsigned long dst_start, 126 unsigned long len, uffd_flags_t flags); 127 extern ssize_t mfill_atomic_poison(struct userfaultfd_ctx *ctx, unsigned long start, 128 unsigned long len, uffd_flags_t flags); 129 extern int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start, 130 unsigned long len, bool enable_wp); 131 extern long uffd_wp_range(struct vm_area_struct *vma, 132 unsigned long start, unsigned long len, bool enable_wp); 133 134 /* move_pages */ 135 void double_pt_lock(spinlock_t *ptl1, spinlock_t *ptl2); 136 void double_pt_unlock(spinlock_t *ptl1, spinlock_t *ptl2); 137 ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, 138 unsigned long src_start, unsigned long len, __u64 flags); 139 int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t dst_pmdval, 140 struct vm_area_struct *dst_vma, 141 struct vm_area_struct *src_vma, 142 unsigned long dst_addr, unsigned long src_addr); 143 144 /* mm helpers */ 145 static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, 146 struct vm_userfaultfd_ctx vm_ctx) 147 { 148 return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx; 149 } 150 151 /* 152 * Never enable huge pmd sharing on some uffd registered vmas: 153 * 154 * - VM_UFFD_WP VMAs, because write protect information is per pgtable entry. 155 * 156 * - VM_UFFD_MINOR VMAs, because otherwise we would never get minor faults for 157 * VMAs which share huge pmds. (If you have two mappings to the same 158 * underlying pages, and fault in the non-UFFD-registered one with a write, 159 * with huge pmd sharing this would *also* setup the second UFFD-registered 160 * mapping, and we'd not get minor faults.) 161 */ 162 static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma) 163 { 164 return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); 165 } 166 167 /* 168 * Don't do fault around for either WP or MINOR registered uffd range. For 169 * MINOR registered range, fault around will be a total disaster and ptes can 170 * be installed without notifications; for WP it should mostly be fine as long 171 * as the fault around checks for pte_none() before the installation, however 172 * to be super safe we just forbid it. 173 */ 174 static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) 175 { 176 return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); 177 } 178 179 static inline bool userfaultfd_missing(struct vm_area_struct *vma) 180 { 181 return vma->vm_flags & VM_UFFD_MISSING; 182 } 183 184 static inline bool userfaultfd_wp(struct vm_area_struct *vma) 185 { 186 return vma->vm_flags & VM_UFFD_WP; 187 } 188 189 static inline bool userfaultfd_minor(struct vm_area_struct *vma) 190 { 191 return vma->vm_flags & VM_UFFD_MINOR; 192 } 193 194 static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, 195 pte_t pte) 196 { 197 return userfaultfd_wp(vma) && pte_uffd_wp(pte); 198 } 199 200 static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, 201 pmd_t pmd) 202 { 203 return userfaultfd_wp(vma) && pmd_uffd_wp(pmd); 204 } 205 206 static inline bool userfaultfd_armed(struct vm_area_struct *vma) 207 { 208 return vma->vm_flags & __VM_UFFD_FLAGS; 209 } 210 211 static inline bool vma_can_userfault(struct vm_area_struct *vma, 212 vm_flags_t vm_flags, 213 bool wp_async) 214 { 215 vm_flags &= __VM_UFFD_FLAGS; 216 217 if (vma->vm_flags & VM_DROPPABLE) 218 return false; 219 220 if ((vm_flags & VM_UFFD_MINOR) && 221 (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) 222 return false; 223 224 /* 225 * If wp async enabled, and WP is the only mode enabled, allow any 226 * memory type. 227 */ 228 if (wp_async && (vm_flags == VM_UFFD_WP)) 229 return true; 230 231 #ifndef CONFIG_PTE_MARKER_UFFD_WP 232 /* 233 * If user requested uffd-wp but not enabled pte markers for 234 * uffd-wp, then shmem & hugetlbfs are not supported but only 235 * anonymous. 236 */ 237 if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) 238 return false; 239 #endif 240 241 /* By default, allow any of anon|shmem|hugetlb */ 242 return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || 243 vma_is_shmem(vma); 244 } 245 246 static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) 247 { 248 struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx; 249 250 return uffd_ctx && (uffd_ctx->features & UFFD_FEATURE_EVENT_REMAP) == 0; 251 } 252 253 extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); 254 extern void dup_userfaultfd_complete(struct list_head *); 255 void dup_userfaultfd_fail(struct list_head *); 256 257 extern void mremap_userfaultfd_prep(struct vm_area_struct *, 258 struct vm_userfaultfd_ctx *); 259 extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, 260 unsigned long from, unsigned long to, 261 unsigned long len); 262 void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *); 263 264 extern bool userfaultfd_remove(struct vm_area_struct *vma, 265 unsigned long start, 266 unsigned long end); 267 268 extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, 269 unsigned long start, unsigned long end, struct list_head *uf); 270 extern void userfaultfd_unmap_complete(struct mm_struct *mm, 271 struct list_head *uf); 272 extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); 273 extern bool userfaultfd_wp_async(struct vm_area_struct *vma); 274 275 void userfaultfd_reset_ctx(struct vm_area_struct *vma); 276 277 struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, 278 struct vm_area_struct *prev, 279 struct vm_area_struct *vma, 280 unsigned long start, 281 unsigned long end); 282 283 int userfaultfd_register_range(struct userfaultfd_ctx *ctx, 284 struct vm_area_struct *vma, 285 vm_flags_t vm_flags, 286 unsigned long start, unsigned long end, 287 bool wp_async); 288 289 void userfaultfd_release_new(struct userfaultfd_ctx *ctx); 290 291 void userfaultfd_release_all(struct mm_struct *mm, 292 struct userfaultfd_ctx *ctx); 293 294 #else /* CONFIG_USERFAULTFD */ 295 296 /* mm helpers */ 297 static inline vm_fault_t handle_userfault(struct vm_fault *vmf, 298 unsigned long reason) 299 { 300 return VM_FAULT_SIGBUS; 301 } 302 303 static inline long uffd_wp_range(struct vm_area_struct *vma, 304 unsigned long start, unsigned long len, 305 bool enable_wp) 306 { 307 return false; 308 } 309 310 static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, 311 struct vm_userfaultfd_ctx vm_ctx) 312 { 313 return true; 314 } 315 316 static inline bool userfaultfd_missing(struct vm_area_struct *vma) 317 { 318 return false; 319 } 320 321 static inline bool userfaultfd_wp(struct vm_area_struct *vma) 322 { 323 return false; 324 } 325 326 static inline bool userfaultfd_minor(struct vm_area_struct *vma) 327 { 328 return false; 329 } 330 331 static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, 332 pte_t pte) 333 { 334 return false; 335 } 336 337 static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, 338 pmd_t pmd) 339 { 340 return false; 341 } 342 343 344 static inline bool userfaultfd_armed(struct vm_area_struct *vma) 345 { 346 return false; 347 } 348 349 static inline int dup_userfaultfd(struct vm_area_struct *vma, 350 struct list_head *l) 351 { 352 return 0; 353 } 354 355 static inline void dup_userfaultfd_complete(struct list_head *l) 356 { 357 } 358 359 static inline void dup_userfaultfd_fail(struct list_head *l) 360 { 361 } 362 363 static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, 364 struct vm_userfaultfd_ctx *ctx) 365 { 366 } 367 368 static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, 369 unsigned long from, 370 unsigned long to, 371 unsigned long len) 372 { 373 } 374 375 static inline void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *ctx) 376 { 377 } 378 379 static inline bool userfaultfd_remove(struct vm_area_struct *vma, 380 unsigned long start, 381 unsigned long end) 382 { 383 return true; 384 } 385 386 static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, 387 unsigned long start, unsigned long end, 388 struct list_head *uf) 389 { 390 return 0; 391 } 392 393 static inline void userfaultfd_unmap_complete(struct mm_struct *mm, 394 struct list_head *uf) 395 { 396 } 397 398 static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) 399 { 400 return false; 401 } 402 403 static inline bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) 404 { 405 return false; 406 } 407 408 static inline bool userfaultfd_wp_async(struct vm_area_struct *vma) 409 { 410 return false; 411 } 412 413 static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) 414 { 415 return false; 416 } 417 418 #endif /* CONFIG_USERFAULTFD */ 419 420 static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) 421 { 422 /* Only wr-protect mode uses pte markers */ 423 if (!userfaultfd_wp(vma)) 424 return false; 425 426 /* File-based uffd-wp always need markers */ 427 if (!vma_is_anonymous(vma)) 428 return true; 429 430 /* 431 * Anonymous uffd-wp only needs the markers if WP_UNPOPULATED 432 * enabled (to apply markers on zero pages). 433 */ 434 return userfaultfd_wp_unpopulated(vma); 435 } 436 437 static inline bool pte_marker_entry_uffd_wp(swp_entry_t entry) 438 { 439 #ifdef CONFIG_PTE_MARKER_UFFD_WP 440 return is_pte_marker_entry(entry) && 441 (pte_marker_get(entry) & PTE_MARKER_UFFD_WP); 442 #else 443 return false; 444 #endif 445 } 446 447 static inline bool pte_marker_uffd_wp(pte_t pte) 448 { 449 #ifdef CONFIG_PTE_MARKER_UFFD_WP 450 swp_entry_t entry; 451 452 if (!is_swap_pte(pte)) 453 return false; 454 455 entry = pte_to_swp_entry(pte); 456 457 return pte_marker_entry_uffd_wp(entry); 458 #else 459 return false; 460 #endif 461 } 462 463 /* 464 * Returns true if this is a swap pte and was uffd-wp wr-protected in either 465 * forms (pte marker or a normal swap pte), false otherwise. 466 */ 467 static inline bool pte_swp_uffd_wp_any(pte_t pte) 468 { 469 #ifdef CONFIG_PTE_MARKER_UFFD_WP 470 if (!is_swap_pte(pte)) 471 return false; 472 473 if (pte_swp_uffd_wp(pte)) 474 return true; 475 476 if (pte_marker_uffd_wp(pte)) 477 return true; 478 #endif 479 return false; 480 } 481 482 #endif /* _LINUX_USERFAULTFD_K_H */ 483