1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _LINUX_HUGE_MM_H 3 #define _LINUX_HUGE_MM_H 4 5 #include <linux/sched/coredump.h> 6 #include <linux/mm_types.h> 7 8 #include <linux/fs.h> /* only for vma_is_dax() */ 9 10 vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf); 11 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, 12 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, 13 struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); 14 void huge_pmd_set_accessed(struct vm_fault *vmf); 15 int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, 16 pud_t *dst_pud, pud_t *src_pud, unsigned long addr, 17 struct vm_area_struct *vma); 18 19 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 20 void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud); 21 #else 22 static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) 23 { 24 } 25 #endif 26 27 vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf); 28 bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, 29 pmd_t *pmd, unsigned long addr, unsigned long next); 30 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, 31 unsigned long addr); 32 int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, 33 unsigned long addr); 34 bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, 35 unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd); 36 int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, 37 pmd_t *pmd, unsigned long addr, pgprot_t newprot, 38 unsigned long cp_flags); 39 40 vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); 41 vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); 42 43 enum transparent_hugepage_flag { 44 TRANSPARENT_HUGEPAGE_UNSUPPORTED, 45 TRANSPARENT_HUGEPAGE_FLAG, 46 TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, 47 TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, 48 TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, 49 TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, 50 TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, 51 TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, 52 TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, 53 }; 54 55 struct kobject; 56 struct kobj_attribute; 57 58 ssize_t single_hugepage_flag_store(struct kobject *kobj, 59 struct kobj_attribute *attr, 60 const char *buf, size_t count, 61 enum transparent_hugepage_flag flag); 62 ssize_t single_hugepage_flag_show(struct kobject *kobj, 63 struct kobj_attribute *attr, char *buf, 64 enum transparent_hugepage_flag flag); 65 extern struct kobj_attribute shmem_enabled_attr; 66 67 /* 68 * Mask of all large folio orders supported for anonymous THP; all orders up to 69 * and including PMD_ORDER, except order-0 (which is not "huge") and order-1 70 * (which is a limitation of the THP implementation). 71 */ 72 #define THP_ORDERS_ALL_ANON ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1))) 73 74 /* 75 * Mask of all large folio orders supported for file THP. 76 */ 77 #define THP_ORDERS_ALL_FILE (BIT(PMD_ORDER) | BIT(PUD_ORDER)) 78 79 /* 80 * Mask of all large folio orders supported for THP. 81 */ 82 #define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE) 83 84 #define TVA_SMAPS (1 << 0) /* Will be used for procfs */ 85 #define TVA_IN_PF (1 << 1) /* Page fault handler */ 86 #define TVA_ENFORCE_SYSFS (1 << 2) /* Obey sysfs configuration */ 87 88 #define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \ 89 (!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order))) 90 91 #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES 92 #define HPAGE_PMD_SHIFT PMD_SHIFT 93 #define HPAGE_PUD_SHIFT PUD_SHIFT 94 #else 95 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) 96 #define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; }) 97 #endif 98 99 #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) 100 #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) 101 #define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1)) 102 #define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT) 103 104 #define HPAGE_PUD_ORDER (HPAGE_PUD_SHIFT-PAGE_SHIFT) 105 #define HPAGE_PUD_NR (1<<HPAGE_PUD_ORDER) 106 #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1)) 107 #define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT) 108 109 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 110 111 extern unsigned long transparent_hugepage_flags; 112 extern unsigned long huge_anon_orders_always; 113 extern unsigned long huge_anon_orders_madvise; 114 extern unsigned long huge_anon_orders_inherit; 115 116 static inline bool hugepage_global_enabled(void) 117 { 118 return transparent_hugepage_flags & 119 ((1<<TRANSPARENT_HUGEPAGE_FLAG) | 120 (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)); 121 } 122 123 static inline bool hugepage_global_always(void) 124 { 125 return transparent_hugepage_flags & 126 (1<<TRANSPARENT_HUGEPAGE_FLAG); 127 } 128 129 static inline bool hugepage_flags_enabled(void) 130 { 131 /* 132 * We cover both the anon and the file-backed case here; we must return 133 * true if globally enabled, even when all anon sizes are set to never. 134 * So we don't need to look at huge_anon_orders_inherit. 135 */ 136 return hugepage_global_enabled() || 137 huge_anon_orders_always || 138 huge_anon_orders_madvise; 139 } 140 141 static inline int highest_order(unsigned long orders) 142 { 143 return fls_long(orders) - 1; 144 } 145 146 static inline int next_order(unsigned long *orders, int prev) 147 { 148 *orders &= ~BIT(prev); 149 return highest_order(*orders); 150 } 151 152 /* 153 * Do the below checks: 154 * - For file vma, check if the linear page offset of vma is 155 * order-aligned within the file. The hugepage is 156 * guaranteed to be order-aligned within the file, but we must 157 * check that the order-aligned addresses in the VMA map to 158 * order-aligned offsets within the file, else the hugepage will 159 * not be mappable. 160 * - For all vmas, check if the haddr is in an aligned hugepage 161 * area. 162 */ 163 static inline bool thp_vma_suitable_order(struct vm_area_struct *vma, 164 unsigned long addr, int order) 165 { 166 unsigned long hpage_size = PAGE_SIZE << order; 167 unsigned long haddr; 168 169 /* Don't have to check pgoff for anonymous vma */ 170 if (!vma_is_anonymous(vma)) { 171 if (!IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, 172 hpage_size >> PAGE_SHIFT)) 173 return false; 174 } 175 176 haddr = ALIGN_DOWN(addr, hpage_size); 177 178 if (haddr < vma->vm_start || haddr + hpage_size > vma->vm_end) 179 return false; 180 return true; 181 } 182 183 /* 184 * Filter the bitfield of input orders to the ones suitable for use in the vma. 185 * See thp_vma_suitable_order(). 186 * All orders that pass the checks are returned as a bitfield. 187 */ 188 static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, 189 unsigned long addr, unsigned long orders) 190 { 191 int order; 192 193 /* 194 * Iterate over orders, highest to lowest, removing orders that don't 195 * meet alignment requirements from the set. Exit loop at first order 196 * that meets requirements, since all lower orders must also meet 197 * requirements. 198 */ 199 200 order = highest_order(orders); 201 202 while (orders) { 203 if (thp_vma_suitable_order(vma, addr, order)) 204 break; 205 order = next_order(&orders, order); 206 } 207 208 return orders; 209 } 210 211 static inline bool file_thp_enabled(struct vm_area_struct *vma) 212 { 213 struct inode *inode; 214 215 if (!vma->vm_file) 216 return false; 217 218 inode = vma->vm_file->f_inode; 219 220 return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) && 221 !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); 222 } 223 224 unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, 225 unsigned long vm_flags, 226 unsigned long tva_flags, 227 unsigned long orders); 228 229 /** 230 * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma 231 * @vma: the vm area to check 232 * @vm_flags: use these vm_flags instead of vma->vm_flags 233 * @tva_flags: Which TVA flags to honour 234 * @orders: bitfield of all orders to consider 235 * 236 * Calculates the intersection of the requested hugepage orders and the allowed 237 * hugepage orders for the provided vma. Permitted orders are encoded as a set 238 * bit at the corresponding bit position (bit-2 corresponds to order-2, bit-3 239 * corresponds to order-3, etc). Order-0 is never considered a hugepage order. 240 * 241 * Return: bitfield of orders allowed for hugepage in the vma. 0 if no hugepage 242 * orders are allowed. 243 */ 244 static inline 245 unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, 246 unsigned long vm_flags, 247 unsigned long tva_flags, 248 unsigned long orders) 249 { 250 /* Optimization to check if required orders are enabled early. */ 251 if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) { 252 unsigned long mask = READ_ONCE(huge_anon_orders_always); 253 254 if (vm_flags & VM_HUGEPAGE) 255 mask |= READ_ONCE(huge_anon_orders_madvise); 256 if (hugepage_global_always() || 257 ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled())) 258 mask |= READ_ONCE(huge_anon_orders_inherit); 259 260 orders &= mask; 261 if (!orders) 262 return 0; 263 } 264 265 return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders); 266 } 267 268 enum mthp_stat_item { 269 MTHP_STAT_ANON_FAULT_ALLOC, 270 MTHP_STAT_ANON_FAULT_FALLBACK, 271 MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, 272 MTHP_STAT_ANON_SWPOUT, 273 MTHP_STAT_ANON_SWPOUT_FALLBACK, 274 __MTHP_STAT_COUNT 275 }; 276 277 struct mthp_stat { 278 unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT]; 279 }; 280 281 DECLARE_PER_CPU(struct mthp_stat, mthp_stats); 282 283 static inline void count_mthp_stat(int order, enum mthp_stat_item item) 284 { 285 if (order <= 0 || order > PMD_ORDER) 286 return; 287 288 this_cpu_inc(mthp_stats.stats[order][item]); 289 } 290 291 #define transparent_hugepage_use_zero_page() \ 292 (transparent_hugepage_flags & \ 293 (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) 294 295 unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, 296 unsigned long len, unsigned long pgoff, unsigned long flags); 297 unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, 298 unsigned long len, unsigned long pgoff, unsigned long flags, 299 vm_flags_t vm_flags); 300 301 bool can_split_folio(struct folio *folio, int *pextra_pins); 302 int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, 303 unsigned int new_order); 304 static inline int split_huge_page(struct page *page) 305 { 306 return split_huge_page_to_list_to_order(page, NULL, 0); 307 } 308 void deferred_split_folio(struct folio *folio); 309 310 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, 311 unsigned long address, bool freeze, struct folio *folio); 312 313 #define split_huge_pmd(__vma, __pmd, __address) \ 314 do { \ 315 pmd_t *____pmd = (__pmd); \ 316 if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \ 317 || pmd_devmap(*____pmd)) \ 318 __split_huge_pmd(__vma, __pmd, __address, \ 319 false, NULL); \ 320 } while (0) 321 322 323 void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, 324 bool freeze, struct folio *folio); 325 326 void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, 327 unsigned long address); 328 329 #define split_huge_pud(__vma, __pud, __address) \ 330 do { \ 331 pud_t *____pud = (__pud); \ 332 if (pud_trans_huge(*____pud) \ 333 || pud_devmap(*____pud)) \ 334 __split_huge_pud(__vma, __pud, __address); \ 335 } while (0) 336 337 int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, 338 int advice); 339 int madvise_collapse(struct vm_area_struct *vma, 340 struct vm_area_struct **prev, 341 unsigned long start, unsigned long end); 342 void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, 343 unsigned long end, long adjust_next); 344 spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); 345 spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma); 346 347 static inline int is_swap_pmd(pmd_t pmd) 348 { 349 return !pmd_none(pmd) && !pmd_present(pmd); 350 } 351 352 /* mmap_lock must be held on entry */ 353 static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, 354 struct vm_area_struct *vma) 355 { 356 if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) 357 return __pmd_trans_huge_lock(pmd, vma); 358 else 359 return NULL; 360 } 361 static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, 362 struct vm_area_struct *vma) 363 { 364 if (pud_trans_huge(*pud) || pud_devmap(*pud)) 365 return __pud_trans_huge_lock(pud, vma); 366 else 367 return NULL; 368 } 369 370 /** 371 * folio_test_pmd_mappable - Can we map this folio with a PMD? 372 * @folio: The folio to test 373 */ 374 static inline bool folio_test_pmd_mappable(struct folio *folio) 375 { 376 return folio_order(folio) >= HPAGE_PMD_ORDER; 377 } 378 379 struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, 380 pmd_t *pmd, int flags, struct dev_pagemap **pgmap); 381 382 vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); 383 384 extern struct folio *huge_zero_folio; 385 extern unsigned long huge_zero_pfn; 386 387 static inline bool is_huge_zero_folio(const struct folio *folio) 388 { 389 return READ_ONCE(huge_zero_folio) == folio; 390 } 391 392 static inline bool is_huge_zero_pmd(pmd_t pmd) 393 { 394 return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd); 395 } 396 397 static inline bool is_huge_zero_pud(pud_t pud) 398 { 399 return false; 400 } 401 402 struct folio *mm_get_huge_zero_folio(struct mm_struct *mm); 403 void mm_put_huge_zero_folio(struct mm_struct *mm); 404 405 #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) 406 407 static inline bool thp_migration_supported(void) 408 { 409 return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); 410 } 411 412 #else /* CONFIG_TRANSPARENT_HUGEPAGE */ 413 414 static inline bool folio_test_pmd_mappable(struct folio *folio) 415 { 416 return false; 417 } 418 419 static inline bool thp_vma_suitable_order(struct vm_area_struct *vma, 420 unsigned long addr, int order) 421 { 422 return false; 423 } 424 425 static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, 426 unsigned long addr, unsigned long orders) 427 { 428 return 0; 429 } 430 431 static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, 432 unsigned long vm_flags, 433 unsigned long tva_flags, 434 unsigned long orders) 435 { 436 return 0; 437 } 438 439 #define transparent_hugepage_flags 0UL 440 441 #define thp_get_unmapped_area NULL 442 443 static inline unsigned long 444 thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, 445 unsigned long len, unsigned long pgoff, 446 unsigned long flags, vm_flags_t vm_flags) 447 { 448 return 0; 449 } 450 451 static inline bool 452 can_split_folio(struct folio *folio, int *pextra_pins) 453 { 454 return false; 455 } 456 static inline int 457 split_huge_page_to_list_to_order(struct page *page, struct list_head *list, 458 unsigned int new_order) 459 { 460 return 0; 461 } 462 static inline int split_huge_page(struct page *page) 463 { 464 return 0; 465 } 466 static inline void deferred_split_folio(struct folio *folio) {} 467 #define split_huge_pmd(__vma, __pmd, __address) \ 468 do { } while (0) 469 470 static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, 471 unsigned long address, bool freeze, struct folio *folio) {} 472 static inline void split_huge_pmd_address(struct vm_area_struct *vma, 473 unsigned long address, bool freeze, struct folio *folio) {} 474 475 #define split_huge_pud(__vma, __pmd, __address) \ 476 do { } while (0) 477 478 static inline int hugepage_madvise(struct vm_area_struct *vma, 479 unsigned long *vm_flags, int advice) 480 { 481 return -EINVAL; 482 } 483 484 static inline int madvise_collapse(struct vm_area_struct *vma, 485 struct vm_area_struct **prev, 486 unsigned long start, unsigned long end) 487 { 488 return -EINVAL; 489 } 490 491 static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, 492 unsigned long start, 493 unsigned long end, 494 long adjust_next) 495 { 496 } 497 static inline int is_swap_pmd(pmd_t pmd) 498 { 499 return 0; 500 } 501 static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, 502 struct vm_area_struct *vma) 503 { 504 return NULL; 505 } 506 static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, 507 struct vm_area_struct *vma) 508 { 509 return NULL; 510 } 511 512 static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) 513 { 514 return 0; 515 } 516 517 static inline bool is_huge_zero_folio(const struct folio *folio) 518 { 519 return false; 520 } 521 522 static inline bool is_huge_zero_pmd(pmd_t pmd) 523 { 524 return false; 525 } 526 527 static inline bool is_huge_zero_pud(pud_t pud) 528 { 529 return false; 530 } 531 532 static inline void mm_put_huge_zero_folio(struct mm_struct *mm) 533 { 534 return; 535 } 536 537 static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, 538 unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap) 539 { 540 return NULL; 541 } 542 543 static inline bool thp_migration_supported(void) 544 { 545 return false; 546 } 547 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 548 549 static inline int split_folio_to_list_to_order(struct folio *folio, 550 struct list_head *list, int new_order) 551 { 552 return split_huge_page_to_list_to_order(&folio->page, list, new_order); 553 } 554 555 static inline int split_folio_to_order(struct folio *folio, int new_order) 556 { 557 return split_folio_to_list_to_order(folio, NULL, new_order); 558 } 559 560 #define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0) 561 #define split_folio(f) split_folio_to_order(f, 0) 562 563 #endif /* _LINUX_HUGE_MM_H */ 564