1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _LINUX_HUGETLB_H 3 #define _LINUX_HUGETLB_H 4 5 #include <linux/mm.h> 6 #include <linux/mm_types.h> 7 #include <linux/mmdebug.h> 8 #include <linux/fs.h> 9 #include <linux/hugetlb_inline.h> 10 #include <linux/cgroup.h> 11 #include <linux/page_ref.h> 12 #include <linux/list.h> 13 #include <linux/kref.h> 14 #include <linux/pgtable.h> 15 #include <linux/gfp.h> 16 #include <linux/userfaultfd_k.h> 17 18 struct ctl_table; 19 struct user_struct; 20 struct mmu_gather; 21 struct node; 22 23 #ifndef CONFIG_ARCH_HAS_HUGEPD 24 typedef struct { unsigned long pd; } hugepd_t; 25 #define is_hugepd(hugepd) (0) 26 #define __hugepd(x) ((hugepd_t) { (x) }) 27 #endif 28 29 void free_huge_folio(struct folio *folio); 30 31 #ifdef CONFIG_HUGETLB_PAGE 32 33 #include <linux/mempolicy.h> 34 #include <linux/shm.h> 35 #include <asm/tlbflush.h> 36 37 /* 38 * For HugeTLB page, there are more metadata to save in the struct page. But 39 * the head struct page cannot meet our needs, so we have to abuse other tail 40 * struct page to store the metadata. 41 */ 42 #define __NR_USED_SUBPAGE 3 43 44 struct hugepage_subpool { 45 spinlock_t lock; 46 long count; 47 long max_hpages; /* Maximum huge pages or -1 if no maximum. */ 48 long used_hpages; /* Used count against maximum, includes */ 49 /* both allocated and reserved pages. */ 50 struct hstate *hstate; 51 long min_hpages; /* Minimum huge pages or -1 if no minimum. */ 52 long rsv_hpages; /* Pages reserved against global pool to */ 53 /* satisfy minimum size. */ 54 }; 55 56 struct resv_map { 57 struct kref refs; 58 spinlock_t lock; 59 struct list_head regions; 60 long adds_in_progress; 61 struct list_head region_cache; 62 long region_cache_count; 63 #ifdef CONFIG_CGROUP_HUGETLB 64 /* 65 * On private mappings, the counter to uncharge reservations is stored 66 * here. If these fields are 0, then either the mapping is shared, or 67 * cgroup accounting is disabled for this resv_map. 68 */ 69 struct page_counter *reservation_counter; 70 unsigned long pages_per_hpage; 71 struct cgroup_subsys_state *css; 72 #endif 73 }; 74 75 /* 76 * Region tracking -- allows tracking of reservations and instantiated pages 77 * across the pages in a mapping. 78 * 79 * The region data structures are embedded into a resv_map and protected 80 * by a resv_map's lock. The set of regions within the resv_map represent 81 * reservations for huge pages, or huge pages that have already been 82 * instantiated within the map. The from and to elements are huge page 83 * indices into the associated mapping. from indicates the starting index 84 * of the region. to represents the first index past the end of the region. 85 * 86 * For example, a file region structure with from == 0 and to == 4 represents 87 * four huge pages in a mapping. It is important to note that the to element 88 * represents the first element past the end of the region. This is used in 89 * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. 90 * 91 * Interval notation of the form [from, to) will be used to indicate that 92 * the endpoint from is inclusive and to is exclusive. 93 */ 94 struct file_region { 95 struct list_head link; 96 long from; 97 long to; 98 #ifdef CONFIG_CGROUP_HUGETLB 99 /* 100 * On shared mappings, each reserved region appears as a struct 101 * file_region in resv_map. These fields hold the info needed to 102 * uncharge each reservation. 103 */ 104 struct page_counter *reservation_counter; 105 struct cgroup_subsys_state *css; 106 #endif 107 }; 108 109 struct hugetlb_vma_lock { 110 struct kref refs; 111 struct rw_semaphore rw_sema; 112 struct vm_area_struct *vma; 113 }; 114 115 extern struct resv_map *resv_map_alloc(void); 116 void resv_map_release(struct kref *ref); 117 118 extern spinlock_t hugetlb_lock; 119 extern int hugetlb_max_hstate __read_mostly; 120 #define for_each_hstate(h) \ 121 for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) 122 123 struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, 124 long min_hpages); 125 void hugepage_put_subpool(struct hugepage_subpool *spool); 126 127 void hugetlb_dup_vma_private(struct vm_area_struct *vma); 128 void clear_vma_resv_huge_pages(struct vm_area_struct *vma); 129 int move_hugetlb_page_tables(struct vm_area_struct *vma, 130 struct vm_area_struct *new_vma, 131 unsigned long old_addr, unsigned long new_addr, 132 unsigned long len); 133 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, 134 struct vm_area_struct *, struct vm_area_struct *); 135 struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, 136 unsigned long address, unsigned int flags, 137 unsigned int *page_mask); 138 void unmap_hugepage_range(struct vm_area_struct *, 139 unsigned long, unsigned long, struct page *, 140 zap_flags_t); 141 void __unmap_hugepage_range_final(struct mmu_gather *tlb, 142 struct vm_area_struct *vma, 143 unsigned long start, unsigned long end, 144 struct page *ref_page, zap_flags_t zap_flags); 145 void hugetlb_report_meminfo(struct seq_file *); 146 int hugetlb_report_node_meminfo(char *buf, int len, int nid); 147 void hugetlb_show_meminfo_node(int nid); 148 unsigned long hugetlb_total_pages(void); 149 vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 150 unsigned long address, unsigned int flags); 151 #ifdef CONFIG_USERFAULTFD 152 int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 153 struct vm_area_struct *dst_vma, 154 unsigned long dst_addr, 155 unsigned long src_addr, 156 uffd_flags_t flags, 157 struct folio **foliop); 158 #endif /* CONFIG_USERFAULTFD */ 159 bool hugetlb_reserve_pages(struct inode *inode, long from, long to, 160 struct vm_area_struct *vma, 161 vm_flags_t vm_flags); 162 long hugetlb_unreserve_pages(struct inode *inode, long start, long end, 163 long freed); 164 bool isolate_hugetlb(struct folio *folio, struct list_head *list); 165 int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); 166 int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 167 bool *migratable_cleared); 168 void folio_putback_active_hugetlb(struct folio *folio); 169 void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); 170 void hugetlb_fix_reserve_counts(struct inode *inode); 171 extern struct mutex *hugetlb_fault_mutex_table; 172 u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); 173 174 pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, 175 unsigned long addr, pud_t *pud); 176 177 struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); 178 179 extern int sysctl_hugetlb_shm_group; 180 extern struct list_head huge_boot_pages; 181 182 /* arch callbacks */ 183 184 #ifndef CONFIG_HIGHPTE 185 /* 186 * pte_offset_huge() and pte_alloc_huge() are helpers for those architectures 187 * which may go down to the lowest PTE level in their huge_pte_offset() and 188 * huge_pte_alloc(): to avoid reliance on pte_offset_map() without pte_unmap(). 189 */ 190 static inline pte_t *pte_offset_huge(pmd_t *pmd, unsigned long address) 191 { 192 return pte_offset_kernel(pmd, address); 193 } 194 static inline pte_t *pte_alloc_huge(struct mm_struct *mm, pmd_t *pmd, 195 unsigned long address) 196 { 197 return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address); 198 } 199 #endif 200 201 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 202 unsigned long addr, unsigned long sz); 203 /* 204 * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. 205 * Returns the pte_t* if found, or NULL if the address is not mapped. 206 * 207 * IMPORTANT: we should normally not directly call this function, instead 208 * this is only a common interface to implement arch-specific 209 * walker. Please use hugetlb_walk() instead, because that will attempt to 210 * verify the locking for you. 211 * 212 * Since this function will walk all the pgtable pages (including not only 213 * high-level pgtable page, but also PUD entry that can be unshared 214 * concurrently for VM_SHARED), the caller of this function should be 215 * responsible of its thread safety. One can follow this rule: 216 * 217 * (1) For private mappings: pmd unsharing is not possible, so holding the 218 * mmap_lock for either read or write is sufficient. Most callers 219 * already hold the mmap_lock, so normally, no special action is 220 * required. 221 * 222 * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged 223 * pgtable page can go away from under us! It can be done by a pmd 224 * unshare with a follow up munmap() on the other process), then we 225 * need either: 226 * 227 * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare 228 * won't happen upon the range (it also makes sure the pte_t we 229 * read is the right and stable one), or, 230 * 231 * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make 232 * sure even if unshare happened the racy unmap() will wait until 233 * i_mmap_rwsem is released. 234 * 235 * Option (2.1) is the safest, which guarantees pte stability from pmd 236 * sharing pov, until the vma lock released. Option (2.2) doesn't protect 237 * a concurrent pmd unshare, but it makes sure the pgtable page is safe to 238 * access. 239 */ 240 pte_t *huge_pte_offset(struct mm_struct *mm, 241 unsigned long addr, unsigned long sz); 242 unsigned long hugetlb_mask_last_page(struct hstate *h); 243 int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, 244 unsigned long addr, pte_t *ptep); 245 void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, 246 unsigned long *start, unsigned long *end); 247 248 void hugetlb_vma_lock_read(struct vm_area_struct *vma); 249 void hugetlb_vma_unlock_read(struct vm_area_struct *vma); 250 void hugetlb_vma_lock_write(struct vm_area_struct *vma); 251 void hugetlb_vma_unlock_write(struct vm_area_struct *vma); 252 int hugetlb_vma_trylock_write(struct vm_area_struct *vma); 253 void hugetlb_vma_assert_locked(struct vm_area_struct *vma); 254 void hugetlb_vma_lock_release(struct kref *kref); 255 256 int pmd_huge(pmd_t pmd); 257 int pud_huge(pud_t pud); 258 long hugetlb_change_protection(struct vm_area_struct *vma, 259 unsigned long address, unsigned long end, pgprot_t newprot, 260 unsigned long cp_flags); 261 262 bool is_hugetlb_entry_migration(pte_t pte); 263 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); 264 265 #else /* !CONFIG_HUGETLB_PAGE */ 266 267 static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma) 268 { 269 } 270 271 static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma) 272 { 273 } 274 275 static inline unsigned long hugetlb_total_pages(void) 276 { 277 return 0; 278 } 279 280 static inline struct address_space *hugetlb_page_mapping_lock_write( 281 struct page *hpage) 282 { 283 return NULL; 284 } 285 286 static inline int huge_pmd_unshare(struct mm_struct *mm, 287 struct vm_area_struct *vma, 288 unsigned long addr, pte_t *ptep) 289 { 290 return 0; 291 } 292 293 static inline void adjust_range_if_pmd_sharing_possible( 294 struct vm_area_struct *vma, 295 unsigned long *start, unsigned long *end) 296 { 297 } 298 299 static inline struct page *hugetlb_follow_page_mask( 300 struct vm_area_struct *vma, unsigned long address, unsigned int flags, 301 unsigned int *page_mask) 302 { 303 BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ 304 } 305 306 static inline int copy_hugetlb_page_range(struct mm_struct *dst, 307 struct mm_struct *src, 308 struct vm_area_struct *dst_vma, 309 struct vm_area_struct *src_vma) 310 { 311 BUG(); 312 return 0; 313 } 314 315 static inline int move_hugetlb_page_tables(struct vm_area_struct *vma, 316 struct vm_area_struct *new_vma, 317 unsigned long old_addr, 318 unsigned long new_addr, 319 unsigned long len) 320 { 321 BUG(); 322 return 0; 323 } 324 325 static inline void hugetlb_report_meminfo(struct seq_file *m) 326 { 327 } 328 329 static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid) 330 { 331 return 0; 332 } 333 334 static inline void hugetlb_show_meminfo_node(int nid) 335 { 336 } 337 338 static inline int prepare_hugepage_range(struct file *file, 339 unsigned long addr, unsigned long len) 340 { 341 return -EINVAL; 342 } 343 344 static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) 345 { 346 } 347 348 static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma) 349 { 350 } 351 352 static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma) 353 { 354 } 355 356 static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma) 357 { 358 } 359 360 static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma) 361 { 362 return 1; 363 } 364 365 static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) 366 { 367 } 368 369 static inline int pmd_huge(pmd_t pmd) 370 { 371 return 0; 372 } 373 374 static inline int pud_huge(pud_t pud) 375 { 376 return 0; 377 } 378 379 static inline int is_hugepage_only_range(struct mm_struct *mm, 380 unsigned long addr, unsigned long len) 381 { 382 return 0; 383 } 384 385 static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, 386 unsigned long addr, unsigned long end, 387 unsigned long floor, unsigned long ceiling) 388 { 389 BUG(); 390 } 391 392 #ifdef CONFIG_USERFAULTFD 393 static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 394 struct vm_area_struct *dst_vma, 395 unsigned long dst_addr, 396 unsigned long src_addr, 397 uffd_flags_t flags, 398 struct folio **foliop) 399 { 400 BUG(); 401 return 0; 402 } 403 #endif /* CONFIG_USERFAULTFD */ 404 405 static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, 406 unsigned long sz) 407 { 408 return NULL; 409 } 410 411 static inline bool isolate_hugetlb(struct folio *folio, struct list_head *list) 412 { 413 return false; 414 } 415 416 static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) 417 { 418 return 0; 419 } 420 421 static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 422 bool *migratable_cleared) 423 { 424 return 0; 425 } 426 427 static inline void folio_putback_active_hugetlb(struct folio *folio) 428 { 429 } 430 431 static inline void move_hugetlb_state(struct folio *old_folio, 432 struct folio *new_folio, int reason) 433 { 434 } 435 436 static inline long hugetlb_change_protection( 437 struct vm_area_struct *vma, unsigned long address, 438 unsigned long end, pgprot_t newprot, 439 unsigned long cp_flags) 440 { 441 return 0; 442 } 443 444 static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, 445 struct vm_area_struct *vma, unsigned long start, 446 unsigned long end, struct page *ref_page, 447 zap_flags_t zap_flags) 448 { 449 BUG(); 450 } 451 452 static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, 453 struct vm_area_struct *vma, unsigned long address, 454 unsigned int flags) 455 { 456 BUG(); 457 return 0; 458 } 459 460 static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } 461 462 #endif /* !CONFIG_HUGETLB_PAGE */ 463 /* 464 * hugepages at page global directory. If arch support 465 * hugepages at pgd level, they need to define this. 466 */ 467 #ifndef pgd_huge 468 #define pgd_huge(x) 0 469 #endif 470 #ifndef p4d_huge 471 #define p4d_huge(x) 0 472 #endif 473 474 #ifndef pgd_write 475 static inline int pgd_write(pgd_t pgd) 476 { 477 BUG(); 478 return 0; 479 } 480 #endif 481 482 #define HUGETLB_ANON_FILE "anon_hugepage" 483 484 enum { 485 /* 486 * The file will be used as an shm file so shmfs accounting rules 487 * apply 488 */ 489 HUGETLB_SHMFS_INODE = 1, 490 /* 491 * The file is being created on the internal vfs mount and shmfs 492 * accounting rules do not apply 493 */ 494 HUGETLB_ANONHUGE_INODE = 2, 495 }; 496 497 #ifdef CONFIG_HUGETLBFS 498 struct hugetlbfs_sb_info { 499 long max_inodes; /* inodes allowed */ 500 long free_inodes; /* inodes free */ 501 spinlock_t stat_lock; 502 struct hstate *hstate; 503 struct hugepage_subpool *spool; 504 kuid_t uid; 505 kgid_t gid; 506 umode_t mode; 507 }; 508 509 static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) 510 { 511 return sb->s_fs_info; 512 } 513 514 struct hugetlbfs_inode_info { 515 struct shared_policy policy; 516 struct inode vfs_inode; 517 unsigned int seals; 518 }; 519 520 static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) 521 { 522 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); 523 } 524 525 extern const struct file_operations hugetlbfs_file_operations; 526 extern const struct vm_operations_struct hugetlb_vm_ops; 527 struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, 528 int creat_flags, int page_size_log); 529 530 static inline bool is_file_hugepages(struct file *file) 531 { 532 if (file->f_op == &hugetlbfs_file_operations) 533 return true; 534 535 return is_file_shm_hugepages(file); 536 } 537 538 static inline struct hstate *hstate_inode(struct inode *i) 539 { 540 return HUGETLBFS_SB(i->i_sb)->hstate; 541 } 542 #else /* !CONFIG_HUGETLBFS */ 543 544 #define is_file_hugepages(file) false 545 static inline struct file * 546 hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, 547 int creat_flags, int page_size_log) 548 { 549 return ERR_PTR(-ENOSYS); 550 } 551 552 static inline struct hstate *hstate_inode(struct inode *i) 553 { 554 return NULL; 555 } 556 #endif /* !CONFIG_HUGETLBFS */ 557 558 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 559 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 560 unsigned long len, unsigned long pgoff, 561 unsigned long flags); 562 #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ 563 564 unsigned long 565 generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 566 unsigned long len, unsigned long pgoff, 567 unsigned long flags); 568 569 /* 570 * huegtlb page specific state flags. These flags are located in page.private 571 * of the hugetlb head page. Functions created via the below macros should be 572 * used to manipulate these flags. 573 * 574 * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at 575 * allocation time. Cleared when page is fully instantiated. Free 576 * routine checks flag to restore a reservation on error paths. 577 * Synchronization: Examined or modified by code that knows it has 578 * the only reference to page. i.e. After allocation but before use 579 * or when the page is being freed. 580 * HPG_migratable - Set after a newly allocated page is added to the page 581 * cache and/or page tables. Indicates the page is a candidate for 582 * migration. 583 * Synchronization: Initially set after new page allocation with no 584 * locking. When examined and modified during migration processing 585 * (isolate, migrate, putback) the hugetlb_lock is held. 586 * HPG_temporary - Set on a page that is temporarily allocated from the buddy 587 * allocator. Typically used for migration target pages when no pages 588 * are available in the pool. The hugetlb free page path will 589 * immediately free pages with this flag set to the buddy allocator. 590 * Synchronization: Can be set after huge page allocation from buddy when 591 * code knows it has only reference. All other examinations and 592 * modifications require hugetlb_lock. 593 * HPG_freed - Set when page is on the free lists. 594 * Synchronization: hugetlb_lock held for examination and modification. 595 * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed. 596 * HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page 597 * that is not tracked by raw_hwp_page list. 598 */ 599 enum hugetlb_page_flags { 600 HPG_restore_reserve = 0, 601 HPG_migratable, 602 HPG_temporary, 603 HPG_freed, 604 HPG_vmemmap_optimized, 605 HPG_raw_hwp_unreliable, 606 __NR_HPAGEFLAGS, 607 }; 608 609 /* 610 * Macros to create test, set and clear function definitions for 611 * hugetlb specific page flags. 612 */ 613 #ifdef CONFIG_HUGETLB_PAGE 614 #define TESTHPAGEFLAG(uname, flname) \ 615 static __always_inline \ 616 bool folio_test_hugetlb_##flname(struct folio *folio) \ 617 { void *private = &folio->private; \ 618 return test_bit(HPG_##flname, private); \ 619 } \ 620 static inline int HPage##uname(struct page *page) \ 621 { return test_bit(HPG_##flname, &(page->private)); } 622 623 #define SETHPAGEFLAG(uname, flname) \ 624 static __always_inline \ 625 void folio_set_hugetlb_##flname(struct folio *folio) \ 626 { void *private = &folio->private; \ 627 set_bit(HPG_##flname, private); \ 628 } \ 629 static inline void SetHPage##uname(struct page *page) \ 630 { set_bit(HPG_##flname, &(page->private)); } 631 632 #define CLEARHPAGEFLAG(uname, flname) \ 633 static __always_inline \ 634 void folio_clear_hugetlb_##flname(struct folio *folio) \ 635 { void *private = &folio->private; \ 636 clear_bit(HPG_##flname, private); \ 637 } \ 638 static inline void ClearHPage##uname(struct page *page) \ 639 { clear_bit(HPG_##flname, &(page->private)); } 640 #else 641 #define TESTHPAGEFLAG(uname, flname) \ 642 static inline bool \ 643 folio_test_hugetlb_##flname(struct folio *folio) \ 644 { return 0; } \ 645 static inline int HPage##uname(struct page *page) \ 646 { return 0; } 647 648 #define SETHPAGEFLAG(uname, flname) \ 649 static inline void \ 650 folio_set_hugetlb_##flname(struct folio *folio) \ 651 { } \ 652 static inline void SetHPage##uname(struct page *page) \ 653 { } 654 655 #define CLEARHPAGEFLAG(uname, flname) \ 656 static inline void \ 657 folio_clear_hugetlb_##flname(struct folio *folio) \ 658 { } \ 659 static inline void ClearHPage##uname(struct page *page) \ 660 { } 661 #endif 662 663 #define HPAGEFLAG(uname, flname) \ 664 TESTHPAGEFLAG(uname, flname) \ 665 SETHPAGEFLAG(uname, flname) \ 666 CLEARHPAGEFLAG(uname, flname) \ 667 668 /* 669 * Create functions associated with hugetlb page flags 670 */ 671 HPAGEFLAG(RestoreReserve, restore_reserve) 672 HPAGEFLAG(Migratable, migratable) 673 HPAGEFLAG(Temporary, temporary) 674 HPAGEFLAG(Freed, freed) 675 HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) 676 HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) 677 678 #ifdef CONFIG_HUGETLB_PAGE 679 680 #define HSTATE_NAME_LEN 32 681 /* Defines one hugetlb page size */ 682 struct hstate { 683 struct mutex resize_lock; 684 int next_nid_to_alloc; 685 int next_nid_to_free; 686 unsigned int order; 687 unsigned int demote_order; 688 unsigned long mask; 689 unsigned long max_huge_pages; 690 unsigned long nr_huge_pages; 691 unsigned long free_huge_pages; 692 unsigned long resv_huge_pages; 693 unsigned long surplus_huge_pages; 694 unsigned long nr_overcommit_huge_pages; 695 struct list_head hugepage_activelist; 696 struct list_head hugepage_freelists[MAX_NUMNODES]; 697 unsigned int max_huge_pages_node[MAX_NUMNODES]; 698 unsigned int nr_huge_pages_node[MAX_NUMNODES]; 699 unsigned int free_huge_pages_node[MAX_NUMNODES]; 700 unsigned int surplus_huge_pages_node[MAX_NUMNODES]; 701 #ifdef CONFIG_CGROUP_HUGETLB 702 /* cgroup control files */ 703 struct cftype cgroup_files_dfl[8]; 704 struct cftype cgroup_files_legacy[10]; 705 #endif 706 char name[HSTATE_NAME_LEN]; 707 }; 708 709 struct huge_bootmem_page { 710 struct list_head list; 711 struct hstate *hstate; 712 }; 713 714 int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); 715 struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 716 unsigned long addr, int avoid_reserve); 717 struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 718 nodemask_t *nmask, gfp_t gfp_mask); 719 struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, 720 unsigned long address); 721 int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, 722 pgoff_t idx); 723 void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, 724 unsigned long address, struct folio *folio); 725 726 /* arch callback */ 727 int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); 728 int __init alloc_bootmem_huge_page(struct hstate *h, int nid); 729 bool __init hugetlb_node_alloc_supported(void); 730 731 void __init hugetlb_add_hstate(unsigned order); 732 bool __init arch_hugetlb_valid_size(unsigned long size); 733 struct hstate *size_to_hstate(unsigned long size); 734 735 #ifndef HUGE_MAX_HSTATE 736 #define HUGE_MAX_HSTATE 1 737 #endif 738 739 extern struct hstate hstates[HUGE_MAX_HSTATE]; 740 extern unsigned int default_hstate_idx; 741 742 #define default_hstate (hstates[default_hstate_idx]) 743 744 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 745 { 746 return folio->_hugetlb_subpool; 747 } 748 749 static inline void hugetlb_set_folio_subpool(struct folio *folio, 750 struct hugepage_subpool *subpool) 751 { 752 folio->_hugetlb_subpool = subpool; 753 } 754 755 static inline struct hstate *hstate_file(struct file *f) 756 { 757 return hstate_inode(file_inode(f)); 758 } 759 760 static inline struct hstate *hstate_sizelog(int page_size_log) 761 { 762 if (!page_size_log) 763 return &default_hstate; 764 765 if (page_size_log < BITS_PER_LONG) 766 return size_to_hstate(1UL << page_size_log); 767 768 return NULL; 769 } 770 771 static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 772 { 773 return hstate_file(vma->vm_file); 774 } 775 776 static inline unsigned long huge_page_size(const struct hstate *h) 777 { 778 return (unsigned long)PAGE_SIZE << h->order; 779 } 780 781 extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); 782 783 extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); 784 785 static inline unsigned long huge_page_mask(struct hstate *h) 786 { 787 return h->mask; 788 } 789 790 static inline unsigned int huge_page_order(struct hstate *h) 791 { 792 return h->order; 793 } 794 795 static inline unsigned huge_page_shift(struct hstate *h) 796 { 797 return h->order + PAGE_SHIFT; 798 } 799 800 static inline bool hstate_is_gigantic(struct hstate *h) 801 { 802 return huge_page_order(h) > MAX_ORDER; 803 } 804 805 static inline unsigned int pages_per_huge_page(const struct hstate *h) 806 { 807 return 1 << h->order; 808 } 809 810 static inline unsigned int blocks_per_huge_page(struct hstate *h) 811 { 812 return huge_page_size(h) / 512; 813 } 814 815 #include <asm/hugetlb.h> 816 817 #ifndef is_hugepage_only_range 818 static inline int is_hugepage_only_range(struct mm_struct *mm, 819 unsigned long addr, unsigned long len) 820 { 821 return 0; 822 } 823 #define is_hugepage_only_range is_hugepage_only_range 824 #endif 825 826 #ifndef arch_clear_hugepage_flags 827 static inline void arch_clear_hugepage_flags(struct page *page) { } 828 #define arch_clear_hugepage_flags arch_clear_hugepage_flags 829 #endif 830 831 #ifndef arch_make_huge_pte 832 static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, 833 vm_flags_t flags) 834 { 835 return pte_mkhuge(entry); 836 } 837 #endif 838 839 static inline struct hstate *folio_hstate(struct folio *folio) 840 { 841 VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); 842 return size_to_hstate(folio_size(folio)); 843 } 844 845 static inline unsigned hstate_index_to_shift(unsigned index) 846 { 847 return hstates[index].order + PAGE_SHIFT; 848 } 849 850 static inline int hstate_index(struct hstate *h) 851 { 852 return h - hstates; 853 } 854 855 extern int dissolve_free_huge_page(struct page *page); 856 extern int dissolve_free_huge_pages(unsigned long start_pfn, 857 unsigned long end_pfn); 858 859 #ifdef CONFIG_MEMORY_FAILURE 860 extern void folio_clear_hugetlb_hwpoison(struct folio *folio); 861 #else 862 static inline void folio_clear_hugetlb_hwpoison(struct folio *folio) 863 { 864 } 865 #endif 866 867 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 868 #ifndef arch_hugetlb_migration_supported 869 static inline bool arch_hugetlb_migration_supported(struct hstate *h) 870 { 871 if ((huge_page_shift(h) == PMD_SHIFT) || 872 (huge_page_shift(h) == PUD_SHIFT) || 873 (huge_page_shift(h) == PGDIR_SHIFT)) 874 return true; 875 else 876 return false; 877 } 878 #endif 879 #else 880 static inline bool arch_hugetlb_migration_supported(struct hstate *h) 881 { 882 return false; 883 } 884 #endif 885 886 static inline bool hugepage_migration_supported(struct hstate *h) 887 { 888 return arch_hugetlb_migration_supported(h); 889 } 890 891 /* 892 * Movability check is different as compared to migration check. 893 * It determines whether or not a huge page should be placed on 894 * movable zone or not. Movability of any huge page should be 895 * required only if huge page size is supported for migration. 896 * There won't be any reason for the huge page to be movable if 897 * it is not migratable to start with. Also the size of the huge 898 * page should be large enough to be placed under a movable zone 899 * and still feasible enough to be migratable. Just the presence 900 * in movable zone does not make the migration feasible. 901 * 902 * So even though large huge page sizes like the gigantic ones 903 * are migratable they should not be movable because its not 904 * feasible to migrate them from movable zone. 905 */ 906 static inline bool hugepage_movable_supported(struct hstate *h) 907 { 908 if (!hugepage_migration_supported(h)) 909 return false; 910 911 if (hstate_is_gigantic(h)) 912 return false; 913 return true; 914 } 915 916 /* Movability of hugepages depends on migration support. */ 917 static inline gfp_t htlb_alloc_mask(struct hstate *h) 918 { 919 if (hugepage_movable_supported(h)) 920 return GFP_HIGHUSER_MOVABLE; 921 else 922 return GFP_HIGHUSER; 923 } 924 925 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 926 { 927 gfp_t modified_mask = htlb_alloc_mask(h); 928 929 /* Some callers might want to enforce node */ 930 modified_mask |= (gfp_mask & __GFP_THISNODE); 931 932 modified_mask |= (gfp_mask & __GFP_NOWARN); 933 934 return modified_mask; 935 } 936 937 static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 938 struct mm_struct *mm, pte_t *pte) 939 { 940 if (huge_page_size(h) == PMD_SIZE) 941 return pmd_lockptr(mm, (pmd_t *) pte); 942 VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); 943 return &mm->page_table_lock; 944 } 945 946 #ifndef hugepages_supported 947 /* 948 * Some platform decide whether they support huge pages at boot 949 * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0 950 * when there is no such support 951 */ 952 #define hugepages_supported() (HPAGE_SHIFT != 0) 953 #endif 954 955 void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm); 956 957 static inline void hugetlb_count_init(struct mm_struct *mm) 958 { 959 atomic_long_set(&mm->hugetlb_usage, 0); 960 } 961 962 static inline void hugetlb_count_add(long l, struct mm_struct *mm) 963 { 964 atomic_long_add(l, &mm->hugetlb_usage); 965 } 966 967 static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 968 { 969 atomic_long_sub(l, &mm->hugetlb_usage); 970 } 971 972 #ifndef huge_ptep_modify_prot_start 973 #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start 974 static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, 975 unsigned long addr, pte_t *ptep) 976 { 977 return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); 978 } 979 #endif 980 981 #ifndef huge_ptep_modify_prot_commit 982 #define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit 983 static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, 984 unsigned long addr, pte_t *ptep, 985 pte_t old_pte, pte_t pte) 986 { 987 unsigned long psize = huge_page_size(hstate_vma(vma)); 988 989 set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); 990 } 991 #endif 992 993 #ifdef CONFIG_NUMA 994 void hugetlb_register_node(struct node *node); 995 void hugetlb_unregister_node(struct node *node); 996 #endif 997 998 /* 999 * Check if a given raw @page in a hugepage is HWPOISON. 1000 */ 1001 bool is_raw_hwpoison_page_in_hugepage(struct page *page); 1002 1003 #else /* CONFIG_HUGETLB_PAGE */ 1004 struct hstate {}; 1005 1006 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 1007 { 1008 return NULL; 1009 } 1010 1011 static inline int isolate_or_dissolve_huge_page(struct page *page, 1012 struct list_head *list) 1013 { 1014 return -ENOMEM; 1015 } 1016 1017 static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 1018 unsigned long addr, 1019 int avoid_reserve) 1020 { 1021 return NULL; 1022 } 1023 1024 static inline struct folio * 1025 alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 1026 nodemask_t *nmask, gfp_t gfp_mask) 1027 { 1028 return NULL; 1029 } 1030 1031 static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h, 1032 struct vm_area_struct *vma, 1033 unsigned long address) 1034 { 1035 return NULL; 1036 } 1037 1038 static inline int __alloc_bootmem_huge_page(struct hstate *h) 1039 { 1040 return 0; 1041 } 1042 1043 static inline struct hstate *hstate_file(struct file *f) 1044 { 1045 return NULL; 1046 } 1047 1048 static inline struct hstate *hstate_sizelog(int page_size_log) 1049 { 1050 return NULL; 1051 } 1052 1053 static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 1054 { 1055 return NULL; 1056 } 1057 1058 static inline struct hstate *folio_hstate(struct folio *folio) 1059 { 1060 return NULL; 1061 } 1062 1063 static inline struct hstate *size_to_hstate(unsigned long size) 1064 { 1065 return NULL; 1066 } 1067 1068 static inline unsigned long huge_page_size(struct hstate *h) 1069 { 1070 return PAGE_SIZE; 1071 } 1072 1073 static inline unsigned long huge_page_mask(struct hstate *h) 1074 { 1075 return PAGE_MASK; 1076 } 1077 1078 static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) 1079 { 1080 return PAGE_SIZE; 1081 } 1082 1083 static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 1084 { 1085 return PAGE_SIZE; 1086 } 1087 1088 static inline unsigned int huge_page_order(struct hstate *h) 1089 { 1090 return 0; 1091 } 1092 1093 static inline unsigned int huge_page_shift(struct hstate *h) 1094 { 1095 return PAGE_SHIFT; 1096 } 1097 1098 static inline bool hstate_is_gigantic(struct hstate *h) 1099 { 1100 return false; 1101 } 1102 1103 static inline unsigned int pages_per_huge_page(struct hstate *h) 1104 { 1105 return 1; 1106 } 1107 1108 static inline unsigned hstate_index_to_shift(unsigned index) 1109 { 1110 return 0; 1111 } 1112 1113 static inline int hstate_index(struct hstate *h) 1114 { 1115 return 0; 1116 } 1117 1118 static inline int dissolve_free_huge_page(struct page *page) 1119 { 1120 return 0; 1121 } 1122 1123 static inline int dissolve_free_huge_pages(unsigned long start_pfn, 1124 unsigned long end_pfn) 1125 { 1126 return 0; 1127 } 1128 1129 static inline bool hugepage_migration_supported(struct hstate *h) 1130 { 1131 return false; 1132 } 1133 1134 static inline bool hugepage_movable_supported(struct hstate *h) 1135 { 1136 return false; 1137 } 1138 1139 static inline gfp_t htlb_alloc_mask(struct hstate *h) 1140 { 1141 return 0; 1142 } 1143 1144 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 1145 { 1146 return 0; 1147 } 1148 1149 static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 1150 struct mm_struct *mm, pte_t *pte) 1151 { 1152 return &mm->page_table_lock; 1153 } 1154 1155 static inline void hugetlb_count_init(struct mm_struct *mm) 1156 { 1157 } 1158 1159 static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m) 1160 { 1161 } 1162 1163 static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 1164 { 1165 } 1166 1167 static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 1168 unsigned long addr, pte_t *ptep) 1169 { 1170 #ifdef CONFIG_MMU 1171 return ptep_get(ptep); 1172 #else 1173 return *ptep; 1174 #endif 1175 } 1176 1177 static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 1178 pte_t *ptep, pte_t pte, unsigned long sz) 1179 { 1180 } 1181 1182 static inline void hugetlb_register_node(struct node *node) 1183 { 1184 } 1185 1186 static inline void hugetlb_unregister_node(struct node *node) 1187 { 1188 } 1189 #endif /* CONFIG_HUGETLB_PAGE */ 1190 1191 static inline spinlock_t *huge_pte_lock(struct hstate *h, 1192 struct mm_struct *mm, pte_t *pte) 1193 { 1194 spinlock_t *ptl; 1195 1196 ptl = huge_pte_lockptr(h, mm, pte); 1197 spin_lock(ptl); 1198 return ptl; 1199 } 1200 1201 #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) 1202 extern void __init hugetlb_cma_reserve(int order); 1203 #else 1204 static inline __init void hugetlb_cma_reserve(int order) 1205 { 1206 } 1207 #endif 1208 1209 #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE 1210 static inline bool hugetlb_pmd_shared(pte_t *pte) 1211 { 1212 return page_count(virt_to_page(pte)) > 1; 1213 } 1214 #else 1215 static inline bool hugetlb_pmd_shared(pte_t *pte) 1216 { 1217 return false; 1218 } 1219 #endif 1220 1221 bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); 1222 1223 #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE 1224 /* 1225 * ARCHes with special requirements for evicting HUGETLB backing TLB entries can 1226 * implement this. 1227 */ 1228 #define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 1229 #endif 1230 1231 static inline bool __vma_shareable_lock(struct vm_area_struct *vma) 1232 { 1233 return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; 1234 } 1235 1236 /* 1237 * Safe version of huge_pte_offset() to check the locks. See comments 1238 * above huge_pte_offset(). 1239 */ 1240 static inline pte_t * 1241 hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) 1242 { 1243 #if defined(CONFIG_HUGETLB_PAGE) && \ 1244 defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP) 1245 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; 1246 1247 /* 1248 * If pmd sharing possible, locking needed to safely walk the 1249 * hugetlb pgtables. More information can be found at the comment 1250 * above huge_pte_offset() in the same file. 1251 * 1252 * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP. 1253 */ 1254 if (__vma_shareable_lock(vma)) 1255 WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) && 1256 !lockdep_is_held( 1257 &vma->vm_file->f_mapping->i_mmap_rwsem)); 1258 #endif 1259 return huge_pte_offset(vma->vm_mm, addr, sz); 1260 } 1261 1262 #endif /* _LINUX_HUGETLB_H */ 1263