1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _LINUX_HUGETLB_H 3 #define _LINUX_HUGETLB_H 4 5 #include <linux/mm.h> 6 #include <linux/mm_types.h> 7 #include <linux/mmdebug.h> 8 #include <linux/fs.h> 9 #include <linux/hugetlb_inline.h> 10 #include <linux/cgroup.h> 11 #include <linux/page_ref.h> 12 #include <linux/list.h> 13 #include <linux/kref.h> 14 #include <linux/pgtable.h> 15 #include <linux/gfp.h> 16 #include <linux/userfaultfd_k.h> 17 #include <linux/nodemask.h> 18 19 struct mmu_gather; 20 struct node; 21 22 void free_huge_folio(struct folio *folio); 23 24 #ifdef CONFIG_HUGETLB_PAGE 25 26 #include <linux/pagemap.h> 27 #include <linux/shm.h> 28 #include <asm/tlbflush.h> 29 30 /* 31 * For HugeTLB page, there are more metadata to save in the struct page. But 32 * the head struct page cannot meet our needs, so we have to abuse other tail 33 * struct page to store the metadata. 34 */ 35 #define __NR_USED_SUBPAGE 3 36 37 struct hugepage_subpool { 38 spinlock_t lock; 39 long count; 40 long max_hpages; /* Maximum huge pages or -1 if no maximum. */ 41 long used_hpages; /* Used count against maximum, includes */ 42 /* both allocated and reserved pages. */ 43 struct hstate *hstate; 44 long min_hpages; /* Minimum huge pages or -1 if no minimum. */ 45 long rsv_hpages; /* Pages reserved against global pool to */ 46 /* satisfy minimum size. */ 47 }; 48 49 struct resv_map { 50 struct kref refs; 51 spinlock_t lock; 52 struct list_head regions; 53 long adds_in_progress; 54 struct list_head region_cache; 55 long region_cache_count; 56 struct rw_semaphore rw_sema; 57 #ifdef CONFIG_CGROUP_HUGETLB 58 /* 59 * On private mappings, the counter to uncharge reservations is stored 60 * here. If these fields are 0, then either the mapping is shared, or 61 * cgroup accounting is disabled for this resv_map. 62 */ 63 struct page_counter *reservation_counter; 64 unsigned long pages_per_hpage; 65 struct cgroup_subsys_state *css; 66 #endif 67 }; 68 69 /* 70 * Region tracking -- allows tracking of reservations and instantiated pages 71 * across the pages in a mapping. 72 * 73 * The region data structures are embedded into a resv_map and protected 74 * by a resv_map's lock. The set of regions within the resv_map represent 75 * reservations for huge pages, or huge pages that have already been 76 * instantiated within the map. The from and to elements are huge page 77 * indices into the associated mapping. from indicates the starting index 78 * of the region. to represents the first index past the end of the region. 79 * 80 * For example, a file region structure with from == 0 and to == 4 represents 81 * four huge pages in a mapping. It is important to note that the to element 82 * represents the first element past the end of the region. This is used in 83 * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. 84 * 85 * Interval notation of the form [from, to) will be used to indicate that 86 * the endpoint from is inclusive and to is exclusive. 87 */ 88 struct file_region { 89 struct list_head link; 90 long from; 91 long to; 92 #ifdef CONFIG_CGROUP_HUGETLB 93 /* 94 * On shared mappings, each reserved region appears as a struct 95 * file_region in resv_map. These fields hold the info needed to 96 * uncharge each reservation. 97 */ 98 struct page_counter *reservation_counter; 99 struct cgroup_subsys_state *css; 100 #endif 101 }; 102 103 struct hugetlb_vma_lock { 104 struct kref refs; 105 struct rw_semaphore rw_sema; 106 struct vm_area_struct *vma; 107 }; 108 109 extern struct resv_map *resv_map_alloc(void); 110 void resv_map_release(struct kref *ref); 111 112 extern spinlock_t hugetlb_lock; 113 extern int hugetlb_max_hstate __read_mostly; 114 #define for_each_hstate(h) \ 115 for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) 116 117 struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, 118 long min_hpages); 119 void hugepage_put_subpool(struct hugepage_subpool *spool); 120 121 void hugetlb_dup_vma_private(struct vm_area_struct *vma); 122 void clear_vma_resv_huge_pages(struct vm_area_struct *vma); 123 int move_hugetlb_page_tables(struct vm_area_struct *vma, 124 struct vm_area_struct *new_vma, 125 unsigned long old_addr, unsigned long new_addr, 126 unsigned long len); 127 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, 128 struct vm_area_struct *, struct vm_area_struct *); 129 void unmap_hugepage_range(struct vm_area_struct *, 130 unsigned long start, unsigned long end, 131 struct folio *, zap_flags_t); 132 void __unmap_hugepage_range(struct mmu_gather *tlb, 133 struct vm_area_struct *vma, 134 unsigned long start, unsigned long end, 135 struct folio *, zap_flags_t zap_flags); 136 void hugetlb_report_meminfo(struct seq_file *); 137 int hugetlb_report_node_meminfo(char *buf, int len, int nid); 138 void hugetlb_show_meminfo_node(int nid); 139 unsigned long hugetlb_total_pages(void); 140 vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 141 unsigned long address, unsigned int flags); 142 #ifdef CONFIG_USERFAULTFD 143 int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 144 struct vm_area_struct *dst_vma, 145 unsigned long dst_addr, 146 unsigned long src_addr, 147 uffd_flags_t flags, 148 struct folio **foliop); 149 #endif /* CONFIG_USERFAULTFD */ 150 long hugetlb_reserve_pages(struct inode *inode, long from, long to, 151 struct vm_area_struct *vma, vma_flags_t vma_flags); 152 long hugetlb_unreserve_pages(struct inode *inode, long start, long end, 153 long freed); 154 bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list); 155 int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); 156 void folio_putback_hugetlb(struct folio *folio); 157 void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); 158 void hugetlb_fix_reserve_counts(struct inode *inode); 159 extern struct mutex *hugetlb_fault_mutex_table; 160 u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); 161 162 pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, 163 unsigned long addr, pud_t *pud); 164 bool hugetlbfs_pagecache_present(struct hstate *h, 165 struct vm_area_struct *vma, 166 unsigned long address); 167 168 struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio); 169 170 extern int movable_gigantic_pages __read_mostly; 171 extern int sysctl_hugetlb_shm_group __read_mostly; 172 extern struct list_head huge_boot_pages[MAX_NUMNODES]; 173 174 void hugetlb_bootmem_alloc(void); 175 extern nodemask_t hugetlb_bootmem_nodes; 176 void hugetlb_bootmem_set_nodes(void); 177 178 /* arch callbacks */ 179 180 #ifndef CONFIG_HIGHPTE 181 /* 182 * pte_offset_huge() and pte_alloc_huge() are helpers for those architectures 183 * which may go down to the lowest PTE level in their huge_pte_offset() and 184 * huge_pte_alloc(): to avoid reliance on pte_offset_map() without pte_unmap(). 185 */ 186 static inline pte_t *pte_offset_huge(pmd_t *pmd, unsigned long address) 187 { 188 return pte_offset_kernel(pmd, address); 189 } 190 static inline pte_t *pte_alloc_huge(struct mm_struct *mm, pmd_t *pmd, 191 unsigned long address) 192 { 193 return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address); 194 } 195 #endif 196 197 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 198 unsigned long addr, unsigned long sz); 199 /* 200 * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. 201 * Returns the pte_t* if found, or NULL if the address is not mapped. 202 * 203 * IMPORTANT: we should normally not directly call this function, instead 204 * this is only a common interface to implement arch-specific 205 * walker. Please use hugetlb_walk() instead, because that will attempt to 206 * verify the locking for you. 207 * 208 * Since this function will walk all the pgtable pages (including not only 209 * high-level pgtable page, but also PUD entry that can be unshared 210 * concurrently for VM_SHARED), the caller of this function should be 211 * responsible of its thread safety. One can follow this rule: 212 * 213 * (1) For private mappings: pmd unsharing is not possible, so holding the 214 * mmap_lock for either read or write is sufficient. Most callers 215 * already hold the mmap_lock, so normally, no special action is 216 * required. 217 * 218 * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged 219 * pgtable page can go away from under us! It can be done by a pmd 220 * unshare with a follow up munmap() on the other process), then we 221 * need either: 222 * 223 * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare 224 * won't happen upon the range (it also makes sure the pte_t we 225 * read is the right and stable one), or, 226 * 227 * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make 228 * sure even if unshare happened the racy unmap() will wait until 229 * i_mmap_rwsem is released. 230 * 231 * Option (2.1) is the safest, which guarantees pte stability from pmd 232 * sharing pov, until the vma lock released. Option (2.2) doesn't protect 233 * a concurrent pmd unshare, but it makes sure the pgtable page is safe to 234 * access. 235 */ 236 pte_t *huge_pte_offset(struct mm_struct *mm, 237 unsigned long addr, unsigned long sz); 238 unsigned long hugetlb_mask_last_page(struct hstate *h); 239 int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma, 240 unsigned long addr, pte_t *ptep); 241 void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma); 242 void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, 243 unsigned long *start, unsigned long *end); 244 245 extern void __hugetlb_zap_begin(struct vm_area_struct *vma, 246 unsigned long *begin, unsigned long *end); 247 extern void __hugetlb_zap_end(struct vm_area_struct *vma, 248 struct zap_details *details); 249 250 static inline void hugetlb_zap_begin(struct vm_area_struct *vma, 251 unsigned long *start, unsigned long *end) 252 { 253 if (is_vm_hugetlb_page(vma)) 254 __hugetlb_zap_begin(vma, start, end); 255 } 256 257 static inline void hugetlb_zap_end(struct vm_area_struct *vma, 258 struct zap_details *details) 259 { 260 if (is_vm_hugetlb_page(vma)) 261 __hugetlb_zap_end(vma, details); 262 } 263 264 void hugetlb_vma_lock_read(struct vm_area_struct *vma); 265 void hugetlb_vma_unlock_read(struct vm_area_struct *vma); 266 void hugetlb_vma_lock_write(struct vm_area_struct *vma); 267 void hugetlb_vma_unlock_write(struct vm_area_struct *vma); 268 int hugetlb_vma_trylock_write(struct vm_area_struct *vma); 269 void hugetlb_vma_assert_locked(struct vm_area_struct *vma); 270 void hugetlb_vma_lock_release(struct kref *kref); 271 long hugetlb_change_protection(struct vm_area_struct *vma, 272 unsigned long address, unsigned long end, pgprot_t newprot, 273 unsigned long cp_flags); 274 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); 275 void fixup_hugetlb_reservations(struct vm_area_struct *vma); 276 void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); 277 278 unsigned int arch_hugetlb_cma_order(void); 279 280 #else /* !CONFIG_HUGETLB_PAGE */ 281 282 static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma) 283 { 284 } 285 286 static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma) 287 { 288 } 289 290 static inline unsigned long hugetlb_total_pages(void) 291 { 292 return 0; 293 } 294 295 static inline struct address_space *hugetlb_folio_mapping_lock_write( 296 struct folio *folio) 297 { 298 return NULL; 299 } 300 301 static inline int huge_pmd_unshare(struct mmu_gather *tlb, 302 struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) 303 { 304 return 0; 305 } 306 307 static inline void huge_pmd_unshare_flush(struct mmu_gather *tlb, 308 struct vm_area_struct *vma) 309 { 310 } 311 312 static inline void adjust_range_if_pmd_sharing_possible( 313 struct vm_area_struct *vma, 314 unsigned long *start, unsigned long *end) 315 { 316 } 317 318 static inline void hugetlb_zap_begin( 319 struct vm_area_struct *vma, 320 unsigned long *start, unsigned long *end) 321 { 322 } 323 324 static inline void hugetlb_zap_end( 325 struct vm_area_struct *vma, 326 struct zap_details *details) 327 { 328 } 329 330 static inline int copy_hugetlb_page_range(struct mm_struct *dst, 331 struct mm_struct *src, 332 struct vm_area_struct *dst_vma, 333 struct vm_area_struct *src_vma) 334 { 335 BUG(); 336 return 0; 337 } 338 339 static inline int move_hugetlb_page_tables(struct vm_area_struct *vma, 340 struct vm_area_struct *new_vma, 341 unsigned long old_addr, 342 unsigned long new_addr, 343 unsigned long len) 344 { 345 BUG(); 346 return 0; 347 } 348 349 static inline void hugetlb_report_meminfo(struct seq_file *m) 350 { 351 } 352 353 static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid) 354 { 355 return 0; 356 } 357 358 static inline void hugetlb_show_meminfo_node(int nid) 359 { 360 } 361 362 static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) 363 { 364 } 365 366 static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma) 367 { 368 } 369 370 static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma) 371 { 372 } 373 374 static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma) 375 { 376 } 377 378 static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma) 379 { 380 return 1; 381 } 382 383 static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) 384 { 385 } 386 387 static inline int is_hugepage_only_range(struct mm_struct *mm, 388 unsigned long addr, unsigned long len) 389 { 390 return 0; 391 } 392 393 #ifdef CONFIG_USERFAULTFD 394 static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 395 struct vm_area_struct *dst_vma, 396 unsigned long dst_addr, 397 unsigned long src_addr, 398 uffd_flags_t flags, 399 struct folio **foliop) 400 { 401 BUG(); 402 return 0; 403 } 404 #endif /* CONFIG_USERFAULTFD */ 405 406 static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, 407 unsigned long sz) 408 { 409 return NULL; 410 } 411 412 static inline bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list) 413 { 414 return false; 415 } 416 417 static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) 418 { 419 return 0; 420 } 421 422 static inline void folio_putback_hugetlb(struct folio *folio) 423 { 424 } 425 426 static inline void move_hugetlb_state(struct folio *old_folio, 427 struct folio *new_folio, int reason) 428 { 429 } 430 431 static inline long hugetlb_change_protection( 432 struct vm_area_struct *vma, unsigned long address, 433 unsigned long end, pgprot_t newprot, 434 unsigned long cp_flags) 435 { 436 return 0; 437 } 438 439 static inline void __unmap_hugepage_range(struct mmu_gather *tlb, 440 struct vm_area_struct *vma, unsigned long start, 441 unsigned long end, struct folio *folio, 442 zap_flags_t zap_flags) 443 { 444 BUG(); 445 } 446 447 static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, 448 struct vm_area_struct *vma, unsigned long address, 449 unsigned int flags) 450 { 451 BUG(); 452 return 0; 453 } 454 455 static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } 456 457 static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) 458 { 459 } 460 461 static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} 462 463 #endif /* !CONFIG_HUGETLB_PAGE */ 464 465 #ifndef pgd_write 466 static inline int pgd_write(pgd_t pgd) 467 { 468 BUG(); 469 return 0; 470 } 471 #endif 472 473 #define HUGETLB_ANON_FILE "anon_hugepage" 474 475 enum { 476 /* 477 * The file will be used as an shm file so shmfs accounting rules 478 * apply 479 */ 480 HUGETLB_SHMFS_INODE = 1, 481 /* 482 * The file is being created on the internal vfs mount and shmfs 483 * accounting rules do not apply 484 */ 485 HUGETLB_ANONHUGE_INODE = 2, 486 }; 487 488 #ifdef CONFIG_HUGETLBFS 489 struct hugetlbfs_sb_info { 490 long max_inodes; /* inodes allowed */ 491 long free_inodes; /* inodes free */ 492 spinlock_t stat_lock; 493 struct hstate *hstate; 494 struct hugepage_subpool *spool; 495 kuid_t uid; 496 kgid_t gid; 497 umode_t mode; 498 }; 499 500 static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) 501 { 502 return sb->s_fs_info; 503 } 504 505 struct hugetlbfs_inode_info { 506 struct inode vfs_inode; 507 struct resv_map *resv_map; 508 unsigned int seals; 509 }; 510 511 static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) 512 { 513 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); 514 } 515 516 extern const struct vm_operations_struct hugetlb_vm_ops; 517 struct file *hugetlb_file_setup(const char *name, size_t size, vma_flags_t acct, 518 int creat_flags, int page_size_log); 519 520 static inline bool is_file_hugepages(const struct file *file) 521 { 522 return file->f_op->fop_flags & FOP_HUGE_PAGES; 523 } 524 525 static inline struct hstate *hstate_inode(struct inode *i) 526 { 527 return HUGETLBFS_SB(i->i_sb)->hstate; 528 } 529 #else /* !CONFIG_HUGETLBFS */ 530 531 #define is_file_hugepages(file) false 532 static inline struct file * 533 hugetlb_file_setup(const char *name, size_t size, vma_flags_t acctflag, 534 int creat_flags, int page_size_log) 535 { 536 return ERR_PTR(-ENOSYS); 537 } 538 539 static inline struct hstate *hstate_inode(struct inode *i) 540 { 541 return NULL; 542 } 543 #endif /* !CONFIG_HUGETLBFS */ 544 545 unsigned long 546 hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 547 unsigned long len, unsigned long pgoff, 548 unsigned long flags); 549 550 /* 551 * huegtlb page specific state flags. These flags are located in page.private 552 * of the hugetlb head page. Functions created via the below macros should be 553 * used to manipulate these flags. 554 * 555 * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at 556 * allocation time. Cleared when page is fully instantiated. Free 557 * routine checks flag to restore a reservation on error paths. 558 * Synchronization: Examined or modified by code that knows it has 559 * the only reference to page. i.e. After allocation but before use 560 * or when the page is being freed. 561 * HPG_migratable - Set after a newly allocated page is added to the page 562 * cache and/or page tables. Indicates the page is a candidate for 563 * migration. 564 * Synchronization: Initially set after new page allocation with no 565 * locking. When examined and modified during migration processing 566 * (isolate, migrate, putback) the hugetlb_lock is held. 567 * HPG_temporary - Set on a page that is temporarily allocated from the buddy 568 * allocator. Typically used for migration target pages when no pages 569 * are available in the pool. The hugetlb free page path will 570 * immediately free pages with this flag set to the buddy allocator. 571 * Synchronization: Can be set after huge page allocation from buddy when 572 * code knows it has only reference. All other examinations and 573 * modifications require hugetlb_lock. 574 * HPG_freed - Set when page is on the free lists. 575 * Synchronization: hugetlb_lock held for examination and modification. 576 * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed. 577 * HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page 578 * that is not tracked by raw_hwp_page list. 579 */ 580 enum hugetlb_page_flags { 581 HPG_restore_reserve = 0, 582 HPG_migratable, 583 HPG_temporary, 584 HPG_freed, 585 HPG_vmemmap_optimized, 586 HPG_raw_hwp_unreliable, 587 HPG_cma, 588 __NR_HPAGEFLAGS, 589 }; 590 591 /* 592 * Macros to create test, set and clear function definitions for 593 * hugetlb specific page flags. 594 */ 595 #ifdef CONFIG_HUGETLB_PAGE 596 #define TESTHPAGEFLAG(uname, flname) \ 597 static __always_inline \ 598 bool folio_test_hugetlb_##flname(struct folio *folio) \ 599 { void *private = &folio->private; \ 600 return test_bit(HPG_##flname, private); \ 601 } 602 603 #define SETHPAGEFLAG(uname, flname) \ 604 static __always_inline \ 605 void folio_set_hugetlb_##flname(struct folio *folio) \ 606 { void *private = &folio->private; \ 607 set_bit(HPG_##flname, private); \ 608 } 609 610 #define CLEARHPAGEFLAG(uname, flname) \ 611 static __always_inline \ 612 void folio_clear_hugetlb_##flname(struct folio *folio) \ 613 { void *private = &folio->private; \ 614 clear_bit(HPG_##flname, private); \ 615 } 616 #else 617 #define TESTHPAGEFLAG(uname, flname) \ 618 static inline bool \ 619 folio_test_hugetlb_##flname(struct folio *folio) \ 620 { return 0; } 621 622 #define SETHPAGEFLAG(uname, flname) \ 623 static inline void \ 624 folio_set_hugetlb_##flname(struct folio *folio) \ 625 { } 626 627 #define CLEARHPAGEFLAG(uname, flname) \ 628 static inline void \ 629 folio_clear_hugetlb_##flname(struct folio *folio) \ 630 { } 631 #endif 632 633 #define HPAGEFLAG(uname, flname) \ 634 TESTHPAGEFLAG(uname, flname) \ 635 SETHPAGEFLAG(uname, flname) \ 636 CLEARHPAGEFLAG(uname, flname) \ 637 638 /* 639 * Create functions associated with hugetlb page flags 640 */ 641 HPAGEFLAG(RestoreReserve, restore_reserve) 642 HPAGEFLAG(Migratable, migratable) 643 HPAGEFLAG(Temporary, temporary) 644 HPAGEFLAG(Freed, freed) 645 HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) 646 HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) 647 HPAGEFLAG(Cma, cma) 648 649 #ifdef CONFIG_HUGETLB_PAGE 650 651 #define HSTATE_NAME_LEN 32 652 /* Defines one hugetlb page size */ 653 struct hstate { 654 struct mutex resize_lock; 655 struct lock_class_key resize_key; 656 int next_nid_to_alloc; 657 int next_nid_to_free; 658 unsigned int order; 659 unsigned int demote_order; 660 unsigned long mask; 661 unsigned long max_huge_pages; 662 unsigned long nr_huge_pages; 663 unsigned long free_huge_pages; 664 unsigned long resv_huge_pages; 665 unsigned long surplus_huge_pages; 666 unsigned long nr_overcommit_huge_pages; 667 struct list_head hugepage_activelist; 668 struct list_head hugepage_freelists[MAX_NUMNODES]; 669 unsigned int max_huge_pages_node[MAX_NUMNODES]; 670 unsigned int nr_huge_pages_node[MAX_NUMNODES]; 671 unsigned int free_huge_pages_node[MAX_NUMNODES]; 672 unsigned int surplus_huge_pages_node[MAX_NUMNODES]; 673 char name[HSTATE_NAME_LEN]; 674 }; 675 676 struct cma; 677 678 struct huge_bootmem_page { 679 struct list_head list; 680 struct hstate *hstate; 681 unsigned long flags; 682 struct cma *cma; 683 }; 684 685 #define HUGE_BOOTMEM_HVO 0x0001 686 #define HUGE_BOOTMEM_ZONES_VALID 0x0002 687 #define HUGE_BOOTMEM_CMA 0x0004 688 689 bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m); 690 691 int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list); 692 int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); 693 void wait_for_freed_hugetlb_folios(void); 694 struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 695 unsigned long addr, bool cow_from_owner); 696 struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 697 nodemask_t *nmask, gfp_t gfp_mask, 698 bool allow_alloc_fallback); 699 struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, 700 nodemask_t *nmask, gfp_t gfp_mask); 701 702 int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, 703 pgoff_t idx); 704 void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, 705 unsigned long address, struct folio *folio); 706 707 /* arch callback */ 708 int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); 709 int __init alloc_bootmem_huge_page(struct hstate *h, int nid); 710 bool __init hugetlb_node_alloc_supported(void); 711 712 void __init hugetlb_add_hstate(unsigned order); 713 bool __init arch_hugetlb_valid_size(unsigned long size); 714 struct hstate *size_to_hstate(unsigned long size); 715 716 #ifndef HUGE_MAX_HSTATE 717 #define HUGE_MAX_HSTATE 1 718 #endif 719 720 extern struct hstate hstates[HUGE_MAX_HSTATE]; 721 extern unsigned int default_hstate_idx; 722 723 #define default_hstate (hstates[default_hstate_idx]) 724 725 static inline struct hugepage_subpool *subpool_inode(struct inode *inode) 726 { 727 return HUGETLBFS_SB(inode->i_sb)->spool; 728 } 729 730 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 731 { 732 return folio->_hugetlb_subpool; 733 } 734 735 static inline void hugetlb_set_folio_subpool(struct folio *folio, 736 struct hugepage_subpool *subpool) 737 { 738 folio->_hugetlb_subpool = subpool; 739 } 740 741 static inline struct hstate *hstate_file(struct file *f) 742 { 743 return hstate_inode(file_inode(f)); 744 } 745 746 static inline struct hstate *hstate_sizelog(int page_size_log) 747 { 748 if (!page_size_log) 749 return &default_hstate; 750 751 if (page_size_log < BITS_PER_LONG) 752 return size_to_hstate(1UL << page_size_log); 753 754 return NULL; 755 } 756 757 static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 758 { 759 return hstate_file(vma->vm_file); 760 } 761 762 static inline unsigned long huge_page_size(const struct hstate *h) 763 { 764 return (unsigned long)PAGE_SIZE << h->order; 765 } 766 767 static inline unsigned long huge_page_mask(struct hstate *h) 768 { 769 return h->mask; 770 } 771 772 static inline unsigned int huge_page_order(struct hstate *h) 773 { 774 return h->order; 775 } 776 777 static inline unsigned huge_page_shift(struct hstate *h) 778 { 779 return h->order + PAGE_SHIFT; 780 } 781 782 /** 783 * hugetlb_linear_page_index() - linear_page_index() but in hugetlb 784 * page size granularity. 785 * @vma: the hugetlb VMA 786 * @address: the virtual address within the VMA 787 * 788 * Return: the page offset within the mapping in huge page units. 789 */ 790 static inline pgoff_t hugetlb_linear_page_index(struct vm_area_struct *vma, 791 unsigned long address) 792 { 793 struct hstate *h = hstate_vma(vma); 794 795 return ((address - vma->vm_start) >> huge_page_shift(h)) + 796 (vma->vm_pgoff >> huge_page_order(h)); 797 } 798 799 static inline bool order_is_gigantic(unsigned int order) 800 { 801 return order > MAX_PAGE_ORDER; 802 } 803 804 static inline bool hstate_is_gigantic(struct hstate *h) 805 { 806 return order_is_gigantic(huge_page_order(h)); 807 } 808 809 static inline unsigned int pages_per_huge_page(const struct hstate *h) 810 { 811 return 1 << h->order; 812 } 813 814 static inline unsigned int blocks_per_huge_page(struct hstate *h) 815 { 816 return huge_page_size(h) / 512; 817 } 818 819 static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, 820 struct address_space *mapping, pgoff_t idx) 821 { 822 return filemap_lock_folio(mapping, idx << huge_page_order(h)); 823 } 824 825 #include <asm/hugetlb.h> 826 827 #ifndef is_hugepage_only_range 828 static inline int is_hugepage_only_range(struct mm_struct *mm, 829 unsigned long addr, unsigned long len) 830 { 831 return 0; 832 } 833 #define is_hugepage_only_range is_hugepage_only_range 834 #endif 835 836 #ifndef arch_clear_hugetlb_flags 837 static inline void arch_clear_hugetlb_flags(struct folio *folio) { } 838 #define arch_clear_hugetlb_flags arch_clear_hugetlb_flags 839 #endif 840 841 #ifndef arch_make_huge_pte 842 static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, 843 vm_flags_t flags) 844 { 845 return pte_mkhuge(entry); 846 } 847 #endif 848 849 #ifndef arch_has_huge_bootmem_alloc 850 /* 851 * Some architectures do their own bootmem allocation, so they can't use 852 * early CMA allocation. 853 */ 854 static inline bool arch_has_huge_bootmem_alloc(void) 855 { 856 return false; 857 } 858 #endif 859 860 static inline struct hstate *folio_hstate(struct folio *folio) 861 { 862 VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); 863 return size_to_hstate(folio_size(folio)); 864 } 865 866 static inline unsigned hstate_index_to_shift(unsigned index) 867 { 868 return hstates[index].order + PAGE_SHIFT; 869 } 870 871 static inline int hstate_index(struct hstate *h) 872 { 873 return h - hstates; 874 } 875 876 int dissolve_free_hugetlb_folio(struct folio *folio); 877 int dissolve_free_hugetlb_folios(unsigned long start_pfn, 878 unsigned long end_pfn); 879 880 #ifdef CONFIG_MEMORY_FAILURE 881 extern void folio_clear_hugetlb_hwpoison(struct folio *folio); 882 #else 883 static inline void folio_clear_hugetlb_hwpoison(struct folio *folio) 884 { 885 } 886 #endif 887 888 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 889 #ifndef arch_hugetlb_migration_supported 890 static inline bool arch_hugetlb_migration_supported(struct hstate *h) 891 { 892 if ((huge_page_shift(h) == PMD_SHIFT) || 893 (huge_page_shift(h) == PUD_SHIFT) || 894 (huge_page_shift(h) == PGDIR_SHIFT)) 895 return true; 896 else 897 return false; 898 } 899 #endif 900 #else 901 static inline bool arch_hugetlb_migration_supported(struct hstate *h) 902 { 903 return false; 904 } 905 #endif 906 907 static inline bool hugepage_migration_supported(struct hstate *h) 908 { 909 return arch_hugetlb_migration_supported(h); 910 } 911 912 /* 913 * Movability check is different as compared to migration check. 914 * It determines whether or not a huge page should be placed on 915 * movable zone or not. Movability of any huge page should be 916 * required only if huge page size is supported for migration. 917 * There won't be any reason for the huge page to be movable if 918 * it is not migratable to start with. Also the size of the huge 919 * page should be large enough to be placed under a movable zone 920 * and still feasible enough to be migratable. Just the presence 921 * in movable zone does not make the migration feasible. 922 * 923 * So even though large huge page sizes like the gigantic ones 924 * are migratable they should not be movable because its not 925 * feasible to migrate them from movable zone. 926 */ 927 static inline bool hugepage_movable_supported(struct hstate *h) 928 { 929 if (!hugepage_migration_supported(h)) 930 return false; 931 932 if (hstate_is_gigantic(h) && !movable_gigantic_pages) 933 return false; 934 return true; 935 } 936 937 /* Movability of hugepages depends on migration support. */ 938 static inline gfp_t htlb_alloc_mask(struct hstate *h) 939 { 940 gfp_t gfp = __GFP_COMP | __GFP_NOWARN; 941 942 gfp |= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; 943 944 return gfp; 945 } 946 947 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 948 { 949 gfp_t modified_mask = htlb_alloc_mask(h); 950 951 /* Some callers might want to enforce node */ 952 modified_mask |= (gfp_mask & __GFP_THISNODE); 953 954 modified_mask |= (gfp_mask & __GFP_NOWARN); 955 956 return modified_mask; 957 } 958 959 static inline bool htlb_allow_alloc_fallback(int reason) 960 { 961 bool allowed_fallback = false; 962 963 /* 964 * Note: the memory offline, memory failure and migration syscalls will 965 * be allowed to fallback to other nodes due to lack of a better chioce, 966 * that might break the per-node hugetlb pool. While other cases will 967 * set the __GFP_THISNODE to avoid breaking the per-node hugetlb pool. 968 */ 969 switch (reason) { 970 case MR_MEMORY_HOTPLUG: 971 case MR_MEMORY_FAILURE: 972 case MR_SYSCALL: 973 case MR_MEMPOLICY_MBIND: 974 allowed_fallback = true; 975 break; 976 default: 977 break; 978 } 979 980 return allowed_fallback; 981 } 982 983 static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 984 struct mm_struct *mm, pte_t *pte) 985 { 986 const unsigned long size = huge_page_size(h); 987 988 VM_WARN_ON(size == PAGE_SIZE); 989 990 /* 991 * hugetlb must use the exact same PT locks as core-mm page table 992 * walkers would. When modifying a PTE table, hugetlb must take the 993 * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD 994 * PT lock etc. 995 * 996 * The expectation is that any hugetlb folio smaller than a PMD is 997 * always mapped into a single PTE table and that any hugetlb folio 998 * smaller than a PUD (but at least as big as a PMD) is always mapped 999 * into a single PMD table. 1000 * 1001 * If that does not hold for an architecture, then that architecture 1002 * must disable split PT locks such that all *_lockptr() functions 1003 * will give us the same result: the per-MM PT lock. 1004 * 1005 * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where 1006 * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr() 1007 * and core-mm would use pmd_lockptr(). However, in such configurations 1008 * split PMD locks are disabled -- they don't make sense on a single 1009 * PGDIR page table -- and the end result is the same. 1010 */ 1011 if (size >= PUD_SIZE) 1012 return pud_lockptr(mm, (pud_t *) pte); 1013 else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE)) 1014 return pmd_lockptr(mm, (pmd_t *) pte); 1015 /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */ 1016 return ptep_lockptr(mm, pte); 1017 } 1018 1019 #ifndef hugepages_supported 1020 /* 1021 * Some platform decide whether they support huge pages at boot 1022 * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0 1023 * when there is no such support 1024 */ 1025 #define hugepages_supported() (HPAGE_SHIFT != 0) 1026 #endif 1027 1028 void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm); 1029 1030 static inline void hugetlb_count_init(struct mm_struct *mm) 1031 { 1032 atomic_long_set(&mm->hugetlb_usage, 0); 1033 } 1034 1035 static inline void hugetlb_count_add(long l, struct mm_struct *mm) 1036 { 1037 atomic_long_add(l, &mm->hugetlb_usage); 1038 } 1039 1040 static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 1041 { 1042 atomic_long_sub(l, &mm->hugetlb_usage); 1043 } 1044 1045 #ifndef huge_ptep_modify_prot_start 1046 #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start 1047 static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, 1048 unsigned long addr, pte_t *ptep) 1049 { 1050 unsigned long psize = huge_page_size(hstate_vma(vma)); 1051 1052 return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize); 1053 } 1054 #endif 1055 1056 #ifndef huge_ptep_modify_prot_commit 1057 #define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit 1058 static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, 1059 unsigned long addr, pte_t *ptep, 1060 pte_t old_pte, pte_t pte) 1061 { 1062 unsigned long psize = huge_page_size(hstate_vma(vma)); 1063 1064 set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); 1065 } 1066 #endif 1067 1068 #ifdef CONFIG_NUMA 1069 void hugetlb_register_node(struct node *node); 1070 void hugetlb_unregister_node(struct node *node); 1071 #endif 1072 1073 /* 1074 * Check if a given raw @page in a hugepage is HWPOISON. 1075 */ 1076 bool is_raw_hwpoison_page_in_hugepage(struct page *page); 1077 1078 static inline unsigned long huge_page_mask_align(struct file *file) 1079 { 1080 return PAGE_MASK & ~huge_page_mask(hstate_file(file)); 1081 } 1082 1083 #else /* CONFIG_HUGETLB_PAGE */ 1084 struct hstate {}; 1085 1086 static inline unsigned long huge_page_mask_align(struct file *file) 1087 { 1088 return 0; 1089 } 1090 1091 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 1092 { 1093 return NULL; 1094 } 1095 1096 static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, 1097 struct address_space *mapping, pgoff_t idx) 1098 { 1099 return NULL; 1100 } 1101 1102 static inline int isolate_or_dissolve_huge_folio(struct folio *folio, 1103 struct list_head *list) 1104 { 1105 return -ENOMEM; 1106 } 1107 1108 static inline int replace_free_hugepage_folios(unsigned long start_pfn, 1109 unsigned long end_pfn) 1110 { 1111 return 0; 1112 } 1113 1114 static inline void wait_for_freed_hugetlb_folios(void) 1115 { 1116 } 1117 1118 static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 1119 unsigned long addr, 1120 bool cow_from_owner) 1121 { 1122 return NULL; 1123 } 1124 1125 static inline struct folio * 1126 alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, 1127 nodemask_t *nmask, gfp_t gfp_mask) 1128 { 1129 return NULL; 1130 } 1131 1132 static inline struct folio * 1133 alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 1134 nodemask_t *nmask, gfp_t gfp_mask, 1135 bool allow_alloc_fallback) 1136 { 1137 return NULL; 1138 } 1139 1140 static inline int __alloc_bootmem_huge_page(struct hstate *h) 1141 { 1142 return 0; 1143 } 1144 1145 static inline struct hstate *hstate_file(struct file *f) 1146 { 1147 return NULL; 1148 } 1149 1150 static inline struct hstate *hstate_sizelog(int page_size_log) 1151 { 1152 return NULL; 1153 } 1154 1155 static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 1156 { 1157 return NULL; 1158 } 1159 1160 static inline struct hstate *folio_hstate(struct folio *folio) 1161 { 1162 return NULL; 1163 } 1164 1165 static inline struct hstate *size_to_hstate(unsigned long size) 1166 { 1167 return NULL; 1168 } 1169 1170 static inline unsigned long huge_page_size(struct hstate *h) 1171 { 1172 return PAGE_SIZE; 1173 } 1174 1175 static inline unsigned long huge_page_mask(struct hstate *h) 1176 { 1177 return PAGE_MASK; 1178 } 1179 1180 static inline unsigned int huge_page_order(struct hstate *h) 1181 { 1182 return 0; 1183 } 1184 1185 static inline unsigned int huge_page_shift(struct hstate *h) 1186 { 1187 return PAGE_SHIFT; 1188 } 1189 1190 static inline bool hstate_is_gigantic(struct hstate *h) 1191 { 1192 return false; 1193 } 1194 1195 static inline unsigned int pages_per_huge_page(struct hstate *h) 1196 { 1197 return 1; 1198 } 1199 1200 static inline unsigned hstate_index_to_shift(unsigned index) 1201 { 1202 return 0; 1203 } 1204 1205 static inline int hstate_index(struct hstate *h) 1206 { 1207 return 0; 1208 } 1209 1210 static inline int dissolve_free_hugetlb_folio(struct folio *folio) 1211 { 1212 return 0; 1213 } 1214 1215 static inline int dissolve_free_hugetlb_folios(unsigned long start_pfn, 1216 unsigned long end_pfn) 1217 { 1218 return 0; 1219 } 1220 1221 static inline bool hugepage_migration_supported(struct hstate *h) 1222 { 1223 return false; 1224 } 1225 1226 static inline bool hugepage_movable_supported(struct hstate *h) 1227 { 1228 return false; 1229 } 1230 1231 static inline gfp_t htlb_alloc_mask(struct hstate *h) 1232 { 1233 return 0; 1234 } 1235 1236 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 1237 { 1238 return 0; 1239 } 1240 1241 static inline bool htlb_allow_alloc_fallback(int reason) 1242 { 1243 return false; 1244 } 1245 1246 static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 1247 struct mm_struct *mm, pte_t *pte) 1248 { 1249 return &mm->page_table_lock; 1250 } 1251 1252 static inline void hugetlb_count_init(struct mm_struct *mm) 1253 { 1254 } 1255 1256 static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m) 1257 { 1258 } 1259 1260 static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 1261 { 1262 } 1263 1264 static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 1265 unsigned long addr, pte_t *ptep) 1266 { 1267 #ifdef CONFIG_MMU 1268 return ptep_get(ptep); 1269 #else 1270 return *ptep; 1271 #endif 1272 } 1273 1274 static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 1275 pte_t *ptep, pte_t pte, unsigned long sz) 1276 { 1277 } 1278 1279 static inline void hugetlb_register_node(struct node *node) 1280 { 1281 } 1282 1283 static inline void hugetlb_unregister_node(struct node *node) 1284 { 1285 } 1286 1287 static inline bool hugetlbfs_pagecache_present( 1288 struct hstate *h, struct vm_area_struct *vma, unsigned long address) 1289 { 1290 return false; 1291 } 1292 1293 static inline void hugetlb_bootmem_alloc(void) 1294 { 1295 } 1296 #endif /* CONFIG_HUGETLB_PAGE */ 1297 1298 static inline spinlock_t *huge_pte_lock(struct hstate *h, 1299 struct mm_struct *mm, pte_t *pte) 1300 { 1301 spinlock_t *ptl; 1302 1303 ptl = huge_pte_lockptr(h, mm, pte); 1304 spin_lock(ptl); 1305 return ptl; 1306 } 1307 1308 #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) 1309 extern void __init hugetlb_cma_reserve(void); 1310 #else 1311 static inline __init void hugetlb_cma_reserve(void) 1312 { 1313 } 1314 #endif 1315 1316 #ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING 1317 static inline bool hugetlb_pmd_shared(pte_t *pte) 1318 { 1319 return ptdesc_pmd_is_shared(virt_to_ptdesc(pte)); 1320 } 1321 #else 1322 static inline bool hugetlb_pmd_shared(pte_t *pte) 1323 { 1324 return false; 1325 } 1326 #endif 1327 1328 bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); 1329 1330 #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE 1331 /* 1332 * ARCHes with special requirements for evicting HUGETLB backing TLB entries can 1333 * implement this. 1334 */ 1335 #define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 1336 #endif 1337 1338 static inline bool __vma_shareable_lock(struct vm_area_struct *vma) 1339 { 1340 return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; 1341 } 1342 1343 bool __vma_private_lock(struct vm_area_struct *vma); 1344 1345 /* 1346 * Safe version of huge_pte_offset() to check the locks. See comments 1347 * above huge_pte_offset(). 1348 */ 1349 static inline pte_t * 1350 hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) 1351 { 1352 #if defined(CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING) && defined(CONFIG_LOCKDEP) 1353 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; 1354 1355 /* 1356 * If pmd sharing possible, locking needed to safely walk the 1357 * hugetlb pgtables. More information can be found at the comment 1358 * above huge_pte_offset() in the same file. 1359 * 1360 * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP. 1361 */ 1362 if (__vma_shareable_lock(vma)) 1363 WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) && 1364 !lockdep_is_held( 1365 &vma->vm_file->f_mapping->i_mmap_rwsem)); 1366 #endif 1367 return huge_pte_offset(vma->vm_mm, addr, sz); 1368 } 1369 1370 #endif /* _LINUX_HUGETLB_H */ 1371