1 /* 2 * PPC Huge TLB Page Support for Kernel. 3 * 4 * Copyright (C) 2003 David Gibson, IBM Corporation. 5 * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor 6 * 7 * Based on the IA-32 version: 8 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> 9 */ 10 11 #include <linux/mm.h> 12 #include <linux/io.h> 13 #include <linux/slab.h> 14 #include <linux/hugetlb.h> 15 #include <linux/export.h> 16 #include <linux/of_fdt.h> 17 #include <linux/memblock.h> 18 #include <linux/bootmem.h> 19 #include <linux/moduleparam.h> 20 #include <asm/pgtable.h> 21 #include <asm/pgalloc.h> 22 #include <asm/tlb.h> 23 #include <asm/setup.h> 24 25 #define PAGE_SHIFT_64K 16 26 #define PAGE_SHIFT_16M 24 27 #define PAGE_SHIFT_16G 34 28 29 unsigned int HPAGE_SHIFT; 30 31 /* 32 * Tracks gpages after the device tree is scanned and before the 33 * huge_boot_pages list is ready. On non-Freescale implementations, this is 34 * just used to track 16G pages and so is a single array. FSL-based 35 * implementations may have more than one gpage size, so we need multiple 36 * arrays 37 */ 38 #ifdef CONFIG_PPC_FSL_BOOK3E 39 #define MAX_NUMBER_GPAGES 128 40 struct psize_gpages { 41 u64 gpage_list[MAX_NUMBER_GPAGES]; 42 unsigned int nr_gpages; 43 }; 44 static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT]; 45 #else 46 #define MAX_NUMBER_GPAGES 1024 47 static u64 gpage_freearray[MAX_NUMBER_GPAGES]; 48 static unsigned nr_gpages; 49 #endif 50 51 static inline int shift_to_mmu_psize(unsigned int shift) 52 { 53 int psize; 54 55 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) 56 if (mmu_psize_defs[psize].shift == shift) 57 return psize; 58 return -1; 59 } 60 61 static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) 62 { 63 if (mmu_psize_defs[mmu_psize].shift) 64 return mmu_psize_defs[mmu_psize].shift; 65 BUG(); 66 } 67 68 #define hugepd_none(hpd) ((hpd).pd == 0) 69 70 pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) 71 { 72 pgd_t *pg; 73 pud_t *pu; 74 pmd_t *pm; 75 hugepd_t *hpdp = NULL; 76 unsigned pdshift = PGDIR_SHIFT; 77 78 if (shift) 79 *shift = 0; 80 81 pg = pgdir + pgd_index(ea); 82 if (is_hugepd(pg)) { 83 hpdp = (hugepd_t *)pg; 84 } else if (!pgd_none(*pg)) { 85 pdshift = PUD_SHIFT; 86 pu = pud_offset(pg, ea); 87 if (is_hugepd(pu)) 88 hpdp = (hugepd_t *)pu; 89 else if (!pud_none(*pu)) { 90 pdshift = PMD_SHIFT; 91 pm = pmd_offset(pu, ea); 92 if (is_hugepd(pm)) 93 hpdp = (hugepd_t *)pm; 94 else if (!pmd_none(*pm)) { 95 return pte_offset_kernel(pm, ea); 96 } 97 } 98 } 99 100 if (!hpdp) 101 return NULL; 102 103 if (shift) 104 *shift = hugepd_shift(*hpdp); 105 return hugepte_offset(hpdp, ea, pdshift); 106 } 107 EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); 108 109 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 110 { 111 return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); 112 } 113 114 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 115 unsigned long address, unsigned pdshift, unsigned pshift) 116 { 117 struct kmem_cache *cachep; 118 pte_t *new; 119 120 #ifdef CONFIG_PPC_FSL_BOOK3E 121 int i; 122 int num_hugepd = 1 << (pshift - pdshift); 123 cachep = hugepte_cache; 124 #else 125 cachep = PGT_CACHE(pdshift - pshift); 126 #endif 127 128 new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); 129 130 BUG_ON(pshift > HUGEPD_SHIFT_MASK); 131 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); 132 133 if (! new) 134 return -ENOMEM; 135 136 spin_lock(&mm->page_table_lock); 137 #ifdef CONFIG_PPC_FSL_BOOK3E 138 /* 139 * We have multiple higher-level entries that point to the same 140 * actual pte location. Fill in each as we go and backtrack on error. 141 * We need all of these so the DTLB pgtable walk code can find the 142 * right higher-level entry without knowing if it's a hugepage or not. 143 */ 144 for (i = 0; i < num_hugepd; i++, hpdp++) { 145 if (unlikely(!hugepd_none(*hpdp))) 146 break; 147 else 148 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; 149 } 150 /* If we bailed from the for loop early, an error occurred, clean up */ 151 if (i < num_hugepd) { 152 for (i = i - 1 ; i >= 0; i--, hpdp--) 153 hpdp->pd = 0; 154 kmem_cache_free(cachep, new); 155 } 156 #else 157 if (!hugepd_none(*hpdp)) 158 kmem_cache_free(cachep, new); 159 else 160 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; 161 #endif 162 spin_unlock(&mm->page_table_lock); 163 return 0; 164 } 165 166 /* 167 * These macros define how to determine which level of the page table holds 168 * the hpdp. 169 */ 170 #ifdef CONFIG_PPC_FSL_BOOK3E 171 #define HUGEPD_PGD_SHIFT PGDIR_SHIFT 172 #define HUGEPD_PUD_SHIFT PUD_SHIFT 173 #else 174 #define HUGEPD_PGD_SHIFT PUD_SHIFT 175 #define HUGEPD_PUD_SHIFT PMD_SHIFT 176 #endif 177 178 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) 179 { 180 pgd_t *pg; 181 pud_t *pu; 182 pmd_t *pm; 183 hugepd_t *hpdp = NULL; 184 unsigned pshift = __ffs(sz); 185 unsigned pdshift = PGDIR_SHIFT; 186 187 addr &= ~(sz-1); 188 189 pg = pgd_offset(mm, addr); 190 191 if (pshift >= HUGEPD_PGD_SHIFT) { 192 hpdp = (hugepd_t *)pg; 193 } else { 194 pdshift = PUD_SHIFT; 195 pu = pud_alloc(mm, pg, addr); 196 if (pshift >= HUGEPD_PUD_SHIFT) { 197 hpdp = (hugepd_t *)pu; 198 } else { 199 pdshift = PMD_SHIFT; 200 pm = pmd_alloc(mm, pu, addr); 201 hpdp = (hugepd_t *)pm; 202 } 203 } 204 205 if (!hpdp) 206 return NULL; 207 208 BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); 209 210 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) 211 return NULL; 212 213 return hugepte_offset(hpdp, addr, pdshift); 214 } 215 216 #ifdef CONFIG_PPC_FSL_BOOK3E 217 /* Build list of addresses of gigantic pages. This function is used in early 218 * boot before the buddy or bootmem allocator is setup. 219 */ 220 void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) 221 { 222 unsigned int idx = shift_to_mmu_psize(__ffs(page_size)); 223 int i; 224 225 if (addr == 0) 226 return; 227 228 gpage_freearray[idx].nr_gpages = number_of_pages; 229 230 for (i = 0; i < number_of_pages; i++) { 231 gpage_freearray[idx].gpage_list[i] = addr; 232 addr += page_size; 233 } 234 } 235 236 /* 237 * Moves the gigantic page addresses from the temporary list to the 238 * huge_boot_pages list. 239 */ 240 int alloc_bootmem_huge_page(struct hstate *hstate) 241 { 242 struct huge_bootmem_page *m; 243 int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT); 244 int nr_gpages = gpage_freearray[idx].nr_gpages; 245 246 if (nr_gpages == 0) 247 return 0; 248 249 #ifdef CONFIG_HIGHMEM 250 /* 251 * If gpages can be in highmem we can't use the trick of storing the 252 * data structure in the page; allocate space for this 253 */ 254 m = alloc_bootmem(sizeof(struct huge_bootmem_page)); 255 m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; 256 #else 257 m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); 258 #endif 259 260 list_add(&m->list, &huge_boot_pages); 261 gpage_freearray[idx].nr_gpages = nr_gpages; 262 gpage_freearray[idx].gpage_list[nr_gpages] = 0; 263 m->hstate = hstate; 264 265 return 1; 266 } 267 /* 268 * Scan the command line hugepagesz= options for gigantic pages; store those in 269 * a list that we use to allocate the memory once all options are parsed. 270 */ 271 272 unsigned long gpage_npages[MMU_PAGE_COUNT]; 273 274 static int __init do_gpage_early_setup(char *param, char *val, 275 const char *unused) 276 { 277 static phys_addr_t size; 278 unsigned long npages; 279 280 /* 281 * The hugepagesz and hugepages cmdline options are interleaved. We 282 * use the size variable to keep track of whether or not this was done 283 * properly and skip over instances where it is incorrect. Other 284 * command-line parsing code will issue warnings, so we don't need to. 285 * 286 */ 287 if ((strcmp(param, "default_hugepagesz") == 0) || 288 (strcmp(param, "hugepagesz") == 0)) { 289 size = memparse(val, NULL); 290 } else if (strcmp(param, "hugepages") == 0) { 291 if (size != 0) { 292 if (sscanf(val, "%lu", &npages) <= 0) 293 npages = 0; 294 gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; 295 size = 0; 296 } 297 } 298 return 0; 299 } 300 301 302 /* 303 * This function allocates physical space for pages that are larger than the 304 * buddy allocator can handle. We want to allocate these in highmem because 305 * the amount of lowmem is limited. This means that this function MUST be 306 * called before lowmem_end_addr is set up in MMU_init() in order for the lmb 307 * allocate to grab highmem. 308 */ 309 void __init reserve_hugetlb_gpages(void) 310 { 311 static __initdata char cmdline[COMMAND_LINE_SIZE]; 312 phys_addr_t size, base; 313 int i; 314 315 strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE); 316 parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0, 317 &do_gpage_early_setup); 318 319 /* 320 * Walk gpage list in reverse, allocating larger page sizes first. 321 * Skip over unsupported sizes, or sizes that have 0 gpages allocated. 322 * When we reach the point in the list where pages are no longer 323 * considered gpages, we're done. 324 */ 325 for (i = MMU_PAGE_COUNT-1; i >= 0; i--) { 326 if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0) 327 continue; 328 else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT)) 329 break; 330 331 size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i)); 332 base = memblock_alloc_base(size * gpage_npages[i], size, 333 MEMBLOCK_ALLOC_ANYWHERE); 334 add_gpage(base, size, gpage_npages[i]); 335 } 336 } 337 338 #else /* !PPC_FSL_BOOK3E */ 339 340 /* Build list of addresses of gigantic pages. This function is used in early 341 * boot before the buddy or bootmem allocator is setup. 342 */ 343 void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) 344 { 345 if (!addr) 346 return; 347 while (number_of_pages > 0) { 348 gpage_freearray[nr_gpages] = addr; 349 nr_gpages++; 350 number_of_pages--; 351 addr += page_size; 352 } 353 } 354 355 /* Moves the gigantic page addresses from the temporary list to the 356 * huge_boot_pages list. 357 */ 358 int alloc_bootmem_huge_page(struct hstate *hstate) 359 { 360 struct huge_bootmem_page *m; 361 if (nr_gpages == 0) 362 return 0; 363 m = phys_to_virt(gpage_freearray[--nr_gpages]); 364 gpage_freearray[nr_gpages] = 0; 365 list_add(&m->list, &huge_boot_pages); 366 m->hstate = hstate; 367 return 1; 368 } 369 #endif 370 371 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 372 { 373 return 0; 374 } 375 376 #ifdef CONFIG_PPC_FSL_BOOK3E 377 #define HUGEPD_FREELIST_SIZE \ 378 ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) 379 380 struct hugepd_freelist { 381 struct rcu_head rcu; 382 unsigned int index; 383 void *ptes[0]; 384 }; 385 386 static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); 387 388 static void hugepd_free_rcu_callback(struct rcu_head *head) 389 { 390 struct hugepd_freelist *batch = 391 container_of(head, struct hugepd_freelist, rcu); 392 unsigned int i; 393 394 for (i = 0; i < batch->index; i++) 395 kmem_cache_free(hugepte_cache, batch->ptes[i]); 396 397 free_page((unsigned long)batch); 398 } 399 400 static void hugepd_free(struct mmu_gather *tlb, void *hugepte) 401 { 402 struct hugepd_freelist **batchp; 403 404 batchp = &__get_cpu_var(hugepd_freelist_cur); 405 406 if (atomic_read(&tlb->mm->mm_users) < 2 || 407 cpumask_equal(mm_cpumask(tlb->mm), 408 cpumask_of(smp_processor_id()))) { 409 kmem_cache_free(hugepte_cache, hugepte); 410 return; 411 } 412 413 if (*batchp == NULL) { 414 *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC); 415 (*batchp)->index = 0; 416 } 417 418 (*batchp)->ptes[(*batchp)->index++] = hugepte; 419 if ((*batchp)->index == HUGEPD_FREELIST_SIZE) { 420 call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback); 421 *batchp = NULL; 422 } 423 } 424 #endif 425 426 static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, 427 unsigned long start, unsigned long end, 428 unsigned long floor, unsigned long ceiling) 429 { 430 pte_t *hugepte = hugepd_page(*hpdp); 431 int i; 432 433 unsigned long pdmask = ~((1UL << pdshift) - 1); 434 unsigned int num_hugepd = 1; 435 436 #ifdef CONFIG_PPC_FSL_BOOK3E 437 /* Note: On fsl the hpdp may be the first of several */ 438 num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift)); 439 #else 440 unsigned int shift = hugepd_shift(*hpdp); 441 #endif 442 443 start &= pdmask; 444 if (start < floor) 445 return; 446 if (ceiling) { 447 ceiling &= pdmask; 448 if (! ceiling) 449 return; 450 } 451 if (end - 1 > ceiling - 1) 452 return; 453 454 for (i = 0; i < num_hugepd; i++, hpdp++) 455 hpdp->pd = 0; 456 457 tlb->need_flush = 1; 458 459 #ifdef CONFIG_PPC_FSL_BOOK3E 460 hugepd_free(tlb, hugepte); 461 #else 462 pgtable_free_tlb(tlb, hugepte, pdshift - shift); 463 #endif 464 } 465 466 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 467 unsigned long addr, unsigned long end, 468 unsigned long floor, unsigned long ceiling) 469 { 470 pmd_t *pmd; 471 unsigned long next; 472 unsigned long start; 473 474 start = addr; 475 do { 476 pmd = pmd_offset(pud, addr); 477 next = pmd_addr_end(addr, end); 478 if (pmd_none(*pmd)) 479 continue; 480 #ifdef CONFIG_PPC_FSL_BOOK3E 481 /* 482 * Increment next by the size of the huge mapping since 483 * there may be more than one entry at this level for a 484 * single hugepage, but all of them point to 485 * the same kmem cache that holds the hugepte. 486 */ 487 next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd)); 488 #endif 489 free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT, 490 addr, next, floor, ceiling); 491 } while (addr = next, addr != end); 492 493 start &= PUD_MASK; 494 if (start < floor) 495 return; 496 if (ceiling) { 497 ceiling &= PUD_MASK; 498 if (!ceiling) 499 return; 500 } 501 if (end - 1 > ceiling - 1) 502 return; 503 504 pmd = pmd_offset(pud, start); 505 pud_clear(pud); 506 pmd_free_tlb(tlb, pmd, start); 507 } 508 509 static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 510 unsigned long addr, unsigned long end, 511 unsigned long floor, unsigned long ceiling) 512 { 513 pud_t *pud; 514 unsigned long next; 515 unsigned long start; 516 517 start = addr; 518 do { 519 pud = pud_offset(pgd, addr); 520 next = pud_addr_end(addr, end); 521 if (!is_hugepd(pud)) { 522 if (pud_none_or_clear_bad(pud)) 523 continue; 524 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, 525 ceiling); 526 } else { 527 #ifdef CONFIG_PPC_FSL_BOOK3E 528 /* 529 * Increment next by the size of the huge mapping since 530 * there may be more than one entry at this level for a 531 * single hugepage, but all of them point to 532 * the same kmem cache that holds the hugepte. 533 */ 534 next = addr + (1 << hugepd_shift(*(hugepd_t *)pud)); 535 #endif 536 free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT, 537 addr, next, floor, ceiling); 538 } 539 } while (addr = next, addr != end); 540 541 start &= PGDIR_MASK; 542 if (start < floor) 543 return; 544 if (ceiling) { 545 ceiling &= PGDIR_MASK; 546 if (!ceiling) 547 return; 548 } 549 if (end - 1 > ceiling - 1) 550 return; 551 552 pud = pud_offset(pgd, start); 553 pgd_clear(pgd); 554 pud_free_tlb(tlb, pud, start); 555 } 556 557 /* 558 * This function frees user-level page tables of a process. 559 * 560 * Must be called with pagetable lock held. 561 */ 562 void hugetlb_free_pgd_range(struct mmu_gather *tlb, 563 unsigned long addr, unsigned long end, 564 unsigned long floor, unsigned long ceiling) 565 { 566 pgd_t *pgd; 567 unsigned long next; 568 569 /* 570 * Because there are a number of different possible pagetable 571 * layouts for hugepage ranges, we limit knowledge of how 572 * things should be laid out to the allocation path 573 * (huge_pte_alloc(), above). Everything else works out the 574 * structure as it goes from information in the hugepd 575 * pointers. That means that we can't here use the 576 * optimization used in the normal page free_pgd_range(), of 577 * checking whether we're actually covering a large enough 578 * range to have to do anything at the top level of the walk 579 * instead of at the bottom. 580 * 581 * To make sense of this, you should probably go read the big 582 * block comment at the top of the normal free_pgd_range(), 583 * too. 584 */ 585 586 do { 587 next = pgd_addr_end(addr, end); 588 pgd = pgd_offset(tlb->mm, addr); 589 if (!is_hugepd(pgd)) { 590 if (pgd_none_or_clear_bad(pgd)) 591 continue; 592 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); 593 } else { 594 #ifdef CONFIG_PPC_FSL_BOOK3E 595 /* 596 * Increment next by the size of the huge mapping since 597 * there may be more than one entry at the pgd level 598 * for a single hugepage, but all of them point to the 599 * same kmem cache that holds the hugepte. 600 */ 601 next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); 602 #endif 603 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, 604 addr, next, floor, ceiling); 605 } 606 } while (addr = next, addr != end); 607 } 608 609 struct page * 610 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 611 { 612 pte_t *ptep; 613 struct page *page; 614 unsigned shift; 615 unsigned long mask; 616 617 ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); 618 619 /* Verify it is a huge page else bail. */ 620 if (!ptep || !shift) 621 return ERR_PTR(-EINVAL); 622 623 mask = (1UL << shift) - 1; 624 page = pte_page(*ptep); 625 if (page) 626 page += (address & mask) / PAGE_SIZE; 627 628 return page; 629 } 630 631 int pmd_huge(pmd_t pmd) 632 { 633 return 0; 634 } 635 636 int pud_huge(pud_t pud) 637 { 638 return 0; 639 } 640 641 struct page * 642 follow_huge_pmd(struct mm_struct *mm, unsigned long address, 643 pmd_t *pmd, int write) 644 { 645 BUG(); 646 return NULL; 647 } 648 649 static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, 650 unsigned long end, int write, struct page **pages, int *nr) 651 { 652 unsigned long mask; 653 unsigned long pte_end; 654 struct page *head, *page, *tail; 655 pte_t pte; 656 int refs; 657 658 pte_end = (addr + sz) & ~(sz-1); 659 if (pte_end < end) 660 end = pte_end; 661 662 pte = *ptep; 663 mask = _PAGE_PRESENT | _PAGE_USER; 664 if (write) 665 mask |= _PAGE_RW; 666 667 if ((pte_val(pte) & mask) != mask) 668 return 0; 669 670 /* hugepages are never "special" */ 671 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 672 673 refs = 0; 674 head = pte_page(pte); 675 676 page = head + ((addr & (sz-1)) >> PAGE_SHIFT); 677 tail = page; 678 do { 679 VM_BUG_ON(compound_head(page) != head); 680 pages[*nr] = page; 681 (*nr)++; 682 page++; 683 refs++; 684 } while (addr += PAGE_SIZE, addr != end); 685 686 if (!page_cache_add_speculative(head, refs)) { 687 *nr -= refs; 688 return 0; 689 } 690 691 if (unlikely(pte_val(pte) != pte_val(*ptep))) { 692 /* Could be optimized better */ 693 *nr -= refs; 694 while (refs--) 695 put_page(head); 696 return 0; 697 } 698 699 /* 700 * Any tail page need their mapcount reference taken before we 701 * return. 702 */ 703 while (refs--) { 704 if (PageTail(tail)) 705 get_huge_page_tail(tail); 706 tail++; 707 } 708 709 return 1; 710 } 711 712 static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, 713 unsigned long sz) 714 { 715 unsigned long __boundary = (addr + sz) & ~(sz-1); 716 return (__boundary - 1 < end - 1) ? __boundary : end; 717 } 718 719 int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, 720 unsigned long addr, unsigned long end, 721 int write, struct page **pages, int *nr) 722 { 723 pte_t *ptep; 724 unsigned long sz = 1UL << hugepd_shift(*hugepd); 725 unsigned long next; 726 727 ptep = hugepte_offset(hugepd, addr, pdshift); 728 do { 729 next = hugepte_addr_end(addr, end, sz); 730 if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) 731 return 0; 732 } while (ptep++, addr = next, addr != end); 733 734 return 1; 735 } 736 737 #ifdef CONFIG_PPC_MM_SLICES 738 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 739 unsigned long len, unsigned long pgoff, 740 unsigned long flags) 741 { 742 struct hstate *hstate = hstate_file(file); 743 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); 744 745 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); 746 } 747 #endif 748 749 unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 750 { 751 #ifdef CONFIG_PPC_MM_SLICES 752 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); 753 754 return 1UL << mmu_psize_to_shift(psize); 755 #else 756 if (!is_vm_hugetlb_page(vma)) 757 return PAGE_SIZE; 758 759 return huge_page_size(hstate_vma(vma)); 760 #endif 761 } 762 763 static inline bool is_power_of_4(unsigned long x) 764 { 765 if (is_power_of_2(x)) 766 return (__ilog2(x) % 2) ? false : true; 767 return false; 768 } 769 770 static int __init add_huge_page_size(unsigned long long size) 771 { 772 int shift = __ffs(size); 773 int mmu_psize; 774 775 /* Check that it is a page size supported by the hardware and 776 * that it fits within pagetable and slice limits. */ 777 #ifdef CONFIG_PPC_FSL_BOOK3E 778 if ((size < PAGE_SIZE) || !is_power_of_4(size)) 779 return -EINVAL; 780 #else 781 if (!is_power_of_2(size) 782 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) 783 return -EINVAL; 784 #endif 785 786 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) 787 return -EINVAL; 788 789 #ifdef CONFIG_SPU_FS_64K_LS 790 /* Disable support for 64K huge pages when 64K SPU local store 791 * support is enabled as the current implementation conflicts. 792 */ 793 if (shift == PAGE_SHIFT_64K) 794 return -EINVAL; 795 #endif /* CONFIG_SPU_FS_64K_LS */ 796 797 BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); 798 799 /* Return if huge page size has already been setup */ 800 if (size_to_hstate(size)) 801 return 0; 802 803 hugetlb_add_hstate(shift - PAGE_SHIFT); 804 805 return 0; 806 } 807 808 static int __init hugepage_setup_sz(char *str) 809 { 810 unsigned long long size; 811 812 size = memparse(str, &str); 813 814 if (add_huge_page_size(size) != 0) 815 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); 816 817 return 1; 818 } 819 __setup("hugepagesz=", hugepage_setup_sz); 820 821 #ifdef CONFIG_PPC_FSL_BOOK3E 822 struct kmem_cache *hugepte_cache; 823 static int __init hugetlbpage_init(void) 824 { 825 int psize; 826 827 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 828 unsigned shift; 829 830 if (!mmu_psize_defs[psize].shift) 831 continue; 832 833 shift = mmu_psize_to_shift(psize); 834 835 /* Don't treat normal page sizes as huge... */ 836 if (shift != PAGE_SHIFT) 837 if (add_huge_page_size(1ULL << shift) < 0) 838 continue; 839 } 840 841 /* 842 * Create a kmem cache for hugeptes. The bottom bits in the pte have 843 * size information encoded in them, so align them to allow this 844 */ 845 hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t), 846 HUGEPD_SHIFT_MASK + 1, 0, NULL); 847 if (hugepte_cache == NULL) 848 panic("%s: Unable to create kmem cache for hugeptes\n", 849 __func__); 850 851 /* Default hpage size = 4M */ 852 if (mmu_psize_defs[MMU_PAGE_4M].shift) 853 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; 854 else 855 panic("%s: Unable to set default huge page size\n", __func__); 856 857 858 return 0; 859 } 860 #else 861 static int __init hugetlbpage_init(void) 862 { 863 int psize; 864 865 if (!mmu_has_feature(MMU_FTR_16M_PAGE)) 866 return -ENODEV; 867 868 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 869 unsigned shift; 870 unsigned pdshift; 871 872 if (!mmu_psize_defs[psize].shift) 873 continue; 874 875 shift = mmu_psize_to_shift(psize); 876 877 if (add_huge_page_size(1ULL << shift) < 0) 878 continue; 879 880 if (shift < PMD_SHIFT) 881 pdshift = PMD_SHIFT; 882 else if (shift < PUD_SHIFT) 883 pdshift = PUD_SHIFT; 884 else 885 pdshift = PGDIR_SHIFT; 886 887 pgtable_cache_add(pdshift - shift, NULL); 888 if (!PGT_CACHE(pdshift - shift)) 889 panic("hugetlbpage_init(): could not create " 890 "pgtable cache for %d bit pagesize\n", shift); 891 } 892 893 /* Set default large page size. Currently, we pick 16M or 1M 894 * depending on what is available 895 */ 896 if (mmu_psize_defs[MMU_PAGE_16M].shift) 897 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; 898 else if (mmu_psize_defs[MMU_PAGE_1M].shift) 899 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; 900 901 return 0; 902 } 903 #endif 904 module_init(hugetlbpage_init); 905 906 void flush_dcache_icache_hugepage(struct page *page) 907 { 908 int i; 909 void *start; 910 911 BUG_ON(!PageCompound(page)); 912 913 for (i = 0; i < (1UL << compound_order(page)); i++) { 914 if (!PageHighMem(page)) { 915 __flush_dcache_icache(page_address(page+i)); 916 } else { 917 start = kmap_atomic(page+i); 918 __flush_dcache_icache(start); 919 kunmap_atomic(start); 920 } 921 } 922 } 923