1 /* 2 * PPC Huge TLB Page Support for Kernel. 3 * 4 * Copyright (C) 2003 David Gibson, IBM Corporation. 5 * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor 6 * 7 * Based on the IA-32 version: 8 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> 9 */ 10 11 #include <linux/mm.h> 12 #include <linux/io.h> 13 #include <linux/slab.h> 14 #include <linux/hugetlb.h> 15 #include <linux/of_fdt.h> 16 #include <linux/memblock.h> 17 #include <linux/bootmem.h> 18 #include <asm/pgtable.h> 19 #include <asm/pgalloc.h> 20 #include <asm/tlb.h> 21 #include <asm/setup.h> 22 23 #define PAGE_SHIFT_64K 16 24 #define PAGE_SHIFT_16M 24 25 #define PAGE_SHIFT_16G 34 26 27 unsigned int HPAGE_SHIFT; 28 29 /* 30 * Tracks gpages after the device tree is scanned and before the 31 * huge_boot_pages list is ready. On 64-bit implementations, this is 32 * just used to track 16G pages and so is a single array. 32-bit 33 * implementations may have more than one gpage size due to limitations 34 * of the memory allocators, so we need multiple arrays 35 */ 36 #ifdef CONFIG_PPC64 37 #define MAX_NUMBER_GPAGES 1024 38 static u64 gpage_freearray[MAX_NUMBER_GPAGES]; 39 static unsigned nr_gpages; 40 #else 41 #define MAX_NUMBER_GPAGES 128 42 struct psize_gpages { 43 u64 gpage_list[MAX_NUMBER_GPAGES]; 44 unsigned int nr_gpages; 45 }; 46 static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT]; 47 #endif 48 49 static inline int shift_to_mmu_psize(unsigned int shift) 50 { 51 int psize; 52 53 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) 54 if (mmu_psize_defs[psize].shift == shift) 55 return psize; 56 return -1; 57 } 58 59 static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) 60 { 61 if (mmu_psize_defs[mmu_psize].shift) 62 return mmu_psize_defs[mmu_psize].shift; 63 BUG(); 64 } 65 66 #define hugepd_none(hpd) ((hpd).pd == 0) 67 68 pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) 69 { 70 pgd_t *pg; 71 pud_t *pu; 72 pmd_t *pm; 73 hugepd_t *hpdp = NULL; 74 unsigned pdshift = PGDIR_SHIFT; 75 76 if (shift) 77 *shift = 0; 78 79 pg = pgdir + pgd_index(ea); 80 if (is_hugepd(pg)) { 81 hpdp = (hugepd_t *)pg; 82 } else if (!pgd_none(*pg)) { 83 pdshift = PUD_SHIFT; 84 pu = pud_offset(pg, ea); 85 if (is_hugepd(pu)) 86 hpdp = (hugepd_t *)pu; 87 else if (!pud_none(*pu)) { 88 pdshift = PMD_SHIFT; 89 pm = pmd_offset(pu, ea); 90 if (is_hugepd(pm)) 91 hpdp = (hugepd_t *)pm; 92 else if (!pmd_none(*pm)) { 93 return pte_offset_kernel(pm, ea); 94 } 95 } 96 } 97 98 if (!hpdp) 99 return NULL; 100 101 if (shift) 102 *shift = hugepd_shift(*hpdp); 103 return hugepte_offset(hpdp, ea, pdshift); 104 } 105 106 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 107 { 108 return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); 109 } 110 111 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 112 unsigned long address, unsigned pdshift, unsigned pshift) 113 { 114 struct kmem_cache *cachep; 115 pte_t *new; 116 117 #ifdef CONFIG_PPC64 118 cachep = PGT_CACHE(pdshift - pshift); 119 #else 120 int i; 121 int num_hugepd = 1 << (pshift - pdshift); 122 cachep = hugepte_cache; 123 #endif 124 125 new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); 126 127 BUG_ON(pshift > HUGEPD_SHIFT_MASK); 128 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); 129 130 if (! new) 131 return -ENOMEM; 132 133 spin_lock(&mm->page_table_lock); 134 #ifdef CONFIG_PPC64 135 if (!hugepd_none(*hpdp)) 136 kmem_cache_free(cachep, new); 137 else 138 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; 139 #else 140 /* 141 * We have multiple higher-level entries that point to the same 142 * actual pte location. Fill in each as we go and backtrack on error. 143 * We need all of these so the DTLB pgtable walk code can find the 144 * right higher-level entry without knowing if it's a hugepage or not. 145 */ 146 for (i = 0; i < num_hugepd; i++, hpdp++) { 147 if (unlikely(!hugepd_none(*hpdp))) 148 break; 149 else 150 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; 151 } 152 /* If we bailed from the for loop early, an error occurred, clean up */ 153 if (i < num_hugepd) { 154 for (i = i - 1 ; i >= 0; i--, hpdp--) 155 hpdp->pd = 0; 156 kmem_cache_free(cachep, new); 157 } 158 #endif 159 spin_unlock(&mm->page_table_lock); 160 return 0; 161 } 162 163 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) 164 { 165 pgd_t *pg; 166 pud_t *pu; 167 pmd_t *pm; 168 hugepd_t *hpdp = NULL; 169 unsigned pshift = __ffs(sz); 170 unsigned pdshift = PGDIR_SHIFT; 171 172 addr &= ~(sz-1); 173 174 pg = pgd_offset(mm, addr); 175 if (pshift >= PUD_SHIFT) { 176 hpdp = (hugepd_t *)pg; 177 } else { 178 pdshift = PUD_SHIFT; 179 pu = pud_alloc(mm, pg, addr); 180 if (pshift >= PMD_SHIFT) { 181 hpdp = (hugepd_t *)pu; 182 } else { 183 pdshift = PMD_SHIFT; 184 pm = pmd_alloc(mm, pu, addr); 185 hpdp = (hugepd_t *)pm; 186 } 187 } 188 189 if (!hpdp) 190 return NULL; 191 192 BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); 193 194 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) 195 return NULL; 196 197 return hugepte_offset(hpdp, addr, pdshift); 198 } 199 200 #ifdef CONFIG_PPC32 201 /* Build list of addresses of gigantic pages. This function is used in early 202 * boot before the buddy or bootmem allocator is setup. 203 */ 204 void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) 205 { 206 unsigned int idx = shift_to_mmu_psize(__ffs(page_size)); 207 int i; 208 209 if (addr == 0) 210 return; 211 212 gpage_freearray[idx].nr_gpages = number_of_pages; 213 214 for (i = 0; i < number_of_pages; i++) { 215 gpage_freearray[idx].gpage_list[i] = addr; 216 addr += page_size; 217 } 218 } 219 220 /* 221 * Moves the gigantic page addresses from the temporary list to the 222 * huge_boot_pages list. 223 */ 224 int alloc_bootmem_huge_page(struct hstate *hstate) 225 { 226 struct huge_bootmem_page *m; 227 int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT); 228 int nr_gpages = gpage_freearray[idx].nr_gpages; 229 230 if (nr_gpages == 0) 231 return 0; 232 233 #ifdef CONFIG_HIGHMEM 234 /* 235 * If gpages can be in highmem we can't use the trick of storing the 236 * data structure in the page; allocate space for this 237 */ 238 m = alloc_bootmem(sizeof(struct huge_bootmem_page)); 239 m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; 240 #else 241 m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); 242 #endif 243 244 list_add(&m->list, &huge_boot_pages); 245 gpage_freearray[idx].nr_gpages = nr_gpages; 246 gpage_freearray[idx].gpage_list[nr_gpages] = 0; 247 m->hstate = hstate; 248 249 return 1; 250 } 251 /* 252 * Scan the command line hugepagesz= options for gigantic pages; store those in 253 * a list that we use to allocate the memory once all options are parsed. 254 */ 255 256 unsigned long gpage_npages[MMU_PAGE_COUNT]; 257 258 static int __init do_gpage_early_setup(char *param, char *val) 259 { 260 static phys_addr_t size; 261 unsigned long npages; 262 263 /* 264 * The hugepagesz and hugepages cmdline options are interleaved. We 265 * use the size variable to keep track of whether or not this was done 266 * properly and skip over instances where it is incorrect. Other 267 * command-line parsing code will issue warnings, so we don't need to. 268 * 269 */ 270 if ((strcmp(param, "default_hugepagesz") == 0) || 271 (strcmp(param, "hugepagesz") == 0)) { 272 size = memparse(val, NULL); 273 } else if (strcmp(param, "hugepages") == 0) { 274 if (size != 0) { 275 if (sscanf(val, "%lu", &npages) <= 0) 276 npages = 0; 277 gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; 278 size = 0; 279 } 280 } 281 return 0; 282 } 283 284 285 /* 286 * This function allocates physical space for pages that are larger than the 287 * buddy allocator can handle. We want to allocate these in highmem because 288 * the amount of lowmem is limited. This means that this function MUST be 289 * called before lowmem_end_addr is set up in MMU_init() in order for the lmb 290 * allocate to grab highmem. 291 */ 292 void __init reserve_hugetlb_gpages(void) 293 { 294 static __initdata char cmdline[COMMAND_LINE_SIZE]; 295 phys_addr_t size, base; 296 int i; 297 298 strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE); 299 parse_args("hugetlb gpages", cmdline, NULL, 0, &do_gpage_early_setup); 300 301 /* 302 * Walk gpage list in reverse, allocating larger page sizes first. 303 * Skip over unsupported sizes, or sizes that have 0 gpages allocated. 304 * When we reach the point in the list where pages are no longer 305 * considered gpages, we're done. 306 */ 307 for (i = MMU_PAGE_COUNT-1; i >= 0; i--) { 308 if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0) 309 continue; 310 else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT)) 311 break; 312 313 size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i)); 314 base = memblock_alloc_base(size * gpage_npages[i], size, 315 MEMBLOCK_ALLOC_ANYWHERE); 316 add_gpage(base, size, gpage_npages[i]); 317 } 318 } 319 320 #else /* PPC64 */ 321 322 /* Build list of addresses of gigantic pages. This function is used in early 323 * boot before the buddy or bootmem allocator is setup. 324 */ 325 void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) 326 { 327 if (!addr) 328 return; 329 while (number_of_pages > 0) { 330 gpage_freearray[nr_gpages] = addr; 331 nr_gpages++; 332 number_of_pages--; 333 addr += page_size; 334 } 335 } 336 337 /* Moves the gigantic page addresses from the temporary list to the 338 * huge_boot_pages list. 339 */ 340 int alloc_bootmem_huge_page(struct hstate *hstate) 341 { 342 struct huge_bootmem_page *m; 343 if (nr_gpages == 0) 344 return 0; 345 m = phys_to_virt(gpage_freearray[--nr_gpages]); 346 gpage_freearray[nr_gpages] = 0; 347 list_add(&m->list, &huge_boot_pages); 348 m->hstate = hstate; 349 return 1; 350 } 351 #endif 352 353 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 354 { 355 return 0; 356 } 357 358 #ifdef CONFIG_PPC32 359 #define HUGEPD_FREELIST_SIZE \ 360 ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) 361 362 struct hugepd_freelist { 363 struct rcu_head rcu; 364 unsigned int index; 365 void *ptes[0]; 366 }; 367 368 static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); 369 370 static void hugepd_free_rcu_callback(struct rcu_head *head) 371 { 372 struct hugepd_freelist *batch = 373 container_of(head, struct hugepd_freelist, rcu); 374 unsigned int i; 375 376 for (i = 0; i < batch->index; i++) 377 kmem_cache_free(hugepte_cache, batch->ptes[i]); 378 379 free_page((unsigned long)batch); 380 } 381 382 static void hugepd_free(struct mmu_gather *tlb, void *hugepte) 383 { 384 struct hugepd_freelist **batchp; 385 386 batchp = &__get_cpu_var(hugepd_freelist_cur); 387 388 if (atomic_read(&tlb->mm->mm_users) < 2 || 389 cpumask_equal(mm_cpumask(tlb->mm), 390 cpumask_of(smp_processor_id()))) { 391 kmem_cache_free(hugepte_cache, hugepte); 392 return; 393 } 394 395 if (*batchp == NULL) { 396 *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC); 397 (*batchp)->index = 0; 398 } 399 400 (*batchp)->ptes[(*batchp)->index++] = hugepte; 401 if ((*batchp)->index == HUGEPD_FREELIST_SIZE) { 402 call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback); 403 *batchp = NULL; 404 } 405 } 406 #endif 407 408 static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, 409 unsigned long start, unsigned long end, 410 unsigned long floor, unsigned long ceiling) 411 { 412 pte_t *hugepte = hugepd_page(*hpdp); 413 int i; 414 415 unsigned long pdmask = ~((1UL << pdshift) - 1); 416 unsigned int num_hugepd = 1; 417 418 #ifdef CONFIG_PPC64 419 unsigned int shift = hugepd_shift(*hpdp); 420 #else 421 /* Note: On 32-bit the hpdp may be the first of several */ 422 num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift)); 423 #endif 424 425 start &= pdmask; 426 if (start < floor) 427 return; 428 if (ceiling) { 429 ceiling &= pdmask; 430 if (! ceiling) 431 return; 432 } 433 if (end - 1 > ceiling - 1) 434 return; 435 436 for (i = 0; i < num_hugepd; i++, hpdp++) 437 hpdp->pd = 0; 438 439 tlb->need_flush = 1; 440 #ifdef CONFIG_PPC64 441 pgtable_free_tlb(tlb, hugepte, pdshift - shift); 442 #else 443 hugepd_free(tlb, hugepte); 444 #endif 445 } 446 447 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 448 unsigned long addr, unsigned long end, 449 unsigned long floor, unsigned long ceiling) 450 { 451 pmd_t *pmd; 452 unsigned long next; 453 unsigned long start; 454 455 start = addr; 456 pmd = pmd_offset(pud, addr); 457 do { 458 next = pmd_addr_end(addr, end); 459 if (pmd_none(*pmd)) 460 continue; 461 free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT, 462 addr, next, floor, ceiling); 463 } while (pmd++, addr = next, addr != end); 464 465 start &= PUD_MASK; 466 if (start < floor) 467 return; 468 if (ceiling) { 469 ceiling &= PUD_MASK; 470 if (!ceiling) 471 return; 472 } 473 if (end - 1 > ceiling - 1) 474 return; 475 476 pmd = pmd_offset(pud, start); 477 pud_clear(pud); 478 pmd_free_tlb(tlb, pmd, start); 479 } 480 481 static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 482 unsigned long addr, unsigned long end, 483 unsigned long floor, unsigned long ceiling) 484 { 485 pud_t *pud; 486 unsigned long next; 487 unsigned long start; 488 489 start = addr; 490 pud = pud_offset(pgd, addr); 491 do { 492 next = pud_addr_end(addr, end); 493 if (!is_hugepd(pud)) { 494 if (pud_none_or_clear_bad(pud)) 495 continue; 496 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, 497 ceiling); 498 } else { 499 free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT, 500 addr, next, floor, ceiling); 501 } 502 } while (pud++, addr = next, addr != end); 503 504 start &= PGDIR_MASK; 505 if (start < floor) 506 return; 507 if (ceiling) { 508 ceiling &= PGDIR_MASK; 509 if (!ceiling) 510 return; 511 } 512 if (end - 1 > ceiling - 1) 513 return; 514 515 pud = pud_offset(pgd, start); 516 pgd_clear(pgd); 517 pud_free_tlb(tlb, pud, start); 518 } 519 520 /* 521 * This function frees user-level page tables of a process. 522 * 523 * Must be called with pagetable lock held. 524 */ 525 void hugetlb_free_pgd_range(struct mmu_gather *tlb, 526 unsigned long addr, unsigned long end, 527 unsigned long floor, unsigned long ceiling) 528 { 529 pgd_t *pgd; 530 unsigned long next; 531 532 /* 533 * Because there are a number of different possible pagetable 534 * layouts for hugepage ranges, we limit knowledge of how 535 * things should be laid out to the allocation path 536 * (huge_pte_alloc(), above). Everything else works out the 537 * structure as it goes from information in the hugepd 538 * pointers. That means that we can't here use the 539 * optimization used in the normal page free_pgd_range(), of 540 * checking whether we're actually covering a large enough 541 * range to have to do anything at the top level of the walk 542 * instead of at the bottom. 543 * 544 * To make sense of this, you should probably go read the big 545 * block comment at the top of the normal free_pgd_range(), 546 * too. 547 */ 548 549 do { 550 next = pgd_addr_end(addr, end); 551 pgd = pgd_offset(tlb->mm, addr); 552 if (!is_hugepd(pgd)) { 553 if (pgd_none_or_clear_bad(pgd)) 554 continue; 555 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); 556 } else { 557 #ifdef CONFIG_PPC32 558 /* 559 * Increment next by the size of the huge mapping since 560 * on 32-bit there may be more than one entry at the pgd 561 * level for a single hugepage, but all of them point to 562 * the same kmem cache that holds the hugepte. 563 */ 564 next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); 565 #endif 566 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, 567 addr, next, floor, ceiling); 568 } 569 } while (addr = next, addr != end); 570 } 571 572 struct page * 573 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 574 { 575 pte_t *ptep; 576 struct page *page; 577 unsigned shift; 578 unsigned long mask; 579 580 ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); 581 582 /* Verify it is a huge page else bail. */ 583 if (!ptep || !shift) 584 return ERR_PTR(-EINVAL); 585 586 mask = (1UL << shift) - 1; 587 page = pte_page(*ptep); 588 if (page) 589 page += (address & mask) / PAGE_SIZE; 590 591 return page; 592 } 593 594 int pmd_huge(pmd_t pmd) 595 { 596 return 0; 597 } 598 599 int pud_huge(pud_t pud) 600 { 601 return 0; 602 } 603 604 struct page * 605 follow_huge_pmd(struct mm_struct *mm, unsigned long address, 606 pmd_t *pmd, int write) 607 { 608 BUG(); 609 return NULL; 610 } 611 612 static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, 613 unsigned long end, int write, struct page **pages, int *nr) 614 { 615 unsigned long mask; 616 unsigned long pte_end; 617 struct page *head, *page, *tail; 618 pte_t pte; 619 int refs; 620 621 pte_end = (addr + sz) & ~(sz-1); 622 if (pte_end < end) 623 end = pte_end; 624 625 pte = *ptep; 626 mask = _PAGE_PRESENT | _PAGE_USER; 627 if (write) 628 mask |= _PAGE_RW; 629 630 if ((pte_val(pte) & mask) != mask) 631 return 0; 632 633 /* hugepages are never "special" */ 634 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 635 636 refs = 0; 637 head = pte_page(pte); 638 639 page = head + ((addr & (sz-1)) >> PAGE_SHIFT); 640 tail = page; 641 do { 642 VM_BUG_ON(compound_head(page) != head); 643 pages[*nr] = page; 644 (*nr)++; 645 page++; 646 refs++; 647 } while (addr += PAGE_SIZE, addr != end); 648 649 if (!page_cache_add_speculative(head, refs)) { 650 *nr -= refs; 651 return 0; 652 } 653 654 if (unlikely(pte_val(pte) != pte_val(*ptep))) { 655 /* Could be optimized better */ 656 *nr -= refs; 657 while (refs--) 658 put_page(head); 659 return 0; 660 } 661 662 /* 663 * Any tail page need their mapcount reference taken before we 664 * return. 665 */ 666 while (refs--) { 667 if (PageTail(tail)) 668 get_huge_page_tail(tail); 669 tail++; 670 } 671 672 return 1; 673 } 674 675 static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, 676 unsigned long sz) 677 { 678 unsigned long __boundary = (addr + sz) & ~(sz-1); 679 return (__boundary - 1 < end - 1) ? __boundary : end; 680 } 681 682 int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, 683 unsigned long addr, unsigned long end, 684 int write, struct page **pages, int *nr) 685 { 686 pte_t *ptep; 687 unsigned long sz = 1UL << hugepd_shift(*hugepd); 688 unsigned long next; 689 690 ptep = hugepte_offset(hugepd, addr, pdshift); 691 do { 692 next = hugepte_addr_end(addr, end, sz); 693 if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) 694 return 0; 695 } while (ptep++, addr = next, addr != end); 696 697 return 1; 698 } 699 700 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 701 unsigned long len, unsigned long pgoff, 702 unsigned long flags) 703 { 704 #ifdef CONFIG_PPC_MM_SLICES 705 struct hstate *hstate = hstate_file(file); 706 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); 707 708 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); 709 #else 710 return get_unmapped_area(file, addr, len, pgoff, flags); 711 #endif 712 } 713 714 unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 715 { 716 #ifdef CONFIG_PPC_MM_SLICES 717 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); 718 719 return 1UL << mmu_psize_to_shift(psize); 720 #else 721 if (!is_vm_hugetlb_page(vma)) 722 return PAGE_SIZE; 723 724 return huge_page_size(hstate_vma(vma)); 725 #endif 726 } 727 728 static inline bool is_power_of_4(unsigned long x) 729 { 730 if (is_power_of_2(x)) 731 return (__ilog2(x) % 2) ? false : true; 732 return false; 733 } 734 735 static int __init add_huge_page_size(unsigned long long size) 736 { 737 int shift = __ffs(size); 738 int mmu_psize; 739 740 /* Check that it is a page size supported by the hardware and 741 * that it fits within pagetable and slice limits. */ 742 #ifdef CONFIG_PPC_FSL_BOOK3E 743 if ((size < PAGE_SIZE) || !is_power_of_4(size)) 744 return -EINVAL; 745 #else 746 if (!is_power_of_2(size) 747 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) 748 return -EINVAL; 749 #endif 750 751 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) 752 return -EINVAL; 753 754 #ifdef CONFIG_SPU_FS_64K_LS 755 /* Disable support for 64K huge pages when 64K SPU local store 756 * support is enabled as the current implementation conflicts. 757 */ 758 if (shift == PAGE_SHIFT_64K) 759 return -EINVAL; 760 #endif /* CONFIG_SPU_FS_64K_LS */ 761 762 BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); 763 764 /* Return if huge page size has already been setup */ 765 if (size_to_hstate(size)) 766 return 0; 767 768 hugetlb_add_hstate(shift - PAGE_SHIFT); 769 770 return 0; 771 } 772 773 static int __init hugepage_setup_sz(char *str) 774 { 775 unsigned long long size; 776 777 size = memparse(str, &str); 778 779 if (add_huge_page_size(size) != 0) 780 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); 781 782 return 1; 783 } 784 __setup("hugepagesz=", hugepage_setup_sz); 785 786 #ifdef CONFIG_FSL_BOOKE 787 struct kmem_cache *hugepte_cache; 788 static int __init hugetlbpage_init(void) 789 { 790 int psize; 791 792 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 793 unsigned shift; 794 795 if (!mmu_psize_defs[psize].shift) 796 continue; 797 798 shift = mmu_psize_to_shift(psize); 799 800 /* Don't treat normal page sizes as huge... */ 801 if (shift != PAGE_SHIFT) 802 if (add_huge_page_size(1ULL << shift) < 0) 803 continue; 804 } 805 806 /* 807 * Create a kmem cache for hugeptes. The bottom bits in the pte have 808 * size information encoded in them, so align them to allow this 809 */ 810 hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t), 811 HUGEPD_SHIFT_MASK + 1, 0, NULL); 812 if (hugepte_cache == NULL) 813 panic("%s: Unable to create kmem cache for hugeptes\n", 814 __func__); 815 816 /* Default hpage size = 4M */ 817 if (mmu_psize_defs[MMU_PAGE_4M].shift) 818 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; 819 else 820 panic("%s: Unable to set default huge page size\n", __func__); 821 822 823 return 0; 824 } 825 #else 826 static int __init hugetlbpage_init(void) 827 { 828 int psize; 829 830 if (!mmu_has_feature(MMU_FTR_16M_PAGE)) 831 return -ENODEV; 832 833 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 834 unsigned shift; 835 unsigned pdshift; 836 837 if (!mmu_psize_defs[psize].shift) 838 continue; 839 840 shift = mmu_psize_to_shift(psize); 841 842 if (add_huge_page_size(1ULL << shift) < 0) 843 continue; 844 845 if (shift < PMD_SHIFT) 846 pdshift = PMD_SHIFT; 847 else if (shift < PUD_SHIFT) 848 pdshift = PUD_SHIFT; 849 else 850 pdshift = PGDIR_SHIFT; 851 852 pgtable_cache_add(pdshift - shift, NULL); 853 if (!PGT_CACHE(pdshift - shift)) 854 panic("hugetlbpage_init(): could not create " 855 "pgtable cache for %d bit pagesize\n", shift); 856 } 857 858 /* Set default large page size. Currently, we pick 16M or 1M 859 * depending on what is available 860 */ 861 if (mmu_psize_defs[MMU_PAGE_16M].shift) 862 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; 863 else if (mmu_psize_defs[MMU_PAGE_1M].shift) 864 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; 865 866 return 0; 867 } 868 #endif 869 module_init(hugetlbpage_init); 870 871 void flush_dcache_icache_hugepage(struct page *page) 872 { 873 int i; 874 void *start; 875 876 BUG_ON(!PageCompound(page)); 877 878 for (i = 0; i < (1UL << compound_order(page)); i++) { 879 if (!PageHighMem(page)) { 880 __flush_dcache_icache(page_address(page+i)); 881 } else { 882 start = kmap_atomic(page+i, KM_PPC_SYNC_ICACHE); 883 __flush_dcache_icache(start); 884 kunmap_atomic(start, KM_PPC_SYNC_ICACHE); 885 } 886 } 887 } 888