1 /* 2 * PPC64 (POWER4) Huge TLB Page Support for Kernel. 3 * 4 * Copyright (C) 2003 David Gibson, IBM Corporation. 5 * 6 * Based on the IA-32 version: 7 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> 8 */ 9 10 #include <linux/init.h> 11 #include <linux/fs.h> 12 #include <linux/mm.h> 13 #include <linux/hugetlb.h> 14 #include <linux/pagemap.h> 15 #include <linux/smp_lock.h> 16 #include <linux/slab.h> 17 #include <linux/err.h> 18 #include <linux/sysctl.h> 19 #include <asm/mman.h> 20 #include <asm/pgalloc.h> 21 #include <asm/tlb.h> 22 #include <asm/tlbflush.h> 23 #include <asm/mmu_context.h> 24 #include <asm/machdep.h> 25 #include <asm/cputable.h> 26 #include <asm/tlb.h> 27 28 #include <linux/sysctl.h> 29 30 #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) 31 #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) 32 33 /* Modelled after find_linux_pte() */ 34 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 35 { 36 pgd_t *pg; 37 pud_t *pu; 38 pmd_t *pm; 39 pte_t *pt; 40 41 BUG_ON(! in_hugepage_area(mm->context, addr)); 42 43 addr &= HPAGE_MASK; 44 45 pg = pgd_offset(mm, addr); 46 if (!pgd_none(*pg)) { 47 pu = pud_offset(pg, addr); 48 if (!pud_none(*pu)) { 49 pm = pmd_offset(pu, addr); 50 #ifdef CONFIG_PPC_64K_PAGES 51 /* Currently, we use the normal PTE offset within full 52 * size PTE pages, thus our huge PTEs are scattered in 53 * the PTE page and we do waste some. We may change 54 * that in the future, but the current mecanism keeps 55 * things much simpler 56 */ 57 if (!pmd_none(*pm)) { 58 /* Note: pte_offset_* are all equivalent on 59 * ppc64 as we don't have HIGHMEM 60 */ 61 pt = pte_offset_kernel(pm, addr); 62 return pt; 63 } 64 #else /* CONFIG_PPC_64K_PAGES */ 65 /* On 4k pages, we put huge PTEs in the PMD page */ 66 pt = (pte_t *)pm; 67 return pt; 68 #endif /* CONFIG_PPC_64K_PAGES */ 69 } 70 } 71 72 return NULL; 73 } 74 75 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 76 { 77 pgd_t *pg; 78 pud_t *pu; 79 pmd_t *pm; 80 pte_t *pt; 81 82 BUG_ON(! in_hugepage_area(mm->context, addr)); 83 84 addr &= HPAGE_MASK; 85 86 pg = pgd_offset(mm, addr); 87 pu = pud_alloc(mm, pg, addr); 88 89 if (pu) { 90 pm = pmd_alloc(mm, pu, addr); 91 if (pm) { 92 #ifdef CONFIG_PPC_64K_PAGES 93 /* See comment in huge_pte_offset. Note that if we ever 94 * want to put the page size in the PMD, we would have 95 * to open code our own pte_alloc* function in order 96 * to populate and set the size atomically 97 */ 98 pt = pte_alloc_map(mm, pm, addr); 99 #else /* CONFIG_PPC_64K_PAGES */ 100 pt = (pte_t *)pm; 101 #endif /* CONFIG_PPC_64K_PAGES */ 102 return pt; 103 } 104 } 105 106 return NULL; 107 } 108 109 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 110 pte_t *ptep, pte_t pte) 111 { 112 if (pte_present(*ptep)) { 113 /* We open-code pte_clear because we need to pass the right 114 * argument to hpte_update (huge / !huge) 115 */ 116 unsigned long old = pte_update(ptep, ~0UL); 117 if (old & _PAGE_HASHPTE) 118 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); 119 flush_tlb_pending(); 120 } 121 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 122 } 123 124 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 125 pte_t *ptep) 126 { 127 unsigned long old = pte_update(ptep, ~0UL); 128 129 if (old & _PAGE_HASHPTE) 130 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); 131 *ptep = __pte(0); 132 133 return __pte(old); 134 } 135 136 struct slb_flush_info { 137 struct mm_struct *mm; 138 u16 newareas; 139 }; 140 141 static void flush_low_segments(void *parm) 142 { 143 struct slb_flush_info *fi = parm; 144 unsigned long i; 145 146 BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); 147 148 if (current->active_mm != fi->mm) 149 return; 150 151 /* Only need to do anything if this CPU is working in the same 152 * mm as the one which has changed */ 153 154 /* update the paca copy of the context struct */ 155 get_paca()->context = current->active_mm->context; 156 157 asm volatile("isync" : : : "memory"); 158 for (i = 0; i < NUM_LOW_AREAS; i++) { 159 if (! (fi->newareas & (1U << i))) 160 continue; 161 asm volatile("slbie %0" 162 : : "r" ((i << SID_SHIFT) | SLBIE_C)); 163 } 164 asm volatile("isync" : : : "memory"); 165 } 166 167 static void flush_high_segments(void *parm) 168 { 169 struct slb_flush_info *fi = parm; 170 unsigned long i, j; 171 172 173 BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); 174 175 if (current->active_mm != fi->mm) 176 return; 177 178 /* Only need to do anything if this CPU is working in the same 179 * mm as the one which has changed */ 180 181 /* update the paca copy of the context struct */ 182 get_paca()->context = current->active_mm->context; 183 184 asm volatile("isync" : : : "memory"); 185 for (i = 0; i < NUM_HIGH_AREAS; i++) { 186 if (! (fi->newareas & (1U << i))) 187 continue; 188 for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) 189 asm volatile("slbie %0" 190 :: "r" (((i << HTLB_AREA_SHIFT) 191 + (j << SID_SHIFT)) | SLBIE_C)); 192 } 193 asm volatile("isync" : : : "memory"); 194 } 195 196 static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) 197 { 198 unsigned long start = area << SID_SHIFT; 199 unsigned long end = (area+1) << SID_SHIFT; 200 struct vm_area_struct *vma; 201 202 BUG_ON(area >= NUM_LOW_AREAS); 203 204 /* Check no VMAs are in the region */ 205 vma = find_vma(mm, start); 206 if (vma && (vma->vm_start < end)) 207 return -EBUSY; 208 209 return 0; 210 } 211 212 static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) 213 { 214 unsigned long start = area << HTLB_AREA_SHIFT; 215 unsigned long end = (area+1) << HTLB_AREA_SHIFT; 216 struct vm_area_struct *vma; 217 218 BUG_ON(area >= NUM_HIGH_AREAS); 219 220 /* Hack, so that each addresses is controlled by exactly one 221 * of the high or low area bitmaps, the first high area starts 222 * at 4GB, not 0 */ 223 if (start == 0) 224 start = 0x100000000UL; 225 226 /* Check no VMAs are in the region */ 227 vma = find_vma(mm, start); 228 if (vma && (vma->vm_start < end)) 229 return -EBUSY; 230 231 return 0; 232 } 233 234 static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) 235 { 236 unsigned long i; 237 struct slb_flush_info fi; 238 239 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); 240 BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); 241 242 newareas &= ~(mm->context.low_htlb_areas); 243 if (! newareas) 244 return 0; /* The segments we want are already open */ 245 246 for (i = 0; i < NUM_LOW_AREAS; i++) 247 if ((1 << i) & newareas) 248 if (prepare_low_area_for_htlb(mm, i) != 0) 249 return -EBUSY; 250 251 mm->context.low_htlb_areas |= newareas; 252 253 /* the context change must make it to memory before the flush, 254 * so that further SLB misses do the right thing. */ 255 mb(); 256 257 fi.mm = mm; 258 fi.newareas = newareas; 259 on_each_cpu(flush_low_segments, &fi, 0, 1); 260 261 return 0; 262 } 263 264 static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) 265 { 266 struct slb_flush_info fi; 267 unsigned long i; 268 269 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); 270 BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) 271 != NUM_HIGH_AREAS); 272 273 newareas &= ~(mm->context.high_htlb_areas); 274 if (! newareas) 275 return 0; /* The areas we want are already open */ 276 277 for (i = 0; i < NUM_HIGH_AREAS; i++) 278 if ((1 << i) & newareas) 279 if (prepare_high_area_for_htlb(mm, i) != 0) 280 return -EBUSY; 281 282 mm->context.high_htlb_areas |= newareas; 283 284 /* update the paca copy of the context struct */ 285 get_paca()->context = mm->context; 286 287 /* the context change must make it to memory before the flush, 288 * so that further SLB misses do the right thing. */ 289 mb(); 290 291 fi.mm = mm; 292 fi.newareas = newareas; 293 on_each_cpu(flush_high_segments, &fi, 0, 1); 294 295 return 0; 296 } 297 298 int prepare_hugepage_range(unsigned long addr, unsigned long len) 299 { 300 int err = 0; 301 302 if ( (addr+len) < addr ) 303 return -EINVAL; 304 305 if (addr < 0x100000000UL) 306 err = open_low_hpage_areas(current->mm, 307 LOW_ESID_MASK(addr, len)); 308 if ((addr + len) > 0x100000000UL) 309 err = open_high_hpage_areas(current->mm, 310 HTLB_AREA_MASK(addr, len)); 311 if (err) { 312 printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" 313 " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", 314 addr, len, 315 LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); 316 return err; 317 } 318 319 return 0; 320 } 321 322 struct page * 323 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 324 { 325 pte_t *ptep; 326 struct page *page; 327 328 if (! in_hugepage_area(mm->context, address)) 329 return ERR_PTR(-EINVAL); 330 331 ptep = huge_pte_offset(mm, address); 332 page = pte_page(*ptep); 333 if (page) 334 page += (address % HPAGE_SIZE) / PAGE_SIZE; 335 336 return page; 337 } 338 339 int pmd_huge(pmd_t pmd) 340 { 341 return 0; 342 } 343 344 struct page * 345 follow_huge_pmd(struct mm_struct *mm, unsigned long address, 346 pmd_t *pmd, int write) 347 { 348 BUG(); 349 return NULL; 350 } 351 352 /* Because we have an exclusive hugepage region which lies within the 353 * normal user address space, we have to take special measures to make 354 * non-huge mmap()s evade the hugepage reserved regions. */ 355 unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, 356 unsigned long len, unsigned long pgoff, 357 unsigned long flags) 358 { 359 struct mm_struct *mm = current->mm; 360 struct vm_area_struct *vma; 361 unsigned long start_addr; 362 363 if (len > TASK_SIZE) 364 return -ENOMEM; 365 366 if (addr) { 367 addr = PAGE_ALIGN(addr); 368 vma = find_vma(mm, addr); 369 if (((TASK_SIZE - len) >= addr) 370 && (!vma || (addr+len) <= vma->vm_start) 371 && !is_hugepage_only_range(mm, addr,len)) 372 return addr; 373 } 374 if (len > mm->cached_hole_size) { 375 start_addr = addr = mm->free_area_cache; 376 } else { 377 start_addr = addr = TASK_UNMAPPED_BASE; 378 mm->cached_hole_size = 0; 379 } 380 381 full_search: 382 vma = find_vma(mm, addr); 383 while (TASK_SIZE - len >= addr) { 384 BUG_ON(vma && (addr >= vma->vm_end)); 385 386 if (touches_hugepage_low_range(mm, addr, len)) { 387 addr = ALIGN(addr+1, 1<<SID_SHIFT); 388 vma = find_vma(mm, addr); 389 continue; 390 } 391 if (touches_hugepage_high_range(mm, addr, len)) { 392 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); 393 vma = find_vma(mm, addr); 394 continue; 395 } 396 if (!vma || addr + len <= vma->vm_start) { 397 /* 398 * Remember the place where we stopped the search: 399 */ 400 mm->free_area_cache = addr + len; 401 return addr; 402 } 403 if (addr + mm->cached_hole_size < vma->vm_start) 404 mm->cached_hole_size = vma->vm_start - addr; 405 addr = vma->vm_end; 406 vma = vma->vm_next; 407 } 408 409 /* Make sure we didn't miss any holes */ 410 if (start_addr != TASK_UNMAPPED_BASE) { 411 start_addr = addr = TASK_UNMAPPED_BASE; 412 mm->cached_hole_size = 0; 413 goto full_search; 414 } 415 return -ENOMEM; 416 } 417 418 /* 419 * This mmap-allocator allocates new areas top-down from below the 420 * stack's low limit (the base): 421 * 422 * Because we have an exclusive hugepage region which lies within the 423 * normal user address space, we have to take special measures to make 424 * non-huge mmap()s evade the hugepage reserved regions. 425 */ 426 unsigned long 427 arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, 428 const unsigned long len, const unsigned long pgoff, 429 const unsigned long flags) 430 { 431 struct vm_area_struct *vma, *prev_vma; 432 struct mm_struct *mm = current->mm; 433 unsigned long base = mm->mmap_base, addr = addr0; 434 unsigned long largest_hole = mm->cached_hole_size; 435 int first_time = 1; 436 437 /* requested length too big for entire address space */ 438 if (len > TASK_SIZE) 439 return -ENOMEM; 440 441 /* dont allow allocations above current base */ 442 if (mm->free_area_cache > base) 443 mm->free_area_cache = base; 444 445 /* requesting a specific address */ 446 if (addr) { 447 addr = PAGE_ALIGN(addr); 448 vma = find_vma(mm, addr); 449 if (TASK_SIZE - len >= addr && 450 (!vma || addr + len <= vma->vm_start) 451 && !is_hugepage_only_range(mm, addr,len)) 452 return addr; 453 } 454 455 if (len <= largest_hole) { 456 largest_hole = 0; 457 mm->free_area_cache = base; 458 } 459 try_again: 460 /* make sure it can fit in the remaining address space */ 461 if (mm->free_area_cache < len) 462 goto fail; 463 464 /* either no address requested or cant fit in requested address hole */ 465 addr = (mm->free_area_cache - len) & PAGE_MASK; 466 do { 467 hugepage_recheck: 468 if (touches_hugepage_low_range(mm, addr, len)) { 469 addr = (addr & ((~0) << SID_SHIFT)) - len; 470 goto hugepage_recheck; 471 } else if (touches_hugepage_high_range(mm, addr, len)) { 472 addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; 473 goto hugepage_recheck; 474 } 475 476 /* 477 * Lookup failure means no vma is above this address, 478 * i.e. return with success: 479 */ 480 if (!(vma = find_vma_prev(mm, addr, &prev_vma))) 481 return addr; 482 483 /* 484 * new region fits between prev_vma->vm_end and 485 * vma->vm_start, use it: 486 */ 487 if (addr+len <= vma->vm_start && 488 (!prev_vma || (addr >= prev_vma->vm_end))) { 489 /* remember the address as a hint for next time */ 490 mm->cached_hole_size = largest_hole; 491 return (mm->free_area_cache = addr); 492 } else { 493 /* pull free_area_cache down to the first hole */ 494 if (mm->free_area_cache == vma->vm_end) { 495 mm->free_area_cache = vma->vm_start; 496 mm->cached_hole_size = largest_hole; 497 } 498 } 499 500 /* remember the largest hole we saw so far */ 501 if (addr + largest_hole < vma->vm_start) 502 largest_hole = vma->vm_start - addr; 503 504 /* try just below the current vma->vm_start */ 505 addr = vma->vm_start-len; 506 } while (len <= vma->vm_start); 507 508 fail: 509 /* 510 * if hint left us with no space for the requested 511 * mapping then try again: 512 */ 513 if (first_time) { 514 mm->free_area_cache = base; 515 largest_hole = 0; 516 first_time = 0; 517 goto try_again; 518 } 519 /* 520 * A failed mmap() very likely causes application failure, 521 * so fall back to the bottom-up function here. This scenario 522 * can happen with large stack limits and large mmap() 523 * allocations. 524 */ 525 mm->free_area_cache = TASK_UNMAPPED_BASE; 526 mm->cached_hole_size = ~0UL; 527 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); 528 /* 529 * Restore the topdown base: 530 */ 531 mm->free_area_cache = base; 532 mm->cached_hole_size = ~0UL; 533 534 return addr; 535 } 536 537 static int htlb_check_hinted_area(unsigned long addr, unsigned long len) 538 { 539 struct vm_area_struct *vma; 540 541 vma = find_vma(current->mm, addr); 542 if (!vma || ((addr + len) <= vma->vm_start)) 543 return 0; 544 545 return -ENOMEM; 546 } 547 548 static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) 549 { 550 unsigned long addr = 0; 551 struct vm_area_struct *vma; 552 553 vma = find_vma(current->mm, addr); 554 while (addr + len <= 0x100000000UL) { 555 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ 556 557 if (! __within_hugepage_low_range(addr, len, segmask)) { 558 addr = ALIGN(addr+1, 1<<SID_SHIFT); 559 vma = find_vma(current->mm, addr); 560 continue; 561 } 562 563 if (!vma || (addr + len) <= vma->vm_start) 564 return addr; 565 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 566 /* Depending on segmask this might not be a confirmed 567 * hugepage region, so the ALIGN could have skipped 568 * some VMAs */ 569 vma = find_vma(current->mm, addr); 570 } 571 572 return -ENOMEM; 573 } 574 575 static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) 576 { 577 unsigned long addr = 0x100000000UL; 578 struct vm_area_struct *vma; 579 580 vma = find_vma(current->mm, addr); 581 while (addr + len <= TASK_SIZE_USER64) { 582 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ 583 584 if (! __within_hugepage_high_range(addr, len, areamask)) { 585 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); 586 vma = find_vma(current->mm, addr); 587 continue; 588 } 589 590 if (!vma || (addr + len) <= vma->vm_start) 591 return addr; 592 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 593 /* Depending on segmask this might not be a confirmed 594 * hugepage region, so the ALIGN could have skipped 595 * some VMAs */ 596 vma = find_vma(current->mm, addr); 597 } 598 599 return -ENOMEM; 600 } 601 602 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 603 unsigned long len, unsigned long pgoff, 604 unsigned long flags) 605 { 606 int lastshift; 607 u16 areamask, curareas; 608 609 if (HPAGE_SHIFT == 0) 610 return -EINVAL; 611 if (len & ~HPAGE_MASK) 612 return -EINVAL; 613 614 if (!cpu_has_feature(CPU_FTR_16M_PAGE)) 615 return -EINVAL; 616 617 /* Paranoia, caller should have dealt with this */ 618 BUG_ON((addr + len) < addr); 619 620 if (test_thread_flag(TIF_32BIT)) { 621 /* Paranoia, caller should have dealt with this */ 622 BUG_ON((addr + len) > 0x100000000UL); 623 624 curareas = current->mm->context.low_htlb_areas; 625 626 /* First see if we can use the hint address */ 627 if (addr && (htlb_check_hinted_area(addr, len) == 0)) { 628 areamask = LOW_ESID_MASK(addr, len); 629 if (open_low_hpage_areas(current->mm, areamask) == 0) 630 return addr; 631 } 632 633 /* Next see if we can map in the existing low areas */ 634 addr = htlb_get_low_area(len, curareas); 635 if (addr != -ENOMEM) 636 return addr; 637 638 /* Finally go looking for areas to open */ 639 lastshift = 0; 640 for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); 641 ! lastshift; areamask >>=1) { 642 if (areamask & 1) 643 lastshift = 1; 644 645 addr = htlb_get_low_area(len, curareas | areamask); 646 if ((addr != -ENOMEM) 647 && open_low_hpage_areas(current->mm, areamask) == 0) 648 return addr; 649 } 650 } else { 651 curareas = current->mm->context.high_htlb_areas; 652 653 /* First see if we can use the hint address */ 654 /* We discourage 64-bit processes from doing hugepage 655 * mappings below 4GB (must use MAP_FIXED) */ 656 if ((addr >= 0x100000000UL) 657 && (htlb_check_hinted_area(addr, len) == 0)) { 658 areamask = HTLB_AREA_MASK(addr, len); 659 if (open_high_hpage_areas(current->mm, areamask) == 0) 660 return addr; 661 } 662 663 /* Next see if we can map in the existing high areas */ 664 addr = htlb_get_high_area(len, curareas); 665 if (addr != -ENOMEM) 666 return addr; 667 668 /* Finally go looking for areas to open */ 669 lastshift = 0; 670 for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); 671 ! lastshift; areamask >>=1) { 672 if (areamask & 1) 673 lastshift = 1; 674 675 addr = htlb_get_high_area(len, curareas | areamask); 676 if ((addr != -ENOMEM) 677 && open_high_hpage_areas(current->mm, areamask) == 0) 678 return addr; 679 } 680 } 681 printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" 682 " enough areas\n"); 683 return -ENOMEM; 684 } 685 686 /* 687 * Called by asm hashtable.S for doing lazy icache flush 688 */ 689 static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, 690 pte_t pte, int trap) 691 { 692 struct page *page; 693 int i; 694 695 if (!pfn_valid(pte_pfn(pte))) 696 return rflags; 697 698 page = pte_page(pte); 699 700 /* page is dirty */ 701 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { 702 if (trap == 0x400) { 703 for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) 704 __flush_dcache_icache(page_address(page+i)); 705 set_bit(PG_arch_1, &page->flags); 706 } else { 707 rflags |= HPTE_R_N; 708 } 709 } 710 return rflags; 711 } 712 713 int hash_huge_page(struct mm_struct *mm, unsigned long access, 714 unsigned long ea, unsigned long vsid, int local, 715 unsigned long trap) 716 { 717 pte_t *ptep; 718 unsigned long old_pte, new_pte; 719 unsigned long va, rflags, pa; 720 long slot; 721 int err = 1; 722 723 ptep = huge_pte_offset(mm, ea); 724 725 /* Search the Linux page table for a match with va */ 726 va = (vsid << 28) | (ea & 0x0fffffff); 727 728 /* 729 * If no pte found or not present, send the problem up to 730 * do_page_fault 731 */ 732 if (unlikely(!ptep || pte_none(*ptep))) 733 goto out; 734 735 /* 736 * Check the user's access rights to the page. If access should be 737 * prevented then send the problem up to do_page_fault. 738 */ 739 if (unlikely(access & ~pte_val(*ptep))) 740 goto out; 741 /* 742 * At this point, we have a pte (old_pte) which can be used to build 743 * or update an HPTE. There are 2 cases: 744 * 745 * 1. There is a valid (present) pte with no associated HPTE (this is 746 * the most common case) 747 * 2. There is a valid (present) pte with an associated HPTE. The 748 * current values of the pp bits in the HPTE prevent access 749 * because we are doing software DIRTY bit management and the 750 * page is currently not DIRTY. 751 */ 752 753 754 do { 755 old_pte = pte_val(*ptep); 756 if (old_pte & _PAGE_BUSY) 757 goto out; 758 new_pte = old_pte | _PAGE_BUSY | 759 _PAGE_ACCESSED | _PAGE_HASHPTE; 760 } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, 761 old_pte, new_pte)); 762 763 rflags = 0x2 | (!(new_pte & _PAGE_RW)); 764 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ 765 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); 766 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) 767 /* No CPU has hugepages but lacks no execute, so we 768 * don't need to worry about that case */ 769 rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), 770 trap); 771 772 /* Check if pte already has an hpte (case 2) */ 773 if (unlikely(old_pte & _PAGE_HASHPTE)) { 774 /* There MIGHT be an HPTE for this pte */ 775 unsigned long hash, slot; 776 777 hash = hpt_hash(va, HPAGE_SHIFT); 778 if (old_pte & _PAGE_F_SECOND) 779 hash = ~hash; 780 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 781 slot += (old_pte & _PAGE_F_GIX) >> 12; 782 783 if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, 784 local) == -1) 785 old_pte &= ~_PAGE_HPTEFLAGS; 786 } 787 788 if (likely(!(old_pte & _PAGE_HASHPTE))) { 789 unsigned long hash = hpt_hash(va, HPAGE_SHIFT); 790 unsigned long hpte_group; 791 792 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; 793 794 repeat: 795 hpte_group = ((hash & htab_hash_mask) * 796 HPTES_PER_GROUP) & ~0x7UL; 797 798 /* clear HPTE slot informations in new PTE */ 799 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; 800 801 /* Add in WIMG bits */ 802 /* XXX We should store these in the pte */ 803 /* --BenH: I think they are ... */ 804 rflags |= _PAGE_COHERENT; 805 806 /* Insert into the hash table, primary slot */ 807 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, 808 mmu_huge_psize); 809 810 /* Primary is full, try the secondary */ 811 if (unlikely(slot == -1)) { 812 new_pte |= _PAGE_F_SECOND; 813 hpte_group = ((~hash & htab_hash_mask) * 814 HPTES_PER_GROUP) & ~0x7UL; 815 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 816 HPTE_V_SECONDARY, 817 mmu_huge_psize); 818 if (slot == -1) { 819 if (mftb() & 0x1) 820 hpte_group = ((hash & htab_hash_mask) * 821 HPTES_PER_GROUP)&~0x7UL; 822 823 ppc_md.hpte_remove(hpte_group); 824 goto repeat; 825 } 826 } 827 828 if (unlikely(slot == -2)) 829 panic("hash_huge_page: pte_insert failed\n"); 830 831 new_pte |= (slot << 12) & _PAGE_F_GIX; 832 } 833 834 /* 835 * No need to use ldarx/stdcx here 836 */ 837 *ptep = __pte(new_pte & ~_PAGE_BUSY); 838 839 err = 0; 840 841 out: 842 return err; 843 } 844