1 /* 2 * PPC64 (POWER4) Huge TLB Page Support for Kernel. 3 * 4 * Copyright (C) 2003 David Gibson, IBM Corporation. 5 * 6 * Based on the IA-32 version: 7 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> 8 */ 9 10 #include <linux/init.h> 11 #include <linux/fs.h> 12 #include <linux/mm.h> 13 #include <linux/hugetlb.h> 14 #include <linux/pagemap.h> 15 #include <linux/smp_lock.h> 16 #include <linux/slab.h> 17 #include <linux/err.h> 18 #include <linux/sysctl.h> 19 #include <asm/mman.h> 20 #include <asm/pgalloc.h> 21 #include <asm/tlb.h> 22 #include <asm/tlbflush.h> 23 #include <asm/mmu_context.h> 24 #include <asm/machdep.h> 25 #include <asm/cputable.h> 26 #include <asm/tlb.h> 27 28 #include <linux/sysctl.h> 29 30 #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) 31 #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) 32 33 /* Modelled after find_linux_pte() */ 34 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 35 { 36 pgd_t *pg; 37 pud_t *pu; 38 pmd_t *pm; 39 pte_t *pt; 40 41 BUG_ON(! in_hugepage_area(mm->context, addr)); 42 43 addr &= HPAGE_MASK; 44 45 pg = pgd_offset(mm, addr); 46 if (!pgd_none(*pg)) { 47 pu = pud_offset(pg, addr); 48 if (!pud_none(*pu)) { 49 pm = pmd_offset(pu, addr); 50 #ifdef CONFIG_PPC_64K_PAGES 51 /* Currently, we use the normal PTE offset within full 52 * size PTE pages, thus our huge PTEs are scattered in 53 * the PTE page and we do waste some. We may change 54 * that in the future, but the current mecanism keeps 55 * things much simpler 56 */ 57 if (!pmd_none(*pm)) { 58 /* Note: pte_offset_* are all equivalent on 59 * ppc64 as we don't have HIGHMEM 60 */ 61 pt = pte_offset_kernel(pm, addr); 62 return pt; 63 } 64 #else /* CONFIG_PPC_64K_PAGES */ 65 /* On 4k pages, we put huge PTEs in the PMD page */ 66 pt = (pte_t *)pm; 67 return pt; 68 #endif /* CONFIG_PPC_64K_PAGES */ 69 } 70 } 71 72 return NULL; 73 } 74 75 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 76 { 77 pgd_t *pg; 78 pud_t *pu; 79 pmd_t *pm; 80 pte_t *pt; 81 82 BUG_ON(! in_hugepage_area(mm->context, addr)); 83 84 addr &= HPAGE_MASK; 85 86 pg = pgd_offset(mm, addr); 87 pu = pud_alloc(mm, pg, addr); 88 89 if (pu) { 90 pm = pmd_alloc(mm, pu, addr); 91 if (pm) { 92 #ifdef CONFIG_PPC_64K_PAGES 93 /* See comment in huge_pte_offset. Note that if we ever 94 * want to put the page size in the PMD, we would have 95 * to open code our own pte_alloc* function in order 96 * to populate and set the size atomically 97 */ 98 pt = pte_alloc_map(mm, pm, addr); 99 #else /* CONFIG_PPC_64K_PAGES */ 100 pt = (pte_t *)pm; 101 #endif /* CONFIG_PPC_64K_PAGES */ 102 return pt; 103 } 104 } 105 106 return NULL; 107 } 108 109 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 110 pte_t *ptep, pte_t pte) 111 { 112 if (pte_present(*ptep)) { 113 /* We open-code pte_clear because we need to pass the right 114 * argument to hpte_update (huge / !huge) 115 */ 116 unsigned long old = pte_update(ptep, ~0UL); 117 if (old & _PAGE_HASHPTE) 118 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); 119 flush_tlb_pending(); 120 } 121 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 122 } 123 124 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 125 pte_t *ptep) 126 { 127 unsigned long old = pte_update(ptep, ~0UL); 128 129 if (old & _PAGE_HASHPTE) 130 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); 131 *ptep = __pte(0); 132 133 return __pte(old); 134 } 135 136 /* 137 * This function checks for proper alignment of input addr and len parameters. 138 */ 139 int is_aligned_hugepage_range(unsigned long addr, unsigned long len) 140 { 141 if (len & ~HPAGE_MASK) 142 return -EINVAL; 143 if (addr & ~HPAGE_MASK) 144 return -EINVAL; 145 if (! (within_hugepage_low_range(addr, len) 146 || within_hugepage_high_range(addr, len)) ) 147 return -EINVAL; 148 return 0; 149 } 150 151 static void flush_low_segments(void *parm) 152 { 153 u16 areas = (unsigned long) parm; 154 unsigned long i; 155 156 asm volatile("isync" : : : "memory"); 157 158 BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS); 159 160 for (i = 0; i < NUM_LOW_AREAS; i++) { 161 if (! (areas & (1U << i))) 162 continue; 163 asm volatile("slbie %0" 164 : : "r" ((i << SID_SHIFT) | SLBIE_C)); 165 } 166 167 asm volatile("isync" : : : "memory"); 168 } 169 170 static void flush_high_segments(void *parm) 171 { 172 u16 areas = (unsigned long) parm; 173 unsigned long i, j; 174 175 asm volatile("isync" : : : "memory"); 176 177 BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS); 178 179 for (i = 0; i < NUM_HIGH_AREAS; i++) { 180 if (! (areas & (1U << i))) 181 continue; 182 for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) 183 asm volatile("slbie %0" 184 :: "r" (((i << HTLB_AREA_SHIFT) 185 + (j << SID_SHIFT)) | SLBIE_C)); 186 } 187 188 asm volatile("isync" : : : "memory"); 189 } 190 191 static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) 192 { 193 unsigned long start = area << SID_SHIFT; 194 unsigned long end = (area+1) << SID_SHIFT; 195 struct vm_area_struct *vma; 196 197 BUG_ON(area >= NUM_LOW_AREAS); 198 199 /* Check no VMAs are in the region */ 200 vma = find_vma(mm, start); 201 if (vma && (vma->vm_start < end)) 202 return -EBUSY; 203 204 return 0; 205 } 206 207 static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) 208 { 209 unsigned long start = area << HTLB_AREA_SHIFT; 210 unsigned long end = (area+1) << HTLB_AREA_SHIFT; 211 struct vm_area_struct *vma; 212 213 BUG_ON(area >= NUM_HIGH_AREAS); 214 215 /* Hack, so that each addresses is controlled by exactly one 216 * of the high or low area bitmaps, the first high area starts 217 * at 4GB, not 0 */ 218 if (start == 0) 219 start = 0x100000000UL; 220 221 /* Check no VMAs are in the region */ 222 vma = find_vma(mm, start); 223 if (vma && (vma->vm_start < end)) 224 return -EBUSY; 225 226 return 0; 227 } 228 229 static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) 230 { 231 unsigned long i; 232 233 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); 234 BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); 235 236 newareas &= ~(mm->context.low_htlb_areas); 237 if (! newareas) 238 return 0; /* The segments we want are already open */ 239 240 for (i = 0; i < NUM_LOW_AREAS; i++) 241 if ((1 << i) & newareas) 242 if (prepare_low_area_for_htlb(mm, i) != 0) 243 return -EBUSY; 244 245 mm->context.low_htlb_areas |= newareas; 246 247 /* update the paca copy of the context struct */ 248 get_paca()->context = mm->context; 249 250 /* the context change must make it to memory before the flush, 251 * so that further SLB misses do the right thing. */ 252 mb(); 253 on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1); 254 255 return 0; 256 } 257 258 static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) 259 { 260 unsigned long i; 261 262 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); 263 BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) 264 != NUM_HIGH_AREAS); 265 266 newareas &= ~(mm->context.high_htlb_areas); 267 if (! newareas) 268 return 0; /* The areas we want are already open */ 269 270 for (i = 0; i < NUM_HIGH_AREAS; i++) 271 if ((1 << i) & newareas) 272 if (prepare_high_area_for_htlb(mm, i) != 0) 273 return -EBUSY; 274 275 mm->context.high_htlb_areas |= newareas; 276 277 /* update the paca copy of the context struct */ 278 get_paca()->context = mm->context; 279 280 /* the context change must make it to memory before the flush, 281 * so that further SLB misses do the right thing. */ 282 mb(); 283 on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1); 284 285 return 0; 286 } 287 288 int prepare_hugepage_range(unsigned long addr, unsigned long len) 289 { 290 int err; 291 292 if ( (addr+len) < addr ) 293 return -EINVAL; 294 295 if ((addr + len) < 0x100000000UL) 296 err = open_low_hpage_areas(current->mm, 297 LOW_ESID_MASK(addr, len)); 298 else 299 err = open_high_hpage_areas(current->mm, 300 HTLB_AREA_MASK(addr, len)); 301 if (err) { 302 printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" 303 " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", 304 addr, len, 305 LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); 306 return err; 307 } 308 309 return 0; 310 } 311 312 struct page * 313 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 314 { 315 pte_t *ptep; 316 struct page *page; 317 318 if (! in_hugepage_area(mm->context, address)) 319 return ERR_PTR(-EINVAL); 320 321 ptep = huge_pte_offset(mm, address); 322 page = pte_page(*ptep); 323 if (page) 324 page += (address % HPAGE_SIZE) / PAGE_SIZE; 325 326 return page; 327 } 328 329 int pmd_huge(pmd_t pmd) 330 { 331 return 0; 332 } 333 334 struct page * 335 follow_huge_pmd(struct mm_struct *mm, unsigned long address, 336 pmd_t *pmd, int write) 337 { 338 BUG(); 339 return NULL; 340 } 341 342 /* Because we have an exclusive hugepage region which lies within the 343 * normal user address space, we have to take special measures to make 344 * non-huge mmap()s evade the hugepage reserved regions. */ 345 unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, 346 unsigned long len, unsigned long pgoff, 347 unsigned long flags) 348 { 349 struct mm_struct *mm = current->mm; 350 struct vm_area_struct *vma; 351 unsigned long start_addr; 352 353 if (len > TASK_SIZE) 354 return -ENOMEM; 355 356 if (addr) { 357 addr = PAGE_ALIGN(addr); 358 vma = find_vma(mm, addr); 359 if (((TASK_SIZE - len) >= addr) 360 && (!vma || (addr+len) <= vma->vm_start) 361 && !is_hugepage_only_range(mm, addr,len)) 362 return addr; 363 } 364 if (len > mm->cached_hole_size) { 365 start_addr = addr = mm->free_area_cache; 366 } else { 367 start_addr = addr = TASK_UNMAPPED_BASE; 368 mm->cached_hole_size = 0; 369 } 370 371 full_search: 372 vma = find_vma(mm, addr); 373 while (TASK_SIZE - len >= addr) { 374 BUG_ON(vma && (addr >= vma->vm_end)); 375 376 if (touches_hugepage_low_range(mm, addr, len)) { 377 addr = ALIGN(addr+1, 1<<SID_SHIFT); 378 vma = find_vma(mm, addr); 379 continue; 380 } 381 if (touches_hugepage_high_range(mm, addr, len)) { 382 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); 383 vma = find_vma(mm, addr); 384 continue; 385 } 386 if (!vma || addr + len <= vma->vm_start) { 387 /* 388 * Remember the place where we stopped the search: 389 */ 390 mm->free_area_cache = addr + len; 391 return addr; 392 } 393 if (addr + mm->cached_hole_size < vma->vm_start) 394 mm->cached_hole_size = vma->vm_start - addr; 395 addr = vma->vm_end; 396 vma = vma->vm_next; 397 } 398 399 /* Make sure we didn't miss any holes */ 400 if (start_addr != TASK_UNMAPPED_BASE) { 401 start_addr = addr = TASK_UNMAPPED_BASE; 402 mm->cached_hole_size = 0; 403 goto full_search; 404 } 405 return -ENOMEM; 406 } 407 408 /* 409 * This mmap-allocator allocates new areas top-down from below the 410 * stack's low limit (the base): 411 * 412 * Because we have an exclusive hugepage region which lies within the 413 * normal user address space, we have to take special measures to make 414 * non-huge mmap()s evade the hugepage reserved regions. 415 */ 416 unsigned long 417 arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, 418 const unsigned long len, const unsigned long pgoff, 419 const unsigned long flags) 420 { 421 struct vm_area_struct *vma, *prev_vma; 422 struct mm_struct *mm = current->mm; 423 unsigned long base = mm->mmap_base, addr = addr0; 424 unsigned long largest_hole = mm->cached_hole_size; 425 int first_time = 1; 426 427 /* requested length too big for entire address space */ 428 if (len > TASK_SIZE) 429 return -ENOMEM; 430 431 /* dont allow allocations above current base */ 432 if (mm->free_area_cache > base) 433 mm->free_area_cache = base; 434 435 /* requesting a specific address */ 436 if (addr) { 437 addr = PAGE_ALIGN(addr); 438 vma = find_vma(mm, addr); 439 if (TASK_SIZE - len >= addr && 440 (!vma || addr + len <= vma->vm_start) 441 && !is_hugepage_only_range(mm, addr,len)) 442 return addr; 443 } 444 445 if (len <= largest_hole) { 446 largest_hole = 0; 447 mm->free_area_cache = base; 448 } 449 try_again: 450 /* make sure it can fit in the remaining address space */ 451 if (mm->free_area_cache < len) 452 goto fail; 453 454 /* either no address requested or cant fit in requested address hole */ 455 addr = (mm->free_area_cache - len) & PAGE_MASK; 456 do { 457 hugepage_recheck: 458 if (touches_hugepage_low_range(mm, addr, len)) { 459 addr = (addr & ((~0) << SID_SHIFT)) - len; 460 goto hugepage_recheck; 461 } else if (touches_hugepage_high_range(mm, addr, len)) { 462 addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; 463 goto hugepage_recheck; 464 } 465 466 /* 467 * Lookup failure means no vma is above this address, 468 * i.e. return with success: 469 */ 470 if (!(vma = find_vma_prev(mm, addr, &prev_vma))) 471 return addr; 472 473 /* 474 * new region fits between prev_vma->vm_end and 475 * vma->vm_start, use it: 476 */ 477 if (addr+len <= vma->vm_start && 478 (!prev_vma || (addr >= prev_vma->vm_end))) { 479 /* remember the address as a hint for next time */ 480 mm->cached_hole_size = largest_hole; 481 return (mm->free_area_cache = addr); 482 } else { 483 /* pull free_area_cache down to the first hole */ 484 if (mm->free_area_cache == vma->vm_end) { 485 mm->free_area_cache = vma->vm_start; 486 mm->cached_hole_size = largest_hole; 487 } 488 } 489 490 /* remember the largest hole we saw so far */ 491 if (addr + largest_hole < vma->vm_start) 492 largest_hole = vma->vm_start - addr; 493 494 /* try just below the current vma->vm_start */ 495 addr = vma->vm_start-len; 496 } while (len <= vma->vm_start); 497 498 fail: 499 /* 500 * if hint left us with no space for the requested 501 * mapping then try again: 502 */ 503 if (first_time) { 504 mm->free_area_cache = base; 505 largest_hole = 0; 506 first_time = 0; 507 goto try_again; 508 } 509 /* 510 * A failed mmap() very likely causes application failure, 511 * so fall back to the bottom-up function here. This scenario 512 * can happen with large stack limits and large mmap() 513 * allocations. 514 */ 515 mm->free_area_cache = TASK_UNMAPPED_BASE; 516 mm->cached_hole_size = ~0UL; 517 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); 518 /* 519 * Restore the topdown base: 520 */ 521 mm->free_area_cache = base; 522 mm->cached_hole_size = ~0UL; 523 524 return addr; 525 } 526 527 static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) 528 { 529 unsigned long addr = 0; 530 struct vm_area_struct *vma; 531 532 vma = find_vma(current->mm, addr); 533 while (addr + len <= 0x100000000UL) { 534 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ 535 536 if (! __within_hugepage_low_range(addr, len, segmask)) { 537 addr = ALIGN(addr+1, 1<<SID_SHIFT); 538 vma = find_vma(current->mm, addr); 539 continue; 540 } 541 542 if (!vma || (addr + len) <= vma->vm_start) 543 return addr; 544 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 545 /* Depending on segmask this might not be a confirmed 546 * hugepage region, so the ALIGN could have skipped 547 * some VMAs */ 548 vma = find_vma(current->mm, addr); 549 } 550 551 return -ENOMEM; 552 } 553 554 static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) 555 { 556 unsigned long addr = 0x100000000UL; 557 struct vm_area_struct *vma; 558 559 vma = find_vma(current->mm, addr); 560 while (addr + len <= TASK_SIZE_USER64) { 561 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ 562 563 if (! __within_hugepage_high_range(addr, len, areamask)) { 564 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); 565 vma = find_vma(current->mm, addr); 566 continue; 567 } 568 569 if (!vma || (addr + len) <= vma->vm_start) 570 return addr; 571 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 572 /* Depending on segmask this might not be a confirmed 573 * hugepage region, so the ALIGN could have skipped 574 * some VMAs */ 575 vma = find_vma(current->mm, addr); 576 } 577 578 return -ENOMEM; 579 } 580 581 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 582 unsigned long len, unsigned long pgoff, 583 unsigned long flags) 584 { 585 int lastshift; 586 u16 areamask, curareas; 587 588 if (HPAGE_SHIFT == 0) 589 return -EINVAL; 590 if (len & ~HPAGE_MASK) 591 return -EINVAL; 592 593 if (!cpu_has_feature(CPU_FTR_16M_PAGE)) 594 return -EINVAL; 595 596 if (test_thread_flag(TIF_32BIT)) { 597 curareas = current->mm->context.low_htlb_areas; 598 599 /* First see if we can do the mapping in the existing 600 * low areas */ 601 addr = htlb_get_low_area(len, curareas); 602 if (addr != -ENOMEM) 603 return addr; 604 605 lastshift = 0; 606 for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); 607 ! lastshift; areamask >>=1) { 608 if (areamask & 1) 609 lastshift = 1; 610 611 addr = htlb_get_low_area(len, curareas | areamask); 612 if ((addr != -ENOMEM) 613 && open_low_hpage_areas(current->mm, areamask) == 0) 614 return addr; 615 } 616 } else { 617 curareas = current->mm->context.high_htlb_areas; 618 619 /* First see if we can do the mapping in the existing 620 * high areas */ 621 addr = htlb_get_high_area(len, curareas); 622 if (addr != -ENOMEM) 623 return addr; 624 625 lastshift = 0; 626 for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); 627 ! lastshift; areamask >>=1) { 628 if (areamask & 1) 629 lastshift = 1; 630 631 addr = htlb_get_high_area(len, curareas | areamask); 632 if ((addr != -ENOMEM) 633 && open_high_hpage_areas(current->mm, areamask) == 0) 634 return addr; 635 } 636 } 637 printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" 638 " enough areas\n"); 639 return -ENOMEM; 640 } 641 642 int hash_huge_page(struct mm_struct *mm, unsigned long access, 643 unsigned long ea, unsigned long vsid, int local) 644 { 645 pte_t *ptep; 646 unsigned long old_pte, new_pte; 647 unsigned long va, rflags, pa; 648 long slot; 649 int err = 1; 650 651 ptep = huge_pte_offset(mm, ea); 652 653 /* Search the Linux page table for a match with va */ 654 va = (vsid << 28) | (ea & 0x0fffffff); 655 656 /* 657 * If no pte found or not present, send the problem up to 658 * do_page_fault 659 */ 660 if (unlikely(!ptep || pte_none(*ptep))) 661 goto out; 662 663 /* 664 * Check the user's access rights to the page. If access should be 665 * prevented then send the problem up to do_page_fault. 666 */ 667 if (unlikely(access & ~pte_val(*ptep))) 668 goto out; 669 /* 670 * At this point, we have a pte (old_pte) which can be used to build 671 * or update an HPTE. There are 2 cases: 672 * 673 * 1. There is a valid (present) pte with no associated HPTE (this is 674 * the most common case) 675 * 2. There is a valid (present) pte with an associated HPTE. The 676 * current values of the pp bits in the HPTE prevent access 677 * because we are doing software DIRTY bit management and the 678 * page is currently not DIRTY. 679 */ 680 681 682 do { 683 old_pte = pte_val(*ptep); 684 if (old_pte & _PAGE_BUSY) 685 goto out; 686 new_pte = old_pte | _PAGE_BUSY | 687 _PAGE_ACCESSED | _PAGE_HASHPTE; 688 } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, 689 old_pte, new_pte)); 690 691 rflags = 0x2 | (!(new_pte & _PAGE_RW)); 692 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ 693 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); 694 695 /* Check if pte already has an hpte (case 2) */ 696 if (unlikely(old_pte & _PAGE_HASHPTE)) { 697 /* There MIGHT be an HPTE for this pte */ 698 unsigned long hash, slot; 699 700 hash = hpt_hash(va, HPAGE_SHIFT); 701 if (old_pte & _PAGE_F_SECOND) 702 hash = ~hash; 703 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 704 slot += (old_pte & _PAGE_F_GIX) >> 12; 705 706 if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) 707 old_pte &= ~_PAGE_HPTEFLAGS; 708 } 709 710 if (likely(!(old_pte & _PAGE_HASHPTE))) { 711 unsigned long hash = hpt_hash(va, HPAGE_SHIFT); 712 unsigned long hpte_group; 713 714 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; 715 716 repeat: 717 hpte_group = ((hash & htab_hash_mask) * 718 HPTES_PER_GROUP) & ~0x7UL; 719 720 /* clear HPTE slot informations in new PTE */ 721 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; 722 723 /* Add in WIMG bits */ 724 /* XXX We should store these in the pte */ 725 /* --BenH: I think they are ... */ 726 rflags |= _PAGE_COHERENT; 727 728 /* Insert into the hash table, primary slot */ 729 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, 730 mmu_huge_psize); 731 732 /* Primary is full, try the secondary */ 733 if (unlikely(slot == -1)) { 734 new_pte |= _PAGE_F_SECOND; 735 hpte_group = ((~hash & htab_hash_mask) * 736 HPTES_PER_GROUP) & ~0x7UL; 737 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 738 HPTE_V_SECONDARY, 739 mmu_huge_psize); 740 if (slot == -1) { 741 if (mftb() & 0x1) 742 hpte_group = ((hash & htab_hash_mask) * 743 HPTES_PER_GROUP)&~0x7UL; 744 745 ppc_md.hpte_remove(hpte_group); 746 goto repeat; 747 } 748 } 749 750 if (unlikely(slot == -2)) 751 panic("hash_huge_page: pte_insert failed\n"); 752 753 new_pte |= (slot << 12) & _PAGE_F_GIX; 754 } 755 756 /* 757 * No need to use ldarx/stdcx here because all who 758 * might be updating the pte will hold the 759 * page_table_lock 760 */ 761 *ptep = __pte(new_pte & ~_PAGE_BUSY); 762 763 err = 0; 764 765 out: 766 return err; 767 } 768