1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <vm/hat.h> 28 #include <vm/hat_sfmmu.h> 29 #include <vm/page.h> 30 #include <sys/pte.h> 31 #include <sys/systm.h> 32 #include <sys/mman.h> 33 #include <sys/sysmacros.h> 34 #include <sys/machparam.h> 35 #include <sys/vtrace.h> 36 #include <sys/kmem.h> 37 #include <sys/mmu.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/cpuvar.h> 41 #include <sys/debug.h> 42 #include <sys/lgrp.h> 43 #include <sys/archsystm.h> 44 #include <sys/machsystm.h> 45 #include <sys/vmsystm.h> 46 #include <sys/bitmap.h> 47 #include <vm/as.h> 48 #include <vm/seg.h> 49 #include <vm/seg_kmem.h> 50 #include <vm/seg_kp.h> 51 #include <vm/seg_kpm.h> 52 #include <vm/rm.h> 53 #include <vm/vm_dep.h> 54 #include <sys/t_lock.h> 55 #include <sys/vm_machparam.h> 56 #include <sys/promif.h> 57 #include <sys/prom_isa.h> 58 #include <sys/prom_plat.h> 59 #include <sys/prom_debug.h> 60 #include <sys/privregs.h> 61 #include <sys/bootconf.h> 62 #include <sys/memlist.h> 63 #include <sys/memlist_plat.h> 64 #include <sys/cpu_module.h> 65 #include <sys/reboot.h> 66 #include <sys/kdi.h> 67 68 /* 69 * Static routines 70 */ 71 static void sfmmu_map_prom_mappings(struct translation *, size_t); 72 static struct translation *read_prom_mappings(size_t *); 73 static void sfmmu_reloc_trap_handler(void *, void *, size_t); 74 75 /* 76 * External routines 77 */ 78 extern void sfmmu_remap_kernel(void); 79 extern void sfmmu_patch_utsb(void); 80 81 /* 82 * Global Data: 83 */ 84 extern caddr_t textva, datava; 85 extern tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */ 86 extern int enable_bigktsb; 87 extern int kmem64_smchunks; 88 89 uint64_t memsegspa = (uintptr_t)MSEG_NULLPTR_PA; /* memsegs physical linkage */ 90 uint64_t memseg_phash[N_MEM_SLOTS]; /* use physical memseg addresses */ 91 92 int sfmmu_kern_mapped = 0; 93 94 /* 95 * DMMU primary context register for the kernel context. Machine specific code 96 * inserts correct page size codes when necessary 97 */ 98 uint64_t kcontextreg = KCONTEXT; 99 100 #ifdef DEBUG 101 static int ndata_middle_hole_detected = 0; 102 #endif 103 104 /* Extern Global Data */ 105 106 extern int page_relocate_ready; 107 108 /* 109 * Controls the logic which enables the use of the 110 * QUAD_LDD_PHYS ASI for TSB accesses. 111 */ 112 extern int ktsb_phys; 113 114 /* 115 * Global Routines called from within: 116 * usr/src/uts/sun4u 117 * usr/src/uts/sfmmu 118 * usr/src/uts/sun 119 */ 120 121 pfn_t 122 va_to_pfn(void *vaddr) 123 { 124 u_longlong_t physaddr; 125 int mode, valid; 126 127 if (tba_taken_over) 128 return (hat_getpfnum(kas.a_hat, (caddr_t)vaddr)); 129 130 #if !defined(C_OBP) 131 if (!kmem64_smchunks && 132 (caddr_t)vaddr >= kmem64_base && (caddr_t)vaddr < kmem64_end) { 133 if (kmem64_pabase == (uint64_t)-1) 134 prom_panic("va_to_pfn: kmem64_pabase not init"); 135 physaddr = kmem64_pabase + ((caddr_t)vaddr - kmem64_base); 136 return ((pfn_t)physaddr >> MMU_PAGESHIFT); 137 } 138 #endif /* !C_OBP */ 139 140 if ((prom_translate_virt(vaddr, &valid, &physaddr, &mode) != -1) && 141 (valid == -1)) { 142 return ((pfn_t)(physaddr >> MMU_PAGESHIFT)); 143 } 144 return (PFN_INVALID); 145 } 146 147 uint64_t 148 va_to_pa(void *vaddr) 149 { 150 pfn_t pfn; 151 152 if ((pfn = va_to_pfn(vaddr)) == PFN_INVALID) 153 return ((uint64_t)-1); 154 return (((uint64_t)pfn << MMU_PAGESHIFT) | 155 ((uint64_t)vaddr & MMU_PAGEOFFSET)); 156 } 157 158 void 159 hat_kern_setup(void) 160 { 161 struct translation *trans_root; 162 size_t ntrans_root; 163 extern void startup_fixup_physavail(void); 164 165 /* 166 * These are the steps we take to take over the mmu from the prom. 167 * 168 * (1) Read the prom's mappings through the translation property. 169 * (2) Remap the kernel text and kernel data with 2 locked 4MB ttes. 170 * Create the the hmeblks for these 2 ttes at this time. 171 * (3) Create hat structures for all other prom mappings. Since the 172 * kernel text and data hme_blks have already been created we 173 * skip the equivalent prom's mappings. 174 * (4) Initialize the tsb and its corresponding hardware regs. 175 * (5) Take over the trap table (currently in startup). 176 * (6) Up to this point it is possible the prom required some of its 177 * locked tte's. Now that we own the trap table we remove them. 178 */ 179 180 ktsb_pbase = va_to_pa(ktsb_base); 181 ktsb4m_pbase = va_to_pa(ktsb4m_base); 182 PRM_DEBUG(ktsb_pbase); 183 PRM_DEBUG(ktsb4m_pbase); 184 185 sfmmu_patch_ktsb(); 186 sfmmu_patch_utsb(); 187 sfmmu_patch_mmu_asi(ktsb_phys); 188 189 sfmmu_init_tsbs(); 190 191 if (kpm_enable) { 192 sfmmu_kpm_patch_tlbm(); 193 if (kpm_smallpages == 0) { 194 sfmmu_kpm_patch_tsbm(); 195 } 196 } 197 198 if (!shctx_on) { 199 sfmmu_patch_shctx(); 200 } 201 202 /* 203 * The 8K-indexed kernel TSB space is used to hold 204 * translations below... 205 */ 206 trans_root = read_prom_mappings(&ntrans_root); 207 sfmmu_remap_kernel(); 208 startup_fixup_physavail(); 209 mmu_init_kernel_pgsz(kas.a_hat); 210 sfmmu_map_prom_mappings(trans_root, ntrans_root); 211 212 /* 213 * We invalidate 8K kernel TSB because we used it in 214 * sfmmu_map_prom_mappings() 215 */ 216 sfmmu_inv_tsb(ktsb_base, ktsb_sz); 217 sfmmu_inv_tsb(ktsb4m_base, ktsb4m_sz); 218 219 sfmmu_init_ktsbinfo(); 220 221 222 sfmmu_kern_mapped = 1; 223 224 /* 225 * hments have been created for mapped pages, and thus we're ready 226 * for kmdb to start using its own trap table. It walks the hments 227 * to resolve TLB misses, and can't be used until they're ready. 228 */ 229 if (boothowto & RB_DEBUG) 230 kdi_dvec_vmready(); 231 } 232 233 /* 234 * Macro used below to convert the prom's 32-bit high and low fields into 235 * a value appropriate for the 64-bit kernel. 236 */ 237 238 #define COMBINE(hi, lo) (((uint64_t)(uint32_t)(hi) << 32) | (uint32_t)(lo)) 239 240 /* 241 * Track larges pages used. 242 * Provides observability for this feature on non-debug kernels. 243 */ 244 ulong_t map_prom_lpcount[MMU_PAGE_SIZES]; 245 246 /* 247 * This function traverses the prom mapping list and creates equivalent 248 * mappings in the sfmmu mapping hash. 249 */ 250 static void 251 sfmmu_map_prom_mappings(struct translation *trans_root, size_t ntrans_root) 252 { 253 struct translation *promt; 254 tte_t tte, oldtte, *ttep; 255 pfn_t pfn, oldpfn, basepfn; 256 caddr_t vaddr; 257 size_t size, offset; 258 unsigned long i; 259 uint_t attr; 260 page_t *pp; 261 extern struct memlist *virt_avail; 262 char buf[256]; 263 264 ttep = &tte; 265 for (i = 0, promt = trans_root; i < ntrans_root; i++, promt++) { 266 ASSERT(promt->tte_hi != 0); 267 ASSERT32(promt->virt_hi == 0 && promt->size_hi == 0); 268 269 vaddr = (caddr_t)COMBINE(promt->virt_hi, promt->virt_lo); 270 271 /* 272 * hack until we get rid of map-for-unix 273 */ 274 if (vaddr < (caddr_t)KERNELBASE) 275 continue; 276 277 ttep->tte_inthi = promt->tte_hi; 278 ttep->tte_intlo = promt->tte_lo; 279 attr = PROC_DATA | HAT_NOSYNC; 280 #if defined(TTE_IS_GLOBAL) 281 if (TTE_IS_GLOBAL(ttep)) { 282 /* 283 * The prom better not use global translations 284 * because a user process might use the same 285 * virtual addresses 286 */ 287 prom_panic("sfmmu_map_prom_mappings: global" 288 " translation"); 289 TTE_SET_LOFLAGS(ttep, TTE_GLB_INT, 0); 290 } 291 #endif 292 if (TTE_IS_LOCKED(ttep)) { 293 /* clear the lock bits */ 294 TTE_CLR_LOCKED(ttep); 295 } 296 attr |= (TTE_IS_VCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEVTTE; 297 attr |= (TTE_IS_PCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEPTTE; 298 attr |= (TTE_IS_SIDEFFECT(ttep)) ? SFMMU_SIDEFFECT : 0; 299 attr |= (TTE_IS_IE(ttep)) ? HAT_STRUCTURE_LE : 0; 300 301 size = COMBINE(promt->size_hi, promt->size_lo); 302 offset = 0; 303 basepfn = TTE_TO_PFN((caddr_t)COMBINE(promt->virt_hi, 304 promt->virt_lo), ttep); 305 while (size) { 306 vaddr = (caddr_t)(COMBINE(promt->virt_hi, 307 promt->virt_lo) + offset); 308 309 /* 310 * make sure address is not in virt-avail list 311 */ 312 if (address_in_memlist(virt_avail, (uint64_t)vaddr, 313 size)) { 314 prom_panic("sfmmu_map_prom_mappings:" 315 " inconsistent translation/avail lists"); 316 } 317 318 pfn = basepfn + mmu_btop(offset); 319 if (pf_is_memory(pfn)) { 320 if (attr & SFMMU_UNCACHEPTTE) { 321 prom_panic("sfmmu_map_prom_mappings:" 322 " uncached prom memory page"); 323 } 324 } else { 325 if (!(attr & SFMMU_SIDEFFECT)) { 326 prom_panic("sfmmu_map_prom_mappings:" 327 " prom i/o page without" 328 " side-effect"); 329 } 330 } 331 332 /* 333 * skip kmem64 area 334 */ 335 if (!kmem64_smchunks && 336 vaddr >= kmem64_base && 337 vaddr < kmem64_aligned_end) { 338 #if !defined(C_OBP) 339 prom_panic("sfmmu_map_prom_mappings:" 340 " unexpected kmem64 prom mapping"); 341 #else /* !C_OBP */ 342 size_t mapsz; 343 344 if (ptob(pfn) != 345 kmem64_pabase + (vaddr - kmem64_base)) { 346 prom_panic("sfmmu_map_prom_mappings:" 347 " unexpected kmem64 prom mapping"); 348 } 349 350 mapsz = kmem64_aligned_end - vaddr; 351 if (mapsz >= size) { 352 break; 353 } 354 size -= mapsz; 355 offset += mapsz; 356 continue; 357 #endif /* !C_OBP */ 358 } 359 360 oldpfn = sfmmu_vatopfn(vaddr, KHATID, &oldtte); 361 ASSERT(oldpfn != PFN_SUSPENDED); 362 ASSERT(page_relocate_ready == 0); 363 364 if (oldpfn != PFN_INVALID) { 365 /* 366 * mapping already exists. 367 * Verify they are equal 368 */ 369 if (pfn != oldpfn) { 370 (void) snprintf(buf, sizeof (buf), 371 "sfmmu_map_prom_mappings: mapping" 372 " conflict (va = 0x%p, pfn = 0x%p," 373 " oldpfn = 0x%p)", (void *)vaddr, 374 (void *)pfn, (void *)oldpfn); 375 prom_panic(buf); 376 } 377 size -= MMU_PAGESIZE; 378 offset += MMU_PAGESIZE; 379 continue; 380 } 381 382 pp = page_numtopp_nolock(pfn); 383 if ((pp != NULL) && PP_ISFREE((page_t *)pp)) { 384 (void) snprintf(buf, sizeof (buf), 385 "sfmmu_map_prom_mappings: prom-mapped" 386 " page (va = 0x%p, pfn = 0x%p) on free list", 387 (void *)vaddr, (void *)pfn); 388 prom_panic(buf); 389 } 390 391 sfmmu_memtte(ttep, pfn, attr, TTE8K); 392 sfmmu_tteload(kas.a_hat, ttep, vaddr, pp, 393 HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD); 394 size -= MMU_PAGESIZE; 395 offset += MMU_PAGESIZE; 396 } 397 } 398 399 /* 400 * We claimed kmem64 from prom, so now we need to load tte. 401 */ 402 if (!kmem64_smchunks && kmem64_base != NULL) { 403 pgcnt_t pages; 404 size_t psize; 405 int pszc; 406 407 pszc = kmem64_szc; 408 #ifdef sun4u 409 if (pszc > TTE8K) { 410 pszc = segkmem_lpszc; 411 } 412 #endif /* sun4u */ 413 psize = TTEBYTES(pszc); 414 pages = btop(psize); 415 basepfn = kmem64_pabase >> MMU_PAGESHIFT; 416 vaddr = kmem64_base; 417 while (vaddr < kmem64_end) { 418 sfmmu_memtte(ttep, basepfn, 419 PROC_DATA | HAT_NOSYNC, pszc); 420 sfmmu_tteload(kas.a_hat, ttep, vaddr, NULL, 421 HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD); 422 vaddr += psize; 423 basepfn += pages; 424 } 425 map_prom_lpcount[pszc] = 426 ((caddr_t)P2ROUNDUP((uintptr_t)kmem64_end, psize) - 427 kmem64_base) >> TTE_PAGE_SHIFT(pszc); 428 } 429 } 430 431 #undef COMBINE /* local to previous routine */ 432 433 /* 434 * This routine reads in the "translations" property in to a buffer and 435 * returns a pointer to this buffer and the number of translations. 436 */ 437 static struct translation * 438 read_prom_mappings(size_t *ntransrootp) 439 { 440 char *prop = "translations"; 441 size_t translen; 442 pnode_t node; 443 struct translation *transroot; 444 445 /* 446 * the "translations" property is associated with the mmu node 447 */ 448 node = (pnode_t)prom_getphandle(prom_mmu_ihandle()); 449 450 /* 451 * We use the TSB space to read in the prom mappings. This space 452 * is currently not being used because we haven't taken over the 453 * trap table yet. It should be big enough to hold the mappings. 454 */ 455 if ((translen = prom_getproplen(node, prop)) == -1) 456 cmn_err(CE_PANIC, "no translations property"); 457 *ntransrootp = translen / sizeof (*transroot); 458 translen = roundup(translen, MMU_PAGESIZE); 459 PRM_DEBUG(translen); 460 if (translen > TSB_BYTES(ktsb_szcode)) 461 cmn_err(CE_PANIC, "not enough space for translations"); 462 463 transroot = (struct translation *)ktsb_base; 464 ASSERT(transroot); 465 if (prom_getprop(node, prop, (caddr_t)transroot) == -1) { 466 cmn_err(CE_PANIC, "translations getprop failed"); 467 } 468 return (transroot); 469 } 470 471 /* 472 * Init routine of the nucleus data memory allocator. 473 * 474 * The nucleus data memory allocator is organized in ecache_alignsize'd 475 * memory chunks. Memory allocated by ndata_alloc() will never be freed. 476 * 477 * The ndata argument is used as header of the ndata freelist. 478 * Other freelist nodes are placed in the nucleus memory itself 479 * at the beginning of a free memory chunk. Therefore a freelist 480 * node (struct memlist) must fit into the smallest allocatable 481 * memory chunk (ecache_alignsize bytes). 482 * 483 * The memory interval [base, end] passed to ndata_alloc_init() must be 484 * bzero'd to allow the allocator to return bzero'd memory easily. 485 */ 486 void 487 ndata_alloc_init(struct memlist *ndata, uintptr_t base, uintptr_t end) 488 { 489 ASSERT(sizeof (struct memlist) <= ecache_alignsize); 490 491 base = roundup(base, ecache_alignsize); 492 end = end - end % ecache_alignsize; 493 494 ASSERT(base < end); 495 496 ndata->address = base; 497 ndata->size = end - base; 498 ndata->next = NULL; 499 ndata->prev = NULL; 500 } 501 502 /* 503 * Deliver the size of the largest free memory chunk. 504 */ 505 size_t 506 ndata_maxsize(struct memlist *ndata) 507 { 508 size_t chunksize = ndata->size; 509 510 while ((ndata = ndata->next) != NULL) { 511 if (chunksize < ndata->size) 512 chunksize = ndata->size; 513 } 514 515 return (chunksize); 516 } 517 518 519 /* 520 * Allocate the last properly aligned memory chunk. 521 * This function is called when no more large nucleus memory chunks 522 * will be allocated. The remaining free nucleus memory at the end 523 * of the nucleus can be added to the phys_avail list. 524 */ 525 void * 526 ndata_extra_base(struct memlist *ndata, size_t alignment, caddr_t endaddr) 527 { 528 uintptr_t base; 529 size_t wasteage = 0; 530 #ifdef DEBUG 531 static int called = 0; 532 533 if (called++ > 0) 534 cmn_err(CE_PANIC, "ndata_extra_base() called more than once"); 535 #endif /* DEBUG */ 536 537 /* 538 * The alignment needs to be a multiple of ecache_alignsize. 539 */ 540 ASSERT((alignment % ecache_alignsize) == 0); 541 542 while (ndata->next != NULL) { 543 wasteage += ndata->size; 544 ndata = ndata->next; 545 } 546 547 base = roundup(ndata->address, alignment); 548 549 if (base >= ndata->address + ndata->size) 550 return (NULL); 551 552 if ((caddr_t)(ndata->address + ndata->size) != endaddr) { 553 #ifdef DEBUG 554 ndata_middle_hole_detected = 1; /* see if we hit this again */ 555 #endif 556 return (NULL); 557 } 558 559 if (base == ndata->address) { 560 if (ndata->prev != NULL) 561 ndata->prev->next = NULL; 562 else 563 ndata->size = 0; 564 565 bzero((void *)base, sizeof (struct memlist)); 566 567 } else { 568 ndata->size = base - ndata->address; 569 wasteage += ndata->size; 570 } 571 PRM_DEBUG(wasteage); 572 573 return ((void *)base); 574 } 575 576 /* 577 * Select the best matching buffer, avoid memory fragmentation. 578 */ 579 static struct memlist * 580 ndata_select_chunk(struct memlist *ndata, size_t wanted, size_t alignment) 581 { 582 struct memlist *fnd_below = NULL; 583 struct memlist *fnd_above = NULL; 584 struct memlist *fnd_unused = NULL; 585 struct memlist *frlist; 586 uintptr_t base; 587 uintptr_t end; 588 size_t below; 589 size_t above; 590 size_t unused; 591 size_t best_below = ULONG_MAX; 592 size_t best_above = ULONG_MAX; 593 size_t best_unused = ULONG_MAX; 594 595 ASSERT(ndata != NULL); 596 597 /* 598 * Look for the best matching buffer, avoid memory fragmentation. 599 * The following strategy is used, try to find 600 * 1. an exact fitting buffer 601 * 2. avoid wasting any space below the buffer, take first 602 * fitting buffer 603 * 3. avoid wasting any space above the buffer, take first 604 * fitting buffer 605 * 4. avoid wasting space, take first fitting buffer 606 * 5. take the last buffer in chain 607 */ 608 for (frlist = ndata; frlist != NULL; frlist = frlist->next) { 609 base = roundup(frlist->address, alignment); 610 end = roundup(base + wanted, ecache_alignsize); 611 612 if (end > frlist->address + frlist->size) 613 continue; 614 615 below = (base - frlist->address) / ecache_alignsize; 616 above = (frlist->address + frlist->size - end) / 617 ecache_alignsize; 618 unused = below + above; 619 620 if (unused == 0) 621 return (frlist); 622 623 if (frlist->next == NULL) 624 break; 625 626 if (below < best_below) { 627 best_below = below; 628 fnd_below = frlist; 629 } 630 631 if (above < best_above) { 632 best_above = above; 633 fnd_above = frlist; 634 } 635 636 if (unused < best_unused) { 637 best_unused = unused; 638 fnd_unused = frlist; 639 } 640 } 641 642 if (best_below == 0) 643 return (fnd_below); 644 if (best_above == 0) 645 return (fnd_above); 646 if (best_unused < ULONG_MAX) 647 return (fnd_unused); 648 649 return (frlist); 650 } 651 652 /* 653 * Nucleus data memory allocator. 654 * The granularity of the allocator is ecache_alignsize. 655 * See also comment for ndata_alloc_init(). 656 */ 657 void * 658 ndata_alloc(struct memlist *ndata, size_t wanted, size_t alignment) 659 { 660 struct memlist *found; 661 struct memlist *fnd_above; 662 uintptr_t base; 663 uintptr_t end; 664 size_t below; 665 size_t above; 666 667 /* 668 * Look for the best matching buffer, avoid memory fragmentation. 669 */ 670 if ((found = ndata_select_chunk(ndata, wanted, alignment)) == NULL) 671 return (NULL); 672 673 /* 674 * Allocate the nucleus data buffer. 675 */ 676 base = roundup(found->address, alignment); 677 end = roundup(base + wanted, ecache_alignsize); 678 ASSERT(end <= found->address + found->size); 679 680 below = base - found->address; 681 above = found->address + found->size - end; 682 ASSERT(above == 0 || (above % ecache_alignsize) == 0); 683 684 if (below >= ecache_alignsize) { 685 /* 686 * There is free memory below the allocated memory chunk. 687 */ 688 found->size = below - below % ecache_alignsize; 689 690 if (above) { 691 fnd_above = (struct memlist *)end; 692 fnd_above->address = end; 693 fnd_above->size = above; 694 695 if ((fnd_above->next = found->next) != NULL) 696 found->next->prev = fnd_above; 697 fnd_above->prev = found; 698 found->next = fnd_above; 699 } 700 701 return ((void *)base); 702 } 703 704 if (found->prev == NULL) { 705 /* 706 * The first chunk (ndata) is selected. 707 */ 708 ASSERT(found == ndata); 709 if (above) { 710 found->address = end; 711 found->size = above; 712 } else if (found->next != NULL) { 713 found->address = found->next->address; 714 found->size = found->next->size; 715 if ((found->next = found->next->next) != NULL) 716 found->next->prev = found; 717 718 bzero((void *)found->address, sizeof (struct memlist)); 719 } else { 720 found->address = end; 721 found->size = 0; 722 } 723 724 return ((void *)base); 725 } 726 727 /* 728 * Not the first chunk. 729 */ 730 if (above) { 731 fnd_above = (struct memlist *)end; 732 fnd_above->address = end; 733 fnd_above->size = above; 734 735 if ((fnd_above->next = found->next) != NULL) 736 fnd_above->next->prev = fnd_above; 737 fnd_above->prev = found->prev; 738 found->prev->next = fnd_above; 739 740 } else { 741 if ((found->prev->next = found->next) != NULL) 742 found->next->prev = found->prev; 743 } 744 745 bzero((void *)found->address, sizeof (struct memlist)); 746 747 return ((void *)base); 748 } 749 750 /* 751 * Size the kernel TSBs based upon the amount of physical 752 * memory in the system. 753 */ 754 static void 755 calc_tsb_sizes(pgcnt_t npages) 756 { 757 PRM_DEBUG(npages); 758 759 if (npages <= TSB_FREEMEM_MIN) { 760 ktsb_szcode = TSB_128K_SZCODE; 761 enable_bigktsb = 0; 762 } else if (npages <= TSB_FREEMEM_LARGE / 2) { 763 ktsb_szcode = TSB_256K_SZCODE; 764 enable_bigktsb = 0; 765 } else if (npages <= TSB_FREEMEM_LARGE) { 766 ktsb_szcode = TSB_512K_SZCODE; 767 enable_bigktsb = 0; 768 } else if (npages <= TSB_FREEMEM_LARGE * 2 || 769 enable_bigktsb == 0) { 770 ktsb_szcode = TSB_1M_SZCODE; 771 enable_bigktsb = 0; 772 } else { 773 ktsb_szcode = highbit(npages - 1); 774 ktsb_szcode -= TSB_START_SIZE; 775 ktsb_szcode = MAX(ktsb_szcode, MIN_BIGKTSB_SZCODE); 776 ktsb_szcode = MIN(ktsb_szcode, MAX_BIGKTSB_SZCODE); 777 } 778 779 /* 780 * We choose the TSB to hold kernel 4M mappings to have twice 781 * the reach as the primary kernel TSB since this TSB will 782 * potentially (currently) be shared by both mappings to all of 783 * physical memory plus user TSBs. If this TSB has to be in nucleus 784 * (only for Spitfire and Cheetah) limit its size to 64K. 785 */ 786 ktsb4m_szcode = highbit((2 * npages) / TTEPAGES(TTE4M) - 1); 787 ktsb4m_szcode -= TSB_START_SIZE; 788 ktsb4m_szcode = MAX(ktsb4m_szcode, TSB_MIN_SZCODE); 789 ktsb4m_szcode = MIN(ktsb4m_szcode, TSB_SOFTSZ_MASK); 790 if ((enable_bigktsb == 0 || ktsb_phys == 0) && ktsb4m_szcode > 791 TSB_64K_SZCODE) { 792 ktsb4m_szcode = TSB_64K_SZCODE; 793 max_bootlp_tteszc = TTE8K; 794 } 795 796 ktsb_sz = TSB_BYTES(ktsb_szcode); /* kernel 8K tsb size */ 797 ktsb4m_sz = TSB_BYTES(ktsb4m_szcode); /* kernel 4M tsb size */ 798 } 799 800 /* 801 * Allocate kernel TSBs from nucleus data memory. 802 * The function return 0 on success and -1 on failure. 803 */ 804 int 805 ndata_alloc_tsbs(struct memlist *ndata, pgcnt_t npages) 806 { 807 /* 808 * Set ktsb_phys to 1 if the processor supports ASI_QUAD_LDD_PHYS. 809 */ 810 sfmmu_setup_4lp(); 811 812 /* 813 * Size the kernel TSBs based upon the amount of physical 814 * memory in the system. 815 */ 816 calc_tsb_sizes(npages); 817 818 /* 819 * Allocate the 8K kernel TSB if it belongs inside the nucleus. 820 */ 821 if (enable_bigktsb == 0) { 822 if ((ktsb_base = ndata_alloc(ndata, ktsb_sz, ktsb_sz)) == NULL) 823 return (-1); 824 ASSERT(!((uintptr_t)ktsb_base & (ktsb_sz - 1))); 825 826 PRM_DEBUG(ktsb_base); 827 PRM_DEBUG(ktsb_sz); 828 PRM_DEBUG(ktsb_szcode); 829 } 830 831 /* 832 * Next, allocate 4M kernel TSB from the nucleus since it's small. 833 */ 834 if (ktsb4m_szcode <= TSB_64K_SZCODE) { 835 836 ktsb4m_base = ndata_alloc(ndata, ktsb4m_sz, ktsb4m_sz); 837 if (ktsb4m_base == NULL) 838 return (-1); 839 ASSERT(!((uintptr_t)ktsb4m_base & (ktsb4m_sz - 1))); 840 841 PRM_DEBUG(ktsb4m_base); 842 PRM_DEBUG(ktsb4m_sz); 843 PRM_DEBUG(ktsb4m_szcode); 844 } 845 846 return (0); 847 } 848 849 size_t 850 calc_hmehash_sz(pgcnt_t npages) 851 { 852 ulong_t hme_buckets; 853 854 /* 855 * The number of buckets in the hme hash tables 856 * is a power of 2 such that the average hash chain length is 857 * HMENT_HASHAVELEN. The number of buckets for the user hash is 858 * a function of physical memory and a predefined overmapping factor. 859 * The number of buckets for the kernel hash is a function of 860 * physical memory only. 861 */ 862 hme_buckets = (npages * HMEHASH_FACTOR) / 863 (HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT)); 864 865 uhmehash_num = (int)MIN(hme_buckets, MAX_UHME_BUCKETS); 866 867 if (uhmehash_num > USER_BUCKETS_THRESHOLD) { 868 /* 869 * if uhmehash_num is not power of 2 round it down to the 870 * next power of 2. 871 */ 872 uint_t align = 1 << (highbit(uhmehash_num - 1) - 1); 873 uhmehash_num = P2ALIGN(uhmehash_num, align); 874 } else 875 uhmehash_num = 1 << highbit(uhmehash_num - 1); 876 877 hme_buckets = npages / (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT); 878 khmehash_num = (int)MIN(hme_buckets, MAX_KHME_BUCKETS); 879 khmehash_num = 1 << highbit(khmehash_num - 1); 880 khmehash_num = MAX(khmehash_num, MIN_KHME_BUCKETS); 881 882 return ((uhmehash_num + khmehash_num) * sizeof (struct hmehash_bucket)); 883 } 884 885 caddr_t 886 alloc_hmehash(caddr_t alloc_base) 887 { 888 size_t khmehash_sz, uhmehash_sz; 889 890 khme_hash = (struct hmehash_bucket *)alloc_base; 891 khmehash_sz = khmehash_num * sizeof (struct hmehash_bucket); 892 alloc_base += khmehash_sz; 893 894 uhme_hash = (struct hmehash_bucket *)alloc_base; 895 uhmehash_sz = uhmehash_num * sizeof (struct hmehash_bucket); 896 alloc_base += uhmehash_sz; 897 898 PRM_DEBUG(khme_hash); 899 PRM_DEBUG(uhme_hash); 900 901 return (alloc_base); 902 } 903 904 /* 905 * Allocate hat structs from the nucleus data memory. 906 */ 907 int 908 ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages) 909 { 910 size_t mml_alloc_sz; 911 size_t cb_alloc_sz; 912 913 /* 914 * For the page mapping list mutex array we allocate one mutex 915 * for every 128 pages (1 MB) with a minimum of 64 entries and 916 * a maximum of 8K entries. For the initial computation npages 917 * is rounded up (ie. 1 << highbit(npages * 1.5 / 128)) 918 * 919 * mml_shift is roughly log2(mml_table_sz) + 3 for MLIST_HASH 920 */ 921 mml_table_sz = 1 << highbit((npages * 3) / 256); 922 if (mml_table_sz < 64) 923 mml_table_sz = 64; 924 else if (mml_table_sz > 8192) 925 mml_table_sz = 8192; 926 mml_shift = highbit(mml_table_sz) + 3; 927 928 PRM_DEBUG(mml_table_sz); 929 PRM_DEBUG(mml_shift); 930 931 mml_alloc_sz = mml_table_sz * sizeof (kmutex_t); 932 933 mml_table = ndata_alloc(ndata, mml_alloc_sz, ecache_alignsize); 934 if (mml_table == NULL) 935 return (-1); 936 PRM_DEBUG(mml_table); 937 938 cb_alloc_sz = sfmmu_max_cb_id * sizeof (struct sfmmu_callback); 939 PRM_DEBUG(cb_alloc_sz); 940 sfmmu_cb_table = ndata_alloc(ndata, cb_alloc_sz, ecache_alignsize); 941 if (sfmmu_cb_table == NULL) 942 return (-1); 943 PRM_DEBUG(sfmmu_cb_table); 944 945 return (0); 946 } 947 948 int 949 ndata_alloc_kpm(struct memlist *ndata, pgcnt_t kpm_npages) 950 { 951 size_t kpmp_alloc_sz; 952 953 /* 954 * For the kpm_page mutex array we allocate one mutex every 16 955 * kpm pages (64MB). In smallpage mode we allocate one mutex 956 * every 8K pages. The minimum is set to 64 entries and the 957 * maximum to 8K entries. 958 */ 959 if (kpm_smallpages == 0) { 960 kpmp_shift = highbit(sizeof (kpm_page_t)) - 1; 961 kpmp_table_sz = 1 << highbit(kpm_npages / 16); 962 kpmp_table_sz = (kpmp_table_sz < 64) ? 64 : 963 ((kpmp_table_sz > 8192) ? 8192 : kpmp_table_sz); 964 kpmp_alloc_sz = kpmp_table_sz * sizeof (kpm_hlk_t); 965 966 kpmp_table = ndata_alloc(ndata, kpmp_alloc_sz, 967 ecache_alignsize); 968 if (kpmp_table == NULL) 969 return (-1); 970 971 PRM_DEBUG(kpmp_table); 972 PRM_DEBUG(kpmp_table_sz); 973 974 kpmp_stable_sz = 0; 975 kpmp_stable = NULL; 976 } else { 977 ASSERT(kpm_pgsz == PAGESIZE); 978 kpmp_shift = highbit(sizeof (kpm_shlk_t)) + 1; 979 kpmp_stable_sz = 1 << highbit(kpm_npages / 8192); 980 kpmp_stable_sz = (kpmp_stable_sz < 64) ? 64 : 981 ((kpmp_stable_sz > 8192) ? 8192 : kpmp_stable_sz); 982 kpmp_alloc_sz = kpmp_stable_sz * sizeof (kpm_shlk_t); 983 984 kpmp_stable = ndata_alloc(ndata, kpmp_alloc_sz, 985 ecache_alignsize); 986 if (kpmp_stable == NULL) 987 return (-1); 988 989 PRM_DEBUG(kpmp_stable); 990 PRM_DEBUG(kpmp_stable_sz); 991 992 kpmp_table_sz = 0; 993 kpmp_table = NULL; 994 } 995 PRM_DEBUG(kpmp_shift); 996 997 return (0); 998 } 999 1000 /* 1001 * This function bop allocs kernel TSBs. 1002 */ 1003 caddr_t 1004 sfmmu_ktsb_alloc(caddr_t tsbbase) 1005 { 1006 caddr_t vaddr; 1007 1008 if (enable_bigktsb) { 1009 ktsb_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb_sz); 1010 vaddr = prom_alloc(ktsb_base, ktsb_sz, ktsb_sz); 1011 if (vaddr != ktsb_base) 1012 cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc" 1013 " 8K bigktsb"); 1014 ktsb_base = vaddr; 1015 tsbbase = ktsb_base + ktsb_sz; 1016 PRM_DEBUG(ktsb_base); 1017 PRM_DEBUG(tsbbase); 1018 } 1019 1020 if (ktsb4m_szcode > TSB_64K_SZCODE) { 1021 ASSERT(ktsb_phys && enable_bigktsb); 1022 ktsb4m_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb4m_sz); 1023 vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb4m_base, ktsb4m_sz, 1024 ktsb4m_sz); 1025 if (vaddr != ktsb4m_base) 1026 cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc" 1027 " 4M bigktsb"); 1028 ktsb4m_base = vaddr; 1029 tsbbase = ktsb4m_base + ktsb4m_sz; 1030 PRM_DEBUG(ktsb4m_base); 1031 PRM_DEBUG(tsbbase); 1032 } 1033 return (tsbbase); 1034 } 1035 1036 /* 1037 * Moves code assembled outside of the trap table into the trap 1038 * table taking care to relocate relative branches to code outside 1039 * of the trap handler. 1040 */ 1041 static void 1042 sfmmu_reloc_trap_handler(void *tablep, void *start, size_t count) 1043 { 1044 size_t i; 1045 uint32_t *src; 1046 uint32_t *dst; 1047 uint32_t inst; 1048 int op, op2; 1049 int32_t offset; 1050 int disp; 1051 1052 src = start; 1053 dst = tablep; 1054 offset = src - dst; 1055 for (src = start, i = 0; i < count; i++, src++, dst++) { 1056 inst = *dst = *src; 1057 op = (inst >> 30) & 0x2; 1058 if (op == 1) { 1059 /* call */ 1060 disp = ((int32_t)inst << 2) >> 2; /* sign-extend */ 1061 if (disp + i >= 0 && disp + i < count) 1062 continue; 1063 disp += offset; 1064 inst = 0x40000000u | (disp & 0x3fffffffu); 1065 *dst = inst; 1066 } else if (op == 0) { 1067 /* branch or sethi */ 1068 op2 = (inst >> 22) & 0x7; 1069 1070 switch (op2) { 1071 case 0x3: /* BPr */ 1072 disp = (((inst >> 20) & 0x3) << 14) | 1073 (inst & 0x3fff); 1074 disp = (disp << 16) >> 16; /* sign-extend */ 1075 if (disp + i >= 0 && disp + i < count) 1076 continue; 1077 disp += offset; 1078 if (((disp << 16) >> 16) != disp) 1079 cmn_err(CE_PANIC, "bad reloc"); 1080 inst &= ~0x303fff; 1081 inst |= (disp & 0x3fff); 1082 inst |= (disp & 0xc000) << 6; 1083 break; 1084 1085 case 0x2: /* Bicc */ 1086 disp = ((int32_t)inst << 10) >> 10; 1087 if (disp + i >= 0 && disp + i < count) 1088 continue; 1089 disp += offset; 1090 if (((disp << 10) >> 10) != disp) 1091 cmn_err(CE_PANIC, "bad reloc"); 1092 inst &= ~0x3fffff; 1093 inst |= (disp & 0x3fffff); 1094 break; 1095 1096 case 0x1: /* Bpcc */ 1097 disp = ((int32_t)inst << 13) >> 13; 1098 if (disp + i >= 0 && disp + i < count) 1099 continue; 1100 disp += offset; 1101 if (((disp << 13) >> 13) != disp) 1102 cmn_err(CE_PANIC, "bad reloc"); 1103 inst &= ~0x7ffff; 1104 inst |= (disp & 0x7ffffu); 1105 break; 1106 } 1107 *dst = inst; 1108 } 1109 } 1110 flush_instr_mem(tablep, count * sizeof (uint32_t)); 1111 } 1112 1113 /* 1114 * Routine to allocate a large page to use in the TSB caches. 1115 */ 1116 /*ARGSUSED*/ 1117 static page_t * 1118 sfmmu_tsb_page_create(void *addr, size_t size, int vmflag, void *arg) 1119 { 1120 int pgflags; 1121 1122 pgflags = PG_EXCL; 1123 if ((vmflag & VM_NOSLEEP) == 0) 1124 pgflags |= PG_WAIT; 1125 if (vmflag & VM_PANIC) 1126 pgflags |= PG_PANIC; 1127 if (vmflag & VM_PUSHPAGE) 1128 pgflags |= PG_PUSHPAGE; 1129 1130 return (page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size, 1131 pgflags, &kvseg, addr, arg)); 1132 } 1133 1134 /* 1135 * Allocate a large page to back the virtual address range 1136 * [addr, addr + size). If addr is NULL, allocate the virtual address 1137 * space as well. 1138 */ 1139 static void * 1140 sfmmu_tsb_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, 1141 uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *), 1142 void *pcarg) 1143 { 1144 page_t *ppl; 1145 page_t *rootpp; 1146 caddr_t addr = inaddr; 1147 pgcnt_t npages = btopr(size); 1148 page_t **ppa; 1149 int i = 0; 1150 1151 /* 1152 * Assuming that only TSBs will call this with size > PAGESIZE 1153 * There is no reason why this couldn't be expanded to 8k pages as 1154 * well, or other page sizes in the future .... but for now, we 1155 * only support fixed sized page requests. 1156 */ 1157 if ((inaddr == NULL) && ((addr = vmem_xalloc(vmp, size, size, 0, 0, 1158 NULL, NULL, vmflag)) == NULL)) 1159 return (NULL); 1160 1161 if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) { 1162 if (inaddr == NULL) 1163 vmem_xfree(vmp, addr, size); 1164 return (NULL); 1165 } 1166 1167 ppl = page_create_func(addr, size, vmflag, pcarg); 1168 if (ppl == NULL) { 1169 if (inaddr == NULL) 1170 vmem_xfree(vmp, addr, size); 1171 page_unresv(npages); 1172 return (NULL); 1173 } 1174 1175 rootpp = ppl; 1176 ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP); 1177 while (ppl != NULL) { 1178 page_t *pp = ppl; 1179 ppa[i++] = pp; 1180 page_sub(&ppl, pp); 1181 ASSERT(page_iolock_assert(pp)); 1182 page_io_unlock(pp); 1183 } 1184 1185 /* 1186 * Load the locked entry. It's OK to preload the entry into 1187 * the TSB since we now support large mappings in the kernel TSB. 1188 */ 1189 hat_memload_array(kas.a_hat, (caddr_t)rootpp->p_offset, size, 1190 ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr, HAT_LOAD_LOCK); 1191 1192 for (--i; i >= 0; --i) { 1193 (void) page_pp_lock(ppa[i], 0, 1); 1194 page_unlock(ppa[i]); 1195 } 1196 1197 kmem_free(ppa, npages * sizeof (page_t *)); 1198 return (addr); 1199 } 1200 1201 /* Called to import new spans into the TSB vmem arenas */ 1202 void * 1203 sfmmu_tsb_segkmem_alloc(vmem_t *vmp, size_t size, int vmflag) 1204 { 1205 lgrp_id_t lgrpid = LGRP_NONE; 1206 1207 if (tsb_lgrp_affinity) { 1208 /* 1209 * Search for the vmp->lgrpid mapping by brute force; 1210 * some day vmp will have an lgrp, until then we have 1211 * to do this the hard way. 1212 */ 1213 for (lgrpid = 0; lgrpid < NLGRPS_MAX && 1214 vmp != kmem_tsb_default_arena[lgrpid]; lgrpid++) 1215 ; 1216 if (lgrpid == NLGRPS_MAX) 1217 lgrpid = LGRP_NONE; 1218 } 1219 1220 return (sfmmu_tsb_xalloc(vmp, NULL, size, vmflag, 0, 1221 sfmmu_tsb_page_create, lgrpid != LGRP_NONE? &lgrpid : NULL)); 1222 } 1223 1224 /* Called to free spans from the TSB vmem arenas */ 1225 void 1226 sfmmu_tsb_segkmem_free(vmem_t *vmp, void *inaddr, size_t size) 1227 { 1228 page_t *pp; 1229 caddr_t addr = inaddr; 1230 caddr_t eaddr; 1231 pgcnt_t npages = btopr(size); 1232 pgcnt_t pgs_left = npages; 1233 page_t *rootpp = NULL; 1234 1235 hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK); 1236 1237 for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) { 1238 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL); 1239 if (pp == NULL) 1240 panic("sfmmu_tsb_segkmem_free: page not found"); 1241 1242 ASSERT(PAGE_EXCL(pp)); 1243 page_pp_unlock(pp, 0, 1); 1244 1245 if (rootpp == NULL) 1246 rootpp = pp; 1247 if (--pgs_left == 0) { 1248 /* 1249 * similar logic to segspt_free_pages, but we know we 1250 * have one large page. 1251 */ 1252 page_destroy_pages(rootpp); 1253 } 1254 } 1255 page_unresv(npages); 1256 1257 if (vmp != NULL) 1258 vmem_xfree(vmp, inaddr, size); 1259 } 1260