1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <vm/hat.h> 29 #include <vm/hat_sfmmu.h> 30 #include <vm/page.h> 31 #include <sys/pte.h> 32 #include <sys/systm.h> 33 #include <sys/mman.h> 34 #include <sys/sysmacros.h> 35 #include <sys/machparam.h> 36 #include <sys/vtrace.h> 37 #include <sys/kmem.h> 38 #include <sys/mmu.h> 39 #include <sys/cmn_err.h> 40 #include <sys/cpu.h> 41 #include <sys/cpuvar.h> 42 #include <sys/debug.h> 43 #include <sys/lgrp.h> 44 #include <sys/archsystm.h> 45 #include <sys/machsystm.h> 46 #include <sys/vmsystm.h> 47 #include <sys/bitmap.h> 48 #include <vm/as.h> 49 #include <vm/seg.h> 50 #include <vm/seg_kmem.h> 51 #include <vm/seg_kp.h> 52 #include <vm/seg_kpm.h> 53 #include <vm/rm.h> 54 #include <vm/vm_dep.h> 55 #include <sys/t_lock.h> 56 #include <sys/vm_machparam.h> 57 #include <sys/promif.h> 58 #include <sys/prom_isa.h> 59 #include <sys/prom_plat.h> 60 #include <sys/prom_debug.h> 61 #include <sys/privregs.h> 62 #include <sys/bootconf.h> 63 #include <sys/memlist.h> 64 #include <sys/memlist_plat.h> 65 #include <sys/cpu_module.h> 66 #include <sys/reboot.h> 67 #include <sys/kdi.h> 68 69 /* 70 * Static routines 71 */ 72 static void sfmmu_map_prom_mappings(struct translation *, size_t); 73 static struct translation *read_prom_mappings(size_t *); 74 static void sfmmu_reloc_trap_handler(void *, void *, size_t); 75 76 /* 77 * External routines 78 */ 79 extern void sfmmu_remap_kernel(void); 80 extern void sfmmu_patch_utsb(void); 81 82 /* 83 * Global Data: 84 */ 85 extern caddr_t textva, datava; 86 extern tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */ 87 extern int enable_bigktsb; 88 extern int kmem64_smchunks; 89 90 uint64_t memsegspa = (uintptr_t)MSEG_NULLPTR_PA; /* memsegs physical linkage */ 91 uint64_t memseg_phash[N_MEM_SLOTS]; /* use physical memseg addresses */ 92 93 int sfmmu_kern_mapped = 0; 94 95 /* 96 * DMMU primary context register for the kernel context. Machine specific code 97 * inserts correct page size codes when necessary 98 */ 99 uint64_t kcontextreg = KCONTEXT; 100 101 #ifdef DEBUG 102 static int ndata_middle_hole_detected = 0; 103 #endif 104 105 /* Extern Global Data */ 106 107 extern int page_relocate_ready; 108 109 /* 110 * Controls the logic which enables the use of the 111 * QUAD_LDD_PHYS ASI for TSB accesses. 112 */ 113 extern int ktsb_phys; 114 115 /* 116 * Global Routines called from within: 117 * usr/src/uts/sun4u 118 * usr/src/uts/sfmmu 119 * usr/src/uts/sun 120 */ 121 122 pfn_t 123 va_to_pfn(void *vaddr) 124 { 125 u_longlong_t physaddr; 126 int mode, valid; 127 128 if (tba_taken_over) 129 return (hat_getpfnum(kas.a_hat, (caddr_t)vaddr)); 130 131 #if !defined(C_OBP) 132 if (!kmem64_smchunks && 133 (caddr_t)vaddr >= kmem64_base && (caddr_t)vaddr < kmem64_end) { 134 if (kmem64_pabase == (uint64_t)-1) 135 prom_panic("va_to_pfn: kmem64_pabase not init"); 136 physaddr = kmem64_pabase + ((caddr_t)vaddr - kmem64_base); 137 return ((pfn_t)physaddr >> MMU_PAGESHIFT); 138 } 139 #endif /* !C_OBP */ 140 141 if ((prom_translate_virt(vaddr, &valid, &physaddr, &mode) != -1) && 142 (valid == -1)) { 143 return ((pfn_t)(physaddr >> MMU_PAGESHIFT)); 144 } 145 return (PFN_INVALID); 146 } 147 148 uint64_t 149 va_to_pa(void *vaddr) 150 { 151 pfn_t pfn; 152 153 if ((pfn = va_to_pfn(vaddr)) == PFN_INVALID) 154 return ((uint64_t)-1); 155 return (((uint64_t)pfn << MMU_PAGESHIFT) | 156 ((uint64_t)vaddr & MMU_PAGEOFFSET)); 157 } 158 159 void 160 hat_kern_setup(void) 161 { 162 struct translation *trans_root; 163 size_t ntrans_root; 164 extern void startup_fixup_physavail(void); 165 166 /* 167 * These are the steps we take to take over the mmu from the prom. 168 * 169 * (1) Read the prom's mappings through the translation property. 170 * (2) Remap the kernel text and kernel data with 2 locked 4MB ttes. 171 * Create the the hmeblks for these 2 ttes at this time. 172 * (3) Create hat structures for all other prom mappings. Since the 173 * kernel text and data hme_blks have already been created we 174 * skip the equivalent prom's mappings. 175 * (4) Initialize the tsb and its corresponding hardware regs. 176 * (5) Take over the trap table (currently in startup). 177 * (6) Up to this point it is possible the prom required some of its 178 * locked tte's. Now that we own the trap table we remove them. 179 */ 180 181 ktsb_pbase = va_to_pa(ktsb_base); 182 ktsb4m_pbase = va_to_pa(ktsb4m_base); 183 PRM_DEBUG(ktsb_pbase); 184 PRM_DEBUG(ktsb4m_pbase); 185 186 sfmmu_patch_ktsb(); 187 sfmmu_patch_utsb(); 188 sfmmu_patch_mmu_asi(ktsb_phys); 189 190 sfmmu_init_tsbs(); 191 192 if (kpm_enable) { 193 sfmmu_kpm_patch_tlbm(); 194 if (kpm_smallpages == 0) { 195 sfmmu_kpm_patch_tsbm(); 196 } 197 } 198 199 if (!shctx_on) { 200 sfmmu_patch_shctx(); 201 } 202 203 /* 204 * The 8K-indexed kernel TSB space is used to hold 205 * translations below... 206 */ 207 trans_root = read_prom_mappings(&ntrans_root); 208 sfmmu_remap_kernel(); 209 startup_fixup_physavail(); 210 mmu_init_kernel_pgsz(kas.a_hat); 211 sfmmu_map_prom_mappings(trans_root, ntrans_root); 212 213 /* 214 * We invalidate 8K kernel TSB because we used it in 215 * sfmmu_map_prom_mappings() 216 */ 217 sfmmu_inv_tsb(ktsb_base, ktsb_sz); 218 sfmmu_inv_tsb(ktsb4m_base, ktsb4m_sz); 219 220 sfmmu_init_ktsbinfo(); 221 222 223 sfmmu_kern_mapped = 1; 224 225 /* 226 * hments have been created for mapped pages, and thus we're ready 227 * for kmdb to start using its own trap table. It walks the hments 228 * to resolve TLB misses, and can't be used until they're ready. 229 */ 230 if (boothowto & RB_DEBUG) 231 kdi_dvec_vmready(); 232 } 233 234 /* 235 * Macro used below to convert the prom's 32-bit high and low fields into 236 * a value appropriate for the 64-bit kernel. 237 */ 238 239 #define COMBINE(hi, lo) (((uint64_t)(uint32_t)(hi) << 32) | (uint32_t)(lo)) 240 241 /* 242 * Track larges pages used. 243 * Provides observability for this feature on non-debug kernels. 244 */ 245 ulong_t map_prom_lpcount[MMU_PAGE_SIZES]; 246 247 /* 248 * This function traverses the prom mapping list and creates equivalent 249 * mappings in the sfmmu mapping hash. 250 */ 251 static void 252 sfmmu_map_prom_mappings(struct translation *trans_root, size_t ntrans_root) 253 { 254 struct translation *promt; 255 tte_t tte, oldtte, *ttep; 256 pfn_t pfn, oldpfn, basepfn; 257 caddr_t vaddr; 258 size_t size, offset; 259 unsigned long i; 260 uint_t attr; 261 page_t *pp; 262 extern struct memlist *virt_avail; 263 char buf[256]; 264 265 ttep = &tte; 266 for (i = 0, promt = trans_root; i < ntrans_root; i++, promt++) { 267 ASSERT(promt->tte_hi != 0); 268 ASSERT32(promt->virt_hi == 0 && promt->size_hi == 0); 269 270 vaddr = (caddr_t)COMBINE(promt->virt_hi, promt->virt_lo); 271 272 /* 273 * hack until we get rid of map-for-unix 274 */ 275 if (vaddr < (caddr_t)KERNELBASE) 276 continue; 277 278 ttep->tte_inthi = promt->tte_hi; 279 ttep->tte_intlo = promt->tte_lo; 280 attr = PROC_DATA | HAT_NOSYNC; 281 #if defined(TTE_IS_GLOBAL) 282 if (TTE_IS_GLOBAL(ttep)) { 283 /* 284 * The prom better not use global translations 285 * because a user process might use the same 286 * virtual addresses 287 */ 288 prom_panic("sfmmu_map_prom_mappings: global" 289 " translation"); 290 TTE_SET_LOFLAGS(ttep, TTE_GLB_INT, 0); 291 } 292 #endif 293 if (TTE_IS_LOCKED(ttep)) { 294 /* clear the lock bits */ 295 TTE_CLR_LOCKED(ttep); 296 } 297 attr |= (TTE_IS_VCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEVTTE; 298 attr |= (TTE_IS_PCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEPTTE; 299 attr |= (TTE_IS_SIDEFFECT(ttep)) ? SFMMU_SIDEFFECT : 0; 300 attr |= (TTE_IS_IE(ttep)) ? HAT_STRUCTURE_LE : 0; 301 302 size = COMBINE(promt->size_hi, promt->size_lo); 303 offset = 0; 304 basepfn = TTE_TO_PFN((caddr_t)COMBINE(promt->virt_hi, 305 promt->virt_lo), ttep); 306 while (size) { 307 vaddr = (caddr_t)(COMBINE(promt->virt_hi, 308 promt->virt_lo) + offset); 309 310 /* 311 * make sure address is not in virt-avail list 312 */ 313 if (address_in_memlist(virt_avail, (uint64_t)vaddr, 314 size)) { 315 prom_panic("sfmmu_map_prom_mappings:" 316 " inconsistent translation/avail lists"); 317 } 318 319 pfn = basepfn + mmu_btop(offset); 320 if (pf_is_memory(pfn)) { 321 if (attr & SFMMU_UNCACHEPTTE) { 322 prom_panic("sfmmu_map_prom_mappings:" 323 " uncached prom memory page"); 324 } 325 } else { 326 if (!(attr & SFMMU_SIDEFFECT)) { 327 prom_panic("sfmmu_map_prom_mappings:" 328 " prom i/o page without" 329 " side-effect"); 330 } 331 } 332 333 /* 334 * skip kmem64 area 335 */ 336 if (!kmem64_smchunks && 337 vaddr >= kmem64_base && 338 vaddr < kmem64_aligned_end) { 339 #if !defined(C_OBP) 340 prom_panic("sfmmu_map_prom_mappings:" 341 " unexpected kmem64 prom mapping"); 342 #else /* !C_OBP */ 343 size_t mapsz; 344 345 if (ptob(pfn) != 346 kmem64_pabase + (vaddr - kmem64_base)) { 347 prom_panic("sfmmu_map_prom_mappings:" 348 " unexpected kmem64 prom mapping"); 349 } 350 351 mapsz = kmem64_aligned_end - vaddr; 352 if (mapsz >= size) { 353 break; 354 } 355 size -= mapsz; 356 offset += mapsz; 357 continue; 358 #endif /* !C_OBP */ 359 } 360 361 oldpfn = sfmmu_vatopfn(vaddr, KHATID, &oldtte); 362 ASSERT(oldpfn != PFN_SUSPENDED); 363 ASSERT(page_relocate_ready == 0); 364 365 if (oldpfn != PFN_INVALID) { 366 /* 367 * mapping already exists. 368 * Verify they are equal 369 */ 370 if (pfn != oldpfn) { 371 (void) snprintf(buf, sizeof (buf), 372 "sfmmu_map_prom_mappings: mapping" 373 " conflict (va = 0x%p, pfn = 0x%p," 374 " oldpfn = 0x%p)", (void *)vaddr, 375 (void *)pfn, (void *)oldpfn); 376 prom_panic(buf); 377 } 378 size -= MMU_PAGESIZE; 379 offset += MMU_PAGESIZE; 380 continue; 381 } 382 383 pp = page_numtopp_nolock(pfn); 384 if ((pp != NULL) && PP_ISFREE((page_t *)pp)) { 385 (void) snprintf(buf, sizeof (buf), 386 "sfmmu_map_prom_mappings: prom-mapped" 387 " page (va = 0x%p, pfn = 0x%p) on free list", 388 (void *)vaddr, (void *)pfn); 389 prom_panic(buf); 390 } 391 392 sfmmu_memtte(ttep, pfn, attr, TTE8K); 393 sfmmu_tteload(kas.a_hat, ttep, vaddr, pp, 394 HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD); 395 size -= MMU_PAGESIZE; 396 offset += MMU_PAGESIZE; 397 } 398 } 399 400 /* 401 * We claimed kmem64 from prom, so now we need to load tte. 402 */ 403 if (!kmem64_smchunks && kmem64_base != NULL) { 404 pgcnt_t pages; 405 size_t psize; 406 int pszc; 407 408 pszc = kmem64_szc; 409 #ifdef sun4u 410 if (pszc > TTE8K) { 411 pszc = segkmem_lpszc; 412 } 413 #endif /* sun4u */ 414 psize = TTEBYTES(pszc); 415 pages = btop(psize); 416 basepfn = kmem64_pabase >> MMU_PAGESHIFT; 417 vaddr = kmem64_base; 418 while (vaddr < kmem64_end) { 419 sfmmu_memtte(ttep, basepfn, 420 PROC_DATA | HAT_NOSYNC, pszc); 421 sfmmu_tteload(kas.a_hat, ttep, vaddr, NULL, 422 HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD); 423 vaddr += psize; 424 basepfn += pages; 425 } 426 map_prom_lpcount[pszc] = 427 ((caddr_t)P2ROUNDUP((uintptr_t)kmem64_end, psize) - 428 kmem64_base) >> TTE_PAGE_SHIFT(pszc); 429 } 430 } 431 432 #undef COMBINE /* local to previous routine */ 433 434 /* 435 * This routine reads in the "translations" property in to a buffer and 436 * returns a pointer to this buffer and the number of translations. 437 */ 438 static struct translation * 439 read_prom_mappings(size_t *ntransrootp) 440 { 441 char *prop = "translations"; 442 size_t translen; 443 pnode_t node; 444 struct translation *transroot; 445 446 /* 447 * the "translations" property is associated with the mmu node 448 */ 449 node = (pnode_t)prom_getphandle(prom_mmu_ihandle()); 450 451 /* 452 * We use the TSB space to read in the prom mappings. This space 453 * is currently not being used because we haven't taken over the 454 * trap table yet. It should be big enough to hold the mappings. 455 */ 456 if ((translen = prom_getproplen(node, prop)) == -1) 457 cmn_err(CE_PANIC, "no translations property"); 458 *ntransrootp = translen / sizeof (*transroot); 459 translen = roundup(translen, MMU_PAGESIZE); 460 PRM_DEBUG(translen); 461 if (translen > TSB_BYTES(ktsb_szcode)) 462 cmn_err(CE_PANIC, "not enough space for translations"); 463 464 transroot = (struct translation *)ktsb_base; 465 ASSERT(transroot); 466 if (prom_getprop(node, prop, (caddr_t)transroot) == -1) { 467 cmn_err(CE_PANIC, "translations getprop failed"); 468 } 469 return (transroot); 470 } 471 472 /* 473 * Init routine of the nucleus data memory allocator. 474 * 475 * The nucleus data memory allocator is organized in ecache_alignsize'd 476 * memory chunks. Memory allocated by ndata_alloc() will never be freed. 477 * 478 * The ndata argument is used as header of the ndata freelist. 479 * Other freelist nodes are placed in the nucleus memory itself 480 * at the beginning of a free memory chunk. Therefore a freelist 481 * node (struct memlist) must fit into the smallest allocatable 482 * memory chunk (ecache_alignsize bytes). 483 * 484 * The memory interval [base, end] passed to ndata_alloc_init() must be 485 * bzero'd to allow the allocator to return bzero'd memory easily. 486 */ 487 void 488 ndata_alloc_init(struct memlist *ndata, uintptr_t base, uintptr_t end) 489 { 490 ASSERT(sizeof (struct memlist) <= ecache_alignsize); 491 492 base = roundup(base, ecache_alignsize); 493 end = end - end % ecache_alignsize; 494 495 ASSERT(base < end); 496 497 ndata->address = base; 498 ndata->size = end - base; 499 ndata->next = NULL; 500 ndata->prev = NULL; 501 } 502 503 /* 504 * Deliver the size of the largest free memory chunk. 505 */ 506 size_t 507 ndata_maxsize(struct memlist *ndata) 508 { 509 size_t chunksize = ndata->size; 510 511 while ((ndata = ndata->next) != NULL) { 512 if (chunksize < ndata->size) 513 chunksize = ndata->size; 514 } 515 516 return (chunksize); 517 } 518 519 520 /* 521 * Allocate the last properly aligned memory chunk. 522 * This function is called when no more large nucleus memory chunks 523 * will be allocated. The remaining free nucleus memory at the end 524 * of the nucleus can be added to the phys_avail list. 525 */ 526 void * 527 ndata_extra_base(struct memlist *ndata, size_t alignment, caddr_t endaddr) 528 { 529 uintptr_t base; 530 size_t wasteage = 0; 531 #ifdef DEBUG 532 static int called = 0; 533 534 if (called++ > 0) 535 cmn_err(CE_PANIC, "ndata_extra_base() called more than once"); 536 #endif /* DEBUG */ 537 538 /* 539 * The alignment needs to be a multiple of ecache_alignsize. 540 */ 541 ASSERT((alignment % ecache_alignsize) == 0); 542 543 while (ndata->next != NULL) { 544 wasteage += ndata->size; 545 ndata = ndata->next; 546 } 547 548 base = roundup(ndata->address, alignment); 549 550 if (base >= ndata->address + ndata->size) 551 return (NULL); 552 553 if ((caddr_t)(ndata->address + ndata->size) != endaddr) { 554 #ifdef DEBUG 555 ndata_middle_hole_detected = 1; /* see if we hit this again */ 556 #endif 557 return (NULL); 558 } 559 560 if (base == ndata->address) { 561 if (ndata->prev != NULL) 562 ndata->prev->next = NULL; 563 else 564 ndata->size = 0; 565 566 bzero((void *)base, sizeof (struct memlist)); 567 568 } else { 569 ndata->size = base - ndata->address; 570 wasteage += ndata->size; 571 } 572 PRM_DEBUG(wasteage); 573 574 return ((void *)base); 575 } 576 577 /* 578 * Select the best matching buffer, avoid memory fragmentation. 579 */ 580 static struct memlist * 581 ndata_select_chunk(struct memlist *ndata, size_t wanted, size_t alignment) 582 { 583 struct memlist *fnd_below = NULL; 584 struct memlist *fnd_above = NULL; 585 struct memlist *fnd_unused = NULL; 586 struct memlist *frlist; 587 uintptr_t base; 588 uintptr_t end; 589 size_t below; 590 size_t above; 591 size_t unused; 592 size_t best_below = ULONG_MAX; 593 size_t best_above = ULONG_MAX; 594 size_t best_unused = ULONG_MAX; 595 596 ASSERT(ndata != NULL); 597 598 /* 599 * Look for the best matching buffer, avoid memory fragmentation. 600 * The following strategy is used, try to find 601 * 1. an exact fitting buffer 602 * 2. avoid wasting any space below the buffer, take first 603 * fitting buffer 604 * 3. avoid wasting any space above the buffer, take first 605 * fitting buffer 606 * 4. avoid wasting space, take first fitting buffer 607 * 5. take the last buffer in chain 608 */ 609 for (frlist = ndata; frlist != NULL; frlist = frlist->next) { 610 base = roundup(frlist->address, alignment); 611 end = roundup(base + wanted, ecache_alignsize); 612 613 if (end > frlist->address + frlist->size) 614 continue; 615 616 below = (base - frlist->address) / ecache_alignsize; 617 above = (frlist->address + frlist->size - end) / 618 ecache_alignsize; 619 unused = below + above; 620 621 if (unused == 0) 622 return (frlist); 623 624 if (frlist->next == NULL) 625 break; 626 627 if (below < best_below) { 628 best_below = below; 629 fnd_below = frlist; 630 } 631 632 if (above < best_above) { 633 best_above = above; 634 fnd_above = frlist; 635 } 636 637 if (unused < best_unused) { 638 best_unused = unused; 639 fnd_unused = frlist; 640 } 641 } 642 643 if (best_below == 0) 644 return (fnd_below); 645 if (best_above == 0) 646 return (fnd_above); 647 if (best_unused < ULONG_MAX) 648 return (fnd_unused); 649 650 return (frlist); 651 } 652 653 /* 654 * Nucleus data memory allocator. 655 * The granularity of the allocator is ecache_alignsize. 656 * See also comment for ndata_alloc_init(). 657 */ 658 void * 659 ndata_alloc(struct memlist *ndata, size_t wanted, size_t alignment) 660 { 661 struct memlist *found; 662 struct memlist *fnd_above; 663 uintptr_t base; 664 uintptr_t end; 665 size_t below; 666 size_t above; 667 668 /* 669 * Look for the best matching buffer, avoid memory fragmentation. 670 */ 671 if ((found = ndata_select_chunk(ndata, wanted, alignment)) == NULL) 672 return (NULL); 673 674 /* 675 * Allocate the nucleus data buffer. 676 */ 677 base = roundup(found->address, alignment); 678 end = roundup(base + wanted, ecache_alignsize); 679 ASSERT(end <= found->address + found->size); 680 681 below = base - found->address; 682 above = found->address + found->size - end; 683 ASSERT(above == 0 || (above % ecache_alignsize) == 0); 684 685 if (below >= ecache_alignsize) { 686 /* 687 * There is free memory below the allocated memory chunk. 688 */ 689 found->size = below - below % ecache_alignsize; 690 691 if (above) { 692 fnd_above = (struct memlist *)end; 693 fnd_above->address = end; 694 fnd_above->size = above; 695 696 if ((fnd_above->next = found->next) != NULL) 697 found->next->prev = fnd_above; 698 fnd_above->prev = found; 699 found->next = fnd_above; 700 } 701 702 return ((void *)base); 703 } 704 705 if (found->prev == NULL) { 706 /* 707 * The first chunk (ndata) is selected. 708 */ 709 ASSERT(found == ndata); 710 if (above) { 711 found->address = end; 712 found->size = above; 713 } else if (found->next != NULL) { 714 found->address = found->next->address; 715 found->size = found->next->size; 716 if ((found->next = found->next->next) != NULL) 717 found->next->prev = found; 718 719 bzero((void *)found->address, sizeof (struct memlist)); 720 } else { 721 found->address = end; 722 found->size = 0; 723 } 724 725 return ((void *)base); 726 } 727 728 /* 729 * Not the first chunk. 730 */ 731 if (above) { 732 fnd_above = (struct memlist *)end; 733 fnd_above->address = end; 734 fnd_above->size = above; 735 736 if ((fnd_above->next = found->next) != NULL) 737 fnd_above->next->prev = fnd_above; 738 fnd_above->prev = found->prev; 739 found->prev->next = fnd_above; 740 741 } else { 742 if ((found->prev->next = found->next) != NULL) 743 found->next->prev = found->prev; 744 } 745 746 bzero((void *)found->address, sizeof (struct memlist)); 747 748 return ((void *)base); 749 } 750 751 /* 752 * Size the kernel TSBs based upon the amount of physical 753 * memory in the system. 754 */ 755 static void 756 calc_tsb_sizes(pgcnt_t npages) 757 { 758 PRM_DEBUG(npages); 759 760 if (npages <= TSB_FREEMEM_MIN) { 761 ktsb_szcode = TSB_128K_SZCODE; 762 enable_bigktsb = 0; 763 } else if (npages <= TSB_FREEMEM_LARGE / 2) { 764 ktsb_szcode = TSB_256K_SZCODE; 765 enable_bigktsb = 0; 766 } else if (npages <= TSB_FREEMEM_LARGE) { 767 ktsb_szcode = TSB_512K_SZCODE; 768 enable_bigktsb = 0; 769 } else if (npages <= TSB_FREEMEM_LARGE * 2 || 770 enable_bigktsb == 0) { 771 ktsb_szcode = TSB_1M_SZCODE; 772 enable_bigktsb = 0; 773 } else { 774 ktsb_szcode = highbit(npages - 1); 775 ktsb_szcode -= TSB_START_SIZE; 776 ktsb_szcode = MAX(ktsb_szcode, MIN_BIGKTSB_SZCODE); 777 ktsb_szcode = MIN(ktsb_szcode, MAX_BIGKTSB_SZCODE); 778 } 779 780 /* 781 * We choose the TSB to hold kernel 4M mappings to have twice 782 * the reach as the primary kernel TSB since this TSB will 783 * potentially (currently) be shared by both mappings to all of 784 * physical memory plus user TSBs. If this TSB has to be in nucleus 785 * (only for Spitfire and Cheetah) limit its size to 64K. 786 */ 787 ktsb4m_szcode = highbit((2 * npages) / TTEPAGES(TTE4M) - 1); 788 ktsb4m_szcode -= TSB_START_SIZE; 789 ktsb4m_szcode = MAX(ktsb4m_szcode, TSB_MIN_SZCODE); 790 ktsb4m_szcode = MIN(ktsb4m_szcode, TSB_SOFTSZ_MASK); 791 if ((enable_bigktsb == 0 || ktsb_phys == 0) && ktsb4m_szcode > 792 TSB_64K_SZCODE) { 793 ktsb4m_szcode = TSB_64K_SZCODE; 794 max_bootlp_tteszc = TTE8K; 795 } 796 797 ktsb_sz = TSB_BYTES(ktsb_szcode); /* kernel 8K tsb size */ 798 ktsb4m_sz = TSB_BYTES(ktsb4m_szcode); /* kernel 4M tsb size */ 799 } 800 801 /* 802 * Allocate kernel TSBs from nucleus data memory. 803 * The function return 0 on success and -1 on failure. 804 */ 805 int 806 ndata_alloc_tsbs(struct memlist *ndata, pgcnt_t npages) 807 { 808 /* 809 * Set ktsb_phys to 1 if the processor supports ASI_QUAD_LDD_PHYS. 810 */ 811 (void) sfmmu_setup_4lp(); 812 813 /* 814 * Size the kernel TSBs based upon the amount of physical 815 * memory in the system. 816 */ 817 calc_tsb_sizes(npages); 818 819 /* 820 * Allocate the 8K kernel TSB if it belongs inside the nucleus. 821 */ 822 if (enable_bigktsb == 0) { 823 if ((ktsb_base = ndata_alloc(ndata, ktsb_sz, ktsb_sz)) == NULL) 824 return (-1); 825 ASSERT(!((uintptr_t)ktsb_base & (ktsb_sz - 1))); 826 827 PRM_DEBUG(ktsb_base); 828 PRM_DEBUG(ktsb_sz); 829 PRM_DEBUG(ktsb_szcode); 830 } 831 832 /* 833 * Next, allocate 4M kernel TSB from the nucleus since it's small. 834 */ 835 if (ktsb4m_szcode <= TSB_64K_SZCODE) { 836 837 ktsb4m_base = ndata_alloc(ndata, ktsb4m_sz, ktsb4m_sz); 838 if (ktsb4m_base == NULL) 839 return (-1); 840 ASSERT(!((uintptr_t)ktsb4m_base & (ktsb4m_sz - 1))); 841 842 PRM_DEBUG(ktsb4m_base); 843 PRM_DEBUG(ktsb4m_sz); 844 PRM_DEBUG(ktsb4m_szcode); 845 } 846 847 return (0); 848 } 849 850 size_t 851 calc_hmehash_sz(pgcnt_t npages) 852 { 853 ulong_t hme_buckets; 854 855 /* 856 * The number of buckets in the hme hash tables 857 * is a power of 2 such that the average hash chain length is 858 * HMENT_HASHAVELEN. The number of buckets for the user hash is 859 * a function of physical memory and a predefined overmapping factor. 860 * The number of buckets for the kernel hash is a function of 861 * physical memory only. 862 */ 863 hme_buckets = (npages * HMEHASH_FACTOR) / 864 (HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT)); 865 866 uhmehash_num = (int)MIN(hme_buckets, MAX_UHME_BUCKETS); 867 868 if (uhmehash_num > USER_BUCKETS_THRESHOLD) { 869 /* 870 * if uhmehash_num is not power of 2 round it down to the 871 * next power of 2. 872 */ 873 uint_t align = 1 << (highbit(uhmehash_num - 1) - 1); 874 uhmehash_num = P2ALIGN(uhmehash_num, align); 875 } else 876 uhmehash_num = 1 << highbit(uhmehash_num - 1); 877 878 hme_buckets = npages / (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT); 879 khmehash_num = (int)MIN(hme_buckets, MAX_KHME_BUCKETS); 880 khmehash_num = 1 << highbit(khmehash_num - 1); 881 khmehash_num = MAX(khmehash_num, MIN_KHME_BUCKETS); 882 883 return ((uhmehash_num + khmehash_num) * sizeof (struct hmehash_bucket)); 884 } 885 886 caddr_t 887 alloc_hmehash(caddr_t alloc_base) 888 { 889 size_t khmehash_sz, uhmehash_sz; 890 891 khme_hash = (struct hmehash_bucket *)alloc_base; 892 khmehash_sz = khmehash_num * sizeof (struct hmehash_bucket); 893 alloc_base += khmehash_sz; 894 895 uhme_hash = (struct hmehash_bucket *)alloc_base; 896 uhmehash_sz = uhmehash_num * sizeof (struct hmehash_bucket); 897 alloc_base += uhmehash_sz; 898 899 PRM_DEBUG(khme_hash); 900 PRM_DEBUG(uhme_hash); 901 902 return (alloc_base); 903 } 904 905 /* 906 * Allocate hat structs from the nucleus data memory. 907 */ 908 int 909 ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages) 910 { 911 size_t mml_alloc_sz; 912 size_t cb_alloc_sz; 913 914 /* 915 * For the page mapping list mutex array we allocate one mutex 916 * for every 128 pages (1 MB) with a minimum of 64 entries and 917 * a maximum of 8K entries. For the initial computation npages 918 * is rounded up (ie. 1 << highbit(npages * 1.5 / 128)) 919 * 920 * mml_shift is roughly log2(mml_table_sz) + 3 for MLIST_HASH 921 */ 922 mml_table_sz = 1 << highbit((npages * 3) / 256); 923 if (mml_table_sz < 64) 924 mml_table_sz = 64; 925 else if (mml_table_sz > 8192) 926 mml_table_sz = 8192; 927 mml_shift = highbit(mml_table_sz) + 3; 928 929 PRM_DEBUG(mml_table_sz); 930 PRM_DEBUG(mml_shift); 931 932 mml_alloc_sz = mml_table_sz * sizeof (kmutex_t); 933 934 mml_table = ndata_alloc(ndata, mml_alloc_sz, ecache_alignsize); 935 if (mml_table == NULL) 936 return (-1); 937 PRM_DEBUG(mml_table); 938 939 cb_alloc_sz = sfmmu_max_cb_id * sizeof (struct sfmmu_callback); 940 PRM_DEBUG(cb_alloc_sz); 941 sfmmu_cb_table = ndata_alloc(ndata, cb_alloc_sz, ecache_alignsize); 942 if (sfmmu_cb_table == NULL) 943 return (-1); 944 PRM_DEBUG(sfmmu_cb_table); 945 946 return (0); 947 } 948 949 int 950 ndata_alloc_kpm(struct memlist *ndata, pgcnt_t kpm_npages) 951 { 952 size_t kpmp_alloc_sz; 953 954 /* 955 * For the kpm_page mutex array we allocate one mutex every 16 956 * kpm pages (64MB). In smallpage mode we allocate one mutex 957 * every 8K pages. The minimum is set to 64 entries and the 958 * maximum to 8K entries. 959 */ 960 if (kpm_smallpages == 0) { 961 kpmp_shift = highbit(sizeof (kpm_page_t)) - 1; 962 kpmp_table_sz = 1 << highbit(kpm_npages / 16); 963 kpmp_table_sz = (kpmp_table_sz < 64) ? 64 : 964 ((kpmp_table_sz > 8192) ? 8192 : kpmp_table_sz); 965 kpmp_alloc_sz = kpmp_table_sz * sizeof (kpm_hlk_t); 966 967 kpmp_table = ndata_alloc(ndata, kpmp_alloc_sz, 968 ecache_alignsize); 969 if (kpmp_table == NULL) 970 return (-1); 971 972 PRM_DEBUG(kpmp_table); 973 PRM_DEBUG(kpmp_table_sz); 974 975 kpmp_stable_sz = 0; 976 kpmp_stable = NULL; 977 } else { 978 ASSERT(kpm_pgsz == PAGESIZE); 979 kpmp_shift = highbit(sizeof (kpm_shlk_t)) + 1; 980 kpmp_stable_sz = 1 << highbit(kpm_npages / 8192); 981 kpmp_stable_sz = (kpmp_stable_sz < 64) ? 64 : 982 ((kpmp_stable_sz > 8192) ? 8192 : kpmp_stable_sz); 983 kpmp_alloc_sz = kpmp_stable_sz * sizeof (kpm_shlk_t); 984 985 kpmp_stable = ndata_alloc(ndata, kpmp_alloc_sz, 986 ecache_alignsize); 987 if (kpmp_stable == NULL) 988 return (-1); 989 990 PRM_DEBUG(kpmp_stable); 991 PRM_DEBUG(kpmp_stable_sz); 992 993 kpmp_table_sz = 0; 994 kpmp_table = NULL; 995 } 996 PRM_DEBUG(kpmp_shift); 997 998 return (0); 999 } 1000 1001 /* 1002 * This function bop allocs kernel TSBs. 1003 */ 1004 caddr_t 1005 sfmmu_ktsb_alloc(caddr_t tsbbase) 1006 { 1007 caddr_t vaddr; 1008 1009 if (enable_bigktsb) { 1010 ktsb_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb_sz); 1011 vaddr = prom_alloc(ktsb_base, ktsb_sz, ktsb_sz); 1012 if (vaddr != ktsb_base) 1013 cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc" 1014 " 8K bigktsb"); 1015 ktsb_base = vaddr; 1016 tsbbase = ktsb_base + ktsb_sz; 1017 PRM_DEBUG(ktsb_base); 1018 PRM_DEBUG(tsbbase); 1019 } 1020 1021 if (ktsb4m_szcode > TSB_64K_SZCODE) { 1022 ASSERT(ktsb_phys && enable_bigktsb); 1023 ktsb4m_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb4m_sz); 1024 vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb4m_base, ktsb4m_sz, 1025 ktsb4m_sz); 1026 if (vaddr != ktsb4m_base) 1027 cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc" 1028 " 4M bigktsb"); 1029 ktsb4m_base = vaddr; 1030 tsbbase = ktsb4m_base + ktsb4m_sz; 1031 PRM_DEBUG(ktsb4m_base); 1032 PRM_DEBUG(tsbbase); 1033 } 1034 return (tsbbase); 1035 } 1036 1037 /* 1038 * Moves code assembled outside of the trap table into the trap 1039 * table taking care to relocate relative branches to code outside 1040 * of the trap handler. 1041 */ 1042 static void 1043 sfmmu_reloc_trap_handler(void *tablep, void *start, size_t count) 1044 { 1045 size_t i; 1046 uint32_t *src; 1047 uint32_t *dst; 1048 uint32_t inst; 1049 int op, op2; 1050 int32_t offset; 1051 int disp; 1052 1053 src = start; 1054 dst = tablep; 1055 offset = src - dst; 1056 for (src = start, i = 0; i < count; i++, src++, dst++) { 1057 inst = *dst = *src; 1058 op = (inst >> 30) & 0x2; 1059 if (op == 1) { 1060 /* call */ 1061 disp = ((int32_t)inst << 2) >> 2; /* sign-extend */ 1062 if (disp + i >= 0 && disp + i < count) 1063 continue; 1064 disp += offset; 1065 inst = 0x40000000u | (disp & 0x3fffffffu); 1066 *dst = inst; 1067 } else if (op == 0) { 1068 /* branch or sethi */ 1069 op2 = (inst >> 22) & 0x7; 1070 1071 switch (op2) { 1072 case 0x3: /* BPr */ 1073 disp = (((inst >> 20) & 0x3) << 14) | 1074 (inst & 0x3fff); 1075 disp = (disp << 16) >> 16; /* sign-extend */ 1076 if (disp + i >= 0 && disp + i < count) 1077 continue; 1078 disp += offset; 1079 if (((disp << 16) >> 16) != disp) 1080 cmn_err(CE_PANIC, "bad reloc"); 1081 inst &= ~0x303fff; 1082 inst |= (disp & 0x3fff); 1083 inst |= (disp & 0xc000) << 6; 1084 break; 1085 1086 case 0x2: /* Bicc */ 1087 disp = ((int32_t)inst << 10) >> 10; 1088 if (disp + i >= 0 && disp + i < count) 1089 continue; 1090 disp += offset; 1091 if (((disp << 10) >> 10) != disp) 1092 cmn_err(CE_PANIC, "bad reloc"); 1093 inst &= ~0x3fffff; 1094 inst |= (disp & 0x3fffff); 1095 break; 1096 1097 case 0x1: /* Bpcc */ 1098 disp = ((int32_t)inst << 13) >> 13; 1099 if (disp + i >= 0 && disp + i < count) 1100 continue; 1101 disp += offset; 1102 if (((disp << 13) >> 13) != disp) 1103 cmn_err(CE_PANIC, "bad reloc"); 1104 inst &= ~0x7ffff; 1105 inst |= (disp & 0x7ffffu); 1106 break; 1107 } 1108 *dst = inst; 1109 } 1110 } 1111 flush_instr_mem(tablep, count * sizeof (uint32_t)); 1112 } 1113 1114 /* 1115 * Routine to allocate a large page to use in the TSB caches. 1116 */ 1117 /*ARGSUSED*/ 1118 static page_t * 1119 sfmmu_tsb_page_create(void *addr, size_t size, int vmflag, void *arg) 1120 { 1121 int pgflags; 1122 1123 pgflags = PG_EXCL; 1124 if ((vmflag & VM_NOSLEEP) == 0) 1125 pgflags |= PG_WAIT; 1126 if (vmflag & VM_PANIC) 1127 pgflags |= PG_PANIC; 1128 if (vmflag & VM_PUSHPAGE) 1129 pgflags |= PG_PUSHPAGE; 1130 1131 return (page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size, 1132 pgflags, &kvseg, addr, arg)); 1133 } 1134 1135 /* 1136 * Allocate a large page to back the virtual address range 1137 * [addr, addr + size). If addr is NULL, allocate the virtual address 1138 * space as well. 1139 */ 1140 static void * 1141 sfmmu_tsb_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, 1142 uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *), 1143 void *pcarg) 1144 { 1145 page_t *ppl; 1146 page_t *rootpp; 1147 caddr_t addr = inaddr; 1148 pgcnt_t npages = btopr(size); 1149 page_t **ppa; 1150 int i = 0; 1151 1152 /* 1153 * Assuming that only TSBs will call this with size > PAGESIZE 1154 * There is no reason why this couldn't be expanded to 8k pages as 1155 * well, or other page sizes in the future .... but for now, we 1156 * only support fixed sized page requests. 1157 */ 1158 if ((inaddr == NULL) && ((addr = vmem_xalloc(vmp, size, size, 0, 0, 1159 NULL, NULL, vmflag)) == NULL)) 1160 return (NULL); 1161 1162 if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) { 1163 if (inaddr == NULL) 1164 vmem_xfree(vmp, addr, size); 1165 return (NULL); 1166 } 1167 1168 ppl = page_create_func(addr, size, vmflag, pcarg); 1169 if (ppl == NULL) { 1170 if (inaddr == NULL) 1171 vmem_xfree(vmp, addr, size); 1172 page_unresv(npages); 1173 return (NULL); 1174 } 1175 1176 rootpp = ppl; 1177 ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP); 1178 while (ppl != NULL) { 1179 page_t *pp = ppl; 1180 ppa[i++] = pp; 1181 page_sub(&ppl, pp); 1182 ASSERT(page_iolock_assert(pp)); 1183 page_io_unlock(pp); 1184 } 1185 1186 /* 1187 * Load the locked entry. It's OK to preload the entry into 1188 * the TSB since we now support large mappings in the kernel TSB. 1189 */ 1190 hat_memload_array(kas.a_hat, (caddr_t)rootpp->p_offset, size, 1191 ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr, HAT_LOAD_LOCK); 1192 1193 for (--i; i >= 0; --i) { 1194 (void) page_pp_lock(ppa[i], 0, 1); 1195 page_unlock(ppa[i]); 1196 } 1197 1198 kmem_free(ppa, npages * sizeof (page_t *)); 1199 return (addr); 1200 } 1201 1202 /* Called to import new spans into the TSB vmem arenas */ 1203 void * 1204 sfmmu_tsb_segkmem_alloc(vmem_t *vmp, size_t size, int vmflag) 1205 { 1206 lgrp_id_t lgrpid = LGRP_NONE; 1207 1208 if (tsb_lgrp_affinity) { 1209 /* 1210 * Search for the vmp->lgrpid mapping by brute force; 1211 * some day vmp will have an lgrp, until then we have 1212 * to do this the hard way. 1213 */ 1214 for (lgrpid = 0; lgrpid < NLGRPS_MAX && 1215 vmp != kmem_tsb_default_arena[lgrpid]; lgrpid++) 1216 ; 1217 if (lgrpid == NLGRPS_MAX) 1218 lgrpid = LGRP_NONE; 1219 } 1220 1221 return (sfmmu_tsb_xalloc(vmp, NULL, size, vmflag, 0, 1222 sfmmu_tsb_page_create, lgrpid != LGRP_NONE? &lgrpid : NULL)); 1223 } 1224 1225 /* Called to free spans from the TSB vmem arenas */ 1226 void 1227 sfmmu_tsb_segkmem_free(vmem_t *vmp, void *inaddr, size_t size) 1228 { 1229 page_t *pp; 1230 caddr_t addr = inaddr; 1231 caddr_t eaddr; 1232 pgcnt_t npages = btopr(size); 1233 pgcnt_t pgs_left = npages; 1234 page_t *rootpp = NULL; 1235 1236 hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK); 1237 1238 for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) { 1239 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL); 1240 if (pp == NULL) 1241 panic("sfmmu_tsb_segkmem_free: page not found"); 1242 1243 ASSERT(PAGE_EXCL(pp)); 1244 page_pp_unlock(pp, 0, 1); 1245 1246 if (rootpp == NULL) 1247 rootpp = pp; 1248 if (--pgs_left == 0) { 1249 /* 1250 * similar logic to segspt_free_pages, but we know we 1251 * have one large page. 1252 */ 1253 page_destroy_pages(rootpp); 1254 } 1255 } 1256 page_unresv(npages); 1257 1258 if (vmp != NULL) 1259 vmem_xfree(vmp, inaddr, size); 1260 } 1261