1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <vm/hat.h> 30 #include <vm/hat_sfmmu.h> 31 #include <vm/page.h> 32 #include <sys/pte.h> 33 #include <sys/systm.h> 34 #include <sys/mman.h> 35 #include <sys/sysmacros.h> 36 #include <sys/machparam.h> 37 #include <sys/vtrace.h> 38 #include <sys/kmem.h> 39 #include <sys/mmu.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/cpuvar.h> 43 #include <sys/debug.h> 44 #include <sys/lgrp.h> 45 #include <sys/archsystm.h> 46 #include <sys/machsystm.h> 47 #include <sys/vmsystm.h> 48 #include <sys/bitmap.h> 49 #include <vm/as.h> 50 #include <vm/seg.h> 51 #include <vm/seg_kmem.h> 52 #include <vm/seg_kp.h> 53 #include <vm/seg_kpm.h> 54 #include <vm/rm.h> 55 #include <vm/vm_dep.h> 56 #include <sys/t_lock.h> 57 #include <sys/vm_machparam.h> 58 #include <sys/promif.h> 59 #include <sys/prom_isa.h> 60 #include <sys/prom_plat.h> 61 #include <sys/prom_debug.h> 62 #include <sys/privregs.h> 63 #include <sys/bootconf.h> 64 #include <sys/memlist.h> 65 #include <sys/memlist_plat.h> 66 #include <sys/cpu_module.h> 67 #include <sys/reboot.h> 68 #include <sys/kdi.h> 69 70 /* 71 * Static routines 72 */ 73 static void sfmmu_map_prom_mappings(struct translation *, size_t); 74 static struct translation *read_prom_mappings(size_t *); 75 static void sfmmu_reloc_trap_handler(void *, void *, size_t); 76 77 /* 78 * External routines 79 */ 80 extern void sfmmu_remap_kernel(void); 81 extern void sfmmu_patch_utsb(void); 82 83 /* 84 * Global Data: 85 */ 86 extern caddr_t textva, datava; 87 extern tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */ 88 extern int enable_bigktsb; 89 90 uint64_t memsegspa = (uintptr_t)MSEG_NULLPTR_PA; /* memsegs physical linkage */ 91 uint64_t memseg_phash[N_MEM_SLOTS]; /* use physical memseg addresses */ 92 93 int sfmmu_kern_mapped = 0; 94 95 /* 96 * DMMU primary context register for the kernel context. Machine specific code 97 * inserts correct page size codes when necessary 98 */ 99 uint64_t kcontextreg = KCONTEXT; 100 101 #ifdef DEBUG 102 static int ndata_middle_hole_detected = 0; 103 #endif 104 105 /* Extern Global Data */ 106 107 extern int page_relocate_ready; 108 109 /* 110 * Controls the logic which enables the use of the 111 * QUAD_LDD_PHYS ASI for TSB accesses. 112 */ 113 extern int ktsb_phys; 114 115 /* 116 * Global Routines called from within: 117 * usr/src/uts/sun4u 118 * usr/src/uts/sfmmu 119 * usr/src/uts/sun 120 */ 121 122 pfn_t 123 va_to_pfn(void *vaddr) 124 { 125 u_longlong_t physaddr; 126 int mode, valid; 127 128 if (tba_taken_over) 129 return (hat_getpfnum(kas.a_hat, (caddr_t)vaddr)); 130 131 #if !defined(C_OBP) 132 if ((caddr_t)vaddr >= kmem64_base && (caddr_t)vaddr < kmem64_end) { 133 if (kmem64_pabase == (uint64_t)-1) 134 prom_panic("va_to_pfn: kmem64_pabase not init"); 135 physaddr = kmem64_pabase + ((caddr_t)vaddr - kmem64_base); 136 return ((pfn_t)physaddr >> MMU_PAGESHIFT); 137 } 138 #endif /* !C_OBP */ 139 140 if ((prom_translate_virt(vaddr, &valid, &physaddr, &mode) != -1) && 141 (valid == -1)) { 142 return ((pfn_t)(physaddr >> MMU_PAGESHIFT)); 143 } 144 return (PFN_INVALID); 145 } 146 147 uint64_t 148 va_to_pa(void *vaddr) 149 { 150 pfn_t pfn; 151 152 if ((pfn = va_to_pfn(vaddr)) == PFN_INVALID) 153 return ((uint64_t)-1); 154 return (((uint64_t)pfn << MMU_PAGESHIFT) | 155 ((uint64_t)vaddr & MMU_PAGEOFFSET)); 156 } 157 158 void 159 hat_kern_setup(void) 160 { 161 struct translation *trans_root; 162 size_t ntrans_root; 163 extern void startup_fixup_physavail(void); 164 165 /* 166 * These are the steps we take to take over the mmu from the prom. 167 * 168 * (1) Read the prom's mappings through the translation property. 169 * (2) Remap the kernel text and kernel data with 2 locked 4MB ttes. 170 * Create the the hmeblks for these 2 ttes at this time. 171 * (3) Create hat structures for all other prom mappings. Since the 172 * kernel text and data hme_blks have already been created we 173 * skip the equivalent prom's mappings. 174 * (4) Initialize the tsb and its corresponding hardware regs. 175 * (5) Take over the trap table (currently in startup). 176 * (6) Up to this point it is possible the prom required some of its 177 * locked tte's. Now that we own the trap table we remove them. 178 */ 179 180 ktsb_pbase = va_to_pa(ktsb_base); 181 ktsb4m_pbase = va_to_pa(ktsb4m_base); 182 PRM_DEBUG(ktsb_pbase); 183 PRM_DEBUG(ktsb4m_pbase); 184 185 sfmmu_patch_ktsb(); 186 sfmmu_patch_utsb(); 187 sfmmu_patch_mmu_asi(ktsb_phys); 188 189 sfmmu_init_tsbs(); 190 191 if (kpm_enable) { 192 sfmmu_kpm_patch_tlbm(); 193 if (kpm_smallpages == 0) { 194 sfmmu_kpm_patch_tsbm(); 195 } 196 } 197 198 /* 199 * The 8K-indexed kernel TSB space is used to hold 200 * translations below... 201 */ 202 trans_root = read_prom_mappings(&ntrans_root); 203 sfmmu_remap_kernel(); 204 startup_fixup_physavail(); 205 mmu_init_kernel_pgsz(kas.a_hat); 206 sfmmu_map_prom_mappings(trans_root, ntrans_root); 207 208 /* 209 * We invalidate 8K kernel TSB because we used it in 210 * sfmmu_map_prom_mappings() 211 */ 212 sfmmu_inv_tsb(ktsb_base, ktsb_sz); 213 sfmmu_inv_tsb(ktsb4m_base, ktsb4m_sz); 214 215 sfmmu_init_ktsbinfo(); 216 217 218 sfmmu_kern_mapped = 1; 219 220 /* 221 * hments have been created for mapped pages, and thus we're ready 222 * for kmdb to start using its own trap table. It walks the hments 223 * to resolve TLB misses, and can't be used until they're ready. 224 */ 225 if (boothowto & RB_DEBUG) 226 kdi_dvec_vmready(); 227 } 228 229 /* 230 * Macro used below to convert the prom's 32-bit high and low fields into 231 * a value appropriate for the 64-bit kernel. 232 */ 233 234 #define COMBINE(hi, lo) (((uint64_t)(uint32_t)(hi) << 32) | (uint32_t)(lo)) 235 236 /* 237 * Track larges pages used. 238 * Provides observability for this feature on non-debug kernels. 239 */ 240 ulong_t map_prom_lpcount[MMU_PAGE_SIZES]; 241 242 /* 243 * This function traverses the prom mapping list and creates equivalent 244 * mappings in the sfmmu mapping hash. 245 */ 246 static void 247 sfmmu_map_prom_mappings(struct translation *trans_root, size_t ntrans_root) 248 { 249 struct translation *promt; 250 tte_t tte, oldtte, *ttep; 251 pfn_t pfn, oldpfn, basepfn; 252 caddr_t vaddr; 253 size_t size, offset; 254 unsigned long i; 255 uint_t attr; 256 page_t *pp; 257 extern struct memlist *virt_avail; 258 259 ttep = &tte; 260 for (i = 0, promt = trans_root; i < ntrans_root; i++, promt++) { 261 ASSERT(promt->tte_hi != 0); 262 ASSERT32(promt->virt_hi == 0 && promt->size_hi == 0); 263 264 vaddr = (caddr_t)COMBINE(promt->virt_hi, promt->virt_lo); 265 266 /* 267 * hack until we get rid of map-for-unix 268 */ 269 if (vaddr < (caddr_t)KERNELBASE) 270 continue; 271 272 ttep->tte_inthi = promt->tte_hi; 273 ttep->tte_intlo = promt->tte_lo; 274 attr = PROC_DATA | HAT_NOSYNC; 275 #if defined(TTE_IS_GLOBAL) 276 if (TTE_IS_GLOBAL(ttep)) { 277 /* 278 * The prom better not use global translations 279 * because a user process might use the same 280 * virtual addresses 281 */ 282 cmn_err(CE_PANIC, "map_prom: global translation"); 283 TTE_SET_LOFLAGS(ttep, TTE_GLB_INT, 0); 284 } 285 #endif 286 if (TTE_IS_LOCKED(ttep)) { 287 /* clear the lock bits */ 288 TTE_CLR_LOCKED(ttep); 289 } 290 attr |= (TTE_IS_VCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEVTTE; 291 attr |= (TTE_IS_PCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEPTTE; 292 attr |= (TTE_IS_SIDEFFECT(ttep)) ? SFMMU_SIDEFFECT : 0; 293 attr |= (TTE_IS_IE(ttep)) ? HAT_STRUCTURE_LE : 0; 294 295 size = COMBINE(promt->size_hi, promt->size_lo); 296 offset = 0; 297 basepfn = TTE_TO_PFN((caddr_t)COMBINE(promt->virt_hi, 298 promt->virt_lo), ttep); 299 while (size) { 300 vaddr = (caddr_t)(COMBINE(promt->virt_hi, 301 promt->virt_lo) + offset); 302 303 /* 304 * make sure address is not in virt-avail list 305 */ 306 if (address_in_memlist(virt_avail, (uint64_t)vaddr, 307 size)) { 308 cmn_err(CE_PANIC, "map_prom: inconsistent " 309 "translation/avail lists"); 310 } 311 312 pfn = basepfn + mmu_btop(offset); 313 if (pf_is_memory(pfn)) { 314 if (attr & SFMMU_UNCACHEPTTE) { 315 cmn_err(CE_PANIC, "map_prom: " 316 "uncached prom memory page"); 317 } 318 } else { 319 if (!(attr & SFMMU_SIDEFFECT)) { 320 cmn_err(CE_PANIC, "map_prom: prom " 321 "i/o page without side-effect"); 322 } 323 } 324 325 /* 326 * skip kmem64 area 327 */ 328 if (vaddr >= kmem64_base && 329 vaddr < kmem64_aligned_end) { 330 #if !defined(C_OBP) 331 cmn_err(CE_PANIC, 332 "unexpected kmem64 prom mapping\n"); 333 #else /* !C_OBP */ 334 size_t mapsz; 335 336 if (ptob(pfn) != 337 kmem64_pabase + (vaddr - kmem64_base)) { 338 cmn_err(CE_PANIC, 339 "unexpected kmem64 prom mapping\n"); 340 } 341 342 mapsz = kmem64_aligned_end - vaddr; 343 if (mapsz >= size) { 344 break; 345 } 346 size -= mapsz; 347 offset += mapsz; 348 continue; 349 #endif /* !C_OBP */ 350 } 351 352 oldpfn = sfmmu_vatopfn(vaddr, KHATID, &oldtte); 353 ASSERT(oldpfn != PFN_SUSPENDED); 354 ASSERT(page_relocate_ready == 0); 355 356 if (oldpfn != PFN_INVALID) { 357 /* 358 * mapping already exists. 359 * Verify they are equal 360 */ 361 if (pfn != oldpfn) { 362 cmn_err(CE_PANIC, "map_prom: mapping " 363 "conflict (va=0x%p pfn=%p, " 364 "oldpfn=%p)", 365 (void *)vaddr, (void *)pfn, 366 (void *)oldpfn); 367 } 368 size -= MMU_PAGESIZE; 369 offset += MMU_PAGESIZE; 370 continue; 371 } 372 373 pp = page_numtopp_nolock(pfn); 374 if ((pp != NULL) && PP_ISFREE((page_t *)pp)) { 375 cmn_err(CE_PANIC, "map_prom: " 376 "prom-mapped page (va 0x%p, pfn 0x%p) " 377 "on free list", (void *)vaddr, (void *)pfn); 378 } 379 380 sfmmu_memtte(ttep, pfn, attr, TTE8K); 381 sfmmu_tteload(kas.a_hat, ttep, vaddr, pp, 382 HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD); 383 size -= MMU_PAGESIZE; 384 offset += MMU_PAGESIZE; 385 } 386 } 387 388 /* 389 * We claimed kmem64 from prom, so now we need to load tte. 390 */ 391 if (kmem64_base != NULL) { 392 pgcnt_t pages; 393 size_t psize; 394 int pszc; 395 396 pszc = kmem64_szc; 397 #ifdef sun4u 398 if (pszc > TTE8K) { 399 pszc = segkmem_lpszc; 400 } 401 #endif /* sun4u */ 402 psize = TTEBYTES(pszc); 403 pages = btop(psize); 404 basepfn = kmem64_pabase >> MMU_PAGESHIFT; 405 vaddr = kmem64_base; 406 while (vaddr < kmem64_end) { 407 sfmmu_memtte(ttep, basepfn, 408 PROC_DATA | HAT_NOSYNC, pszc); 409 sfmmu_tteload(kas.a_hat, ttep, vaddr, NULL, 410 HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD); 411 vaddr += psize; 412 basepfn += pages; 413 } 414 map_prom_lpcount[pszc] = 415 ((caddr_t)P2ROUNDUP((uintptr_t)kmem64_end, psize) - 416 kmem64_base) >> TTE_PAGE_SHIFT(pszc); 417 } 418 } 419 420 #undef COMBINE /* local to previous routine */ 421 422 /* 423 * This routine reads in the "translations" property in to a buffer and 424 * returns a pointer to this buffer and the number of translations. 425 */ 426 static struct translation * 427 read_prom_mappings(size_t *ntransrootp) 428 { 429 char *prop = "translations"; 430 size_t translen; 431 pnode_t node; 432 struct translation *transroot; 433 434 /* 435 * the "translations" property is associated with the mmu node 436 */ 437 node = (pnode_t)prom_getphandle(prom_mmu_ihandle()); 438 439 /* 440 * We use the TSB space to read in the prom mappings. This space 441 * is currently not being used because we haven't taken over the 442 * trap table yet. It should be big enough to hold the mappings. 443 */ 444 if ((translen = prom_getproplen(node, prop)) == -1) 445 cmn_err(CE_PANIC, "no translations property"); 446 *ntransrootp = translen / sizeof (*transroot); 447 translen = roundup(translen, MMU_PAGESIZE); 448 PRM_DEBUG(translen); 449 if (translen > TSB_BYTES(ktsb_szcode)) 450 cmn_err(CE_PANIC, "not enough space for translations"); 451 452 transroot = (struct translation *)ktsb_base; 453 ASSERT(transroot); 454 if (prom_getprop(node, prop, (caddr_t)transroot) == -1) { 455 cmn_err(CE_PANIC, "translations getprop failed"); 456 } 457 return (transroot); 458 } 459 460 /* 461 * Init routine of the nucleus data memory allocator. 462 * 463 * The nucleus data memory allocator is organized in ecache_alignsize'd 464 * memory chunks. Memory allocated by ndata_alloc() will never be freed. 465 * 466 * The ndata argument is used as header of the ndata freelist. 467 * Other freelist nodes are placed in the nucleus memory itself 468 * at the beginning of a free memory chunk. Therefore a freelist 469 * node (struct memlist) must fit into the smallest allocatable 470 * memory chunk (ecache_alignsize bytes). 471 * 472 * The memory interval [base, end] passed to ndata_alloc_init() must be 473 * bzero'd to allow the allocator to return bzero'd memory easily. 474 */ 475 void 476 ndata_alloc_init(struct memlist *ndata, uintptr_t base, uintptr_t end) 477 { 478 ASSERT(sizeof (struct memlist) <= ecache_alignsize); 479 480 base = roundup(base, ecache_alignsize); 481 end = end - end % ecache_alignsize; 482 483 ASSERT(base < end); 484 485 ndata->address = base; 486 ndata->size = end - base; 487 ndata->next = NULL; 488 ndata->prev = NULL; 489 } 490 491 /* 492 * Deliver the size of the largest free memory chunk. 493 */ 494 size_t 495 ndata_maxsize(struct memlist *ndata) 496 { 497 size_t chunksize = ndata->size; 498 499 while ((ndata = ndata->next) != NULL) { 500 if (chunksize < ndata->size) 501 chunksize = ndata->size; 502 } 503 504 return (chunksize); 505 } 506 507 /* 508 * This is a special function to figure out if the memory chunk needed 509 * for the page structs can fit in the nucleus or not. If it fits the 510 * function calculates and returns the possible remaining ndata size 511 * in the last element if the size needed for page structs would be 512 * allocated from the nucleus. 513 */ 514 size_t 515 ndata_spare(struct memlist *ndata, size_t wanted, size_t alignment) 516 { 517 struct memlist *frlist; 518 uintptr_t base; 519 uintptr_t end; 520 521 for (frlist = ndata; frlist != NULL; frlist = frlist->next) { 522 base = roundup(frlist->address, alignment); 523 end = roundup(base + wanted, ecache_alignsize); 524 525 if (end <= frlist->address + frlist->size) { 526 if (frlist->next == NULL) 527 return (frlist->address + frlist->size - end); 528 529 while (frlist->next != NULL) 530 frlist = frlist->next; 531 532 return (frlist->size); 533 } 534 } 535 536 return (0); 537 } 538 539 /* 540 * Allocate the last properly aligned memory chunk. 541 * This function is called when no more large nucleus memory chunks 542 * will be allocated. The remaining free nucleus memory at the end 543 * of the nucleus can be added to the phys_avail list. 544 */ 545 void * 546 ndata_extra_base(struct memlist *ndata, size_t alignment, caddr_t endaddr) 547 { 548 uintptr_t base; 549 size_t wasteage = 0; 550 #ifdef DEBUG 551 static int called = 0; 552 553 if (called++ > 0) 554 cmn_err(CE_PANIC, "ndata_extra_base() called more than once"); 555 #endif /* DEBUG */ 556 557 /* 558 * The alignment needs to be a multiple of ecache_alignsize. 559 */ 560 ASSERT((alignment % ecache_alignsize) == 0); 561 562 while (ndata->next != NULL) { 563 wasteage += ndata->size; 564 ndata = ndata->next; 565 } 566 567 base = roundup(ndata->address, alignment); 568 569 if (base >= ndata->address + ndata->size) 570 return (NULL); 571 572 if ((caddr_t)(ndata->address + ndata->size) != endaddr) { 573 #ifdef DEBUG 574 ndata_middle_hole_detected = 1; /* see if we hit this again */ 575 #endif 576 return (NULL); 577 } 578 579 if (base == ndata->address) { 580 if (ndata->prev != NULL) 581 ndata->prev->next = NULL; 582 else 583 ndata->size = 0; 584 585 bzero((void *)base, sizeof (struct memlist)); 586 587 } else { 588 ndata->size = base - ndata->address; 589 wasteage += ndata->size; 590 } 591 PRM_DEBUG(wasteage); 592 593 return ((void *)base); 594 } 595 596 /* 597 * Select the best matching buffer, avoid memory fragmentation. 598 */ 599 static struct memlist * 600 ndata_select_chunk(struct memlist *ndata, size_t wanted, size_t alignment) 601 { 602 struct memlist *fnd_below = NULL; 603 struct memlist *fnd_above = NULL; 604 struct memlist *fnd_unused = NULL; 605 struct memlist *frlist; 606 uintptr_t base; 607 uintptr_t end; 608 size_t below; 609 size_t above; 610 size_t unused; 611 size_t best_below = ULONG_MAX; 612 size_t best_above = ULONG_MAX; 613 size_t best_unused = ULONG_MAX; 614 615 ASSERT(ndata != NULL); 616 617 /* 618 * Look for the best matching buffer, avoid memory fragmentation. 619 * The following strategy is used, try to find 620 * 1. an exact fitting buffer 621 * 2. avoid wasting any space below the buffer, take first 622 * fitting buffer 623 * 3. avoid wasting any space above the buffer, take first 624 * fitting buffer 625 * 4. avoid wasting space, take first fitting buffer 626 * 5. take the last buffer in chain 627 */ 628 for (frlist = ndata; frlist != NULL; frlist = frlist->next) { 629 base = roundup(frlist->address, alignment); 630 end = roundup(base + wanted, ecache_alignsize); 631 632 if (end > frlist->address + frlist->size) 633 continue; 634 635 below = (base - frlist->address) / ecache_alignsize; 636 above = (frlist->address + frlist->size - end) / 637 ecache_alignsize; 638 unused = below + above; 639 640 if (unused == 0) 641 return (frlist); 642 643 if (frlist->next == NULL) 644 break; 645 646 if (below < best_below) { 647 best_below = below; 648 fnd_below = frlist; 649 } 650 651 if (above < best_above) { 652 best_above = above; 653 fnd_above = frlist; 654 } 655 656 if (unused < best_unused) { 657 best_unused = unused; 658 fnd_unused = frlist; 659 } 660 } 661 662 if (best_below == 0) 663 return (fnd_below); 664 if (best_above == 0) 665 return (fnd_above); 666 if (best_unused < ULONG_MAX) 667 return (fnd_unused); 668 669 return (frlist); 670 } 671 672 /* 673 * Nucleus data memory allocator. 674 * The granularity of the allocator is ecache_alignsize. 675 * See also comment for ndata_alloc_init(). 676 */ 677 void * 678 ndata_alloc(struct memlist *ndata, size_t wanted, size_t alignment) 679 { 680 struct memlist *found; 681 struct memlist *fnd_above; 682 uintptr_t base; 683 uintptr_t end; 684 size_t below; 685 size_t above; 686 687 /* 688 * Look for the best matching buffer, avoid memory fragmentation. 689 */ 690 if ((found = ndata_select_chunk(ndata, wanted, alignment)) == NULL) 691 return (NULL); 692 693 /* 694 * Allocate the nucleus data buffer. 695 */ 696 base = roundup(found->address, alignment); 697 end = roundup(base + wanted, ecache_alignsize); 698 ASSERT(end <= found->address + found->size); 699 700 below = base - found->address; 701 above = found->address + found->size - end; 702 ASSERT(above == 0 || (above % ecache_alignsize) == 0); 703 704 if (below >= ecache_alignsize) { 705 /* 706 * There is free memory below the allocated memory chunk. 707 */ 708 found->size = below - below % ecache_alignsize; 709 710 if (above) { 711 fnd_above = (struct memlist *)end; 712 fnd_above->address = end; 713 fnd_above->size = above; 714 715 if ((fnd_above->next = found->next) != NULL) 716 found->next->prev = fnd_above; 717 fnd_above->prev = found; 718 found->next = fnd_above; 719 } 720 721 return ((void *)base); 722 } 723 724 if (found->prev == NULL) { 725 /* 726 * The first chunk (ndata) is selected. 727 */ 728 ASSERT(found == ndata); 729 if (above) { 730 found->address = end; 731 found->size = above; 732 } else if (found->next != NULL) { 733 found->address = found->next->address; 734 found->size = found->next->size; 735 if ((found->next = found->next->next) != NULL) 736 found->next->prev = found; 737 738 bzero((void *)found->address, sizeof (struct memlist)); 739 } else { 740 found->address = end; 741 found->size = 0; 742 } 743 744 return ((void *)base); 745 } 746 747 /* 748 * Not the first chunk. 749 */ 750 if (above) { 751 fnd_above = (struct memlist *)end; 752 fnd_above->address = end; 753 fnd_above->size = above; 754 755 if ((fnd_above->next = found->next) != NULL) 756 fnd_above->next->prev = fnd_above; 757 fnd_above->prev = found->prev; 758 found->prev->next = fnd_above; 759 760 } else { 761 if ((found->prev->next = found->next) != NULL) 762 found->next->prev = found->prev; 763 } 764 765 bzero((void *)found->address, sizeof (struct memlist)); 766 767 return ((void *)base); 768 } 769 770 /* 771 * Size the kernel TSBs based upon the amount of physical 772 * memory in the system. 773 */ 774 static void 775 calc_tsb_sizes(pgcnt_t npages) 776 { 777 PRM_DEBUG(npages); 778 779 if (npages <= TSB_FREEMEM_MIN) { 780 ktsb_szcode = TSB_128K_SZCODE; 781 enable_bigktsb = 0; 782 } else if (npages <= TSB_FREEMEM_LARGE / 2) { 783 ktsb_szcode = TSB_256K_SZCODE; 784 enable_bigktsb = 0; 785 } else if (npages <= TSB_FREEMEM_LARGE) { 786 ktsb_szcode = TSB_512K_SZCODE; 787 enable_bigktsb = 0; 788 } else if (npages <= TSB_FREEMEM_LARGE * 2 || 789 enable_bigktsb == 0) { 790 ktsb_szcode = TSB_1M_SZCODE; 791 enable_bigktsb = 0; 792 } else { 793 ktsb_szcode = highbit(npages - 1); 794 ktsb_szcode -= TSB_START_SIZE; 795 ktsb_szcode = MAX(ktsb_szcode, MIN_BIGKTSB_SZCODE); 796 ktsb_szcode = MIN(ktsb_szcode, MAX_BIGKTSB_SZCODE); 797 } 798 799 /* 800 * We choose the TSB to hold kernel 4M mappings to have twice 801 * the reach as the primary kernel TSB since this TSB will 802 * potentially (currently) be shared by both mappings to all of 803 * physical memory plus user TSBs. If this TSB has to be in nucleus 804 * (only for Spitfire and Cheetah) limit its size to 64K. 805 */ 806 ktsb4m_szcode = highbit((2 * npages) / TTEPAGES(TTE4M) - 1); 807 ktsb4m_szcode -= TSB_START_SIZE; 808 ktsb4m_szcode = MAX(ktsb4m_szcode, TSB_MIN_SZCODE); 809 ktsb4m_szcode = MIN(ktsb4m_szcode, TSB_SOFTSZ_MASK); 810 if ((enable_bigktsb == 0 || ktsb_phys == 0) && ktsb4m_szcode > 811 TSB_64K_SZCODE) { 812 ktsb4m_szcode = TSB_64K_SZCODE; 813 max_bootlp_tteszc = TTE8K; 814 } 815 816 ktsb_sz = TSB_BYTES(ktsb_szcode); /* kernel 8K tsb size */ 817 ktsb4m_sz = TSB_BYTES(ktsb4m_szcode); /* kernel 4M tsb size */ 818 } 819 820 /* 821 * Allocate kernel TSBs from nucleus data memory. 822 * The function return 0 on success and -1 on failure. 823 */ 824 int 825 ndata_alloc_tsbs(struct memlist *ndata, pgcnt_t npages) 826 { 827 /* 828 * Set ktsb_phys to 1 if the processor supports ASI_QUAD_LDD_PHYS. 829 */ 830 sfmmu_setup_4lp(); 831 832 /* 833 * Size the kernel TSBs based upon the amount of physical 834 * memory in the system. 835 */ 836 calc_tsb_sizes(npages); 837 838 /* 839 * Allocate the 8K kernel TSB if it belongs inside the nucleus. 840 */ 841 if (enable_bigktsb == 0) { 842 if ((ktsb_base = ndata_alloc(ndata, ktsb_sz, ktsb_sz)) == NULL) 843 return (-1); 844 ASSERT(!((uintptr_t)ktsb_base & (ktsb_sz - 1))); 845 846 PRM_DEBUG(ktsb_base); 847 PRM_DEBUG(ktsb_sz); 848 PRM_DEBUG(ktsb_szcode); 849 } 850 851 /* 852 * Next, allocate 4M kernel TSB from the nucleus since it's small. 853 */ 854 if (ktsb4m_szcode <= TSB_64K_SZCODE) { 855 856 ktsb4m_base = ndata_alloc(ndata, ktsb4m_sz, ktsb4m_sz); 857 if (ktsb4m_base == NULL) 858 return (-1); 859 ASSERT(!((uintptr_t)ktsb4m_base & (ktsb4m_sz - 1))); 860 861 PRM_DEBUG(ktsb4m_base); 862 PRM_DEBUG(ktsb4m_sz); 863 PRM_DEBUG(ktsb4m_szcode); 864 } 865 866 return (0); 867 } 868 869 /* 870 * Allocate hat structs from the nucleus data memory. 871 */ 872 int 873 ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages, pgcnt_t kpm_npages) 874 { 875 size_t mml_alloc_sz; 876 size_t cb_alloc_sz; 877 int max_nucuhme_buckets = MAX_NUCUHME_BUCKETS; 878 int max_nuckhme_buckets = MAX_NUCKHME_BUCKETS; 879 ulong_t hme_buckets; 880 881 if (enable_bigktsb) { 882 ASSERT((max_nucuhme_buckets + max_nuckhme_buckets) * 883 sizeof (struct hmehash_bucket) <= 884 TSB_BYTES(TSB_1M_SZCODE)); 885 886 max_nucuhme_buckets *= 2; 887 max_nuckhme_buckets *= 2; 888 } 889 890 /* 891 * The number of buckets in the hme hash tables 892 * is a power of 2 such that the average hash chain length is 893 * HMENT_HASHAVELEN. The number of buckets for the user hash is 894 * a function of physical memory and a predefined overmapping factor. 895 * The number of buckets for the kernel hash is a function of 896 * physical memory only. 897 */ 898 hme_buckets = (npages * HMEHASH_FACTOR) / 899 (HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT)); 900 901 uhmehash_num = (int)MIN(hme_buckets, MAX_UHME_BUCKETS); 902 903 if (uhmehash_num > USER_BUCKETS_THRESHOLD) { 904 /* 905 * if uhmehash_num is not power of 2 round it down to the 906 * next power of 2. 907 */ 908 uint_t align = 1 << (highbit(uhmehash_num - 1) - 1); 909 uhmehash_num = P2ALIGN(uhmehash_num, align); 910 } else 911 uhmehash_num = 1 << highbit(uhmehash_num - 1); 912 913 hme_buckets = npages / (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT); 914 khmehash_num = (int)MIN(hme_buckets, MAX_KHME_BUCKETS); 915 khmehash_num = 1 << highbit(khmehash_num - 1); 916 khmehash_num = MAX(khmehash_num, MIN_KHME_BUCKETS); 917 918 if ((khmehash_num > max_nuckhme_buckets) || 919 (uhmehash_num > max_nucuhme_buckets)) { 920 khme_hash = NULL; 921 uhme_hash = NULL; 922 } else { 923 size_t hmehash_sz = (uhmehash_num + khmehash_num) * 924 sizeof (struct hmehash_bucket); 925 926 if ((khme_hash = ndata_alloc(ndata, hmehash_sz, 927 ecache_alignsize)) != NULL) 928 uhme_hash = &khme_hash[khmehash_num]; 929 else 930 uhme_hash = NULL; 931 932 PRM_DEBUG(hmehash_sz); 933 } 934 935 PRM_DEBUG(khme_hash); 936 PRM_DEBUG(khmehash_num); 937 PRM_DEBUG(uhme_hash); 938 PRM_DEBUG(uhmehash_num); 939 940 /* 941 * For the page mapping list mutex array we allocate one mutex 942 * for every 128 pages (1 MB) with a minimum of 64 entries and 943 * a maximum of 8K entries. For the initial computation npages 944 * is rounded up (ie. 1 << highbit(npages * 1.5 / 128)) 945 * 946 * mml_shift is roughly log2(mml_table_sz) + 3 for MLIST_HASH 947 * 948 * It is not required that this be allocated from the nucleus, 949 * but it is desirable. So we first allocate from the nucleus 950 * everything that must be there. Having done so, if mml_table 951 * will fit within what remains of the nucleus then it will be 952 * allocated here. If not, set mml_table to NULL, which will cause 953 * startup_memlist() to BOP_ALLOC() space for it after our return... 954 */ 955 mml_table_sz = 1 << highbit((npages * 3) / 256); 956 if (mml_table_sz < 64) 957 mml_table_sz = 64; 958 else if (mml_table_sz > 8192) 959 mml_table_sz = 8192; 960 mml_shift = highbit(mml_table_sz) + 3; 961 962 PRM_DEBUG(mml_table_sz); 963 PRM_DEBUG(mml_shift); 964 965 mml_alloc_sz = mml_table_sz * sizeof (kmutex_t); 966 967 mml_table = ndata_alloc(ndata, mml_alloc_sz, ecache_alignsize); 968 969 PRM_DEBUG(mml_table); 970 971 cb_alloc_sz = sfmmu_max_cb_id * sizeof (struct sfmmu_callback); 972 PRM_DEBUG(cb_alloc_sz); 973 sfmmu_cb_table = ndata_alloc(ndata, cb_alloc_sz, ecache_alignsize); 974 PRM_DEBUG(sfmmu_cb_table); 975 976 /* 977 * For the kpm_page mutex array we allocate one mutex every 16 978 * kpm pages (64MB). In smallpage mode we allocate one mutex 979 * every 8K pages. The minimum is set to 64 entries and the 980 * maximum to 8K entries. 981 * 982 * It is not required that this be allocated from the nucleus, 983 * but it is desirable. So we first allocate from the nucleus 984 * everything that must be there. Having done so, if kpmp_table 985 * or kpmp_stable will fit within what remains of the nucleus 986 * then it will be allocated here. If not, startup_memlist() 987 * will use BOP_ALLOC() space for it after our return... 988 */ 989 if (kpm_enable) { 990 size_t kpmp_alloc_sz; 991 992 if (kpm_smallpages == 0) { 993 kpmp_shift = highbit(sizeof (kpm_page_t)) - 1; 994 kpmp_table_sz = 1 << highbit(kpm_npages / 16); 995 kpmp_table_sz = (kpmp_table_sz < 64) ? 64 : 996 ((kpmp_table_sz > 8192) ? 8192 : kpmp_table_sz); 997 kpmp_alloc_sz = kpmp_table_sz * sizeof (kpm_hlk_t); 998 999 kpmp_table = ndata_alloc(ndata, kpmp_alloc_sz, 1000 ecache_alignsize); 1001 1002 PRM_DEBUG(kpmp_table); 1003 PRM_DEBUG(kpmp_table_sz); 1004 1005 kpmp_stable_sz = 0; 1006 kpmp_stable = NULL; 1007 } else { 1008 ASSERT(kpm_pgsz == PAGESIZE); 1009 kpmp_shift = highbit(sizeof (kpm_shlk_t)) + 1; 1010 kpmp_stable_sz = 1 << highbit(kpm_npages / 8192); 1011 kpmp_stable_sz = (kpmp_stable_sz < 64) ? 64 : 1012 ((kpmp_stable_sz > 8192) ? 8192 : kpmp_stable_sz); 1013 kpmp_alloc_sz = kpmp_stable_sz * sizeof (kpm_shlk_t); 1014 1015 kpmp_stable = ndata_alloc(ndata, kpmp_alloc_sz, 1016 ecache_alignsize); 1017 1018 PRM_DEBUG(kpmp_stable); 1019 PRM_DEBUG(kpmp_stable_sz); 1020 1021 kpmp_table_sz = 0; 1022 kpmp_table = NULL; 1023 } 1024 PRM_DEBUG(kpmp_shift); 1025 } 1026 1027 return (0); 1028 } 1029 1030 /* 1031 * Allocate virtual addresses at base with given alignment. 1032 * Note that there is no physical memory behind the address yet. 1033 */ 1034 caddr_t 1035 alloc_hme_buckets(caddr_t base, int alignsize) 1036 { 1037 size_t hmehash_sz = (uhmehash_num + khmehash_num) * 1038 sizeof (struct hmehash_bucket); 1039 1040 ASSERT(khme_hash == NULL); 1041 ASSERT(uhme_hash == NULL); 1042 1043 base = (caddr_t)roundup((uintptr_t)base, alignsize); 1044 hmehash_sz = roundup(hmehash_sz, alignsize); 1045 1046 khme_hash = (struct hmehash_bucket *)base; 1047 uhme_hash = (struct hmehash_bucket *)((caddr_t)khme_hash + 1048 khmehash_num * sizeof (struct hmehash_bucket)); 1049 base += hmehash_sz; 1050 return (base); 1051 } 1052 1053 /* 1054 * This function bop allocs kernel TSBs. 1055 */ 1056 caddr_t 1057 sfmmu_ktsb_alloc(caddr_t tsbbase) 1058 { 1059 caddr_t vaddr; 1060 1061 if (enable_bigktsb) { 1062 ktsb_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb_sz); 1063 vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb_base, ktsb_sz, 1064 ktsb_sz); 1065 if (vaddr != ktsb_base) 1066 cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc" 1067 " 8K bigktsb"); 1068 ktsb_base = vaddr; 1069 tsbbase = ktsb_base + ktsb_sz; 1070 PRM_DEBUG(ktsb_base); 1071 PRM_DEBUG(tsbbase); 1072 } 1073 1074 if (ktsb4m_szcode > TSB_64K_SZCODE) { 1075 ASSERT(ktsb_phys && enable_bigktsb); 1076 ktsb4m_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb4m_sz); 1077 vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb4m_base, ktsb4m_sz, 1078 ktsb4m_sz); 1079 if (vaddr != ktsb4m_base) 1080 cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc" 1081 " 4M bigktsb"); 1082 ktsb4m_base = vaddr; 1083 tsbbase = ktsb4m_base + ktsb4m_sz; 1084 PRM_DEBUG(ktsb4m_base); 1085 PRM_DEBUG(tsbbase); 1086 } 1087 return (tsbbase); 1088 } 1089 1090 /* 1091 * Moves code assembled outside of the trap table into the trap 1092 * table taking care to relocate relative branches to code outside 1093 * of the trap handler. 1094 */ 1095 static void 1096 sfmmu_reloc_trap_handler(void *tablep, void *start, size_t count) 1097 { 1098 size_t i; 1099 uint32_t *src; 1100 uint32_t *dst; 1101 uint32_t inst; 1102 int op, op2; 1103 int32_t offset; 1104 int disp; 1105 1106 src = start; 1107 dst = tablep; 1108 offset = src - dst; 1109 for (src = start, i = 0; i < count; i++, src++, dst++) { 1110 inst = *dst = *src; 1111 op = (inst >> 30) & 0x2; 1112 if (op == 1) { 1113 /* call */ 1114 disp = ((int32_t)inst << 2) >> 2; /* sign-extend */ 1115 if (disp + i >= 0 && disp + i < count) 1116 continue; 1117 disp += offset; 1118 inst = 0x40000000u | (disp & 0x3fffffffu); 1119 *dst = inst; 1120 } else if (op == 0) { 1121 /* branch or sethi */ 1122 op2 = (inst >> 22) & 0x7; 1123 1124 switch (op2) { 1125 case 0x3: /* BPr */ 1126 disp = (((inst >> 20) & 0x3) << 14) | 1127 (inst & 0x3fff); 1128 disp = (disp << 16) >> 16; /* sign-extend */ 1129 if (disp + i >= 0 && disp + i < count) 1130 continue; 1131 disp += offset; 1132 if (((disp << 16) >> 16) != disp) 1133 cmn_err(CE_PANIC, "bad reloc"); 1134 inst &= ~0x303fff; 1135 inst |= (disp & 0x3fff); 1136 inst |= (disp & 0xc000) << 6; 1137 break; 1138 1139 case 0x2: /* Bicc */ 1140 disp = ((int32_t)inst << 10) >> 10; 1141 if (disp + i >= 0 && disp + i < count) 1142 continue; 1143 disp += offset; 1144 if (((disp << 10) >> 10) != disp) 1145 cmn_err(CE_PANIC, "bad reloc"); 1146 inst &= ~0x3fffff; 1147 inst |= (disp & 0x3fffff); 1148 break; 1149 1150 case 0x1: /* Bpcc */ 1151 disp = ((int32_t)inst << 13) >> 13; 1152 if (disp + i >= 0 && disp + i < count) 1153 continue; 1154 disp += offset; 1155 if (((disp << 13) >> 13) != disp) 1156 cmn_err(CE_PANIC, "bad reloc"); 1157 inst &= ~0x7ffff; 1158 inst |= (disp & 0x7ffffu); 1159 break; 1160 } 1161 *dst = inst; 1162 } 1163 } 1164 flush_instr_mem(tablep, count * sizeof (uint32_t)); 1165 } 1166 1167 /* 1168 * Routine to allocate a large page to use in the TSB caches. 1169 */ 1170 /*ARGSUSED*/ 1171 static page_t * 1172 sfmmu_tsb_page_create(void *addr, size_t size, int vmflag, void *arg) 1173 { 1174 int pgflags; 1175 1176 pgflags = PG_EXCL; 1177 if ((vmflag & VM_NOSLEEP) == 0) 1178 pgflags |= PG_WAIT; 1179 if (vmflag & VM_PANIC) 1180 pgflags |= PG_PANIC; 1181 if (vmflag & VM_PUSHPAGE) 1182 pgflags |= PG_PUSHPAGE; 1183 1184 return (page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size, 1185 pgflags, &kvseg, addr, arg)); 1186 } 1187 1188 /* 1189 * Allocate a large page to back the virtual address range 1190 * [addr, addr + size). If addr is NULL, allocate the virtual address 1191 * space as well. 1192 */ 1193 static void * 1194 sfmmu_tsb_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, 1195 uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *), 1196 void *pcarg) 1197 { 1198 page_t *ppl; 1199 page_t *rootpp; 1200 caddr_t addr = inaddr; 1201 pgcnt_t npages = btopr(size); 1202 page_t **ppa; 1203 int i = 0; 1204 1205 /* 1206 * Assuming that only TSBs will call this with size > PAGESIZE 1207 * There is no reason why this couldn't be expanded to 8k pages as 1208 * well, or other page sizes in the future .... but for now, we 1209 * only support fixed sized page requests. 1210 */ 1211 if ((inaddr == NULL) && ((addr = vmem_xalloc(vmp, size, size, 0, 0, 1212 NULL, NULL, vmflag)) == NULL)) 1213 return (NULL); 1214 1215 /* If we ever don't want TSB slab-sized pages, this will panic */ 1216 ASSERT(((uintptr_t)addr & (tsb_slab_size - 1)) == 0); 1217 1218 if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) { 1219 if (inaddr == NULL) 1220 vmem_xfree(vmp, addr, size); 1221 return (NULL); 1222 } 1223 1224 ppl = page_create_func(addr, size, vmflag, pcarg); 1225 if (ppl == NULL) { 1226 if (inaddr == NULL) 1227 vmem_xfree(vmp, addr, size); 1228 page_unresv(npages); 1229 return (NULL); 1230 } 1231 1232 rootpp = ppl; 1233 ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP); 1234 while (ppl != NULL) { 1235 page_t *pp = ppl; 1236 ppa[i++] = pp; 1237 page_sub(&ppl, pp); 1238 ASSERT(page_iolock_assert(pp)); 1239 page_io_unlock(pp); 1240 } 1241 1242 /* 1243 * Load the locked entry. It's OK to preload the entry into 1244 * the TSB since we now support large mappings in the kernel TSB. 1245 */ 1246 hat_memload_array(kas.a_hat, (caddr_t)rootpp->p_offset, size, 1247 ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr, HAT_LOAD_LOCK); 1248 1249 for (--i; i >= 0; --i) { 1250 (void) page_pp_lock(ppa[i], 0, 1); 1251 page_unlock(ppa[i]); 1252 } 1253 1254 kmem_free(ppa, npages * sizeof (page_t *)); 1255 return (addr); 1256 } 1257 1258 /* Called to import new spans into the TSB vmem arenas */ 1259 void * 1260 sfmmu_tsb_segkmem_alloc(vmem_t *vmp, size_t size, int vmflag) 1261 { 1262 lgrp_id_t lgrpid = LGRP_NONE; 1263 1264 if (tsb_lgrp_affinity) { 1265 /* 1266 * Search for the vmp->lgrpid mapping by brute force; 1267 * some day vmp will have an lgrp, until then we have 1268 * to do this the hard way. 1269 */ 1270 for (lgrpid = 0; lgrpid < NLGRPS_MAX && 1271 vmp != kmem_tsb_default_arena[lgrpid]; lgrpid++); 1272 if (lgrpid == NLGRPS_MAX) 1273 lgrpid = LGRP_NONE; 1274 } 1275 1276 return (sfmmu_tsb_xalloc(vmp, NULL, size, vmflag, 0, 1277 sfmmu_tsb_page_create, lgrpid != LGRP_NONE? &lgrpid : NULL)); 1278 } 1279 1280 /* Called to free spans from the TSB vmem arenas */ 1281 void 1282 sfmmu_tsb_segkmem_free(vmem_t *vmp, void *inaddr, size_t size) 1283 { 1284 page_t *pp; 1285 caddr_t addr = inaddr; 1286 caddr_t eaddr; 1287 pgcnt_t npages = btopr(size); 1288 pgcnt_t pgs_left = npages; 1289 page_t *rootpp = NULL; 1290 1291 ASSERT(((uintptr_t)addr & (tsb_slab_size - 1)) == 0); 1292 1293 hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK); 1294 1295 for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) { 1296 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL); 1297 if (pp == NULL) 1298 panic("sfmmu_tsb_segkmem_free: page not found"); 1299 1300 ASSERT(PAGE_EXCL(pp)); 1301 page_pp_unlock(pp, 0, 1); 1302 1303 if (rootpp == NULL) 1304 rootpp = pp; 1305 if (--pgs_left == 0) { 1306 /* 1307 * similar logic to segspt_free_pages, but we know we 1308 * have one large page. 1309 */ 1310 page_destroy_pages(rootpp); 1311 } 1312 } 1313 page_unresv(npages); 1314 1315 if (vmp != NULL) 1316 vmem_xfree(vmp, inaddr, size); 1317 } 1318