1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <vm/hat.h> 30 #include <vm/hat_sfmmu.h> 31 #include <vm/page.h> 32 #include <sys/pte.h> 33 #include <sys/systm.h> 34 #include <sys/mman.h> 35 #include <sys/sysmacros.h> 36 #include <sys/machparam.h> 37 #include <sys/vtrace.h> 38 #include <sys/kmem.h> 39 #include <sys/mmu.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/cpuvar.h> 43 #include <sys/debug.h> 44 #include <sys/lgrp.h> 45 #include <sys/archsystm.h> 46 #include <sys/machsystm.h> 47 #include <sys/vmsystm.h> 48 #include <sys/bitmap.h> 49 #include <vm/as.h> 50 #include <vm/seg.h> 51 #include <vm/seg_kmem.h> 52 #include <vm/seg_kp.h> 53 #include <vm/seg_kpm.h> 54 #include <vm/rm.h> 55 #include <vm/vm_dep.h> 56 #include <sys/t_lock.h> 57 #include <sys/vm_machparam.h> 58 #include <sys/promif.h> 59 #include <sys/prom_isa.h> 60 #include <sys/prom_plat.h> 61 #include <sys/prom_debug.h> 62 #include <sys/privregs.h> 63 #include <sys/bootconf.h> 64 #include <sys/memlist.h> 65 #include <sys/memlist_plat.h> 66 #include <sys/cpu_module.h> 67 #include <sys/reboot.h> 68 #include <sys/kdi.h> 69 70 /* 71 * Static routines 72 */ 73 static void sfmmu_map_prom_mappings(struct translation *, size_t); 74 static struct translation *read_prom_mappings(size_t *); 75 static void sfmmu_reloc_trap_handler(void *, void *, size_t); 76 77 /* 78 * External routines 79 */ 80 extern void sfmmu_remap_kernel(void); 81 extern void sfmmu_patch_utsb(void); 82 83 /* 84 * Global Data: 85 */ 86 extern caddr_t textva, datava; 87 extern tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */ 88 extern int enable_bigktsb; 89 90 uint64_t memsegspa = (uintptr_t)MSEG_NULLPTR_PA; /* memsegs physical linkage */ 91 uint64_t memseg_phash[N_MEM_SLOTS]; /* use physical memseg addresses */ 92 93 int sfmmu_kern_mapped = 0; 94 95 /* 96 * DMMU primary context register for the kernel context. Machine specific code 97 * inserts correct page size codes when necessary 98 */ 99 uint64_t kcontextreg = KCONTEXT; 100 101 /* Extern Global Data */ 102 103 extern int page_relocate_ready; 104 105 /* 106 * Controls the logic which enables the use of the 107 * QUAD_LDD_PHYS ASI for TSB accesses. 108 */ 109 extern int ktsb_phys; 110 111 /* 112 * Global Routines called from within: 113 * usr/src/uts/sun4u 114 * usr/src/uts/sfmmu 115 * usr/src/uts/sun 116 */ 117 118 pfn_t 119 va_to_pfn(void *vaddr) 120 { 121 u_longlong_t physaddr; 122 int mode, valid; 123 124 if (tba_taken_over) 125 return (hat_getpfnum(kas.a_hat, (caddr_t)vaddr)); 126 127 #if !defined(C_OBP) 128 if ((caddr_t)vaddr >= kmem64_base && (caddr_t)vaddr < kmem64_end) { 129 if (kmem64_pabase == (uint64_t)-1) 130 prom_panic("va_to_pfn: kmem64_pabase not init"); 131 physaddr = kmem64_pabase + ((caddr_t)vaddr - kmem64_base); 132 return ((pfn_t)physaddr >> MMU_PAGESHIFT); 133 } 134 #endif /* !C_OBP */ 135 136 if ((prom_translate_virt(vaddr, &valid, &physaddr, &mode) != -1) && 137 (valid == -1)) { 138 return ((pfn_t)(physaddr >> MMU_PAGESHIFT)); 139 } 140 return (PFN_INVALID); 141 } 142 143 uint64_t 144 va_to_pa(void *vaddr) 145 { 146 pfn_t pfn; 147 148 if ((pfn = va_to_pfn(vaddr)) == PFN_INVALID) 149 return ((uint64_t)-1); 150 return (((uint64_t)pfn << MMU_PAGESHIFT) | 151 ((uint64_t)vaddr & MMU_PAGEOFFSET)); 152 } 153 154 void 155 hat_kern_setup(void) 156 { 157 struct translation *trans_root; 158 size_t ntrans_root; 159 extern void startup_fixup_physavail(void); 160 161 /* 162 * These are the steps we take to take over the mmu from the prom. 163 * 164 * (1) Read the prom's mappings through the translation property. 165 * (2) Remap the kernel text and kernel data with 2 locked 4MB ttes. 166 * Create the the hmeblks for these 2 ttes at this time. 167 * (3) Create hat structures for all other prom mappings. Since the 168 * kernel text and data hme_blks have already been created we 169 * skip the equivalent prom's mappings. 170 * (4) Initialize the tsb and its corresponding hardware regs. 171 * (5) Take over the trap table (currently in startup). 172 * (6) Up to this point it is possible the prom required some of its 173 * locked tte's. Now that we own the trap table we remove them. 174 */ 175 176 ktsb_pbase = va_to_pa(ktsb_base); 177 ktsb4m_pbase = va_to_pa(ktsb4m_base); 178 PRM_DEBUG(ktsb_pbase); 179 PRM_DEBUG(ktsb4m_pbase); 180 181 sfmmu_patch_ktsb(); 182 sfmmu_patch_utsb(); 183 sfmmu_patch_mmu_asi(ktsb_phys); 184 185 sfmmu_init_tsbs(); 186 187 if (kpm_enable) { 188 sfmmu_kpm_patch_tlbm(); 189 if (kpm_smallpages == 0) { 190 sfmmu_kpm_patch_tsbm(); 191 } 192 } 193 194 /* 195 * The 8K-indexed kernel TSB space is used to hold 196 * translations below... 197 */ 198 trans_root = read_prom_mappings(&ntrans_root); 199 sfmmu_remap_kernel(); 200 startup_fixup_physavail(); 201 mmu_init_kernel_pgsz(kas.a_hat); 202 sfmmu_map_prom_mappings(trans_root, ntrans_root); 203 204 /* 205 * We invalidate 8K kernel TSB because we used it in 206 * sfmmu_map_prom_mappings() 207 */ 208 sfmmu_inv_tsb(ktsb_base, ktsb_sz); 209 sfmmu_inv_tsb(ktsb4m_base, ktsb4m_sz); 210 211 sfmmu_init_ktsbinfo(); 212 213 214 sfmmu_kern_mapped = 1; 215 216 /* 217 * hments have been created for mapped pages, and thus we're ready 218 * for kmdb to start using its own trap table. It walks the hments 219 * to resolve TLB misses, and can't be used until they're ready. 220 */ 221 if (boothowto & RB_DEBUG) 222 kdi_dvec_vmready(); 223 } 224 225 /* 226 * Macro used below to convert the prom's 32-bit high and low fields into 227 * a value appropriate for the 64-bit kernel. 228 */ 229 230 #define COMBINE(hi, lo) (((uint64_t)(uint32_t)(hi) << 32) | (uint32_t)(lo)) 231 232 /* 233 * Track larges pages used. 234 * Provides observability for this feature on non-debug kernels. 235 */ 236 ulong_t map_prom_lpcount[MMU_PAGE_SIZES]; 237 238 /* 239 * This function traverses the prom mapping list and creates equivalent 240 * mappings in the sfmmu mapping hash. 241 */ 242 static void 243 sfmmu_map_prom_mappings(struct translation *trans_root, size_t ntrans_root) 244 { 245 struct translation *promt; 246 tte_t tte, oldtte, *ttep; 247 pfn_t pfn, oldpfn, basepfn; 248 caddr_t vaddr; 249 size_t size, offset; 250 unsigned long i; 251 uint_t attr; 252 page_t *pp; 253 extern struct memlist *virt_avail; 254 255 ttep = &tte; 256 for (i = 0, promt = trans_root; i < ntrans_root; i++, promt++) { 257 ASSERT(promt->tte_hi != 0); 258 ASSERT32(promt->virt_hi == 0 && promt->size_hi == 0); 259 260 vaddr = (caddr_t)COMBINE(promt->virt_hi, promt->virt_lo); 261 262 /* 263 * hack until we get rid of map-for-unix 264 */ 265 if (vaddr < (caddr_t)KERNELBASE) 266 continue; 267 268 ttep->tte_inthi = promt->tte_hi; 269 ttep->tte_intlo = promt->tte_lo; 270 attr = PROC_DATA | HAT_NOSYNC; 271 #if defined(TTE_IS_GLOBAL) 272 if (TTE_IS_GLOBAL(ttep)) { 273 /* 274 * The prom better not use global translations 275 * because a user process might use the same 276 * virtual addresses 277 */ 278 cmn_err(CE_PANIC, "map_prom: global translation"); 279 TTE_SET_LOFLAGS(ttep, TTE_GLB_INT, 0); 280 } 281 #endif 282 if (TTE_IS_LOCKED(ttep)) { 283 /* clear the lock bits */ 284 TTE_CLR_LOCKED(ttep); 285 } 286 attr |= (TTE_IS_VCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEVTTE; 287 attr |= (TTE_IS_PCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEPTTE; 288 attr |= (TTE_IS_SIDEFFECT(ttep)) ? SFMMU_SIDEFFECT : 0; 289 attr |= (TTE_IS_IE(ttep)) ? HAT_STRUCTURE_LE : 0; 290 291 size = COMBINE(promt->size_hi, promt->size_lo); 292 offset = 0; 293 basepfn = TTE_TO_PFN((caddr_t)COMBINE(promt->virt_hi, 294 promt->virt_lo), ttep); 295 while (size) { 296 vaddr = (caddr_t)(COMBINE(promt->virt_hi, 297 promt->virt_lo) + offset); 298 299 /* 300 * make sure address is not in virt-avail list 301 */ 302 if (address_in_memlist(virt_avail, (uint64_t)vaddr, 303 size)) { 304 cmn_err(CE_PANIC, "map_prom: inconsistent " 305 "translation/avail lists"); 306 } 307 308 pfn = basepfn + mmu_btop(offset); 309 if (pf_is_memory(pfn)) { 310 if (attr & SFMMU_UNCACHEPTTE) { 311 cmn_err(CE_PANIC, "map_prom: " 312 "uncached prom memory page"); 313 } 314 } else { 315 if (!(attr & SFMMU_SIDEFFECT)) { 316 cmn_err(CE_PANIC, "map_prom: prom " 317 "i/o page without side-effect"); 318 } 319 } 320 321 /* 322 * skip kmem64 area 323 */ 324 if (vaddr >= kmem64_base && 325 vaddr < kmem64_aligned_end) { 326 #if !defined(C_OBP) 327 cmn_err(CE_PANIC, 328 "unexpected kmem64 prom mapping\n"); 329 #else /* !C_OBP */ 330 size_t mapsz; 331 332 if (ptob(pfn) != 333 kmem64_pabase + (vaddr - kmem64_base)) { 334 cmn_err(CE_PANIC, 335 "unexpected kmem64 prom mapping\n"); 336 } 337 338 mapsz = kmem64_aligned_end - vaddr; 339 if (mapsz >= size) { 340 break; 341 } 342 size -= mapsz; 343 offset += mapsz; 344 continue; 345 #endif /* !C_OBP */ 346 } 347 348 oldpfn = sfmmu_vatopfn(vaddr, KHATID, &oldtte); 349 ASSERT(oldpfn != PFN_SUSPENDED); 350 ASSERT(page_relocate_ready == 0); 351 352 if (oldpfn != PFN_INVALID) { 353 /* 354 * mapping already exists. 355 * Verify they are equal 356 */ 357 if (pfn != oldpfn) { 358 cmn_err(CE_PANIC, "map_prom: mapping " 359 "conflict (va=0x%p pfn=%p, " 360 "oldpfn=%p)", 361 (void *)vaddr, (void *)pfn, 362 (void *)oldpfn); 363 } 364 size -= MMU_PAGESIZE; 365 offset += MMU_PAGESIZE; 366 continue; 367 } 368 369 pp = page_numtopp_nolock(pfn); 370 if ((pp != NULL) && PP_ISFREE((page_t *)pp)) { 371 cmn_err(CE_PANIC, "map_prom: " 372 "prom-mapped page (va 0x%p, pfn 0x%p) " 373 "on free list", (void *)vaddr, (void *)pfn); 374 } 375 376 sfmmu_memtte(ttep, pfn, attr, TTE8K); 377 sfmmu_tteload(kas.a_hat, ttep, vaddr, pp, 378 HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD); 379 size -= MMU_PAGESIZE; 380 offset += MMU_PAGESIZE; 381 } 382 } 383 384 /* 385 * We claimed kmem64 from prom, so now we need to load tte. 386 */ 387 if (kmem64_base != NULL) { 388 pgcnt_t pages; 389 size_t psize; 390 int pszc; 391 392 pszc = kmem64_szc; 393 #ifdef sun4u 394 if (pszc > TTE8K) { 395 pszc = segkmem_lpszc; 396 } 397 #endif /* sun4u */ 398 psize = TTEBYTES(pszc); 399 pages = btop(psize); 400 basepfn = kmem64_pabase >> MMU_PAGESHIFT; 401 vaddr = kmem64_base; 402 while (vaddr < kmem64_end) { 403 sfmmu_memtte(ttep, basepfn, 404 PROC_DATA | HAT_NOSYNC, pszc); 405 sfmmu_tteload(kas.a_hat, ttep, vaddr, NULL, 406 HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD); 407 vaddr += psize; 408 basepfn += pages; 409 } 410 map_prom_lpcount[pszc] = 411 ((caddr_t)P2ROUNDUP((uintptr_t)kmem64_end, psize) - 412 kmem64_base) >> TTE_PAGE_SHIFT(pszc); 413 } 414 } 415 416 #undef COMBINE /* local to previous routine */ 417 418 /* 419 * This routine reads in the "translations" property in to a buffer and 420 * returns a pointer to this buffer and the number of translations. 421 */ 422 static struct translation * 423 read_prom_mappings(size_t *ntransrootp) 424 { 425 char *prop = "translations"; 426 size_t translen; 427 pnode_t node; 428 struct translation *transroot; 429 430 /* 431 * the "translations" property is associated with the mmu node 432 */ 433 node = (pnode_t)prom_getphandle(prom_mmu_ihandle()); 434 435 /* 436 * We use the TSB space to read in the prom mappings. This space 437 * is currently not being used because we haven't taken over the 438 * trap table yet. It should be big enough to hold the mappings. 439 */ 440 if ((translen = prom_getproplen(node, prop)) == -1) 441 cmn_err(CE_PANIC, "no translations property"); 442 *ntransrootp = translen / sizeof (*transroot); 443 translen = roundup(translen, MMU_PAGESIZE); 444 PRM_DEBUG(translen); 445 if (translen > TSB_BYTES(ktsb_szcode)) 446 cmn_err(CE_PANIC, "not enough space for translations"); 447 448 transroot = (struct translation *)ktsb_base; 449 ASSERT(transroot); 450 if (prom_getprop(node, prop, (caddr_t)transroot) == -1) { 451 cmn_err(CE_PANIC, "translations getprop failed"); 452 } 453 return (transroot); 454 } 455 456 /* 457 * Init routine of the nucleus data memory allocator. 458 * 459 * The nucleus data memory allocator is organized in ecache_alignsize'd 460 * memory chunks. Memory allocated by ndata_alloc() will never be freed. 461 * 462 * The ndata argument is used as header of the ndata freelist. 463 * Other freelist nodes are placed in the nucleus memory itself 464 * at the beginning of a free memory chunk. Therefore a freelist 465 * node (struct memlist) must fit into the smallest allocatable 466 * memory chunk (ecache_alignsize bytes). 467 * 468 * The memory interval [base, end] passed to ndata_alloc_init() must be 469 * bzero'd to allow the allocator to return bzero'd memory easily. 470 */ 471 void 472 ndata_alloc_init(struct memlist *ndata, uintptr_t base, uintptr_t end) 473 { 474 ASSERT(sizeof (struct memlist) <= ecache_alignsize); 475 476 base = roundup(base, ecache_alignsize); 477 end = end - end % ecache_alignsize; 478 479 ASSERT(base < end); 480 481 ndata->address = base; 482 ndata->size = end - base; 483 ndata->next = NULL; 484 ndata->prev = NULL; 485 } 486 487 /* 488 * Deliver the size of the largest free memory chunk. 489 */ 490 size_t 491 ndata_maxsize(struct memlist *ndata) 492 { 493 size_t chunksize = ndata->size; 494 495 while ((ndata = ndata->next) != NULL) { 496 if (chunksize < ndata->size) 497 chunksize = ndata->size; 498 } 499 500 return (chunksize); 501 } 502 503 /* 504 * This is a special function to figure out if the memory chunk needed 505 * for the page structs can fit in the nucleus or not. If it fits the 506 * function calculates and returns the possible remaining ndata size 507 * in the last element if the size needed for page structs would be 508 * allocated from the nucleus. 509 */ 510 size_t 511 ndata_spare(struct memlist *ndata, size_t wanted, size_t alignment) 512 { 513 struct memlist *frlist; 514 uintptr_t base; 515 uintptr_t end; 516 517 for (frlist = ndata; frlist != NULL; frlist = frlist->next) { 518 base = roundup(frlist->address, alignment); 519 end = roundup(base + wanted, ecache_alignsize); 520 521 if (end <= frlist->address + frlist->size) { 522 if (frlist->next == NULL) 523 return (frlist->address + frlist->size - end); 524 525 while (frlist->next != NULL) 526 frlist = frlist->next; 527 528 return (frlist->size); 529 } 530 } 531 532 return (0); 533 } 534 535 /* 536 * Allocate the last properly aligned memory chunk. 537 * This function is called when no more large nucleus memory chunks 538 * will be allocated. The remaining free nucleus memory at the end 539 * of the nucleus can be added to the phys_avail list. 540 */ 541 void * 542 ndata_extra_base(struct memlist *ndata, size_t alignment) 543 { 544 uintptr_t base; 545 size_t wasteage = 0; 546 #ifdef DEBUG 547 static int called = 0; 548 549 if (called++ > 0) 550 cmn_err(CE_PANIC, "ndata_extra_base() called more than once"); 551 #endif /* DEBUG */ 552 553 /* 554 * The alignment needs to be a multiple of ecache_alignsize. 555 */ 556 ASSERT((alignment % ecache_alignsize) == 0); 557 558 while (ndata->next != NULL) { 559 wasteage += ndata->size; 560 ndata = ndata->next; 561 } 562 563 base = roundup(ndata->address, alignment); 564 565 if (base >= ndata->address + ndata->size) 566 return (NULL); 567 568 if (base == ndata->address) { 569 if (ndata->prev != NULL) 570 ndata->prev->next = NULL; 571 else 572 ndata->size = 0; 573 574 bzero((void *)base, sizeof (struct memlist)); 575 576 } else { 577 ndata->size = base - ndata->address; 578 wasteage += ndata->size; 579 } 580 PRM_DEBUG(wasteage); 581 582 return ((void *)base); 583 } 584 585 /* 586 * Select the best matching buffer, avoid memory fragmentation. 587 */ 588 static struct memlist * 589 ndata_select_chunk(struct memlist *ndata, size_t wanted, size_t alignment) 590 { 591 struct memlist *fnd_below = NULL; 592 struct memlist *fnd_above = NULL; 593 struct memlist *fnd_unused = NULL; 594 struct memlist *frlist; 595 uintptr_t base; 596 uintptr_t end; 597 size_t below; 598 size_t above; 599 size_t unused; 600 size_t best_below = ULONG_MAX; 601 size_t best_above = ULONG_MAX; 602 size_t best_unused = ULONG_MAX; 603 604 ASSERT(ndata != NULL); 605 606 /* 607 * Look for the best matching buffer, avoid memory fragmentation. 608 * The following strategy is used, try to find 609 * 1. an exact fitting buffer 610 * 2. avoid wasting any space below the buffer, take first 611 * fitting buffer 612 * 3. avoid wasting any space above the buffer, take first 613 * fitting buffer 614 * 4. avoid wasting space, take first fitting buffer 615 * 5. take the last buffer in chain 616 */ 617 for (frlist = ndata; frlist != NULL; frlist = frlist->next) { 618 base = roundup(frlist->address, alignment); 619 end = roundup(base + wanted, ecache_alignsize); 620 621 if (end > frlist->address + frlist->size) 622 continue; 623 624 below = (base - frlist->address) / ecache_alignsize; 625 above = (frlist->address + frlist->size - end) / 626 ecache_alignsize; 627 unused = below + above; 628 629 if (unused == 0) 630 return (frlist); 631 632 if (frlist->next == NULL) 633 break; 634 635 if (below < best_below) { 636 best_below = below; 637 fnd_below = frlist; 638 } 639 640 if (above < best_above) { 641 best_above = above; 642 fnd_above = frlist; 643 } 644 645 if (unused < best_unused) { 646 best_unused = unused; 647 fnd_unused = frlist; 648 } 649 } 650 651 if (best_below == 0) 652 return (fnd_below); 653 if (best_above == 0) 654 return (fnd_above); 655 if (best_unused < ULONG_MAX) 656 return (fnd_unused); 657 658 return (frlist); 659 } 660 661 /* 662 * Nucleus data memory allocator. 663 * The granularity of the allocator is ecache_alignsize. 664 * See also comment for ndata_alloc_init(). 665 */ 666 void * 667 ndata_alloc(struct memlist *ndata, size_t wanted, size_t alignment) 668 { 669 struct memlist *found; 670 struct memlist *fnd_above; 671 uintptr_t base; 672 uintptr_t end; 673 size_t below; 674 size_t above; 675 676 /* 677 * Look for the best matching buffer, avoid memory fragmentation. 678 */ 679 if ((found = ndata_select_chunk(ndata, wanted, alignment)) == NULL) 680 return (NULL); 681 682 /* 683 * Allocate the nucleus data buffer. 684 */ 685 base = roundup(found->address, alignment); 686 end = roundup(base + wanted, ecache_alignsize); 687 ASSERT(end <= found->address + found->size); 688 689 below = base - found->address; 690 above = found->address + found->size - end; 691 ASSERT(above == 0 || (above % ecache_alignsize) == 0); 692 693 if (below >= ecache_alignsize) { 694 /* 695 * There is free memory below the allocated memory chunk. 696 */ 697 found->size = below - below % ecache_alignsize; 698 699 if (above) { 700 fnd_above = (struct memlist *)end; 701 fnd_above->address = end; 702 fnd_above->size = above; 703 704 if ((fnd_above->next = found->next) != NULL) 705 found->next->prev = fnd_above; 706 fnd_above->prev = found; 707 found->next = fnd_above; 708 } 709 710 return ((void *)base); 711 } 712 713 if (found->prev == NULL) { 714 /* 715 * The first chunk (ndata) is selected. 716 */ 717 ASSERT(found == ndata); 718 if (above) { 719 found->address = end; 720 found->size = above; 721 } else if (found->next != NULL) { 722 found->address = found->next->address; 723 found->size = found->next->size; 724 if ((found->next = found->next->next) != NULL) 725 found->next->prev = found; 726 727 bzero((void *)found->address, sizeof (struct memlist)); 728 } else { 729 found->address = end; 730 found->size = 0; 731 } 732 733 return ((void *)base); 734 } 735 736 /* 737 * Not the first chunk. 738 */ 739 if (above) { 740 fnd_above = (struct memlist *)end; 741 fnd_above->address = end; 742 fnd_above->size = above; 743 744 if ((fnd_above->next = found->next) != NULL) 745 fnd_above->next->prev = fnd_above; 746 fnd_above->prev = found->prev; 747 found->prev->next = fnd_above; 748 749 } else { 750 if ((found->prev->next = found->next) != NULL) 751 found->next->prev = found->prev; 752 } 753 754 bzero((void *)found->address, sizeof (struct memlist)); 755 756 return ((void *)base); 757 } 758 759 /* 760 * Size the kernel TSBs based upon the amount of physical 761 * memory in the system. 762 */ 763 static void 764 calc_tsb_sizes(pgcnt_t npages) 765 { 766 PRM_DEBUG(npages); 767 768 if (npages <= TSB_FREEMEM_MIN) { 769 ktsb_szcode = TSB_128K_SZCODE; 770 enable_bigktsb = 0; 771 } else if (npages <= TSB_FREEMEM_LARGE / 2) { 772 ktsb_szcode = TSB_256K_SZCODE; 773 enable_bigktsb = 0; 774 } else if (npages <= TSB_FREEMEM_LARGE) { 775 ktsb_szcode = TSB_512K_SZCODE; 776 enable_bigktsb = 0; 777 } else if (npages <= TSB_FREEMEM_LARGE * 2 || 778 enable_bigktsb == 0) { 779 ktsb_szcode = TSB_1M_SZCODE; 780 enable_bigktsb = 0; 781 } else { 782 ktsb_szcode = highbit(npages - 1); 783 ktsb_szcode -= TSB_START_SIZE; 784 ktsb_szcode = MAX(ktsb_szcode, MIN_BIGKTSB_SZCODE); 785 ktsb_szcode = MIN(ktsb_szcode, MAX_BIGKTSB_SZCODE); 786 } 787 788 /* 789 * We choose the TSB to hold kernel 4M mappings to have twice 790 * the reach as the primary kernel TSB since this TSB will 791 * potentially (currently) be shared by both mappings to all of 792 * physical memory plus user TSBs. If this TSB has to be in nucleus 793 * (only for Spitfire and Cheetah) limit its size to 64K. 794 */ 795 ktsb4m_szcode = highbit((2 * npages) / TTEPAGES(TTE4M) - 1); 796 ktsb4m_szcode -= TSB_START_SIZE; 797 ktsb4m_szcode = MAX(ktsb4m_szcode, TSB_MIN_SZCODE); 798 ktsb4m_szcode = MIN(ktsb4m_szcode, TSB_SOFTSZ_MASK); 799 if ((enable_bigktsb == 0 || ktsb_phys == 0) && ktsb4m_szcode > 800 TSB_64K_SZCODE) { 801 ktsb4m_szcode = TSB_64K_SZCODE; 802 max_bootlp_tteszc = TTE8K; 803 } 804 805 ktsb_sz = TSB_BYTES(ktsb_szcode); /* kernel 8K tsb size */ 806 ktsb4m_sz = TSB_BYTES(ktsb4m_szcode); /* kernel 4M tsb size */ 807 } 808 809 /* 810 * Allocate kernel TSBs from nucleus data memory. 811 * The function return 0 on success and -1 on failure. 812 */ 813 int 814 ndata_alloc_tsbs(struct memlist *ndata, pgcnt_t npages) 815 { 816 /* 817 * Set ktsb_phys to 1 if the processor supports ASI_QUAD_LDD_PHYS. 818 */ 819 sfmmu_setup_4lp(); 820 821 /* 822 * Size the kernel TSBs based upon the amount of physical 823 * memory in the system. 824 */ 825 calc_tsb_sizes(npages); 826 827 /* 828 * Allocate the 8K kernel TSB if it belongs inside the nucleus. 829 */ 830 if (enable_bigktsb == 0) { 831 if ((ktsb_base = ndata_alloc(ndata, ktsb_sz, ktsb_sz)) == NULL) 832 return (-1); 833 ASSERT(!((uintptr_t)ktsb_base & (ktsb_sz - 1))); 834 835 PRM_DEBUG(ktsb_base); 836 PRM_DEBUG(ktsb_sz); 837 PRM_DEBUG(ktsb_szcode); 838 } 839 840 /* 841 * Next, allocate 4M kernel TSB from the nucleus since it's small. 842 */ 843 if (ktsb4m_szcode <= TSB_64K_SZCODE) { 844 845 ktsb4m_base = ndata_alloc(ndata, ktsb4m_sz, ktsb4m_sz); 846 if (ktsb4m_base == NULL) 847 return (-1); 848 ASSERT(!((uintptr_t)ktsb4m_base & (ktsb4m_sz - 1))); 849 850 PRM_DEBUG(ktsb4m_base); 851 PRM_DEBUG(ktsb4m_sz); 852 PRM_DEBUG(ktsb4m_szcode); 853 } 854 855 return (0); 856 } 857 858 /* 859 * Allocate hat structs from the nucleus data memory. 860 */ 861 int 862 ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages, pgcnt_t kpm_npages) 863 { 864 size_t mml_alloc_sz; 865 size_t cb_alloc_sz; 866 int max_nucuhme_buckets = MAX_NUCUHME_BUCKETS; 867 int max_nuckhme_buckets = MAX_NUCKHME_BUCKETS; 868 ulong_t hme_buckets; 869 870 if (enable_bigktsb) { 871 ASSERT((max_nucuhme_buckets + max_nuckhme_buckets) * 872 sizeof (struct hmehash_bucket) <= 873 TSB_BYTES(TSB_1M_SZCODE)); 874 875 max_nucuhme_buckets *= 2; 876 max_nuckhme_buckets *= 2; 877 } 878 879 /* 880 * The number of buckets in the hme hash tables 881 * is a power of 2 such that the average hash chain length is 882 * HMENT_HASHAVELEN. The number of buckets for the user hash is 883 * a function of physical memory and a predefined overmapping factor. 884 * The number of buckets for the kernel hash is a function of 885 * physical memory only. 886 */ 887 hme_buckets = (npages * HMEHASH_FACTOR) / 888 (HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT)); 889 890 uhmehash_num = (int)MIN(hme_buckets, MAX_UHME_BUCKETS); 891 892 if (uhmehash_num > USER_BUCKETS_THRESHOLD) { 893 /* 894 * if uhmehash_num is not power of 2 round it down to the 895 * next power of 2. 896 */ 897 uint_t align = 1 << (highbit(uhmehash_num - 1) - 1); 898 uhmehash_num = P2ALIGN(uhmehash_num, align); 899 } else 900 uhmehash_num = 1 << highbit(uhmehash_num - 1); 901 902 hme_buckets = npages / (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT); 903 khmehash_num = (int)MIN(hme_buckets, MAX_KHME_BUCKETS); 904 khmehash_num = 1 << highbit(khmehash_num - 1); 905 khmehash_num = MAX(khmehash_num, MIN_KHME_BUCKETS); 906 907 if ((khmehash_num > max_nuckhme_buckets) || 908 (uhmehash_num > max_nucuhme_buckets)) { 909 khme_hash = NULL; 910 uhme_hash = NULL; 911 } else { 912 size_t hmehash_sz = (uhmehash_num + khmehash_num) * 913 sizeof (struct hmehash_bucket); 914 915 if ((khme_hash = ndata_alloc(ndata, hmehash_sz, 916 ecache_alignsize)) != NULL) 917 uhme_hash = &khme_hash[khmehash_num]; 918 else 919 uhme_hash = NULL; 920 921 PRM_DEBUG(hmehash_sz); 922 } 923 924 PRM_DEBUG(khme_hash); 925 PRM_DEBUG(khmehash_num); 926 PRM_DEBUG(uhme_hash); 927 PRM_DEBUG(uhmehash_num); 928 929 /* 930 * For the page mapping list mutex array we allocate one mutex 931 * for every 128 pages (1 MB) with a minimum of 64 entries and 932 * a maximum of 8K entries. For the initial computation npages 933 * is rounded up (ie. 1 << highbit(npages * 1.5 / 128)) 934 * 935 * mml_shift is roughly log2(mml_table_sz) + 3 for MLIST_HASH 936 * 937 * It is not required that this be allocated from the nucleus, 938 * but it is desirable. So we first allocate from the nucleus 939 * everything that must be there. Having done so, if mml_table 940 * will fit within what remains of the nucleus then it will be 941 * allocated here. If not, set mml_table to NULL, which will cause 942 * startup_memlist() to BOP_ALLOC() space for it after our return... 943 */ 944 mml_table_sz = 1 << highbit((npages * 3) / 256); 945 if (mml_table_sz < 64) 946 mml_table_sz = 64; 947 else if (mml_table_sz > 8192) 948 mml_table_sz = 8192; 949 mml_shift = highbit(mml_table_sz) + 3; 950 951 PRM_DEBUG(mml_table_sz); 952 PRM_DEBUG(mml_shift); 953 954 mml_alloc_sz = mml_table_sz * sizeof (kmutex_t); 955 956 mml_table = ndata_alloc(ndata, mml_alloc_sz, ecache_alignsize); 957 958 PRM_DEBUG(mml_table); 959 960 cb_alloc_sz = sfmmu_max_cb_id * sizeof (struct sfmmu_callback); 961 PRM_DEBUG(cb_alloc_sz); 962 sfmmu_cb_table = ndata_alloc(ndata, cb_alloc_sz, ecache_alignsize); 963 PRM_DEBUG(sfmmu_cb_table); 964 965 /* 966 * For the kpm_page mutex array we allocate one mutex every 16 967 * kpm pages (64MB). In smallpage mode we allocate one mutex 968 * every 8K pages. The minimum is set to 64 entries and the 969 * maximum to 8K entries. 970 * 971 * It is not required that this be allocated from the nucleus, 972 * but it is desirable. So we first allocate from the nucleus 973 * everything that must be there. Having done so, if kpmp_table 974 * or kpmp_stable will fit within what remains of the nucleus 975 * then it will be allocated here. If not, startup_memlist() 976 * will use BOP_ALLOC() space for it after our return... 977 */ 978 if (kpm_enable) { 979 size_t kpmp_alloc_sz; 980 981 if (kpm_smallpages == 0) { 982 kpmp_shift = highbit(sizeof (kpm_page_t)) - 1; 983 kpmp_table_sz = 1 << highbit(kpm_npages / 16); 984 kpmp_table_sz = (kpmp_table_sz < 64) ? 64 : 985 ((kpmp_table_sz > 8192) ? 8192 : kpmp_table_sz); 986 kpmp_alloc_sz = kpmp_table_sz * sizeof (kpm_hlk_t); 987 988 kpmp_table = ndata_alloc(ndata, kpmp_alloc_sz, 989 ecache_alignsize); 990 991 PRM_DEBUG(kpmp_table); 992 PRM_DEBUG(kpmp_table_sz); 993 994 kpmp_stable_sz = 0; 995 kpmp_stable = NULL; 996 } else { 997 ASSERT(kpm_pgsz == PAGESIZE); 998 kpmp_shift = highbit(sizeof (kpm_shlk_t)) + 1; 999 kpmp_stable_sz = 1 << highbit(kpm_npages / 8192); 1000 kpmp_stable_sz = (kpmp_stable_sz < 64) ? 64 : 1001 ((kpmp_stable_sz > 8192) ? 8192 : kpmp_stable_sz); 1002 kpmp_alloc_sz = kpmp_stable_sz * sizeof (kpm_shlk_t); 1003 1004 kpmp_stable = ndata_alloc(ndata, kpmp_alloc_sz, 1005 ecache_alignsize); 1006 1007 PRM_DEBUG(kpmp_stable); 1008 PRM_DEBUG(kpmp_stable_sz); 1009 1010 kpmp_table_sz = 0; 1011 kpmp_table = NULL; 1012 } 1013 PRM_DEBUG(kpmp_shift); 1014 } 1015 1016 return (0); 1017 } 1018 1019 /* 1020 * Allocate virtual addresses at base with given alignment. 1021 * Note that there is no physical memory behind the address yet. 1022 */ 1023 caddr_t 1024 alloc_hme_buckets(caddr_t base, int alignsize) 1025 { 1026 size_t hmehash_sz = (uhmehash_num + khmehash_num) * 1027 sizeof (struct hmehash_bucket); 1028 1029 ASSERT(khme_hash == NULL); 1030 ASSERT(uhme_hash == NULL); 1031 1032 base = (caddr_t)roundup((uintptr_t)base, alignsize); 1033 hmehash_sz = roundup(hmehash_sz, alignsize); 1034 1035 khme_hash = (struct hmehash_bucket *)base; 1036 uhme_hash = (struct hmehash_bucket *)((caddr_t)khme_hash + 1037 khmehash_num * sizeof (struct hmehash_bucket)); 1038 base += hmehash_sz; 1039 return (base); 1040 } 1041 1042 /* 1043 * This function bop allocs kernel TSBs. 1044 */ 1045 caddr_t 1046 sfmmu_ktsb_alloc(caddr_t tsbbase) 1047 { 1048 caddr_t vaddr; 1049 1050 if (enable_bigktsb) { 1051 ktsb_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb_sz); 1052 vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb_base, ktsb_sz, 1053 ktsb_sz); 1054 if (vaddr != ktsb_base) 1055 cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc" 1056 " 8K bigktsb"); 1057 ktsb_base = vaddr; 1058 tsbbase = ktsb_base + ktsb_sz; 1059 PRM_DEBUG(ktsb_base); 1060 PRM_DEBUG(tsbbase); 1061 } 1062 1063 if (ktsb4m_szcode > TSB_64K_SZCODE) { 1064 ASSERT(ktsb_phys && enable_bigktsb); 1065 ktsb4m_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb4m_sz); 1066 vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb4m_base, ktsb4m_sz, 1067 ktsb4m_sz); 1068 if (vaddr != ktsb4m_base) 1069 cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc" 1070 " 4M bigktsb"); 1071 ktsb4m_base = vaddr; 1072 tsbbase = ktsb4m_base + ktsb4m_sz; 1073 PRM_DEBUG(ktsb4m_base); 1074 PRM_DEBUG(tsbbase); 1075 } 1076 return (tsbbase); 1077 } 1078 1079 /* 1080 * Moves code assembled outside of the trap table into the trap 1081 * table taking care to relocate relative branches to code outside 1082 * of the trap handler. 1083 */ 1084 static void 1085 sfmmu_reloc_trap_handler(void *tablep, void *start, size_t count) 1086 { 1087 size_t i; 1088 uint32_t *src; 1089 uint32_t *dst; 1090 uint32_t inst; 1091 int op, op2; 1092 int32_t offset; 1093 int disp; 1094 1095 src = start; 1096 dst = tablep; 1097 offset = src - dst; 1098 for (src = start, i = 0; i < count; i++, src++, dst++) { 1099 inst = *dst = *src; 1100 op = (inst >> 30) & 0x2; 1101 if (op == 1) { 1102 /* call */ 1103 disp = ((int32_t)inst << 2) >> 2; /* sign-extend */ 1104 if (disp + i >= 0 && disp + i < count) 1105 continue; 1106 disp += offset; 1107 inst = 0x40000000u | (disp & 0x3fffffffu); 1108 *dst = inst; 1109 } else if (op == 0) { 1110 /* branch or sethi */ 1111 op2 = (inst >> 22) & 0x7; 1112 1113 switch (op2) { 1114 case 0x3: /* BPr */ 1115 disp = (((inst >> 20) & 0x3) << 14) | 1116 (inst & 0x3fff); 1117 disp = (disp << 16) >> 16; /* sign-extend */ 1118 if (disp + i >= 0 && disp + i < count) 1119 continue; 1120 disp += offset; 1121 if (((disp << 16) >> 16) != disp) 1122 cmn_err(CE_PANIC, "bad reloc"); 1123 inst &= ~0x303fff; 1124 inst |= (disp & 0x3fff); 1125 inst |= (disp & 0xc000) << 6; 1126 break; 1127 1128 case 0x2: /* Bicc */ 1129 disp = ((int32_t)inst << 10) >> 10; 1130 if (disp + i >= 0 && disp + i < count) 1131 continue; 1132 disp += offset; 1133 if (((disp << 10) >> 10) != disp) 1134 cmn_err(CE_PANIC, "bad reloc"); 1135 inst &= ~0x3fffff; 1136 inst |= (disp & 0x3fffff); 1137 break; 1138 1139 case 0x1: /* Bpcc */ 1140 disp = ((int32_t)inst << 13) >> 13; 1141 if (disp + i >= 0 && disp + i < count) 1142 continue; 1143 disp += offset; 1144 if (((disp << 13) >> 13) != disp) 1145 cmn_err(CE_PANIC, "bad reloc"); 1146 inst &= ~0x7ffff; 1147 inst |= (disp & 0x7ffffu); 1148 break; 1149 } 1150 *dst = inst; 1151 } 1152 } 1153 flush_instr_mem(tablep, count * sizeof (uint32_t)); 1154 } 1155 1156 /* 1157 * Routine to allocate a large page to use in the TSB caches. 1158 */ 1159 /*ARGSUSED*/ 1160 static page_t * 1161 sfmmu_tsb_page_create(void *addr, size_t size, int vmflag, void *arg) 1162 { 1163 int pgflags; 1164 1165 pgflags = PG_EXCL; 1166 if ((vmflag & VM_NOSLEEP) == 0) 1167 pgflags |= PG_WAIT; 1168 if (vmflag & VM_PANIC) 1169 pgflags |= PG_PANIC; 1170 if (vmflag & VM_PUSHPAGE) 1171 pgflags |= PG_PUSHPAGE; 1172 1173 return (page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size, 1174 pgflags, &kvseg, addr, arg)); 1175 } 1176 1177 /* 1178 * Allocate a large page to back the virtual address range 1179 * [addr, addr + size). If addr is NULL, allocate the virtual address 1180 * space as well. 1181 */ 1182 static void * 1183 sfmmu_tsb_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, 1184 uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *), 1185 void *pcarg) 1186 { 1187 page_t *ppl; 1188 page_t *rootpp; 1189 caddr_t addr = inaddr; 1190 pgcnt_t npages = btopr(size); 1191 page_t **ppa; 1192 int i = 0; 1193 1194 /* 1195 * Assuming that only TSBs will call this with size > PAGESIZE 1196 * There is no reason why this couldn't be expanded to 8k pages as 1197 * well, or other page sizes in the future .... but for now, we 1198 * only support fixed sized page requests. 1199 */ 1200 if ((inaddr == NULL) && ((addr = vmem_xalloc(vmp, size, size, 0, 0, 1201 NULL, NULL, vmflag)) == NULL)) 1202 return (NULL); 1203 1204 /* If we ever don't want TSB slab-sized pages, this will panic */ 1205 ASSERT(((uintptr_t)addr & (tsb_slab_size - 1)) == 0); 1206 1207 if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) { 1208 if (inaddr == NULL) 1209 vmem_xfree(vmp, addr, size); 1210 return (NULL); 1211 } 1212 1213 ppl = page_create_func(addr, size, vmflag, pcarg); 1214 if (ppl == NULL) { 1215 if (inaddr == NULL) 1216 vmem_xfree(vmp, addr, size); 1217 page_unresv(npages); 1218 return (NULL); 1219 } 1220 1221 rootpp = ppl; 1222 ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP); 1223 while (ppl != NULL) { 1224 page_t *pp = ppl; 1225 ppa[i++] = pp; 1226 page_sub(&ppl, pp); 1227 ASSERT(page_iolock_assert(pp)); 1228 page_io_unlock(pp); 1229 } 1230 1231 /* 1232 * Load the locked entry. It's OK to preload the entry into 1233 * the TSB since we now support large mappings in the kernel TSB. 1234 */ 1235 hat_memload_array(kas.a_hat, (caddr_t)rootpp->p_offset, size, 1236 ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr, HAT_LOAD_LOCK); 1237 1238 for (--i; i >= 0; --i) { 1239 (void) page_pp_lock(ppa[i], 0, 1); 1240 page_unlock(ppa[i]); 1241 } 1242 1243 kmem_free(ppa, npages * sizeof (page_t *)); 1244 return (addr); 1245 } 1246 1247 /* Called to import new spans into the TSB vmem arenas */ 1248 void * 1249 sfmmu_tsb_segkmem_alloc(vmem_t *vmp, size_t size, int vmflag) 1250 { 1251 lgrp_id_t lgrpid = LGRP_NONE; 1252 1253 if (tsb_lgrp_affinity) { 1254 /* 1255 * Search for the vmp->lgrpid mapping by brute force; 1256 * some day vmp will have an lgrp, until then we have 1257 * to do this the hard way. 1258 */ 1259 for (lgrpid = 0; lgrpid < NLGRPS_MAX && 1260 vmp != kmem_tsb_default_arena[lgrpid]; lgrpid++); 1261 if (lgrpid == NLGRPS_MAX) 1262 lgrpid = LGRP_NONE; 1263 } 1264 1265 return (sfmmu_tsb_xalloc(vmp, NULL, size, vmflag, 0, 1266 sfmmu_tsb_page_create, lgrpid != LGRP_NONE? &lgrpid : NULL)); 1267 } 1268 1269 /* Called to free spans from the TSB vmem arenas */ 1270 void 1271 sfmmu_tsb_segkmem_free(vmem_t *vmp, void *inaddr, size_t size) 1272 { 1273 page_t *pp; 1274 caddr_t addr = inaddr; 1275 caddr_t eaddr; 1276 pgcnt_t npages = btopr(size); 1277 pgcnt_t pgs_left = npages; 1278 page_t *rootpp = NULL; 1279 1280 ASSERT(((uintptr_t)addr & (tsb_slab_size - 1)) == 0); 1281 1282 hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK); 1283 1284 for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) { 1285 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL); 1286 if (pp == NULL) 1287 panic("sfmmu_tsb_segkmem_free: page not found"); 1288 1289 ASSERT(PAGE_EXCL(pp)); 1290 page_pp_unlock(pp, 0, 1); 1291 1292 if (rootpp == NULL) 1293 rootpp = pp; 1294 if (--pgs_left == 0) { 1295 /* 1296 * similar logic to segspt_free_pages, but we know we 1297 * have one large page. 1298 */ 1299 page_destroy_pages(rootpp); 1300 } 1301 } 1302 page_unresv(npages); 1303 1304 if (vmp != NULL) 1305 vmem_xfree(vmp, inaddr, size); 1306 } 1307