1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <vm/hat.h> 30 #include <vm/hat_sfmmu.h> 31 #include <vm/page.h> 32 #include <sys/pte.h> 33 #include <sys/systm.h> 34 #include <sys/mman.h> 35 #include <sys/sysmacros.h> 36 #include <sys/machparam.h> 37 #include <sys/vtrace.h> 38 #include <sys/kmem.h> 39 #include <sys/mmu.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/cpuvar.h> 43 #include <sys/debug.h> 44 #include <sys/lgrp.h> 45 #include <sys/archsystm.h> 46 #include <sys/machsystm.h> 47 #include <sys/vmsystm.h> 48 #include <sys/bitmap.h> 49 #include <vm/as.h> 50 #include <vm/seg.h> 51 #include <vm/seg_kmem.h> 52 #include <vm/seg_kp.h> 53 #include <vm/seg_kpm.h> 54 #include <vm/rm.h> 55 #include <vm/vm_dep.h> 56 #include <sys/t_lock.h> 57 #include <sys/vm_machparam.h> 58 #include <sys/promif.h> 59 #include <sys/prom_isa.h> 60 #include <sys/prom_plat.h> 61 #include <sys/prom_debug.h> 62 #include <sys/privregs.h> 63 #include <sys/bootconf.h> 64 #include <sys/memlist.h> 65 #include <sys/memlist_plat.h> 66 #include <sys/cpu_module.h> 67 #include <sys/reboot.h> 68 #include <sys/kdi.h> 69 70 /* 71 * Static routines 72 */ 73 static void sfmmu_map_prom_mappings(struct translation *, size_t); 74 static struct translation *read_prom_mappings(size_t *); 75 static void sfmmu_reloc_trap_handler(void *, void *, size_t); 76 77 /* 78 * External routines 79 */ 80 extern void sfmmu_remap_kernel(void); 81 extern void sfmmu_patch_utsb(void); 82 83 /* 84 * Global Data: 85 */ 86 extern caddr_t textva, datava; 87 extern tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */ 88 extern int enable_bigktsb; 89 extern int kmem64_smchunks; 90 91 uint64_t memsegspa = (uintptr_t)MSEG_NULLPTR_PA; /* memsegs physical linkage */ 92 uint64_t memseg_phash[N_MEM_SLOTS]; /* use physical memseg addresses */ 93 94 int sfmmu_kern_mapped = 0; 95 96 /* 97 * DMMU primary context register for the kernel context. Machine specific code 98 * inserts correct page size codes when necessary 99 */ 100 uint64_t kcontextreg = KCONTEXT; 101 102 #ifdef DEBUG 103 static int ndata_middle_hole_detected = 0; 104 #endif 105 106 /* Extern Global Data */ 107 108 extern int page_relocate_ready; 109 110 /* 111 * Controls the logic which enables the use of the 112 * QUAD_LDD_PHYS ASI for TSB accesses. 113 */ 114 extern int ktsb_phys; 115 116 /* 117 * Global Routines called from within: 118 * usr/src/uts/sun4u 119 * usr/src/uts/sfmmu 120 * usr/src/uts/sun 121 */ 122 123 pfn_t 124 va_to_pfn(void *vaddr) 125 { 126 u_longlong_t physaddr; 127 int mode, valid; 128 129 if (tba_taken_over) 130 return (hat_getpfnum(kas.a_hat, (caddr_t)vaddr)); 131 132 #if !defined(C_OBP) 133 if (!kmem64_smchunks && 134 (caddr_t)vaddr >= kmem64_base && (caddr_t)vaddr < kmem64_end) { 135 if (kmem64_pabase == (uint64_t)-1) 136 prom_panic("va_to_pfn: kmem64_pabase not init"); 137 physaddr = kmem64_pabase + ((caddr_t)vaddr - kmem64_base); 138 return ((pfn_t)physaddr >> MMU_PAGESHIFT); 139 } 140 #endif /* !C_OBP */ 141 142 if ((prom_translate_virt(vaddr, &valid, &physaddr, &mode) != -1) && 143 (valid == -1)) { 144 return ((pfn_t)(physaddr >> MMU_PAGESHIFT)); 145 } 146 return (PFN_INVALID); 147 } 148 149 uint64_t 150 va_to_pa(void *vaddr) 151 { 152 pfn_t pfn; 153 154 if ((pfn = va_to_pfn(vaddr)) == PFN_INVALID) 155 return ((uint64_t)-1); 156 return (((uint64_t)pfn << MMU_PAGESHIFT) | 157 ((uint64_t)vaddr & MMU_PAGEOFFSET)); 158 } 159 160 void 161 hat_kern_setup(void) 162 { 163 struct translation *trans_root; 164 size_t ntrans_root; 165 extern void startup_fixup_physavail(void); 166 167 /* 168 * These are the steps we take to take over the mmu from the prom. 169 * 170 * (1) Read the prom's mappings through the translation property. 171 * (2) Remap the kernel text and kernel data with 2 locked 4MB ttes. 172 * Create the the hmeblks for these 2 ttes at this time. 173 * (3) Create hat structures for all other prom mappings. Since the 174 * kernel text and data hme_blks have already been created we 175 * skip the equivalent prom's mappings. 176 * (4) Initialize the tsb and its corresponding hardware regs. 177 * (5) Take over the trap table (currently in startup). 178 * (6) Up to this point it is possible the prom required some of its 179 * locked tte's. Now that we own the trap table we remove them. 180 */ 181 182 ktsb_pbase = va_to_pa(ktsb_base); 183 ktsb4m_pbase = va_to_pa(ktsb4m_base); 184 PRM_DEBUG(ktsb_pbase); 185 PRM_DEBUG(ktsb4m_pbase); 186 187 sfmmu_patch_ktsb(); 188 sfmmu_patch_utsb(); 189 sfmmu_patch_mmu_asi(ktsb_phys); 190 191 sfmmu_init_tsbs(); 192 193 if (kpm_enable) { 194 sfmmu_kpm_patch_tlbm(); 195 if (kpm_smallpages == 0) { 196 sfmmu_kpm_patch_tsbm(); 197 } 198 } 199 200 if (!shctx_on) { 201 sfmmu_patch_shctx(); 202 } 203 204 /* 205 * The 8K-indexed kernel TSB space is used to hold 206 * translations below... 207 */ 208 trans_root = read_prom_mappings(&ntrans_root); 209 sfmmu_remap_kernel(); 210 startup_fixup_physavail(); 211 mmu_init_kernel_pgsz(kas.a_hat); 212 sfmmu_map_prom_mappings(trans_root, ntrans_root); 213 214 /* 215 * We invalidate 8K kernel TSB because we used it in 216 * sfmmu_map_prom_mappings() 217 */ 218 sfmmu_inv_tsb(ktsb_base, ktsb_sz); 219 sfmmu_inv_tsb(ktsb4m_base, ktsb4m_sz); 220 221 sfmmu_init_ktsbinfo(); 222 223 224 sfmmu_kern_mapped = 1; 225 226 /* 227 * hments have been created for mapped pages, and thus we're ready 228 * for kmdb to start using its own trap table. It walks the hments 229 * to resolve TLB misses, and can't be used until they're ready. 230 */ 231 if (boothowto & RB_DEBUG) 232 kdi_dvec_vmready(); 233 } 234 235 /* 236 * Macro used below to convert the prom's 32-bit high and low fields into 237 * a value appropriate for the 64-bit kernel. 238 */ 239 240 #define COMBINE(hi, lo) (((uint64_t)(uint32_t)(hi) << 32) | (uint32_t)(lo)) 241 242 /* 243 * Track larges pages used. 244 * Provides observability for this feature on non-debug kernels. 245 */ 246 ulong_t map_prom_lpcount[MMU_PAGE_SIZES]; 247 248 /* 249 * This function traverses the prom mapping list and creates equivalent 250 * mappings in the sfmmu mapping hash. 251 */ 252 static void 253 sfmmu_map_prom_mappings(struct translation *trans_root, size_t ntrans_root) 254 { 255 struct translation *promt; 256 tte_t tte, oldtte, *ttep; 257 pfn_t pfn, oldpfn, basepfn; 258 caddr_t vaddr; 259 size_t size, offset; 260 unsigned long i; 261 uint_t attr; 262 page_t *pp; 263 extern struct memlist *virt_avail; 264 char buf[256]; 265 266 ttep = &tte; 267 for (i = 0, promt = trans_root; i < ntrans_root; i++, promt++) { 268 ASSERT(promt->tte_hi != 0); 269 ASSERT32(promt->virt_hi == 0 && promt->size_hi == 0); 270 271 vaddr = (caddr_t)COMBINE(promt->virt_hi, promt->virt_lo); 272 273 /* 274 * hack until we get rid of map-for-unix 275 */ 276 if (vaddr < (caddr_t)KERNELBASE) 277 continue; 278 279 ttep->tte_inthi = promt->tte_hi; 280 ttep->tte_intlo = promt->tte_lo; 281 attr = PROC_DATA | HAT_NOSYNC; 282 #if defined(TTE_IS_GLOBAL) 283 if (TTE_IS_GLOBAL(ttep)) { 284 /* 285 * The prom better not use global translations 286 * because a user process might use the same 287 * virtual addresses 288 */ 289 prom_panic("sfmmu_map_prom_mappings: global" 290 " translation"); 291 TTE_SET_LOFLAGS(ttep, TTE_GLB_INT, 0); 292 } 293 #endif 294 if (TTE_IS_LOCKED(ttep)) { 295 /* clear the lock bits */ 296 TTE_CLR_LOCKED(ttep); 297 } 298 attr |= (TTE_IS_VCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEVTTE; 299 attr |= (TTE_IS_PCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEPTTE; 300 attr |= (TTE_IS_SIDEFFECT(ttep)) ? SFMMU_SIDEFFECT : 0; 301 attr |= (TTE_IS_IE(ttep)) ? HAT_STRUCTURE_LE : 0; 302 303 size = COMBINE(promt->size_hi, promt->size_lo); 304 offset = 0; 305 basepfn = TTE_TO_PFN((caddr_t)COMBINE(promt->virt_hi, 306 promt->virt_lo), ttep); 307 while (size) { 308 vaddr = (caddr_t)(COMBINE(promt->virt_hi, 309 promt->virt_lo) + offset); 310 311 /* 312 * make sure address is not in virt-avail list 313 */ 314 if (address_in_memlist(virt_avail, (uint64_t)vaddr, 315 size)) { 316 prom_panic("sfmmu_map_prom_mappings:" 317 " inconsistent translation/avail lists"); 318 } 319 320 pfn = basepfn + mmu_btop(offset); 321 if (pf_is_memory(pfn)) { 322 if (attr & SFMMU_UNCACHEPTTE) { 323 prom_panic("sfmmu_map_prom_mappings:" 324 " uncached prom memory page"); 325 } 326 } else { 327 if (!(attr & SFMMU_SIDEFFECT)) { 328 prom_panic("sfmmu_map_prom_mappings:" 329 " prom i/o page without" 330 " side-effect"); 331 } 332 } 333 334 /* 335 * skip kmem64 area 336 */ 337 if (!kmem64_smchunks && 338 vaddr >= kmem64_base && 339 vaddr < kmem64_aligned_end) { 340 #if !defined(C_OBP) 341 prom_panic("sfmmu_map_prom_mappings:" 342 " unexpected kmem64 prom mapping"); 343 #else /* !C_OBP */ 344 size_t mapsz; 345 346 if (ptob(pfn) != 347 kmem64_pabase + (vaddr - kmem64_base)) { 348 prom_panic("sfmmu_map_prom_mappings:" 349 " unexpected kmem64 prom mapping"); 350 } 351 352 mapsz = kmem64_aligned_end - vaddr; 353 if (mapsz >= size) { 354 break; 355 } 356 size -= mapsz; 357 offset += mapsz; 358 continue; 359 #endif /* !C_OBP */ 360 } 361 362 oldpfn = sfmmu_vatopfn(vaddr, KHATID, &oldtte); 363 ASSERT(oldpfn != PFN_SUSPENDED); 364 ASSERT(page_relocate_ready == 0); 365 366 if (oldpfn != PFN_INVALID) { 367 /* 368 * mapping already exists. 369 * Verify they are equal 370 */ 371 if (pfn != oldpfn) { 372 (void) snprintf(buf, sizeof (buf), 373 "sfmmu_map_prom_mappings: mapping" 374 " conflict (va = 0x%p, pfn = 0x%p," 375 " oldpfn = 0x%p)", (void *)vaddr, 376 (void *)pfn, (void *)oldpfn); 377 prom_panic(buf); 378 } 379 size -= MMU_PAGESIZE; 380 offset += MMU_PAGESIZE; 381 continue; 382 } 383 384 pp = page_numtopp_nolock(pfn); 385 if ((pp != NULL) && PP_ISFREE((page_t *)pp)) { 386 (void) snprintf(buf, sizeof (buf), 387 "sfmmu_map_prom_mappings: prom-mapped" 388 " page (va = 0x%p, pfn = 0x%p) on free list", 389 (void *)vaddr, (void *)pfn); 390 prom_panic(buf); 391 } 392 393 sfmmu_memtte(ttep, pfn, attr, TTE8K); 394 sfmmu_tteload(kas.a_hat, ttep, vaddr, pp, 395 HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD); 396 size -= MMU_PAGESIZE; 397 offset += MMU_PAGESIZE; 398 } 399 } 400 401 /* 402 * We claimed kmem64 from prom, so now we need to load tte. 403 */ 404 if (!kmem64_smchunks && kmem64_base != NULL) { 405 pgcnt_t pages; 406 size_t psize; 407 int pszc; 408 409 pszc = kmem64_szc; 410 #ifdef sun4u 411 if (pszc > TTE8K) { 412 pszc = segkmem_lpszc; 413 } 414 #endif /* sun4u */ 415 psize = TTEBYTES(pszc); 416 pages = btop(psize); 417 basepfn = kmem64_pabase >> MMU_PAGESHIFT; 418 vaddr = kmem64_base; 419 while (vaddr < kmem64_end) { 420 sfmmu_memtte(ttep, basepfn, 421 PROC_DATA | HAT_NOSYNC, pszc); 422 sfmmu_tteload(kas.a_hat, ttep, vaddr, NULL, 423 HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD); 424 vaddr += psize; 425 basepfn += pages; 426 } 427 map_prom_lpcount[pszc] = 428 ((caddr_t)P2ROUNDUP((uintptr_t)kmem64_end, psize) - 429 kmem64_base) >> TTE_PAGE_SHIFT(pszc); 430 } 431 } 432 433 #undef COMBINE /* local to previous routine */ 434 435 /* 436 * This routine reads in the "translations" property in to a buffer and 437 * returns a pointer to this buffer and the number of translations. 438 */ 439 static struct translation * 440 read_prom_mappings(size_t *ntransrootp) 441 { 442 char *prop = "translations"; 443 size_t translen; 444 pnode_t node; 445 struct translation *transroot; 446 447 /* 448 * the "translations" property is associated with the mmu node 449 */ 450 node = (pnode_t)prom_getphandle(prom_mmu_ihandle()); 451 452 /* 453 * We use the TSB space to read in the prom mappings. This space 454 * is currently not being used because we haven't taken over the 455 * trap table yet. It should be big enough to hold the mappings. 456 */ 457 if ((translen = prom_getproplen(node, prop)) == -1) 458 cmn_err(CE_PANIC, "no translations property"); 459 *ntransrootp = translen / sizeof (*transroot); 460 translen = roundup(translen, MMU_PAGESIZE); 461 PRM_DEBUG(translen); 462 if (translen > TSB_BYTES(ktsb_szcode)) 463 cmn_err(CE_PANIC, "not enough space for translations"); 464 465 transroot = (struct translation *)ktsb_base; 466 ASSERT(transroot); 467 if (prom_getprop(node, prop, (caddr_t)transroot) == -1) { 468 cmn_err(CE_PANIC, "translations getprop failed"); 469 } 470 return (transroot); 471 } 472 473 /* 474 * Init routine of the nucleus data memory allocator. 475 * 476 * The nucleus data memory allocator is organized in ecache_alignsize'd 477 * memory chunks. Memory allocated by ndata_alloc() will never be freed. 478 * 479 * The ndata argument is used as header of the ndata freelist. 480 * Other freelist nodes are placed in the nucleus memory itself 481 * at the beginning of a free memory chunk. Therefore a freelist 482 * node (struct memlist) must fit into the smallest allocatable 483 * memory chunk (ecache_alignsize bytes). 484 * 485 * The memory interval [base, end] passed to ndata_alloc_init() must be 486 * bzero'd to allow the allocator to return bzero'd memory easily. 487 */ 488 void 489 ndata_alloc_init(struct memlist *ndata, uintptr_t base, uintptr_t end) 490 { 491 ASSERT(sizeof (struct memlist) <= ecache_alignsize); 492 493 base = roundup(base, ecache_alignsize); 494 end = end - end % ecache_alignsize; 495 496 ASSERT(base < end); 497 498 ndata->address = base; 499 ndata->size = end - base; 500 ndata->next = NULL; 501 ndata->prev = NULL; 502 } 503 504 /* 505 * Deliver the size of the largest free memory chunk. 506 */ 507 size_t 508 ndata_maxsize(struct memlist *ndata) 509 { 510 size_t chunksize = ndata->size; 511 512 while ((ndata = ndata->next) != NULL) { 513 if (chunksize < ndata->size) 514 chunksize = ndata->size; 515 } 516 517 return (chunksize); 518 } 519 520 521 /* 522 * Allocate the last properly aligned memory chunk. 523 * This function is called when no more large nucleus memory chunks 524 * will be allocated. The remaining free nucleus memory at the end 525 * of the nucleus can be added to the phys_avail list. 526 */ 527 void * 528 ndata_extra_base(struct memlist *ndata, size_t alignment, caddr_t endaddr) 529 { 530 uintptr_t base; 531 size_t wasteage = 0; 532 #ifdef DEBUG 533 static int called = 0; 534 535 if (called++ > 0) 536 cmn_err(CE_PANIC, "ndata_extra_base() called more than once"); 537 #endif /* DEBUG */ 538 539 /* 540 * The alignment needs to be a multiple of ecache_alignsize. 541 */ 542 ASSERT((alignment % ecache_alignsize) == 0); 543 544 while (ndata->next != NULL) { 545 wasteage += ndata->size; 546 ndata = ndata->next; 547 } 548 549 base = roundup(ndata->address, alignment); 550 551 if (base >= ndata->address + ndata->size) 552 return (NULL); 553 554 if ((caddr_t)(ndata->address + ndata->size) != endaddr) { 555 #ifdef DEBUG 556 ndata_middle_hole_detected = 1; /* see if we hit this again */ 557 #endif 558 return (NULL); 559 } 560 561 if (base == ndata->address) { 562 if (ndata->prev != NULL) 563 ndata->prev->next = NULL; 564 else 565 ndata->size = 0; 566 567 bzero((void *)base, sizeof (struct memlist)); 568 569 } else { 570 ndata->size = base - ndata->address; 571 wasteage += ndata->size; 572 } 573 PRM_DEBUG(wasteage); 574 575 return ((void *)base); 576 } 577 578 /* 579 * Select the best matching buffer, avoid memory fragmentation. 580 */ 581 static struct memlist * 582 ndata_select_chunk(struct memlist *ndata, size_t wanted, size_t alignment) 583 { 584 struct memlist *fnd_below = NULL; 585 struct memlist *fnd_above = NULL; 586 struct memlist *fnd_unused = NULL; 587 struct memlist *frlist; 588 uintptr_t base; 589 uintptr_t end; 590 size_t below; 591 size_t above; 592 size_t unused; 593 size_t best_below = ULONG_MAX; 594 size_t best_above = ULONG_MAX; 595 size_t best_unused = ULONG_MAX; 596 597 ASSERT(ndata != NULL); 598 599 /* 600 * Look for the best matching buffer, avoid memory fragmentation. 601 * The following strategy is used, try to find 602 * 1. an exact fitting buffer 603 * 2. avoid wasting any space below the buffer, take first 604 * fitting buffer 605 * 3. avoid wasting any space above the buffer, take first 606 * fitting buffer 607 * 4. avoid wasting space, take first fitting buffer 608 * 5. take the last buffer in chain 609 */ 610 for (frlist = ndata; frlist != NULL; frlist = frlist->next) { 611 base = roundup(frlist->address, alignment); 612 end = roundup(base + wanted, ecache_alignsize); 613 614 if (end > frlist->address + frlist->size) 615 continue; 616 617 below = (base - frlist->address) / ecache_alignsize; 618 above = (frlist->address + frlist->size - end) / 619 ecache_alignsize; 620 unused = below + above; 621 622 if (unused == 0) 623 return (frlist); 624 625 if (frlist->next == NULL) 626 break; 627 628 if (below < best_below) { 629 best_below = below; 630 fnd_below = frlist; 631 } 632 633 if (above < best_above) { 634 best_above = above; 635 fnd_above = frlist; 636 } 637 638 if (unused < best_unused) { 639 best_unused = unused; 640 fnd_unused = frlist; 641 } 642 } 643 644 if (best_below == 0) 645 return (fnd_below); 646 if (best_above == 0) 647 return (fnd_above); 648 if (best_unused < ULONG_MAX) 649 return (fnd_unused); 650 651 return (frlist); 652 } 653 654 /* 655 * Nucleus data memory allocator. 656 * The granularity of the allocator is ecache_alignsize. 657 * See also comment for ndata_alloc_init(). 658 */ 659 void * 660 ndata_alloc(struct memlist *ndata, size_t wanted, size_t alignment) 661 { 662 struct memlist *found; 663 struct memlist *fnd_above; 664 uintptr_t base; 665 uintptr_t end; 666 size_t below; 667 size_t above; 668 669 /* 670 * Look for the best matching buffer, avoid memory fragmentation. 671 */ 672 if ((found = ndata_select_chunk(ndata, wanted, alignment)) == NULL) 673 return (NULL); 674 675 /* 676 * Allocate the nucleus data buffer. 677 */ 678 base = roundup(found->address, alignment); 679 end = roundup(base + wanted, ecache_alignsize); 680 ASSERT(end <= found->address + found->size); 681 682 below = base - found->address; 683 above = found->address + found->size - end; 684 ASSERT(above == 0 || (above % ecache_alignsize) == 0); 685 686 if (below >= ecache_alignsize) { 687 /* 688 * There is free memory below the allocated memory chunk. 689 */ 690 found->size = below - below % ecache_alignsize; 691 692 if (above) { 693 fnd_above = (struct memlist *)end; 694 fnd_above->address = end; 695 fnd_above->size = above; 696 697 if ((fnd_above->next = found->next) != NULL) 698 found->next->prev = fnd_above; 699 fnd_above->prev = found; 700 found->next = fnd_above; 701 } 702 703 return ((void *)base); 704 } 705 706 if (found->prev == NULL) { 707 /* 708 * The first chunk (ndata) is selected. 709 */ 710 ASSERT(found == ndata); 711 if (above) { 712 found->address = end; 713 found->size = above; 714 } else if (found->next != NULL) { 715 found->address = found->next->address; 716 found->size = found->next->size; 717 if ((found->next = found->next->next) != NULL) 718 found->next->prev = found; 719 720 bzero((void *)found->address, sizeof (struct memlist)); 721 } else { 722 found->address = end; 723 found->size = 0; 724 } 725 726 return ((void *)base); 727 } 728 729 /* 730 * Not the first chunk. 731 */ 732 if (above) { 733 fnd_above = (struct memlist *)end; 734 fnd_above->address = end; 735 fnd_above->size = above; 736 737 if ((fnd_above->next = found->next) != NULL) 738 fnd_above->next->prev = fnd_above; 739 fnd_above->prev = found->prev; 740 found->prev->next = fnd_above; 741 742 } else { 743 if ((found->prev->next = found->next) != NULL) 744 found->next->prev = found->prev; 745 } 746 747 bzero((void *)found->address, sizeof (struct memlist)); 748 749 return ((void *)base); 750 } 751 752 /* 753 * Size the kernel TSBs based upon the amount of physical 754 * memory in the system. 755 */ 756 static void 757 calc_tsb_sizes(pgcnt_t npages) 758 { 759 PRM_DEBUG(npages); 760 761 if (npages <= TSB_FREEMEM_MIN) { 762 ktsb_szcode = TSB_128K_SZCODE; 763 enable_bigktsb = 0; 764 } else if (npages <= TSB_FREEMEM_LARGE / 2) { 765 ktsb_szcode = TSB_256K_SZCODE; 766 enable_bigktsb = 0; 767 } else if (npages <= TSB_FREEMEM_LARGE) { 768 ktsb_szcode = TSB_512K_SZCODE; 769 enable_bigktsb = 0; 770 } else if (npages <= TSB_FREEMEM_LARGE * 2 || 771 enable_bigktsb == 0) { 772 ktsb_szcode = TSB_1M_SZCODE; 773 enable_bigktsb = 0; 774 } else { 775 ktsb_szcode = highbit(npages - 1); 776 ktsb_szcode -= TSB_START_SIZE; 777 ktsb_szcode = MAX(ktsb_szcode, MIN_BIGKTSB_SZCODE); 778 ktsb_szcode = MIN(ktsb_szcode, MAX_BIGKTSB_SZCODE); 779 } 780 781 /* 782 * We choose the TSB to hold kernel 4M mappings to have twice 783 * the reach as the primary kernel TSB since this TSB will 784 * potentially (currently) be shared by both mappings to all of 785 * physical memory plus user TSBs. If this TSB has to be in nucleus 786 * (only for Spitfire and Cheetah) limit its size to 64K. 787 */ 788 ktsb4m_szcode = highbit((2 * npages) / TTEPAGES(TTE4M) - 1); 789 ktsb4m_szcode -= TSB_START_SIZE; 790 ktsb4m_szcode = MAX(ktsb4m_szcode, TSB_MIN_SZCODE); 791 ktsb4m_szcode = MIN(ktsb4m_szcode, TSB_SOFTSZ_MASK); 792 if ((enable_bigktsb == 0 || ktsb_phys == 0) && ktsb4m_szcode > 793 TSB_64K_SZCODE) { 794 ktsb4m_szcode = TSB_64K_SZCODE; 795 max_bootlp_tteszc = TTE8K; 796 } 797 798 ktsb_sz = TSB_BYTES(ktsb_szcode); /* kernel 8K tsb size */ 799 ktsb4m_sz = TSB_BYTES(ktsb4m_szcode); /* kernel 4M tsb size */ 800 } 801 802 /* 803 * Allocate kernel TSBs from nucleus data memory. 804 * The function return 0 on success and -1 on failure. 805 */ 806 int 807 ndata_alloc_tsbs(struct memlist *ndata, pgcnt_t npages) 808 { 809 /* 810 * Set ktsb_phys to 1 if the processor supports ASI_QUAD_LDD_PHYS. 811 */ 812 sfmmu_setup_4lp(); 813 814 /* 815 * Size the kernel TSBs based upon the amount of physical 816 * memory in the system. 817 */ 818 calc_tsb_sizes(npages); 819 820 /* 821 * Allocate the 8K kernel TSB if it belongs inside the nucleus. 822 */ 823 if (enable_bigktsb == 0) { 824 if ((ktsb_base = ndata_alloc(ndata, ktsb_sz, ktsb_sz)) == NULL) 825 return (-1); 826 ASSERT(!((uintptr_t)ktsb_base & (ktsb_sz - 1))); 827 828 PRM_DEBUG(ktsb_base); 829 PRM_DEBUG(ktsb_sz); 830 PRM_DEBUG(ktsb_szcode); 831 } 832 833 /* 834 * Next, allocate 4M kernel TSB from the nucleus since it's small. 835 */ 836 if (ktsb4m_szcode <= TSB_64K_SZCODE) { 837 838 ktsb4m_base = ndata_alloc(ndata, ktsb4m_sz, ktsb4m_sz); 839 if (ktsb4m_base == NULL) 840 return (-1); 841 ASSERT(!((uintptr_t)ktsb4m_base & (ktsb4m_sz - 1))); 842 843 PRM_DEBUG(ktsb4m_base); 844 PRM_DEBUG(ktsb4m_sz); 845 PRM_DEBUG(ktsb4m_szcode); 846 } 847 848 return (0); 849 } 850 851 size_t 852 calc_hmehash_sz(pgcnt_t npages) 853 { 854 ulong_t hme_buckets; 855 856 /* 857 * The number of buckets in the hme hash tables 858 * is a power of 2 such that the average hash chain length is 859 * HMENT_HASHAVELEN. The number of buckets for the user hash is 860 * a function of physical memory and a predefined overmapping factor. 861 * The number of buckets for the kernel hash is a function of 862 * physical memory only. 863 */ 864 hme_buckets = (npages * HMEHASH_FACTOR) / 865 (HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT)); 866 867 uhmehash_num = (int)MIN(hme_buckets, MAX_UHME_BUCKETS); 868 869 if (uhmehash_num > USER_BUCKETS_THRESHOLD) { 870 /* 871 * if uhmehash_num is not power of 2 round it down to the 872 * next power of 2. 873 */ 874 uint_t align = 1 << (highbit(uhmehash_num - 1) - 1); 875 uhmehash_num = P2ALIGN(uhmehash_num, align); 876 } else 877 uhmehash_num = 1 << highbit(uhmehash_num - 1); 878 879 hme_buckets = npages / (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT); 880 khmehash_num = (int)MIN(hme_buckets, MAX_KHME_BUCKETS); 881 khmehash_num = 1 << highbit(khmehash_num - 1); 882 khmehash_num = MAX(khmehash_num, MIN_KHME_BUCKETS); 883 884 return ((uhmehash_num + khmehash_num) * sizeof (struct hmehash_bucket)); 885 } 886 887 caddr_t 888 alloc_hmehash(caddr_t alloc_base) 889 { 890 size_t khmehash_sz, uhmehash_sz; 891 892 khme_hash = (struct hmehash_bucket *)alloc_base; 893 khmehash_sz = khmehash_num * sizeof (struct hmehash_bucket); 894 alloc_base += khmehash_sz; 895 896 uhme_hash = (struct hmehash_bucket *)alloc_base; 897 uhmehash_sz = uhmehash_num * sizeof (struct hmehash_bucket); 898 alloc_base += uhmehash_sz; 899 900 PRM_DEBUG(khme_hash); 901 PRM_DEBUG(uhme_hash); 902 903 return (alloc_base); 904 } 905 906 /* 907 * Allocate hat structs from the nucleus data memory. 908 */ 909 int 910 ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages) 911 { 912 size_t mml_alloc_sz; 913 size_t cb_alloc_sz; 914 915 /* 916 * For the page mapping list mutex array we allocate one mutex 917 * for every 128 pages (1 MB) with a minimum of 64 entries and 918 * a maximum of 8K entries. For the initial computation npages 919 * is rounded up (ie. 1 << highbit(npages * 1.5 / 128)) 920 * 921 * mml_shift is roughly log2(mml_table_sz) + 3 for MLIST_HASH 922 */ 923 mml_table_sz = 1 << highbit((npages * 3) / 256); 924 if (mml_table_sz < 64) 925 mml_table_sz = 64; 926 else if (mml_table_sz > 8192) 927 mml_table_sz = 8192; 928 mml_shift = highbit(mml_table_sz) + 3; 929 930 PRM_DEBUG(mml_table_sz); 931 PRM_DEBUG(mml_shift); 932 933 mml_alloc_sz = mml_table_sz * sizeof (kmutex_t); 934 935 mml_table = ndata_alloc(ndata, mml_alloc_sz, ecache_alignsize); 936 if (mml_table == NULL) 937 return (-1); 938 PRM_DEBUG(mml_table); 939 940 cb_alloc_sz = sfmmu_max_cb_id * sizeof (struct sfmmu_callback); 941 PRM_DEBUG(cb_alloc_sz); 942 sfmmu_cb_table = ndata_alloc(ndata, cb_alloc_sz, ecache_alignsize); 943 if (sfmmu_cb_table == NULL) 944 return (-1); 945 PRM_DEBUG(sfmmu_cb_table); 946 947 return (0); 948 } 949 950 int 951 ndata_alloc_kpm(struct memlist *ndata, pgcnt_t kpm_npages) 952 { 953 size_t kpmp_alloc_sz; 954 955 /* 956 * For the kpm_page mutex array we allocate one mutex every 16 957 * kpm pages (64MB). In smallpage mode we allocate one mutex 958 * every 8K pages. The minimum is set to 64 entries and the 959 * maximum to 8K entries. 960 */ 961 if (kpm_smallpages == 0) { 962 kpmp_shift = highbit(sizeof (kpm_page_t)) - 1; 963 kpmp_table_sz = 1 << highbit(kpm_npages / 16); 964 kpmp_table_sz = (kpmp_table_sz < 64) ? 64 : 965 ((kpmp_table_sz > 8192) ? 8192 : kpmp_table_sz); 966 kpmp_alloc_sz = kpmp_table_sz * sizeof (kpm_hlk_t); 967 968 kpmp_table = ndata_alloc(ndata, kpmp_alloc_sz, 969 ecache_alignsize); 970 if (kpmp_table == NULL) 971 return (-1); 972 973 PRM_DEBUG(kpmp_table); 974 PRM_DEBUG(kpmp_table_sz); 975 976 kpmp_stable_sz = 0; 977 kpmp_stable = NULL; 978 } else { 979 ASSERT(kpm_pgsz == PAGESIZE); 980 kpmp_shift = highbit(sizeof (kpm_shlk_t)) + 1; 981 kpmp_stable_sz = 1 << highbit(kpm_npages / 8192); 982 kpmp_stable_sz = (kpmp_stable_sz < 64) ? 64 : 983 ((kpmp_stable_sz > 8192) ? 8192 : kpmp_stable_sz); 984 kpmp_alloc_sz = kpmp_stable_sz * sizeof (kpm_shlk_t); 985 986 kpmp_stable = ndata_alloc(ndata, kpmp_alloc_sz, 987 ecache_alignsize); 988 if (kpmp_stable == NULL) 989 return (-1); 990 991 PRM_DEBUG(kpmp_stable); 992 PRM_DEBUG(kpmp_stable_sz); 993 994 kpmp_table_sz = 0; 995 kpmp_table = NULL; 996 } 997 PRM_DEBUG(kpmp_shift); 998 999 return (0); 1000 } 1001 1002 /* 1003 * This function bop allocs kernel TSBs. 1004 */ 1005 caddr_t 1006 sfmmu_ktsb_alloc(caddr_t tsbbase) 1007 { 1008 caddr_t vaddr; 1009 1010 if (enable_bigktsb) { 1011 ktsb_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb_sz); 1012 vaddr = prom_alloc(ktsb_base, ktsb_sz, ktsb_sz); 1013 if (vaddr != ktsb_base) 1014 cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc" 1015 " 8K bigktsb"); 1016 ktsb_base = vaddr; 1017 tsbbase = ktsb_base + ktsb_sz; 1018 PRM_DEBUG(ktsb_base); 1019 PRM_DEBUG(tsbbase); 1020 } 1021 1022 if (ktsb4m_szcode > TSB_64K_SZCODE) { 1023 ASSERT(ktsb_phys && enable_bigktsb); 1024 ktsb4m_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb4m_sz); 1025 vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb4m_base, ktsb4m_sz, 1026 ktsb4m_sz); 1027 if (vaddr != ktsb4m_base) 1028 cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc" 1029 " 4M bigktsb"); 1030 ktsb4m_base = vaddr; 1031 tsbbase = ktsb4m_base + ktsb4m_sz; 1032 PRM_DEBUG(ktsb4m_base); 1033 PRM_DEBUG(tsbbase); 1034 } 1035 return (tsbbase); 1036 } 1037 1038 /* 1039 * Moves code assembled outside of the trap table into the trap 1040 * table taking care to relocate relative branches to code outside 1041 * of the trap handler. 1042 */ 1043 static void 1044 sfmmu_reloc_trap_handler(void *tablep, void *start, size_t count) 1045 { 1046 size_t i; 1047 uint32_t *src; 1048 uint32_t *dst; 1049 uint32_t inst; 1050 int op, op2; 1051 int32_t offset; 1052 int disp; 1053 1054 src = start; 1055 dst = tablep; 1056 offset = src - dst; 1057 for (src = start, i = 0; i < count; i++, src++, dst++) { 1058 inst = *dst = *src; 1059 op = (inst >> 30) & 0x2; 1060 if (op == 1) { 1061 /* call */ 1062 disp = ((int32_t)inst << 2) >> 2; /* sign-extend */ 1063 if (disp + i >= 0 && disp + i < count) 1064 continue; 1065 disp += offset; 1066 inst = 0x40000000u | (disp & 0x3fffffffu); 1067 *dst = inst; 1068 } else if (op == 0) { 1069 /* branch or sethi */ 1070 op2 = (inst >> 22) & 0x7; 1071 1072 switch (op2) { 1073 case 0x3: /* BPr */ 1074 disp = (((inst >> 20) & 0x3) << 14) | 1075 (inst & 0x3fff); 1076 disp = (disp << 16) >> 16; /* sign-extend */ 1077 if (disp + i >= 0 && disp + i < count) 1078 continue; 1079 disp += offset; 1080 if (((disp << 16) >> 16) != disp) 1081 cmn_err(CE_PANIC, "bad reloc"); 1082 inst &= ~0x303fff; 1083 inst |= (disp & 0x3fff); 1084 inst |= (disp & 0xc000) << 6; 1085 break; 1086 1087 case 0x2: /* Bicc */ 1088 disp = ((int32_t)inst << 10) >> 10; 1089 if (disp + i >= 0 && disp + i < count) 1090 continue; 1091 disp += offset; 1092 if (((disp << 10) >> 10) != disp) 1093 cmn_err(CE_PANIC, "bad reloc"); 1094 inst &= ~0x3fffff; 1095 inst |= (disp & 0x3fffff); 1096 break; 1097 1098 case 0x1: /* Bpcc */ 1099 disp = ((int32_t)inst << 13) >> 13; 1100 if (disp + i >= 0 && disp + i < count) 1101 continue; 1102 disp += offset; 1103 if (((disp << 13) >> 13) != disp) 1104 cmn_err(CE_PANIC, "bad reloc"); 1105 inst &= ~0x7ffff; 1106 inst |= (disp & 0x7ffffu); 1107 break; 1108 } 1109 *dst = inst; 1110 } 1111 } 1112 flush_instr_mem(tablep, count * sizeof (uint32_t)); 1113 } 1114 1115 /* 1116 * Routine to allocate a large page to use in the TSB caches. 1117 */ 1118 /*ARGSUSED*/ 1119 static page_t * 1120 sfmmu_tsb_page_create(void *addr, size_t size, int vmflag, void *arg) 1121 { 1122 int pgflags; 1123 1124 pgflags = PG_EXCL; 1125 if ((vmflag & VM_NOSLEEP) == 0) 1126 pgflags |= PG_WAIT; 1127 if (vmflag & VM_PANIC) 1128 pgflags |= PG_PANIC; 1129 if (vmflag & VM_PUSHPAGE) 1130 pgflags |= PG_PUSHPAGE; 1131 1132 return (page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size, 1133 pgflags, &kvseg, addr, arg)); 1134 } 1135 1136 /* 1137 * Allocate a large page to back the virtual address range 1138 * [addr, addr + size). If addr is NULL, allocate the virtual address 1139 * space as well. 1140 */ 1141 static void * 1142 sfmmu_tsb_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, 1143 uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *), 1144 void *pcarg) 1145 { 1146 page_t *ppl; 1147 page_t *rootpp; 1148 caddr_t addr = inaddr; 1149 pgcnt_t npages = btopr(size); 1150 page_t **ppa; 1151 int i = 0; 1152 1153 /* 1154 * Assuming that only TSBs will call this with size > PAGESIZE 1155 * There is no reason why this couldn't be expanded to 8k pages as 1156 * well, or other page sizes in the future .... but for now, we 1157 * only support fixed sized page requests. 1158 */ 1159 if ((inaddr == NULL) && ((addr = vmem_xalloc(vmp, size, size, 0, 0, 1160 NULL, NULL, vmflag)) == NULL)) 1161 return (NULL); 1162 1163 if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) { 1164 if (inaddr == NULL) 1165 vmem_xfree(vmp, addr, size); 1166 return (NULL); 1167 } 1168 1169 ppl = page_create_func(addr, size, vmflag, pcarg); 1170 if (ppl == NULL) { 1171 if (inaddr == NULL) 1172 vmem_xfree(vmp, addr, size); 1173 page_unresv(npages); 1174 return (NULL); 1175 } 1176 1177 rootpp = ppl; 1178 ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP); 1179 while (ppl != NULL) { 1180 page_t *pp = ppl; 1181 ppa[i++] = pp; 1182 page_sub(&ppl, pp); 1183 ASSERT(page_iolock_assert(pp)); 1184 page_io_unlock(pp); 1185 } 1186 1187 /* 1188 * Load the locked entry. It's OK to preload the entry into 1189 * the TSB since we now support large mappings in the kernel TSB. 1190 */ 1191 hat_memload_array(kas.a_hat, (caddr_t)rootpp->p_offset, size, 1192 ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr, HAT_LOAD_LOCK); 1193 1194 for (--i; i >= 0; --i) { 1195 (void) page_pp_lock(ppa[i], 0, 1); 1196 page_unlock(ppa[i]); 1197 } 1198 1199 kmem_free(ppa, npages * sizeof (page_t *)); 1200 return (addr); 1201 } 1202 1203 /* Called to import new spans into the TSB vmem arenas */ 1204 void * 1205 sfmmu_tsb_segkmem_alloc(vmem_t *vmp, size_t size, int vmflag) 1206 { 1207 lgrp_id_t lgrpid = LGRP_NONE; 1208 1209 if (tsb_lgrp_affinity) { 1210 /* 1211 * Search for the vmp->lgrpid mapping by brute force; 1212 * some day vmp will have an lgrp, until then we have 1213 * to do this the hard way. 1214 */ 1215 for (lgrpid = 0; lgrpid < NLGRPS_MAX && 1216 vmp != kmem_tsb_default_arena[lgrpid]; lgrpid++) 1217 ; 1218 if (lgrpid == NLGRPS_MAX) 1219 lgrpid = LGRP_NONE; 1220 } 1221 1222 return (sfmmu_tsb_xalloc(vmp, NULL, size, vmflag, 0, 1223 sfmmu_tsb_page_create, lgrpid != LGRP_NONE? &lgrpid : NULL)); 1224 } 1225 1226 /* Called to free spans from the TSB vmem arenas */ 1227 void 1228 sfmmu_tsb_segkmem_free(vmem_t *vmp, void *inaddr, size_t size) 1229 { 1230 page_t *pp; 1231 caddr_t addr = inaddr; 1232 caddr_t eaddr; 1233 pgcnt_t npages = btopr(size); 1234 pgcnt_t pgs_left = npages; 1235 page_t *rootpp = NULL; 1236 1237 hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK); 1238 1239 for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) { 1240 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL); 1241 if (pp == NULL) 1242 panic("sfmmu_tsb_segkmem_free: page not found"); 1243 1244 ASSERT(PAGE_EXCL(pp)); 1245 page_pp_unlock(pp, 0, 1); 1246 1247 if (rootpp == NULL) 1248 rootpp = pp; 1249 if (--pgs_left == 0) { 1250 /* 1251 * similar logic to segspt_free_pages, but we know we 1252 * have one large page. 1253 */ 1254 page_destroy_pages(rootpp); 1255 } 1256 } 1257 page_unresv(npages); 1258 1259 if (vmp != NULL) 1260 vmem_xfree(vmp, inaddr, size); 1261 } 1262