1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <vm/hat.h>
28 #include <vm/hat_sfmmu.h>
29 #include <vm/page.h>
30 #include <sys/pte.h>
31 #include <sys/systm.h>
32 #include <sys/mman.h>
33 #include <sys/sysmacros.h>
34 #include <sys/machparam.h>
35 #include <sys/vtrace.h>
36 #include <sys/kmem.h>
37 #include <sys/mmu.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/cpuvar.h>
41 #include <sys/debug.h>
42 #include <sys/lgrp.h>
43 #include <sys/archsystm.h>
44 #include <sys/machsystm.h>
45 #include <sys/vmsystm.h>
46 #include <sys/bitmap.h>
47 #include <vm/as.h>
48 #include <vm/seg.h>
49 #include <vm/seg_kmem.h>
50 #include <vm/seg_kp.h>
51 #include <vm/seg_kpm.h>
52 #include <vm/rm.h>
53 #include <vm/vm_dep.h>
54 #include <sys/t_lock.h>
55 #include <sys/vm_machparam.h>
56 #include <sys/promif.h>
57 #include <sys/prom_isa.h>
58 #include <sys/prom_plat.h>
59 #include <sys/prom_debug.h>
60 #include <sys/privregs.h>
61 #include <sys/bootconf.h>
62 #include <sys/memlist.h>
63 #include <sys/memlist_plat.h>
64 #include <sys/cpu_module.h>
65 #include <sys/reboot.h>
66 #include <sys/kdi.h>
67
68 /*
69 * Static routines
70 */
71 static void sfmmu_map_prom_mappings(struct translation *, size_t);
72 static struct translation *read_prom_mappings(size_t *);
73 static void sfmmu_reloc_trap_handler(void *, void *, size_t);
74
75 /*
76 * External routines
77 */
78 extern void sfmmu_remap_kernel(void);
79 extern void sfmmu_patch_utsb(void);
80
81 /*
82 * Global Data:
83 */
84 extern caddr_t textva, datava;
85 extern tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */
86 extern int enable_bigktsb;
87 extern int kmem64_smchunks;
88
89 uint64_t memsegspa = (uintptr_t)MSEG_NULLPTR_PA; /* memsegs physical linkage */
90 uint64_t memseg_phash[N_MEM_SLOTS]; /* use physical memseg addresses */
91
92 int sfmmu_kern_mapped = 0;
93
94 /*
95 * DMMU primary context register for the kernel context. Machine specific code
96 * inserts correct page size codes when necessary
97 */
98 uint64_t kcontextreg = KCONTEXT;
99
100 #ifdef DEBUG
101 static int ndata_middle_hole_detected = 0;
102 #endif
103
104 /* Extern Global Data */
105
106 extern int page_relocate_ready;
107
108 /*
109 * Controls the logic which enables the use of the
110 * QUAD_LDD_PHYS ASI for TSB accesses.
111 */
112 extern int ktsb_phys;
113
114 /*
115 * Global Routines called from within:
116 * usr/src/uts/sun4u
117 * usr/src/uts/sfmmu
118 * usr/src/uts/sun
119 */
120
121 pfn_t
va_to_pfn(void * vaddr)122 va_to_pfn(void *vaddr)
123 {
124 u_longlong_t physaddr;
125 int mode, valid;
126
127 if (tba_taken_over)
128 return (hat_getpfnum(kas.a_hat, (caddr_t)vaddr));
129
130 #if !defined(C_OBP)
131 if (!kmem64_smchunks &&
132 (caddr_t)vaddr >= kmem64_base && (caddr_t)vaddr < kmem64_end) {
133 if (kmem64_pabase == (uint64_t)-1)
134 prom_panic("va_to_pfn: kmem64_pabase not init");
135 physaddr = kmem64_pabase + ((caddr_t)vaddr - kmem64_base);
136 return ((pfn_t)physaddr >> MMU_PAGESHIFT);
137 }
138 #endif /* !C_OBP */
139
140 if ((prom_translate_virt(vaddr, &valid, &physaddr, &mode) != -1) &&
141 (valid == -1)) {
142 return ((pfn_t)(physaddr >> MMU_PAGESHIFT));
143 }
144 return (PFN_INVALID);
145 }
146
147 uint64_t
va_to_pa(void * vaddr)148 va_to_pa(void *vaddr)
149 {
150 pfn_t pfn;
151
152 if ((pfn = va_to_pfn(vaddr)) == PFN_INVALID)
153 return ((uint64_t)-1);
154 return (((uint64_t)pfn << MMU_PAGESHIFT) |
155 ((uint64_t)vaddr & MMU_PAGEOFFSET));
156 }
157
158 void
hat_kern_setup(void)159 hat_kern_setup(void)
160 {
161 struct translation *trans_root;
162 size_t ntrans_root;
163 extern void startup_fixup_physavail(void);
164
165 /*
166 * These are the steps we take to take over the mmu from the prom.
167 *
168 * (1) Read the prom's mappings through the translation property.
169 * (2) Remap the kernel text and kernel data with 2 locked 4MB ttes.
170 * Create the the hmeblks for these 2 ttes at this time.
171 * (3) Create hat structures for all other prom mappings. Since the
172 * kernel text and data hme_blks have already been created we
173 * skip the equivalent prom's mappings.
174 * (4) Initialize the tsb and its corresponding hardware regs.
175 * (5) Take over the trap table (currently in startup).
176 * (6) Up to this point it is possible the prom required some of its
177 * locked tte's. Now that we own the trap table we remove them.
178 */
179
180 ktsb_pbase = va_to_pa(ktsb_base);
181 ktsb4m_pbase = va_to_pa(ktsb4m_base);
182 PRM_DEBUG(ktsb_pbase);
183 PRM_DEBUG(ktsb4m_pbase);
184
185 sfmmu_patch_ktsb();
186 sfmmu_patch_utsb();
187 sfmmu_patch_mmu_asi(ktsb_phys);
188
189 sfmmu_init_tsbs();
190
191 if (kpm_enable) {
192 sfmmu_kpm_patch_tlbm();
193 if (kpm_smallpages == 0) {
194 sfmmu_kpm_patch_tsbm();
195 }
196 }
197
198 if (!shctx_on) {
199 sfmmu_patch_shctx();
200 }
201
202 /*
203 * The 8K-indexed kernel TSB space is used to hold
204 * translations below...
205 */
206 trans_root = read_prom_mappings(&ntrans_root);
207 sfmmu_remap_kernel();
208 startup_fixup_physavail();
209 mmu_init_kernel_pgsz(kas.a_hat);
210 sfmmu_map_prom_mappings(trans_root, ntrans_root);
211
212 /*
213 * We invalidate 8K kernel TSB because we used it in
214 * sfmmu_map_prom_mappings()
215 */
216 sfmmu_inv_tsb(ktsb_base, ktsb_sz);
217 sfmmu_inv_tsb(ktsb4m_base, ktsb4m_sz);
218
219 sfmmu_init_ktsbinfo();
220
221
222 sfmmu_kern_mapped = 1;
223
224 /*
225 * hments have been created for mapped pages, and thus we're ready
226 * for kmdb to start using its own trap table. It walks the hments
227 * to resolve TLB misses, and can't be used until they're ready.
228 */
229 if (boothowto & RB_DEBUG)
230 kdi_dvec_vmready();
231 }
232
233 /*
234 * Macro used below to convert the prom's 32-bit high and low fields into
235 * a value appropriate for the 64-bit kernel.
236 */
237
238 #define COMBINE(hi, lo) (((uint64_t)(uint32_t)(hi) << 32) | (uint32_t)(lo))
239
240 /*
241 * Track larges pages used.
242 * Provides observability for this feature on non-debug kernels.
243 */
244 ulong_t map_prom_lpcount[MMU_PAGE_SIZES];
245
246 /*
247 * This function traverses the prom mapping list and creates equivalent
248 * mappings in the sfmmu mapping hash.
249 */
250 static void
sfmmu_map_prom_mappings(struct translation * trans_root,size_t ntrans_root)251 sfmmu_map_prom_mappings(struct translation *trans_root, size_t ntrans_root)
252 {
253 struct translation *promt;
254 tte_t tte, oldtte, *ttep;
255 pfn_t pfn, oldpfn, basepfn;
256 caddr_t vaddr;
257 size_t size, offset;
258 unsigned long i;
259 uint_t attr;
260 page_t *pp;
261 extern struct memlist *virt_avail;
262 char buf[256];
263
264 ttep = &tte;
265 for (i = 0, promt = trans_root; i < ntrans_root; i++, promt++) {
266 ASSERT(promt->tte_hi != 0);
267 ASSERT32(promt->virt_hi == 0 && promt->size_hi == 0);
268
269 vaddr = (caddr_t)COMBINE(promt->virt_hi, promt->virt_lo);
270
271 /*
272 * hack until we get rid of map-for-unix
273 */
274 if (vaddr < (caddr_t)KERNELBASE)
275 continue;
276
277 ttep->tte_inthi = promt->tte_hi;
278 ttep->tte_intlo = promt->tte_lo;
279 attr = PROC_DATA | HAT_NOSYNC;
280 #if defined(TTE_IS_GLOBAL)
281 if (TTE_IS_GLOBAL(ttep)) {
282 /*
283 * The prom better not use global translations
284 * because a user process might use the same
285 * virtual addresses
286 */
287 prom_panic("sfmmu_map_prom_mappings: global"
288 " translation");
289 TTE_SET_LOFLAGS(ttep, TTE_GLB_INT, 0);
290 }
291 #endif
292 if (TTE_IS_LOCKED(ttep)) {
293 /* clear the lock bits */
294 TTE_CLR_LOCKED(ttep);
295 }
296 attr |= (TTE_IS_VCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEVTTE;
297 attr |= (TTE_IS_PCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEPTTE;
298 attr |= (TTE_IS_SIDEFFECT(ttep)) ? SFMMU_SIDEFFECT : 0;
299 attr |= (TTE_IS_IE(ttep)) ? HAT_STRUCTURE_LE : 0;
300
301 size = COMBINE(promt->size_hi, promt->size_lo);
302 offset = 0;
303 basepfn = TTE_TO_PFN((caddr_t)COMBINE(promt->virt_hi,
304 promt->virt_lo), ttep);
305 while (size) {
306 vaddr = (caddr_t)(COMBINE(promt->virt_hi,
307 promt->virt_lo) + offset);
308
309 /*
310 * make sure address is not in virt-avail list
311 */
312 if (address_in_memlist(virt_avail, (uint64_t)vaddr,
313 size)) {
314 prom_panic("sfmmu_map_prom_mappings:"
315 " inconsistent translation/avail lists");
316 }
317
318 pfn = basepfn + mmu_btop(offset);
319 if (pf_is_memory(pfn)) {
320 if (attr & SFMMU_UNCACHEPTTE) {
321 prom_panic("sfmmu_map_prom_mappings:"
322 " uncached prom memory page");
323 }
324 } else {
325 if (!(attr & SFMMU_SIDEFFECT)) {
326 prom_panic("sfmmu_map_prom_mappings:"
327 " prom i/o page without"
328 " side-effect");
329 }
330 }
331
332 /*
333 * skip kmem64 area
334 */
335 if (!kmem64_smchunks &&
336 vaddr >= kmem64_base &&
337 vaddr < kmem64_aligned_end) {
338 #if !defined(C_OBP)
339 prom_panic("sfmmu_map_prom_mappings:"
340 " unexpected kmem64 prom mapping");
341 #else /* !C_OBP */
342 size_t mapsz;
343
344 if (ptob(pfn) !=
345 kmem64_pabase + (vaddr - kmem64_base)) {
346 prom_panic("sfmmu_map_prom_mappings:"
347 " unexpected kmem64 prom mapping");
348 }
349
350 mapsz = kmem64_aligned_end - vaddr;
351 if (mapsz >= size) {
352 break;
353 }
354 size -= mapsz;
355 offset += mapsz;
356 continue;
357 #endif /* !C_OBP */
358 }
359
360 oldpfn = sfmmu_vatopfn(vaddr, KHATID, &oldtte);
361 ASSERT(oldpfn != PFN_SUSPENDED);
362 ASSERT(page_relocate_ready == 0);
363
364 if (oldpfn != PFN_INVALID) {
365 /*
366 * mapping already exists.
367 * Verify they are equal
368 */
369 if (pfn != oldpfn) {
370 (void) snprintf(buf, sizeof (buf),
371 "sfmmu_map_prom_mappings: mapping"
372 " conflict (va = 0x%p, pfn = 0x%p,"
373 " oldpfn = 0x%p)", (void *)vaddr,
374 (void *)pfn, (void *)oldpfn);
375 prom_panic(buf);
376 }
377 size -= MMU_PAGESIZE;
378 offset += MMU_PAGESIZE;
379 continue;
380 }
381
382 pp = page_numtopp_nolock(pfn);
383 if ((pp != NULL) && PP_ISFREE((page_t *)pp)) {
384 (void) snprintf(buf, sizeof (buf),
385 "sfmmu_map_prom_mappings: prom-mapped"
386 " page (va = 0x%p, pfn = 0x%p) on free list",
387 (void *)vaddr, (void *)pfn);
388 prom_panic(buf);
389 }
390
391 sfmmu_memtte(ttep, pfn, attr, TTE8K);
392 sfmmu_tteload(kas.a_hat, ttep, vaddr, pp,
393 HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD);
394 size -= MMU_PAGESIZE;
395 offset += MMU_PAGESIZE;
396 }
397 }
398
399 /*
400 * We claimed kmem64 from prom, so now we need to load tte.
401 */
402 if (!kmem64_smchunks && kmem64_base != NULL) {
403 pgcnt_t pages;
404 size_t psize;
405 int pszc;
406
407 pszc = kmem64_szc;
408 #ifdef sun4u
409 if (pszc > TTE8K) {
410 pszc = segkmem_lpszc;
411 }
412 #endif /* sun4u */
413 psize = TTEBYTES(pszc);
414 pages = btop(psize);
415 basepfn = kmem64_pabase >> MMU_PAGESHIFT;
416 vaddr = kmem64_base;
417 while (vaddr < kmem64_end) {
418 sfmmu_memtte(ttep, basepfn,
419 PROC_DATA | HAT_NOSYNC, pszc);
420 sfmmu_tteload(kas.a_hat, ttep, vaddr, NULL,
421 HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD);
422 vaddr += psize;
423 basepfn += pages;
424 }
425 map_prom_lpcount[pszc] =
426 ((caddr_t)P2ROUNDUP((uintptr_t)kmem64_end, psize) -
427 kmem64_base) >> TTE_PAGE_SHIFT(pszc);
428 }
429 }
430
431 #undef COMBINE /* local to previous routine */
432
433 /*
434 * This routine reads in the "translations" property in to a buffer and
435 * returns a pointer to this buffer and the number of translations.
436 */
437 static struct translation *
read_prom_mappings(size_t * ntransrootp)438 read_prom_mappings(size_t *ntransrootp)
439 {
440 char *prop = "translations";
441 size_t translen;
442 pnode_t node;
443 struct translation *transroot;
444
445 /*
446 * the "translations" property is associated with the mmu node
447 */
448 node = (pnode_t)prom_getphandle(prom_mmu_ihandle());
449
450 /*
451 * We use the TSB space to read in the prom mappings. This space
452 * is currently not being used because we haven't taken over the
453 * trap table yet. It should be big enough to hold the mappings.
454 */
455 if ((translen = prom_getproplen(node, prop)) == -1)
456 cmn_err(CE_PANIC, "no translations property");
457 *ntransrootp = translen / sizeof (*transroot);
458 translen = roundup(translen, MMU_PAGESIZE);
459 PRM_DEBUG(translen);
460 if (translen > TSB_BYTES(ktsb_szcode))
461 cmn_err(CE_PANIC, "not enough space for translations");
462
463 transroot = (struct translation *)ktsb_base;
464 ASSERT(transroot);
465 if (prom_getprop(node, prop, (caddr_t)transroot) == -1) {
466 cmn_err(CE_PANIC, "translations getprop failed");
467 }
468 return (transroot);
469 }
470
471 /*
472 * Init routine of the nucleus data memory allocator.
473 *
474 * The nucleus data memory allocator is organized in ecache_alignsize'd
475 * memory chunks. Memory allocated by ndata_alloc() will never be freed.
476 *
477 * The ndata argument is used as header of the ndata freelist.
478 * Other freelist nodes are placed in the nucleus memory itself
479 * at the beginning of a free memory chunk. Therefore a freelist
480 * node (struct memlist) must fit into the smallest allocatable
481 * memory chunk (ecache_alignsize bytes).
482 *
483 * The memory interval [base, end] passed to ndata_alloc_init() must be
484 * bzero'd to allow the allocator to return bzero'd memory easily.
485 */
486 void
ndata_alloc_init(struct memlist * ndata,uintptr_t base,uintptr_t end)487 ndata_alloc_init(struct memlist *ndata, uintptr_t base, uintptr_t end)
488 {
489 ASSERT(sizeof (struct memlist) <= ecache_alignsize);
490
491 base = roundup(base, ecache_alignsize);
492 end = end - end % ecache_alignsize;
493
494 ASSERT(base < end);
495
496 ndata->ml_address = base;
497 ndata->ml_size = end - base;
498 ndata->ml_next = NULL;
499 ndata->ml_prev = NULL;
500 }
501
502 /*
503 * Deliver the size of the largest free memory chunk.
504 */
505 size_t
ndata_maxsize(struct memlist * ndata)506 ndata_maxsize(struct memlist *ndata)
507 {
508 size_t chunksize = ndata->ml_size;
509
510 while ((ndata = ndata->ml_next) != NULL) {
511 if (chunksize < ndata->ml_size)
512 chunksize = ndata->ml_size;
513 }
514
515 return (chunksize);
516 }
517
518
519 /*
520 * Allocate the last properly aligned memory chunk.
521 * This function is called when no more large nucleus memory chunks
522 * will be allocated. The remaining free nucleus memory at the end
523 * of the nucleus can be added to the phys_avail list.
524 */
525 void *
ndata_extra_base(struct memlist * ndata,size_t alignment,caddr_t endaddr)526 ndata_extra_base(struct memlist *ndata, size_t alignment, caddr_t endaddr)
527 {
528 uintptr_t base;
529 size_t wasteage = 0;
530 #ifdef DEBUG
531 static int called = 0;
532
533 if (called++ > 0)
534 cmn_err(CE_PANIC, "ndata_extra_base() called more than once");
535 #endif /* DEBUG */
536
537 /*
538 * The alignment needs to be a multiple of ecache_alignsize.
539 */
540 ASSERT((alignment % ecache_alignsize) == 0);
541
542 while (ndata->ml_next != NULL) {
543 wasteage += ndata->ml_size;
544 ndata = ndata->ml_next;
545 }
546
547 base = roundup(ndata->ml_address, alignment);
548
549 if (base >= ndata->ml_address + ndata->ml_size)
550 return (NULL);
551
552 if ((caddr_t)(ndata->ml_address + ndata->ml_size) != endaddr) {
553 #ifdef DEBUG
554 ndata_middle_hole_detected = 1; /* see if we hit this again */
555 #endif
556 return (NULL);
557 }
558
559 if (base == ndata->ml_address) {
560 if (ndata->ml_prev != NULL)
561 ndata->ml_prev->ml_next = NULL;
562 else
563 ndata->ml_size = 0;
564
565 bzero((void *)base, sizeof (struct memlist));
566
567 } else {
568 ndata->ml_size = base - ndata->ml_address;
569 wasteage += ndata->ml_size;
570 }
571 PRM_DEBUG(wasteage);
572
573 return ((void *)base);
574 }
575
576 /*
577 * Select the best matching buffer, avoid memory fragmentation.
578 */
579 static struct memlist *
ndata_select_chunk(struct memlist * ndata,size_t wanted,size_t alignment)580 ndata_select_chunk(struct memlist *ndata, size_t wanted, size_t alignment)
581 {
582 struct memlist *fnd_below = NULL;
583 struct memlist *fnd_above = NULL;
584 struct memlist *fnd_unused = NULL;
585 struct memlist *frlist;
586 uintptr_t base;
587 uintptr_t end;
588 size_t below;
589 size_t above;
590 size_t unused;
591 size_t best_below = ULONG_MAX;
592 size_t best_above = ULONG_MAX;
593 size_t best_unused = ULONG_MAX;
594
595 ASSERT(ndata != NULL);
596
597 /*
598 * Look for the best matching buffer, avoid memory fragmentation.
599 * The following strategy is used, try to find
600 * 1. an exact fitting buffer
601 * 2. avoid wasting any space below the buffer, take first
602 * fitting buffer
603 * 3. avoid wasting any space above the buffer, take first
604 * fitting buffer
605 * 4. avoid wasting space, take first fitting buffer
606 * 5. take the last buffer in chain
607 */
608 for (frlist = ndata; frlist != NULL; frlist = frlist->ml_next) {
609 base = roundup(frlist->ml_address, alignment);
610 end = roundup(base + wanted, ecache_alignsize);
611
612 if (end > frlist->ml_address + frlist->ml_size)
613 continue;
614
615 below = (base - frlist->ml_address) / ecache_alignsize;
616 above = (frlist->ml_address + frlist->ml_size - end) /
617 ecache_alignsize;
618 unused = below + above;
619
620 if (unused == 0)
621 return (frlist);
622
623 if (frlist->ml_next == NULL)
624 break;
625
626 if (below < best_below) {
627 best_below = below;
628 fnd_below = frlist;
629 }
630
631 if (above < best_above) {
632 best_above = above;
633 fnd_above = frlist;
634 }
635
636 if (unused < best_unused) {
637 best_unused = unused;
638 fnd_unused = frlist;
639 }
640 }
641
642 if (best_below == 0)
643 return (fnd_below);
644 if (best_above == 0)
645 return (fnd_above);
646 if (best_unused < ULONG_MAX)
647 return (fnd_unused);
648
649 return (frlist);
650 }
651
652 /*
653 * Nucleus data memory allocator.
654 * The granularity of the allocator is ecache_alignsize.
655 * See also comment for ndata_alloc_init().
656 */
657 void *
ndata_alloc(struct memlist * ndata,size_t wanted,size_t alignment)658 ndata_alloc(struct memlist *ndata, size_t wanted, size_t alignment)
659 {
660 struct memlist *found;
661 struct memlist *fnd_above;
662 uintptr_t base;
663 uintptr_t end;
664 size_t below;
665 size_t above;
666
667 /*
668 * Look for the best matching buffer, avoid memory fragmentation.
669 */
670 if ((found = ndata_select_chunk(ndata, wanted, alignment)) == NULL)
671 return (NULL);
672
673 /*
674 * Allocate the nucleus data buffer.
675 */
676 base = roundup(found->ml_address, alignment);
677 end = roundup(base + wanted, ecache_alignsize);
678 ASSERT(end <= found->ml_address + found->ml_size);
679
680 below = base - found->ml_address;
681 above = found->ml_address + found->ml_size - end;
682 ASSERT(above == 0 || (above % ecache_alignsize) == 0);
683
684 if (below >= ecache_alignsize) {
685 /*
686 * There is free memory below the allocated memory chunk.
687 */
688 found->ml_size = below - below % ecache_alignsize;
689
690 if (above) {
691 fnd_above = (struct memlist *)end;
692 fnd_above->ml_address = end;
693 fnd_above->ml_size = above;
694
695 if ((fnd_above->ml_next = found->ml_next) != NULL)
696 found->ml_next->ml_prev = fnd_above;
697 fnd_above->ml_prev = found;
698 found->ml_next = fnd_above;
699 }
700
701 return ((void *)base);
702 }
703
704 if (found->ml_prev == NULL) {
705 /*
706 * The first chunk (ndata) is selected.
707 */
708 ASSERT(found == ndata);
709 if (above) {
710 found->ml_address = end;
711 found->ml_size = above;
712 } else if (found->ml_next != NULL) {
713 found->ml_address = found->ml_next->ml_address;
714 found->ml_size = found->ml_next->ml_size;
715 if ((found->ml_next = found->ml_next->ml_next) != NULL)
716 found->ml_next->ml_prev = found;
717
718 bzero((void *)found->ml_address,
719 sizeof (struct memlist));
720 } else {
721 found->ml_address = end;
722 found->ml_size = 0;
723 }
724
725 return ((void *)base);
726 }
727
728 /*
729 * Not the first chunk.
730 */
731 if (above) {
732 fnd_above = (struct memlist *)end;
733 fnd_above->ml_address = end;
734 fnd_above->ml_size = above;
735
736 if ((fnd_above->ml_next = found->ml_next) != NULL)
737 fnd_above->ml_next->ml_prev = fnd_above;
738 fnd_above->ml_prev = found->ml_prev;
739 found->ml_prev->ml_next = fnd_above;
740
741 } else {
742 if ((found->ml_prev->ml_next = found->ml_next) != NULL)
743 found->ml_next->ml_prev = found->ml_prev;
744 }
745
746 bzero((void *)found->ml_address, sizeof (struct memlist));
747
748 return ((void *)base);
749 }
750
751 /*
752 * Size the kernel TSBs based upon the amount of physical
753 * memory in the system.
754 */
755 static void
calc_tsb_sizes(pgcnt_t npages)756 calc_tsb_sizes(pgcnt_t npages)
757 {
758 PRM_DEBUG(npages);
759
760 if (npages <= TSB_FREEMEM_MIN) {
761 ktsb_szcode = TSB_128K_SZCODE;
762 enable_bigktsb = 0;
763 } else if (npages <= TSB_FREEMEM_LARGE / 2) {
764 ktsb_szcode = TSB_256K_SZCODE;
765 enable_bigktsb = 0;
766 } else if (npages <= TSB_FREEMEM_LARGE) {
767 ktsb_szcode = TSB_512K_SZCODE;
768 enable_bigktsb = 0;
769 } else if (npages <= TSB_FREEMEM_LARGE * 2 ||
770 enable_bigktsb == 0) {
771 ktsb_szcode = TSB_1M_SZCODE;
772 enable_bigktsb = 0;
773 } else {
774 ktsb_szcode = highbit(npages - 1);
775 ktsb_szcode -= TSB_START_SIZE;
776 ktsb_szcode = MAX(ktsb_szcode, MIN_BIGKTSB_SZCODE);
777 ktsb_szcode = MIN(ktsb_szcode, MAX_BIGKTSB_SZCODE);
778 }
779
780 /*
781 * We choose the TSB to hold kernel 4M mappings to have twice
782 * the reach as the primary kernel TSB since this TSB will
783 * potentially (currently) be shared by both mappings to all of
784 * physical memory plus user TSBs. If this TSB has to be in nucleus
785 * (only for Spitfire and Cheetah) limit its size to 64K.
786 */
787 ktsb4m_szcode = highbit((2 * npages) / TTEPAGES(TTE4M) - 1);
788 ktsb4m_szcode -= TSB_START_SIZE;
789 ktsb4m_szcode = MAX(ktsb4m_szcode, TSB_MIN_SZCODE);
790 ktsb4m_szcode = MIN(ktsb4m_szcode, TSB_SOFTSZ_MASK);
791 if ((enable_bigktsb == 0 || ktsb_phys == 0) && ktsb4m_szcode >
792 TSB_64K_SZCODE) {
793 ktsb4m_szcode = TSB_64K_SZCODE;
794 max_bootlp_tteszc = TTE8K;
795 }
796
797 ktsb_sz = TSB_BYTES(ktsb_szcode); /* kernel 8K tsb size */
798 ktsb4m_sz = TSB_BYTES(ktsb4m_szcode); /* kernel 4M tsb size */
799 }
800
801 /*
802 * Allocate kernel TSBs from nucleus data memory.
803 * The function return 0 on success and -1 on failure.
804 */
805 int
ndata_alloc_tsbs(struct memlist * ndata,pgcnt_t npages)806 ndata_alloc_tsbs(struct memlist *ndata, pgcnt_t npages)
807 {
808 /*
809 * Set ktsb_phys to 1 if the processor supports ASI_QUAD_LDD_PHYS.
810 */
811 (void) sfmmu_setup_4lp();
812
813 /*
814 * Size the kernel TSBs based upon the amount of physical
815 * memory in the system.
816 */
817 calc_tsb_sizes(npages);
818
819 /*
820 * Allocate the 8K kernel TSB if it belongs inside the nucleus.
821 */
822 if (enable_bigktsb == 0) {
823 if ((ktsb_base = ndata_alloc(ndata, ktsb_sz, ktsb_sz)) == NULL)
824 return (-1);
825 ASSERT(!((uintptr_t)ktsb_base & (ktsb_sz - 1)));
826
827 PRM_DEBUG(ktsb_base);
828 PRM_DEBUG(ktsb_sz);
829 PRM_DEBUG(ktsb_szcode);
830 }
831
832 /*
833 * Next, allocate 4M kernel TSB from the nucleus since it's small.
834 */
835 if (ktsb4m_szcode <= TSB_64K_SZCODE) {
836
837 ktsb4m_base = ndata_alloc(ndata, ktsb4m_sz, ktsb4m_sz);
838 if (ktsb4m_base == NULL)
839 return (-1);
840 ASSERT(!((uintptr_t)ktsb4m_base & (ktsb4m_sz - 1)));
841
842 PRM_DEBUG(ktsb4m_base);
843 PRM_DEBUG(ktsb4m_sz);
844 PRM_DEBUG(ktsb4m_szcode);
845 }
846
847 return (0);
848 }
849
850 size_t
calc_hmehash_sz(pgcnt_t npages)851 calc_hmehash_sz(pgcnt_t npages)
852 {
853 ulong_t hme_buckets;
854
855 /*
856 * The number of buckets in the hme hash tables
857 * is a power of 2 such that the average hash chain length is
858 * HMENT_HASHAVELEN. The number of buckets for the user hash is
859 * a function of physical memory and a predefined overmapping factor.
860 * The number of buckets for the kernel hash is a function of
861 * physical memory only.
862 */
863 hme_buckets = (npages * HMEHASH_FACTOR) /
864 (HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT));
865
866 uhmehash_num = (int)MIN(hme_buckets, MAX_UHME_BUCKETS);
867
868 if (uhmehash_num > USER_BUCKETS_THRESHOLD) {
869 /*
870 * if uhmehash_num is not power of 2 round it down to the
871 * next power of 2.
872 */
873 uint_t align = 1 << (highbit(uhmehash_num - 1) - 1);
874 uhmehash_num = P2ALIGN(uhmehash_num, align);
875 } else
876 uhmehash_num = 1 << highbit(uhmehash_num - 1);
877
878 hme_buckets = npages / (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT);
879 khmehash_num = (int)MIN(hme_buckets, MAX_KHME_BUCKETS);
880 khmehash_num = 1 << highbit(khmehash_num - 1);
881 khmehash_num = MAX(khmehash_num, MIN_KHME_BUCKETS);
882
883 return ((uhmehash_num + khmehash_num) * sizeof (struct hmehash_bucket));
884 }
885
886 caddr_t
alloc_hmehash(caddr_t alloc_base)887 alloc_hmehash(caddr_t alloc_base)
888 {
889 size_t khmehash_sz, uhmehash_sz;
890
891 khme_hash = (struct hmehash_bucket *)alloc_base;
892 khmehash_sz = khmehash_num * sizeof (struct hmehash_bucket);
893 alloc_base += khmehash_sz;
894
895 uhme_hash = (struct hmehash_bucket *)alloc_base;
896 uhmehash_sz = uhmehash_num * sizeof (struct hmehash_bucket);
897 alloc_base += uhmehash_sz;
898
899 PRM_DEBUG(khme_hash);
900 PRM_DEBUG(uhme_hash);
901
902 return (alloc_base);
903 }
904
905 /*
906 * Allocate hat structs from the nucleus data memory.
907 */
908 int
ndata_alloc_hat(struct memlist * ndata)909 ndata_alloc_hat(struct memlist *ndata)
910 {
911 size_t cb_alloc_sz;
912
913 cb_alloc_sz = sfmmu_max_cb_id * sizeof (struct sfmmu_callback);
914 PRM_DEBUG(cb_alloc_sz);
915 sfmmu_cb_table = ndata_alloc(ndata, cb_alloc_sz, ecache_alignsize);
916 if (sfmmu_cb_table == NULL)
917 return (-1);
918 PRM_DEBUG(sfmmu_cb_table);
919
920 return (0);
921 }
922
923 int
ndata_alloc_kpm(struct memlist * ndata,pgcnt_t kpm_npages)924 ndata_alloc_kpm(struct memlist *ndata, pgcnt_t kpm_npages)
925 {
926 size_t kpmp_alloc_sz;
927
928 /*
929 * For the kpm_page mutex array we allocate one mutex every 16
930 * kpm pages (64MB). In smallpage mode we allocate one mutex
931 * every 8K pages. The minimum is set to 64 entries and the
932 * maximum to 8K entries.
933 */
934 if (kpm_smallpages == 0) {
935 kpmp_shift = highbit(sizeof (kpm_page_t)) - 1;
936 kpmp_table_sz = 1 << highbit(kpm_npages / 16);
937 kpmp_table_sz = (kpmp_table_sz < 64) ? 64 :
938 ((kpmp_table_sz > 8192) ? 8192 : kpmp_table_sz);
939 kpmp_alloc_sz = kpmp_table_sz * sizeof (kpm_hlk_t);
940
941 kpmp_table = ndata_alloc(ndata, kpmp_alloc_sz,
942 ecache_alignsize);
943 if (kpmp_table == NULL)
944 return (-1);
945
946 PRM_DEBUG(kpmp_table);
947 PRM_DEBUG(kpmp_table_sz);
948
949 kpmp_stable_sz = 0;
950 kpmp_stable = NULL;
951 } else {
952 ASSERT(kpm_pgsz == PAGESIZE);
953 kpmp_shift = highbit(sizeof (kpm_shlk_t)) + 1;
954 kpmp_stable_sz = 1 << highbit(kpm_npages / 8192);
955 kpmp_stable_sz = (kpmp_stable_sz < 64) ? 64 :
956 ((kpmp_stable_sz > 8192) ? 8192 : kpmp_stable_sz);
957 kpmp_alloc_sz = kpmp_stable_sz * sizeof (kpm_shlk_t);
958
959 kpmp_stable = ndata_alloc(ndata, kpmp_alloc_sz,
960 ecache_alignsize);
961 if (kpmp_stable == NULL)
962 return (-1);
963
964 PRM_DEBUG(kpmp_stable);
965 PRM_DEBUG(kpmp_stable_sz);
966
967 kpmp_table_sz = 0;
968 kpmp_table = NULL;
969 }
970 PRM_DEBUG(kpmp_shift);
971
972 return (0);
973 }
974
975 /*
976 * This function bop allocs kernel TSBs.
977 */
978 caddr_t
sfmmu_ktsb_alloc(caddr_t tsbbase)979 sfmmu_ktsb_alloc(caddr_t tsbbase)
980 {
981 caddr_t vaddr;
982
983 if (enable_bigktsb) {
984 ktsb_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb_sz);
985 vaddr = prom_alloc(ktsb_base, ktsb_sz, ktsb_sz);
986 if (vaddr != ktsb_base)
987 cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc"
988 " 8K bigktsb");
989 ktsb_base = vaddr;
990 tsbbase = ktsb_base + ktsb_sz;
991 PRM_DEBUG(ktsb_base);
992 PRM_DEBUG(tsbbase);
993 }
994
995 if (ktsb4m_szcode > TSB_64K_SZCODE) {
996 ASSERT(ktsb_phys && enable_bigktsb);
997 ktsb4m_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb4m_sz);
998 vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb4m_base, ktsb4m_sz,
999 ktsb4m_sz);
1000 if (vaddr != ktsb4m_base)
1001 cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc"
1002 " 4M bigktsb");
1003 ktsb4m_base = vaddr;
1004 tsbbase = ktsb4m_base + ktsb4m_sz;
1005 PRM_DEBUG(ktsb4m_base);
1006 PRM_DEBUG(tsbbase);
1007 }
1008 return (tsbbase);
1009 }
1010
1011 /*
1012 * Moves code assembled outside of the trap table into the trap
1013 * table taking care to relocate relative branches to code outside
1014 * of the trap handler.
1015 */
1016 static void
sfmmu_reloc_trap_handler(void * tablep,void * start,size_t count)1017 sfmmu_reloc_trap_handler(void *tablep, void *start, size_t count)
1018 {
1019 size_t i;
1020 uint32_t *src;
1021 uint32_t *dst;
1022 uint32_t inst;
1023 int op, op2;
1024 int32_t offset;
1025 int disp;
1026
1027 src = start;
1028 dst = tablep;
1029 offset = src - dst;
1030 for (src = start, i = 0; i < count; i++, src++, dst++) {
1031 inst = *dst = *src;
1032 op = (inst >> 30) & 0x2;
1033 if (op == 1) {
1034 /* call */
1035 disp = ((int32_t)inst << 2) >> 2; /* sign-extend */
1036 if (disp + i >= 0 && disp + i < count)
1037 continue;
1038 disp += offset;
1039 inst = 0x40000000u | (disp & 0x3fffffffu);
1040 *dst = inst;
1041 } else if (op == 0) {
1042 /* branch or sethi */
1043 op2 = (inst >> 22) & 0x7;
1044
1045 switch (op2) {
1046 case 0x3: /* BPr */
1047 disp = (((inst >> 20) & 0x3) << 14) |
1048 (inst & 0x3fff);
1049 disp = (disp << 16) >> 16; /* sign-extend */
1050 if (disp + i >= 0 && disp + i < count)
1051 continue;
1052 disp += offset;
1053 if (((disp << 16) >> 16) != disp)
1054 cmn_err(CE_PANIC, "bad reloc");
1055 inst &= ~0x303fff;
1056 inst |= (disp & 0x3fff);
1057 inst |= (disp & 0xc000) << 6;
1058 break;
1059
1060 case 0x2: /* Bicc */
1061 disp = ((int32_t)inst << 10) >> 10;
1062 if (disp + i >= 0 && disp + i < count)
1063 continue;
1064 disp += offset;
1065 if (((disp << 10) >> 10) != disp)
1066 cmn_err(CE_PANIC, "bad reloc");
1067 inst &= ~0x3fffff;
1068 inst |= (disp & 0x3fffff);
1069 break;
1070
1071 case 0x1: /* Bpcc */
1072 disp = ((int32_t)inst << 13) >> 13;
1073 if (disp + i >= 0 && disp + i < count)
1074 continue;
1075 disp += offset;
1076 if (((disp << 13) >> 13) != disp)
1077 cmn_err(CE_PANIC, "bad reloc");
1078 inst &= ~0x7ffff;
1079 inst |= (disp & 0x7ffffu);
1080 break;
1081 }
1082 *dst = inst;
1083 }
1084 }
1085 flush_instr_mem(tablep, count * sizeof (uint32_t));
1086 }
1087
1088 /*
1089 * Routine to allocate a large page to use in the TSB caches.
1090 */
1091 /*ARGSUSED*/
1092 static page_t *
sfmmu_tsb_page_create(void * addr,size_t size,int vmflag,void * arg)1093 sfmmu_tsb_page_create(void *addr, size_t size, int vmflag, void *arg)
1094 {
1095 int pgflags;
1096
1097 pgflags = PG_EXCL;
1098 if ((vmflag & VM_NOSLEEP) == 0)
1099 pgflags |= PG_WAIT;
1100 if (vmflag & VM_PANIC)
1101 pgflags |= PG_PANIC;
1102 if (vmflag & VM_PUSHPAGE)
1103 pgflags |= PG_PUSHPAGE;
1104
1105 return (page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size,
1106 pgflags, &kvseg, addr, arg));
1107 }
1108
1109 /*
1110 * Allocate a large page to back the virtual address range
1111 * [addr, addr + size). If addr is NULL, allocate the virtual address
1112 * space as well.
1113 */
1114 static void *
sfmmu_tsb_xalloc(vmem_t * vmp,void * inaddr,size_t size,int vmflag,uint_t attr,page_t * (* page_create_func)(void *,size_t,int,void *),void * pcarg)1115 sfmmu_tsb_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag,
1116 uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *),
1117 void *pcarg)
1118 {
1119 page_t *ppl;
1120 page_t *rootpp;
1121 caddr_t addr = inaddr;
1122 pgcnt_t npages = btopr(size);
1123 page_t **ppa;
1124 int i = 0;
1125
1126 /*
1127 * Assuming that only TSBs will call this with size > PAGESIZE
1128 * There is no reason why this couldn't be expanded to 8k pages as
1129 * well, or other page sizes in the future .... but for now, we
1130 * only support fixed sized page requests.
1131 */
1132 if ((inaddr == NULL) && ((addr = vmem_xalloc(vmp, size, size, 0, 0,
1133 NULL, NULL, vmflag)) == NULL))
1134 return (NULL);
1135
1136 if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
1137 if (inaddr == NULL)
1138 vmem_xfree(vmp, addr, size);
1139 return (NULL);
1140 }
1141
1142 ppl = page_create_func(addr, size, vmflag, pcarg);
1143 if (ppl == NULL) {
1144 if (inaddr == NULL)
1145 vmem_xfree(vmp, addr, size);
1146 page_unresv(npages);
1147 return (NULL);
1148 }
1149
1150 rootpp = ppl;
1151 ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP);
1152 while (ppl != NULL) {
1153 page_t *pp = ppl;
1154 ppa[i++] = pp;
1155 page_sub(&ppl, pp);
1156 ASSERT(page_iolock_assert(pp));
1157 page_io_unlock(pp);
1158 }
1159
1160 /*
1161 * Load the locked entry. It's OK to preload the entry into
1162 * the TSB since we now support large mappings in the kernel TSB.
1163 */
1164 hat_memload_array(kas.a_hat, (caddr_t)rootpp->p_offset, size,
1165 ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr, HAT_LOAD_LOCK);
1166
1167 for (--i; i >= 0; --i) {
1168 (void) page_pp_lock(ppa[i], 0, 1);
1169 page_unlock(ppa[i]);
1170 }
1171
1172 kmem_free(ppa, npages * sizeof (page_t *));
1173 return (addr);
1174 }
1175
1176 /* Called to import new spans into the TSB vmem arenas */
1177 void *
sfmmu_tsb_segkmem_alloc(vmem_t * vmp,size_t size,int vmflag)1178 sfmmu_tsb_segkmem_alloc(vmem_t *vmp, size_t size, int vmflag)
1179 {
1180 lgrp_id_t lgrpid = LGRP_NONE;
1181
1182 if (tsb_lgrp_affinity) {
1183 /*
1184 * Search for the vmp->lgrpid mapping by brute force;
1185 * some day vmp will have an lgrp, until then we have
1186 * to do this the hard way.
1187 */
1188 for (lgrpid = 0; lgrpid < NLGRPS_MAX &&
1189 vmp != kmem_tsb_default_arena[lgrpid]; lgrpid++)
1190 ;
1191 if (lgrpid == NLGRPS_MAX)
1192 lgrpid = LGRP_NONE;
1193 }
1194
1195 return (sfmmu_tsb_xalloc(vmp, NULL, size, vmflag, 0,
1196 sfmmu_tsb_page_create, lgrpid != LGRP_NONE? &lgrpid : NULL));
1197 }
1198
1199 /* Called to free spans from the TSB vmem arenas */
1200 void
sfmmu_tsb_segkmem_free(vmem_t * vmp,void * inaddr,size_t size)1201 sfmmu_tsb_segkmem_free(vmem_t *vmp, void *inaddr, size_t size)
1202 {
1203 page_t *pp;
1204 caddr_t addr = inaddr;
1205 caddr_t eaddr;
1206 pgcnt_t npages = btopr(size);
1207 pgcnt_t pgs_left = npages;
1208 page_t *rootpp = NULL;
1209
1210 hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1211
1212 for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
1213 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
1214 if (pp == NULL)
1215 panic("sfmmu_tsb_segkmem_free: page not found");
1216
1217 ASSERT(PAGE_EXCL(pp));
1218 page_pp_unlock(pp, 0, 1);
1219
1220 if (rootpp == NULL)
1221 rootpp = pp;
1222 if (--pgs_left == 0) {
1223 /*
1224 * similar logic to segspt_free_pages, but we know we
1225 * have one large page.
1226 */
1227 page_destroy_pages(rootpp);
1228 }
1229 }
1230 page_unresv(npages);
1231
1232 if (vmp != NULL)
1233 vmem_xfree(vmp, inaddr, size);
1234 }
1235