xref: /titanic_44/usr/src/uts/sun4/vm/sfmmu.c (revision a237e38e9161f0acd6451439d4a7dd597e66291d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <vm/hat.h>
30 #include <vm/hat_sfmmu.h>
31 #include <vm/page.h>
32 #include <sys/pte.h>
33 #include <sys/systm.h>
34 #include <sys/mman.h>
35 #include <sys/sysmacros.h>
36 #include <sys/machparam.h>
37 #include <sys/vtrace.h>
38 #include <sys/kmem.h>
39 #include <sys/mmu.h>
40 #include <sys/cmn_err.h>
41 #include <sys/cpu.h>
42 #include <sys/cpuvar.h>
43 #include <sys/debug.h>
44 #include <sys/lgrp.h>
45 #include <sys/archsystm.h>
46 #include <sys/machsystm.h>
47 #include <sys/vmsystm.h>
48 #include <sys/bitmap.h>
49 #include <vm/as.h>
50 #include <vm/seg.h>
51 #include <vm/seg_kmem.h>
52 #include <vm/seg_kp.h>
53 #include <vm/seg_kpm.h>
54 #include <vm/rm.h>
55 #include <vm/vm_dep.h>
56 #include <sys/t_lock.h>
57 #include <sys/vm_machparam.h>
58 #include <sys/promif.h>
59 #include <sys/prom_isa.h>
60 #include <sys/prom_plat.h>
61 #include <sys/prom_debug.h>
62 #include <sys/privregs.h>
63 #include <sys/bootconf.h>
64 #include <sys/memlist.h>
65 #include <sys/memlist_plat.h>
66 #include <sys/cpu_module.h>
67 #include <sys/reboot.h>
68 #include <sys/kdi.h>
69 
70 /*
71  * Static routines
72  */
73 static void	sfmmu_map_prom_mappings(struct translation *, size_t);
74 static struct translation *read_prom_mappings(size_t *);
75 static void	sfmmu_reloc_trap_handler(void *, void *, size_t);
76 
77 /*
78  * External routines
79  */
80 extern void sfmmu_remap_kernel(void);
81 extern void sfmmu_patch_utsb(void);
82 
83 /*
84  * Global Data:
85  */
86 extern caddr_t	textva, datava;
87 extern tte_t	ktext_tte, kdata_tte;	/* ttes for kernel text and data */
88 extern int	enable_bigktsb;
89 
90 uint64_t memsegspa = (uintptr_t)MSEG_NULLPTR_PA; /* memsegs physical linkage */
91 uint64_t memseg_phash[N_MEM_SLOTS];	/* use physical memseg addresses */
92 
93 int	sfmmu_kern_mapped = 0;
94 
95 /*
96  * DMMU primary context register for the kernel context. Machine specific code
97  * inserts correct page size codes when necessary
98  */
99 uint64_t kcontextreg = KCONTEXT;
100 
101 /* Extern Global Data */
102 
103 extern int page_relocate_ready;
104 
105 /*
106  * Controls the logic which enables the use of the
107  * QUAD_LDD_PHYS ASI for TSB accesses.
108  */
109 extern int	ktsb_phys;
110 
111 /*
112  * Global Routines called from within:
113  *	usr/src/uts/sun4u
114  *	usr/src/uts/sfmmu
115  *	usr/src/uts/sun
116  */
117 
118 pfn_t
119 va_to_pfn(void *vaddr)
120 {
121 	u_longlong_t physaddr;
122 	int mode, valid;
123 
124 	if (tba_taken_over)
125 		return (hat_getpfnum(kas.a_hat, (caddr_t)vaddr));
126 
127 #if !defined(C_OBP)
128 	if ((caddr_t)vaddr >= kmem64_base && (caddr_t)vaddr < kmem64_end) {
129 		if (kmem64_pabase == (uint64_t)-1)
130 			prom_panic("va_to_pfn: kmem64_pabase not init");
131 		physaddr = kmem64_pabase + ((caddr_t)vaddr - kmem64_base);
132 		return ((pfn_t)physaddr >> MMU_PAGESHIFT);
133 	}
134 #endif	/* !C_OBP */
135 
136 	if ((prom_translate_virt(vaddr, &valid, &physaddr, &mode) != -1) &&
137 	    (valid == -1)) {
138 		return ((pfn_t)(physaddr >> MMU_PAGESHIFT));
139 	}
140 	return (PFN_INVALID);
141 }
142 
143 uint64_t
144 va_to_pa(void *vaddr)
145 {
146 	pfn_t pfn;
147 
148 	if ((pfn = va_to_pfn(vaddr)) == PFN_INVALID)
149 		return ((uint64_t)-1);
150 	return (((uint64_t)pfn << MMU_PAGESHIFT) |
151 		((uint64_t)vaddr & MMU_PAGEOFFSET));
152 }
153 
154 void
155 hat_kern_setup(void)
156 {
157 	struct translation *trans_root;
158 	size_t ntrans_root;
159 	extern void startup_fixup_physavail(void);
160 
161 	/*
162 	 * These are the steps we take to take over the mmu from the prom.
163 	 *
164 	 * (1)	Read the prom's mappings through the translation property.
165 	 * (2)	Remap the kernel text and kernel data with 2 locked 4MB ttes.
166 	 *	Create the the hmeblks for these 2 ttes at this time.
167 	 * (3)	Create hat structures for all other prom mappings.  Since the
168 	 *	kernel text and data hme_blks have already been created we
169 	 *	skip the equivalent prom's mappings.
170 	 * (4)	Initialize the tsb and its corresponding hardware regs.
171 	 * (5)	Take over the trap table (currently in startup).
172 	 * (6)	Up to this point it is possible the prom required some of its
173 	 *	locked tte's.  Now that we own the trap table we remove them.
174 	 */
175 
176 	ktsb_pbase = va_to_pa(ktsb_base);
177 	ktsb4m_pbase = va_to_pa(ktsb4m_base);
178 	PRM_DEBUG(ktsb_pbase);
179 	PRM_DEBUG(ktsb4m_pbase);
180 
181 	sfmmu_patch_ktsb();
182 	sfmmu_patch_utsb();
183 	sfmmu_patch_mmu_asi(ktsb_phys);
184 
185 	sfmmu_init_tsbs();
186 
187 	if (kpm_enable) {
188 		sfmmu_kpm_patch_tlbm();
189 		if (kpm_smallpages == 0) {
190 			sfmmu_kpm_patch_tsbm();
191 		}
192 	}
193 
194 	/*
195 	 * The 8K-indexed kernel TSB space is used to hold
196 	 * translations below...
197 	 */
198 	trans_root = read_prom_mappings(&ntrans_root);
199 	sfmmu_remap_kernel();
200 	startup_fixup_physavail();
201 	mmu_init_kernel_pgsz(kas.a_hat);
202 	sfmmu_map_prom_mappings(trans_root, ntrans_root);
203 
204 	/*
205 	 * We invalidate 8K kernel TSB because we used it in
206 	 * sfmmu_map_prom_mappings()
207 	 */
208 	sfmmu_inv_tsb(ktsb_base, ktsb_sz);
209 	sfmmu_inv_tsb(ktsb4m_base, ktsb4m_sz);
210 
211 	sfmmu_init_ktsbinfo();
212 
213 
214 	sfmmu_kern_mapped = 1;
215 
216 	/*
217 	 * hments have been created for mapped pages, and thus we're ready
218 	 * for kmdb to start using its own trap table.  It walks the hments
219 	 * to resolve TLB misses, and can't be used until they're ready.
220 	 */
221 	if (boothowto & RB_DEBUG)
222 		kdi_dvec_vmready();
223 }
224 
225 /*
226  * Macro used below to convert the prom's 32-bit high and low fields into
227  * a value appropriate for the 64-bit kernel.
228  */
229 
230 #define	COMBINE(hi, lo) (((uint64_t)(uint32_t)(hi) << 32) | (uint32_t)(lo))
231 
232 /*
233  * Track larges pages used.
234  * Provides observability for this feature on non-debug kernels.
235  */
236 ulong_t map_prom_lpcount[MMU_PAGE_SIZES];
237 
238 /*
239  * This function traverses the prom mapping list and creates equivalent
240  * mappings in the sfmmu mapping hash.
241  */
242 static void
243 sfmmu_map_prom_mappings(struct translation *trans_root, size_t ntrans_root)
244 {
245 	struct translation *promt;
246 	tte_t	tte, oldtte, *ttep;
247 	pfn_t	pfn, oldpfn, basepfn;
248 	caddr_t vaddr;
249 	size_t	size, offset;
250 	unsigned long i;
251 	uint_t	attr;
252 	page_t *pp;
253 	extern struct memlist *virt_avail;
254 
255 	ttep = &tte;
256 	for (i = 0, promt = trans_root; i < ntrans_root; i++, promt++) {
257 		ASSERT(promt->tte_hi != 0);
258 		ASSERT32(promt->virt_hi == 0 && promt->size_hi == 0);
259 
260 		vaddr = (caddr_t)COMBINE(promt->virt_hi, promt->virt_lo);
261 
262 		/*
263 		 * hack until we get rid of map-for-unix
264 		 */
265 		if (vaddr < (caddr_t)KERNELBASE)
266 			continue;
267 
268 		ttep->tte_inthi = promt->tte_hi;
269 		ttep->tte_intlo = promt->tte_lo;
270 		attr = PROC_DATA | HAT_NOSYNC;
271 #if defined(TTE_IS_GLOBAL)
272 		if (TTE_IS_GLOBAL(ttep)) {
273 			/*
274 			 * The prom better not use global translations
275 			 * because a user process might use the same
276 			 * virtual addresses
277 			 */
278 			cmn_err(CE_PANIC, "map_prom: global translation");
279 			TTE_SET_LOFLAGS(ttep, TTE_GLB_INT, 0);
280 		}
281 #endif
282 		if (TTE_IS_LOCKED(ttep)) {
283 			/* clear the lock bits */
284 			TTE_CLR_LOCKED(ttep);
285 		}
286 		attr |= (TTE_IS_VCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEVTTE;
287 		attr |= (TTE_IS_PCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEPTTE;
288 		attr |= (TTE_IS_SIDEFFECT(ttep)) ? SFMMU_SIDEFFECT : 0;
289 		attr |= (TTE_IS_IE(ttep)) ? HAT_STRUCTURE_LE : 0;
290 
291 		size = COMBINE(promt->size_hi, promt->size_lo);
292 		offset = 0;
293 		basepfn = TTE_TO_PFN((caddr_t)COMBINE(promt->virt_hi,
294 		    promt->virt_lo), ttep);
295 		while (size) {
296 			vaddr = (caddr_t)(COMBINE(promt->virt_hi,
297 			    promt->virt_lo) + offset);
298 
299 			/*
300 			 * make sure address is not in virt-avail list
301 			 */
302 			if (address_in_memlist(virt_avail, (uint64_t)vaddr,
303 			    size)) {
304 				cmn_err(CE_PANIC, "map_prom: inconsistent "
305 				    "translation/avail lists");
306 			}
307 
308 			pfn = basepfn + mmu_btop(offset);
309 			if (pf_is_memory(pfn)) {
310 				if (attr & SFMMU_UNCACHEPTTE) {
311 					cmn_err(CE_PANIC, "map_prom: "
312 					    "uncached prom memory page");
313 				}
314 			} else {
315 				if (!(attr & SFMMU_SIDEFFECT)) {
316 					cmn_err(CE_PANIC, "map_prom: prom "
317 					    "i/o page without side-effect");
318 				}
319 			}
320 
321 			/*
322 			 * skip kmem64 area
323 			 */
324 			if (vaddr >= kmem64_base &&
325 			    vaddr < kmem64_aligned_end) {
326 #if !defined(C_OBP)
327 				cmn_err(CE_PANIC,
328 				    "unexpected kmem64 prom mapping\n");
329 #else	/* !C_OBP */
330 				size_t mapsz;
331 
332 				if (ptob(pfn) !=
333 				    kmem64_pabase + (vaddr - kmem64_base)) {
334 					cmn_err(CE_PANIC,
335 					    "unexpected kmem64 prom mapping\n");
336 				}
337 
338 				mapsz = kmem64_aligned_end - vaddr;
339 				if (mapsz >= size) {
340 					break;
341 				}
342 				size -= mapsz;
343 				offset += mapsz;
344 				continue;
345 #endif	/* !C_OBP */
346 			}
347 
348 			oldpfn = sfmmu_vatopfn(vaddr, KHATID, &oldtte);
349 			ASSERT(oldpfn != PFN_SUSPENDED);
350 			ASSERT(page_relocate_ready == 0);
351 
352 			if (oldpfn != PFN_INVALID) {
353 				/*
354 				 * mapping already exists.
355 				 * Verify they are equal
356 				 */
357 				if (pfn != oldpfn) {
358 					cmn_err(CE_PANIC, "map_prom: mapping "
359 					    "conflict (va=0x%p pfn=%p, "
360 					    "oldpfn=%p)",
361 					    (void *)vaddr, (void *)pfn,
362 					    (void *)oldpfn);
363 				}
364 				size -= MMU_PAGESIZE;
365 				offset += MMU_PAGESIZE;
366 				continue;
367 			}
368 
369 			pp = page_numtopp_nolock(pfn);
370 			if ((pp != NULL) && PP_ISFREE((page_t *)pp)) {
371 				cmn_err(CE_PANIC, "map_prom: "
372 				    "prom-mapped page (va 0x%p, pfn 0x%p) "
373 				    "on free list", (void *)vaddr, (void *)pfn);
374 			}
375 
376 			sfmmu_memtte(ttep, pfn, attr, TTE8K);
377 			sfmmu_tteload(kas.a_hat, ttep, vaddr, pp,
378 			    HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD);
379 			size -= MMU_PAGESIZE;
380 			offset += MMU_PAGESIZE;
381 		}
382 	}
383 
384 	/*
385 	 * We claimed kmem64 from prom, so now we need to load tte.
386 	 */
387 	if (kmem64_base != NULL) {
388 		pgcnt_t pages;
389 		size_t psize;
390 		int pszc;
391 
392 		pszc = kmem64_szc;
393 #ifdef sun4u
394 		if (pszc > TTE8K) {
395 			pszc = segkmem_lpszc;
396 		}
397 #endif	/* sun4u */
398 		psize = TTEBYTES(pszc);
399 		pages = btop(psize);
400 		basepfn = kmem64_pabase >> MMU_PAGESHIFT;
401 		vaddr = kmem64_base;
402 		while (vaddr < kmem64_end) {
403 			sfmmu_memtte(ttep, basepfn,
404 			    PROC_DATA | HAT_NOSYNC, pszc);
405 			sfmmu_tteload(kas.a_hat, ttep, vaddr, NULL,
406 			    HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD);
407 			vaddr += psize;
408 			basepfn += pages;
409 		}
410 		map_prom_lpcount[pszc] =
411 		    ((caddr_t)P2ROUNDUP((uintptr_t)kmem64_end, psize) -
412 			kmem64_base) >> TTE_PAGE_SHIFT(pszc);
413 	}
414 }
415 
416 #undef COMBINE	/* local to previous routine */
417 
418 /*
419  * This routine reads in the "translations" property in to a buffer and
420  * returns a pointer to this buffer and the number of translations.
421  */
422 static struct translation *
423 read_prom_mappings(size_t *ntransrootp)
424 {
425 	char *prop = "translations";
426 	size_t translen;
427 	pnode_t node;
428 	struct translation *transroot;
429 
430 	/*
431 	 * the "translations" property is associated with the mmu node
432 	 */
433 	node = (pnode_t)prom_getphandle(prom_mmu_ihandle());
434 
435 	/*
436 	 * We use the TSB space to read in the prom mappings.  This space
437 	 * is currently not being used because we haven't taken over the
438 	 * trap table yet.  It should be big enough to hold the mappings.
439 	 */
440 	if ((translen = prom_getproplen(node, prop)) == -1)
441 		cmn_err(CE_PANIC, "no translations property");
442 	*ntransrootp = translen / sizeof (*transroot);
443 	translen = roundup(translen, MMU_PAGESIZE);
444 	PRM_DEBUG(translen);
445 	if (translen > TSB_BYTES(ktsb_szcode))
446 		cmn_err(CE_PANIC, "not enough space for translations");
447 
448 	transroot = (struct translation *)ktsb_base;
449 	ASSERT(transroot);
450 	if (prom_getprop(node, prop, (caddr_t)transroot) == -1) {
451 		cmn_err(CE_PANIC, "translations getprop failed");
452 	}
453 	return (transroot);
454 }
455 
456 /*
457  * Init routine of the nucleus data memory allocator.
458  *
459  * The nucleus data memory allocator is organized in ecache_alignsize'd
460  * memory chunks. Memory allocated by ndata_alloc() will never be freed.
461  *
462  * The ndata argument is used as header of the ndata freelist.
463  * Other freelist nodes are placed in the nucleus memory itself
464  * at the beginning of a free memory chunk. Therefore a freelist
465  * node (struct memlist) must fit into the smallest allocatable
466  * memory chunk (ecache_alignsize bytes).
467  *
468  * The memory interval [base, end] passed to ndata_alloc_init() must be
469  * bzero'd to allow the allocator to return bzero'd memory easily.
470  */
471 void
472 ndata_alloc_init(struct memlist *ndata, uintptr_t base, uintptr_t end)
473 {
474 	ASSERT(sizeof (struct memlist) <= ecache_alignsize);
475 
476 	base = roundup(base, ecache_alignsize);
477 	end = end - end % ecache_alignsize;
478 
479 	ASSERT(base < end);
480 
481 	ndata->address = base;
482 	ndata->size = end - base;
483 	ndata->next = NULL;
484 	ndata->prev = NULL;
485 }
486 
487 /*
488  * Deliver the size of the largest free memory chunk.
489  */
490 size_t
491 ndata_maxsize(struct memlist *ndata)
492 {
493 	size_t chunksize = ndata->size;
494 
495 	while ((ndata = ndata->next) != NULL) {
496 		if (chunksize < ndata->size)
497 			chunksize = ndata->size;
498 	}
499 
500 	return (chunksize);
501 }
502 
503 /*
504  * This is a special function to figure out if the memory chunk needed
505  * for the page structs can fit in the nucleus or not. If it fits the
506  * function calculates and returns the possible remaining ndata size
507  * in the last element if the size needed for page structs would be
508  * allocated from the nucleus.
509  */
510 size_t
511 ndata_spare(struct memlist *ndata, size_t wanted, size_t alignment)
512 {
513 	struct memlist *frlist;
514 	uintptr_t base;
515 	uintptr_t end;
516 
517 	for (frlist = ndata; frlist != NULL; frlist = frlist->next) {
518 		base = roundup(frlist->address, alignment);
519 		end = roundup(base + wanted, ecache_alignsize);
520 
521 		if (end <= frlist->address + frlist->size) {
522 			if (frlist->next == NULL)
523 				return (frlist->address + frlist->size - end);
524 
525 			while (frlist->next != NULL)
526 				frlist = frlist->next;
527 
528 			return (frlist->size);
529 		}
530 	}
531 
532 	return (0);
533 }
534 
535 /*
536  * Allocate the last properly aligned memory chunk.
537  * This function is called when no more large nucleus memory chunks
538  * will be allocated.  The remaining free nucleus memory at the end
539  * of the nucleus can be added to the phys_avail list.
540  */
541 void *
542 ndata_extra_base(struct memlist *ndata, size_t alignment)
543 {
544 	uintptr_t base;
545 	size_t wasteage = 0;
546 #ifdef	DEBUG
547 	static int called = 0;
548 
549 	if (called++ > 0)
550 		cmn_err(CE_PANIC, "ndata_extra_base() called more than once");
551 #endif /* DEBUG */
552 
553 	/*
554 	 * The alignment needs to be a multiple of ecache_alignsize.
555 	 */
556 	ASSERT((alignment % ecache_alignsize) ==  0);
557 
558 	while (ndata->next != NULL) {
559 		wasteage += ndata->size;
560 		ndata = ndata->next;
561 	}
562 
563 	base = roundup(ndata->address, alignment);
564 
565 	if (base >= ndata->address + ndata->size)
566 		return (NULL);
567 
568 	if (base == ndata->address) {
569 		if (ndata->prev != NULL)
570 			ndata->prev->next = NULL;
571 		else
572 			ndata->size = 0;
573 
574 		bzero((void *)base, sizeof (struct memlist));
575 
576 	} else {
577 		ndata->size = base - ndata->address;
578 		wasteage += ndata->size;
579 	}
580 	PRM_DEBUG(wasteage);
581 
582 	return ((void *)base);
583 }
584 
585 /*
586  * Select the best matching buffer, avoid memory fragmentation.
587  */
588 static struct memlist *
589 ndata_select_chunk(struct memlist *ndata, size_t wanted, size_t alignment)
590 {
591 	struct memlist *fnd_below = NULL;
592 	struct memlist *fnd_above = NULL;
593 	struct memlist *fnd_unused = NULL;
594 	struct memlist *frlist;
595 	uintptr_t base;
596 	uintptr_t end;
597 	size_t below;
598 	size_t above;
599 	size_t unused;
600 	size_t best_below = ULONG_MAX;
601 	size_t best_above = ULONG_MAX;
602 	size_t best_unused = ULONG_MAX;
603 
604 	ASSERT(ndata != NULL);
605 
606 	/*
607 	 * Look for the best matching buffer, avoid memory fragmentation.
608 	 * The following strategy is used, try to find
609 	 *   1. an exact fitting buffer
610 	 *   2. avoid wasting any space below the buffer, take first
611 	 *	fitting buffer
612 	 *   3. avoid wasting any space above the buffer, take first
613 	 *	fitting buffer
614 	 *   4. avoid wasting space, take first fitting buffer
615 	 *   5. take the last buffer in chain
616 	 */
617 	for (frlist = ndata; frlist != NULL; frlist = frlist->next) {
618 		base = roundup(frlist->address, alignment);
619 		end = roundup(base + wanted, ecache_alignsize);
620 
621 		if (end > frlist->address + frlist->size)
622 			continue;
623 
624 		below = (base - frlist->address) / ecache_alignsize;
625 		above = (frlist->address + frlist->size - end) /
626 		    ecache_alignsize;
627 		unused = below + above;
628 
629 		if (unused == 0)
630 			return (frlist);
631 
632 		if (frlist->next == NULL)
633 			break;
634 
635 		if (below < best_below) {
636 			best_below = below;
637 			fnd_below = frlist;
638 		}
639 
640 		if (above < best_above) {
641 			best_above = above;
642 			fnd_above = frlist;
643 		}
644 
645 		if (unused < best_unused) {
646 			best_unused = unused;
647 			fnd_unused = frlist;
648 		}
649 	}
650 
651 	if (best_below == 0)
652 		return (fnd_below);
653 	if (best_above == 0)
654 		return (fnd_above);
655 	if (best_unused < ULONG_MAX)
656 		return (fnd_unused);
657 
658 	return (frlist);
659 }
660 
661 /*
662  * Nucleus data memory allocator.
663  * The granularity of the allocator is ecache_alignsize.
664  * See also comment for ndata_alloc_init().
665  */
666 void *
667 ndata_alloc(struct memlist *ndata, size_t wanted, size_t alignment)
668 {
669 	struct memlist *found;
670 	struct memlist *fnd_above;
671 	uintptr_t base;
672 	uintptr_t end;
673 	size_t below;
674 	size_t above;
675 
676 	/*
677 	 * Look for the best matching buffer, avoid memory fragmentation.
678 	 */
679 	if ((found = ndata_select_chunk(ndata, wanted, alignment)) == NULL)
680 		return (NULL);
681 
682 	/*
683 	 * Allocate the nucleus data buffer.
684 	 */
685 	base = roundup(found->address, alignment);
686 	end = roundup(base + wanted, ecache_alignsize);
687 	ASSERT(end <= found->address + found->size);
688 
689 	below = base - found->address;
690 	above = found->address + found->size - end;
691 	ASSERT(above == 0 || (above % ecache_alignsize) == 0);
692 
693 	if (below >= ecache_alignsize) {
694 		/*
695 		 * There is free memory below the allocated memory chunk.
696 		 */
697 		found->size = below - below % ecache_alignsize;
698 
699 		if (above) {
700 			fnd_above = (struct memlist *)end;
701 			fnd_above->address = end;
702 			fnd_above->size = above;
703 
704 			if ((fnd_above->next = found->next) != NULL)
705 				found->next->prev = fnd_above;
706 			fnd_above->prev = found;
707 			found->next = fnd_above;
708 		}
709 
710 		return ((void *)base);
711 	}
712 
713 	if (found->prev == NULL) {
714 		/*
715 		 * The first chunk (ndata) is selected.
716 		 */
717 		ASSERT(found == ndata);
718 		if (above) {
719 			found->address = end;
720 			found->size = above;
721 		} else if (found->next != NULL) {
722 			found->address = found->next->address;
723 			found->size = found->next->size;
724 			if ((found->next = found->next->next) != NULL)
725 				found->next->prev = found;
726 
727 			bzero((void *)found->address, sizeof (struct memlist));
728 		} else {
729 			found->address = end;
730 			found->size = 0;
731 		}
732 
733 		return ((void *)base);
734 	}
735 
736 	/*
737 	 * Not the first chunk.
738 	 */
739 	if (above) {
740 		fnd_above = (struct memlist *)end;
741 		fnd_above->address = end;
742 		fnd_above->size = above;
743 
744 		if ((fnd_above->next = found->next) != NULL)
745 			fnd_above->next->prev = fnd_above;
746 		fnd_above->prev = found->prev;
747 		found->prev->next = fnd_above;
748 
749 	} else {
750 		if ((found->prev->next = found->next) != NULL)
751 			found->next->prev = found->prev;
752 	}
753 
754 	bzero((void *)found->address, sizeof (struct memlist));
755 
756 	return ((void *)base);
757 }
758 
759 /*
760  * Size the kernel TSBs based upon the amount of physical
761  * memory in the system.
762  */
763 static void
764 calc_tsb_sizes(pgcnt_t npages)
765 {
766 	PRM_DEBUG(npages);
767 
768 	if (npages <= TSB_FREEMEM_MIN) {
769 		ktsb_szcode = TSB_128K_SZCODE;
770 		enable_bigktsb = 0;
771 	} else if (npages <= TSB_FREEMEM_LARGE / 2) {
772 		ktsb_szcode = TSB_256K_SZCODE;
773 		enable_bigktsb = 0;
774 	} else if (npages <= TSB_FREEMEM_LARGE) {
775 		ktsb_szcode = TSB_512K_SZCODE;
776 		enable_bigktsb = 0;
777 	} else if (npages <= TSB_FREEMEM_LARGE * 2 ||
778 	    enable_bigktsb == 0) {
779 		ktsb_szcode = TSB_1M_SZCODE;
780 		enable_bigktsb = 0;
781 	} else {
782 		ktsb_szcode = highbit(npages - 1);
783 		ktsb_szcode -= TSB_START_SIZE;
784 		ktsb_szcode = MAX(ktsb_szcode, MIN_BIGKTSB_SZCODE);
785 		ktsb_szcode = MIN(ktsb_szcode, MAX_BIGKTSB_SZCODE);
786 	}
787 
788 	/*
789 	 * We choose the TSB to hold kernel 4M mappings to have twice
790 	 * the reach as the primary kernel TSB since this TSB will
791 	 * potentially (currently) be shared by both mappings to all of
792 	 * physical memory plus user TSBs. If this TSB has to be in nucleus
793 	 * (only for Spitfire and Cheetah) limit its size to 64K.
794 	 */
795 	ktsb4m_szcode = highbit((2 * npages) / TTEPAGES(TTE4M) - 1);
796 	ktsb4m_szcode -= TSB_START_SIZE;
797 	ktsb4m_szcode = MAX(ktsb4m_szcode, TSB_MIN_SZCODE);
798 	ktsb4m_szcode = MIN(ktsb4m_szcode, TSB_SOFTSZ_MASK);
799 	if ((enable_bigktsb == 0 || ktsb_phys == 0) && ktsb4m_szcode >
800 	    TSB_64K_SZCODE) {
801 		ktsb4m_szcode = TSB_64K_SZCODE;
802 		max_bootlp_tteszc = TTE8K;
803 	}
804 
805 	ktsb_sz = TSB_BYTES(ktsb_szcode);	/* kernel 8K tsb size */
806 	ktsb4m_sz = TSB_BYTES(ktsb4m_szcode);	/* kernel 4M tsb size */
807 }
808 
809 /*
810  * Allocate kernel TSBs from nucleus data memory.
811  * The function return 0 on success and -1 on failure.
812  */
813 int
814 ndata_alloc_tsbs(struct memlist *ndata, pgcnt_t npages)
815 {
816 	/*
817 	 * Set ktsb_phys to 1 if the processor supports ASI_QUAD_LDD_PHYS.
818 	 */
819 	sfmmu_setup_4lp();
820 
821 	/*
822 	 * Size the kernel TSBs based upon the amount of physical
823 	 * memory in the system.
824 	 */
825 	calc_tsb_sizes(npages);
826 
827 	/*
828 	 * Allocate the 8K kernel TSB if it belongs inside the nucleus.
829 	 */
830 	if (enable_bigktsb == 0) {
831 		if ((ktsb_base = ndata_alloc(ndata, ktsb_sz, ktsb_sz)) == NULL)
832 			return (-1);
833 		ASSERT(!((uintptr_t)ktsb_base & (ktsb_sz - 1)));
834 
835 		PRM_DEBUG(ktsb_base);
836 		PRM_DEBUG(ktsb_sz);
837 		PRM_DEBUG(ktsb_szcode);
838 	}
839 
840 	/*
841 	 * Next, allocate 4M kernel TSB from the nucleus since it's small.
842 	 */
843 	if (ktsb4m_szcode <= TSB_64K_SZCODE) {
844 
845 		ktsb4m_base = ndata_alloc(ndata, ktsb4m_sz, ktsb4m_sz);
846 		if (ktsb4m_base == NULL)
847 			return (-1);
848 		ASSERT(!((uintptr_t)ktsb4m_base & (ktsb4m_sz - 1)));
849 
850 		PRM_DEBUG(ktsb4m_base);
851 		PRM_DEBUG(ktsb4m_sz);
852 		PRM_DEBUG(ktsb4m_szcode);
853 	}
854 
855 	return (0);
856 }
857 
858 /*
859  * Allocate hat structs from the nucleus data memory.
860  */
861 int
862 ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages, pgcnt_t kpm_npages)
863 {
864 	size_t	mml_alloc_sz;
865 	size_t	cb_alloc_sz;
866 	int	max_nucuhme_buckets = MAX_NUCUHME_BUCKETS;
867 	int	max_nuckhme_buckets = MAX_NUCKHME_BUCKETS;
868 	ulong_t hme_buckets;
869 
870 	if (enable_bigktsb) {
871 		ASSERT((max_nucuhme_buckets + max_nuckhme_buckets) *
872 		    sizeof (struct hmehash_bucket) <=
873 			TSB_BYTES(TSB_1M_SZCODE));
874 
875 		max_nucuhme_buckets *= 2;
876 		max_nuckhme_buckets *= 2;
877 	}
878 
879 	/*
880 	 * The number of buckets in the hme hash tables
881 	 * is a power of 2 such that the average hash chain length is
882 	 * HMENT_HASHAVELEN.  The number of buckets for the user hash is
883 	 * a function of physical memory and a predefined overmapping factor.
884 	 * The number of buckets for the kernel hash is a function of
885 	 * physical memory only.
886 	 */
887 	hme_buckets = (npages * HMEHASH_FACTOR) /
888 		(HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT));
889 
890 	uhmehash_num = (int)MIN(hme_buckets, MAX_UHME_BUCKETS);
891 
892 	if (uhmehash_num > USER_BUCKETS_THRESHOLD) {
893 		/*
894 		 * if uhmehash_num is not power of 2 round it down to the
895 		 *  next power of 2.
896 		 */
897 		uint_t align = 1 << (highbit(uhmehash_num - 1) - 1);
898 		uhmehash_num = P2ALIGN(uhmehash_num, align);
899 	} else
900 		uhmehash_num = 1 << highbit(uhmehash_num - 1);
901 
902 	hme_buckets = npages / (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT);
903 	khmehash_num = (int)MIN(hme_buckets, MAX_KHME_BUCKETS);
904 	khmehash_num = 1 << highbit(khmehash_num - 1);
905 	khmehash_num = MAX(khmehash_num, MIN_KHME_BUCKETS);
906 
907 	if ((khmehash_num > max_nuckhme_buckets) ||
908 		(uhmehash_num > max_nucuhme_buckets)) {
909 		khme_hash = NULL;
910 		uhme_hash = NULL;
911 	} else {
912 		size_t hmehash_sz = (uhmehash_num + khmehash_num) *
913 		    sizeof (struct hmehash_bucket);
914 
915 		if ((khme_hash = ndata_alloc(ndata, hmehash_sz,
916 		    ecache_alignsize)) != NULL)
917 			uhme_hash = &khme_hash[khmehash_num];
918 		else
919 			uhme_hash = NULL;
920 
921 		PRM_DEBUG(hmehash_sz);
922 	}
923 
924 	PRM_DEBUG(khme_hash);
925 	PRM_DEBUG(khmehash_num);
926 	PRM_DEBUG(uhme_hash);
927 	PRM_DEBUG(uhmehash_num);
928 
929 	/*
930 	 * For the page mapping list mutex array we allocate one mutex
931 	 * for every 128 pages (1 MB) with a minimum of 64 entries and
932 	 * a maximum of 8K entries. For the initial computation npages
933 	 * is rounded up (ie. 1 << highbit(npages * 1.5 / 128))
934 	 *
935 	 * mml_shift is roughly log2(mml_table_sz) + 3 for MLIST_HASH
936 	 *
937 	 * It is not required that this be allocated from the nucleus,
938 	 * but it is desirable.  So we first allocate from the nucleus
939 	 * everything that must be there.  Having done so, if mml_table
940 	 * will fit within what remains of the nucleus then it will be
941 	 * allocated here.  If not, set mml_table to NULL, which will cause
942 	 * startup_memlist() to BOP_ALLOC() space for it after our return...
943 	 */
944 	mml_table_sz = 1 << highbit((npages * 3) / 256);
945 	if (mml_table_sz < 64)
946 		mml_table_sz = 64;
947 	else if (mml_table_sz > 8192)
948 		mml_table_sz = 8192;
949 	mml_shift = highbit(mml_table_sz) + 3;
950 
951 	PRM_DEBUG(mml_table_sz);
952 	PRM_DEBUG(mml_shift);
953 
954 	mml_alloc_sz = mml_table_sz * sizeof (kmutex_t);
955 
956 	mml_table = ndata_alloc(ndata, mml_alloc_sz, ecache_alignsize);
957 
958 	PRM_DEBUG(mml_table);
959 
960 	cb_alloc_sz = sfmmu_max_cb_id * sizeof (struct sfmmu_callback);
961 	PRM_DEBUG(cb_alloc_sz);
962 	sfmmu_cb_table = ndata_alloc(ndata, cb_alloc_sz, ecache_alignsize);
963 	PRM_DEBUG(sfmmu_cb_table);
964 
965 	/*
966 	 * For the kpm_page mutex array we allocate one mutex every 16
967 	 * kpm pages (64MB). In smallpage mode we allocate one mutex
968 	 * every 8K pages. The minimum is set to 64 entries and the
969 	 * maximum to 8K entries.
970 	 *
971 	 * It is not required that this be allocated from the nucleus,
972 	 * but it is desirable.  So we first allocate from the nucleus
973 	 * everything that must be there.  Having done so, if kpmp_table
974 	 * or kpmp_stable will fit within what remains of the nucleus
975 	 * then it will be allocated here.  If not, startup_memlist()
976 	 * will use BOP_ALLOC() space for it after our return...
977 	 */
978 	if (kpm_enable) {
979 		size_t	kpmp_alloc_sz;
980 
981 		if (kpm_smallpages == 0) {
982 			kpmp_shift = highbit(sizeof (kpm_page_t)) - 1;
983 			kpmp_table_sz = 1 << highbit(kpm_npages / 16);
984 			kpmp_table_sz = (kpmp_table_sz < 64) ? 64 :
985 			    ((kpmp_table_sz > 8192) ? 8192 : kpmp_table_sz);
986 			kpmp_alloc_sz = kpmp_table_sz * sizeof (kpm_hlk_t);
987 
988 			kpmp_table = ndata_alloc(ndata, kpmp_alloc_sz,
989 			    ecache_alignsize);
990 
991 			PRM_DEBUG(kpmp_table);
992 			PRM_DEBUG(kpmp_table_sz);
993 
994 			kpmp_stable_sz = 0;
995 			kpmp_stable = NULL;
996 		} else {
997 			ASSERT(kpm_pgsz == PAGESIZE);
998 			kpmp_shift = highbit(sizeof (kpm_shlk_t)) + 1;
999 			kpmp_stable_sz = 1 << highbit(kpm_npages / 8192);
1000 			kpmp_stable_sz = (kpmp_stable_sz < 64) ? 64 :
1001 			    ((kpmp_stable_sz > 8192) ? 8192 : kpmp_stable_sz);
1002 			kpmp_alloc_sz = kpmp_stable_sz * sizeof (kpm_shlk_t);
1003 
1004 			kpmp_stable = ndata_alloc(ndata, kpmp_alloc_sz,
1005 			    ecache_alignsize);
1006 
1007 			PRM_DEBUG(kpmp_stable);
1008 			PRM_DEBUG(kpmp_stable_sz);
1009 
1010 			kpmp_table_sz = 0;
1011 			kpmp_table = NULL;
1012 		}
1013 		PRM_DEBUG(kpmp_shift);
1014 	}
1015 
1016 	return (0);
1017 }
1018 
1019 /*
1020  * Allocate virtual addresses at base with given alignment.
1021  * Note that there is no physical memory behind the address yet.
1022  */
1023 caddr_t
1024 alloc_hme_buckets(caddr_t base, int alignsize)
1025 {
1026 	size_t hmehash_sz = (uhmehash_num + khmehash_num) *
1027 	    sizeof (struct hmehash_bucket);
1028 
1029 	ASSERT(khme_hash == NULL);
1030 	ASSERT(uhme_hash == NULL);
1031 
1032 	base = (caddr_t)roundup((uintptr_t)base, alignsize);
1033 	hmehash_sz = roundup(hmehash_sz, alignsize);
1034 
1035 	khme_hash = (struct hmehash_bucket *)base;
1036 	uhme_hash = (struct hmehash_bucket *)((caddr_t)khme_hash +
1037 	    khmehash_num * sizeof (struct hmehash_bucket));
1038 	base += hmehash_sz;
1039 	return (base);
1040 }
1041 
1042 /*
1043  * This function bop allocs kernel TSBs.
1044  */
1045 caddr_t
1046 sfmmu_ktsb_alloc(caddr_t tsbbase)
1047 {
1048 	caddr_t vaddr;
1049 
1050 	if (enable_bigktsb) {
1051 		ktsb_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb_sz);
1052 		vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb_base, ktsb_sz,
1053 		    ktsb_sz);
1054 		if (vaddr != ktsb_base)
1055 			cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc"
1056 			    " 8K bigktsb");
1057 		ktsb_base = vaddr;
1058 		tsbbase = ktsb_base + ktsb_sz;
1059 		PRM_DEBUG(ktsb_base);
1060 		PRM_DEBUG(tsbbase);
1061 	}
1062 
1063 	if (ktsb4m_szcode > TSB_64K_SZCODE) {
1064 		ASSERT(ktsb_phys && enable_bigktsb);
1065 		ktsb4m_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb4m_sz);
1066 		vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb4m_base, ktsb4m_sz,
1067 		    ktsb4m_sz);
1068 		if (vaddr != ktsb4m_base)
1069 			cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc"
1070 			    " 4M bigktsb");
1071 		ktsb4m_base = vaddr;
1072 		tsbbase = ktsb4m_base + ktsb4m_sz;
1073 		PRM_DEBUG(ktsb4m_base);
1074 		PRM_DEBUG(tsbbase);
1075 	}
1076 	return (tsbbase);
1077 }
1078 
1079 /*
1080  * Moves code assembled outside of the trap table into the trap
1081  * table taking care to relocate relative branches to code outside
1082  * of the trap handler.
1083  */
1084 static void
1085 sfmmu_reloc_trap_handler(void *tablep, void *start, size_t count)
1086 {
1087 	size_t i;
1088 	uint32_t *src;
1089 	uint32_t *dst;
1090 	uint32_t inst;
1091 	int op, op2;
1092 	int32_t offset;
1093 	int disp;
1094 
1095 	src = start;
1096 	dst = tablep;
1097 	offset = src - dst;
1098 	for (src = start, i = 0; i < count; i++, src++, dst++) {
1099 		inst = *dst = *src;
1100 		op = (inst >> 30) & 0x2;
1101 		if (op == 1) {
1102 			/* call */
1103 			disp = ((int32_t)inst << 2) >> 2; /* sign-extend */
1104 			if (disp + i >= 0 && disp + i < count)
1105 				continue;
1106 			disp += offset;
1107 			inst = 0x40000000u | (disp & 0x3fffffffu);
1108 			*dst = inst;
1109 		} else if (op == 0) {
1110 			/* branch or sethi */
1111 			op2 = (inst >> 22) & 0x7;
1112 
1113 			switch (op2) {
1114 			case 0x3: /* BPr */
1115 				disp = (((inst >> 20) & 0x3) << 14) |
1116 				    (inst & 0x3fff);
1117 				disp = (disp << 16) >> 16; /* sign-extend */
1118 				if (disp + i >= 0 && disp + i < count)
1119 					continue;
1120 				disp += offset;
1121 				if (((disp << 16) >> 16) != disp)
1122 					cmn_err(CE_PANIC, "bad reloc");
1123 				inst &= ~0x303fff;
1124 				inst |= (disp & 0x3fff);
1125 				inst |= (disp & 0xc000) << 6;
1126 				break;
1127 
1128 			case 0x2: /* Bicc */
1129 				disp = ((int32_t)inst << 10) >> 10;
1130 				if (disp + i >= 0 && disp + i < count)
1131 					continue;
1132 				disp += offset;
1133 				if (((disp << 10) >> 10) != disp)
1134 					cmn_err(CE_PANIC, "bad reloc");
1135 				inst &= ~0x3fffff;
1136 				inst |= (disp & 0x3fffff);
1137 				break;
1138 
1139 			case 0x1: /* Bpcc */
1140 				disp = ((int32_t)inst << 13) >> 13;
1141 				if (disp + i >= 0 && disp + i < count)
1142 					continue;
1143 				disp += offset;
1144 				if (((disp << 13) >> 13) != disp)
1145 					cmn_err(CE_PANIC, "bad reloc");
1146 				inst &= ~0x7ffff;
1147 				inst |= (disp & 0x7ffffu);
1148 				break;
1149 			}
1150 			*dst = inst;
1151 		}
1152 	}
1153 	flush_instr_mem(tablep, count * sizeof (uint32_t));
1154 }
1155 
1156 /*
1157  * Routine to allocate a large page to use in the TSB caches.
1158  */
1159 /*ARGSUSED*/
1160 static page_t *
1161 sfmmu_tsb_page_create(void *addr, size_t size, int vmflag, void *arg)
1162 {
1163 	int pgflags;
1164 
1165 	pgflags = PG_EXCL;
1166 	if ((vmflag & VM_NOSLEEP) == 0)
1167 		pgflags |= PG_WAIT;
1168 	if (vmflag & VM_PANIC)
1169 		pgflags |= PG_PANIC;
1170 	if (vmflag & VM_PUSHPAGE)
1171 		pgflags |= PG_PUSHPAGE;
1172 
1173 	return (page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size,
1174 	    pgflags, &kvseg, addr, arg));
1175 }
1176 
1177 /*
1178  * Allocate a large page to back the virtual address range
1179  * [addr, addr + size).  If addr is NULL, allocate the virtual address
1180  * space as well.
1181  */
1182 static void *
1183 sfmmu_tsb_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag,
1184     uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *),
1185     void *pcarg)
1186 {
1187 	page_t *ppl;
1188 	page_t *rootpp;
1189 	caddr_t addr = inaddr;
1190 	pgcnt_t npages = btopr(size);
1191 	page_t **ppa;
1192 	int i = 0;
1193 
1194 	/*
1195 	 * Assuming that only TSBs will call this with size > PAGESIZE
1196 	 * There is no reason why this couldn't be expanded to 8k pages as
1197 	 * well, or other page sizes in the future .... but for now, we
1198 	 * only support fixed sized page requests.
1199 	 */
1200 	if ((inaddr == NULL) && ((addr = vmem_xalloc(vmp, size, size, 0, 0,
1201 	    NULL, NULL, vmflag)) == NULL))
1202 		return (NULL);
1203 
1204 	/* If we ever don't want TSB slab-sized pages, this will panic */
1205 	ASSERT(((uintptr_t)addr & (tsb_slab_size - 1)) == 0);
1206 
1207 	if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
1208 		if (inaddr == NULL)
1209 			vmem_xfree(vmp, addr, size);
1210 		return (NULL);
1211 	}
1212 
1213 	ppl = page_create_func(addr, size, vmflag, pcarg);
1214 	if (ppl == NULL) {
1215 		if (inaddr == NULL)
1216 			vmem_xfree(vmp, addr, size);
1217 		page_unresv(npages);
1218 		return (NULL);
1219 	}
1220 
1221 	rootpp = ppl;
1222 	ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP);
1223 	while (ppl != NULL) {
1224 		page_t *pp = ppl;
1225 		ppa[i++] = pp;
1226 		page_sub(&ppl, pp);
1227 		ASSERT(page_iolock_assert(pp));
1228 		page_io_unlock(pp);
1229 	}
1230 
1231 	/*
1232 	 * Load the locked entry.  It's OK to preload the entry into
1233 	 * the TSB since we now support large mappings in the kernel TSB.
1234 	 */
1235 	hat_memload_array(kas.a_hat, (caddr_t)rootpp->p_offset, size,
1236 	    ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr, HAT_LOAD_LOCK);
1237 
1238 	for (--i; i >= 0; --i) {
1239 		(void) page_pp_lock(ppa[i], 0, 1);
1240 		page_unlock(ppa[i]);
1241 	}
1242 
1243 	kmem_free(ppa, npages * sizeof (page_t *));
1244 	return (addr);
1245 }
1246 
1247 /* Called to import new spans into the TSB vmem arenas */
1248 void *
1249 sfmmu_tsb_segkmem_alloc(vmem_t *vmp, size_t size, int vmflag)
1250 {
1251 	lgrp_id_t lgrpid = LGRP_NONE;
1252 
1253 	if (tsb_lgrp_affinity) {
1254 		/*
1255 		 * Search for the vmp->lgrpid mapping by brute force;
1256 		 * some day vmp will have an lgrp, until then we have
1257 		 * to do this the hard way.
1258 		 */
1259 		for (lgrpid = 0; lgrpid < NLGRPS_MAX &&
1260 		    vmp != kmem_tsb_default_arena[lgrpid]; lgrpid++);
1261 		if (lgrpid == NLGRPS_MAX)
1262 			lgrpid = LGRP_NONE;
1263 	}
1264 
1265 	return (sfmmu_tsb_xalloc(vmp, NULL, size, vmflag, 0,
1266 	    sfmmu_tsb_page_create, lgrpid != LGRP_NONE? &lgrpid : NULL));
1267 }
1268 
1269 /* Called to free spans from the TSB vmem arenas */
1270 void
1271 sfmmu_tsb_segkmem_free(vmem_t *vmp, void *inaddr, size_t size)
1272 {
1273 	page_t *pp;
1274 	caddr_t addr = inaddr;
1275 	caddr_t eaddr;
1276 	pgcnt_t npages = btopr(size);
1277 	pgcnt_t pgs_left = npages;
1278 	page_t *rootpp = NULL;
1279 
1280 	ASSERT(((uintptr_t)addr & (tsb_slab_size - 1)) == 0);
1281 
1282 	hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1283 
1284 	for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
1285 		pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
1286 		if (pp == NULL)
1287 			panic("sfmmu_tsb_segkmem_free: page not found");
1288 
1289 		ASSERT(PAGE_EXCL(pp));
1290 		page_pp_unlock(pp, 0, 1);
1291 
1292 		if (rootpp == NULL)
1293 			rootpp = pp;
1294 		if (--pgs_left == 0) {
1295 			/*
1296 			 * similar logic to segspt_free_pages, but we know we
1297 			 * have one large page.
1298 			 */
1299 			page_destroy_pages(rootpp);
1300 		}
1301 	}
1302 	page_unresv(npages);
1303 
1304 	if (vmp != NULL)
1305 		vmem_xfree(vmp, inaddr, size);
1306 }
1307