xref: /titanic_52/usr/src/uts/sun4/vm/sfmmu.c (revision c2aa8c918a0c67f7fd93724a31efac84968fc12c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <vm/hat.h>
30 #include <vm/hat_sfmmu.h>
31 #include <vm/page.h>
32 #include <sys/pte.h>
33 #include <sys/systm.h>
34 #include <sys/mman.h>
35 #include <sys/sysmacros.h>
36 #include <sys/machparam.h>
37 #include <sys/vtrace.h>
38 #include <sys/kmem.h>
39 #include <sys/mmu.h>
40 #include <sys/cmn_err.h>
41 #include <sys/cpu.h>
42 #include <sys/cpuvar.h>
43 #include <sys/debug.h>
44 #include <sys/lgrp.h>
45 #include <sys/archsystm.h>
46 #include <sys/machsystm.h>
47 #include <sys/vmsystm.h>
48 #include <sys/bitmap.h>
49 #include <vm/as.h>
50 #include <vm/seg.h>
51 #include <vm/seg_kmem.h>
52 #include <vm/seg_kp.h>
53 #include <vm/seg_kpm.h>
54 #include <vm/rm.h>
55 #include <vm/vm_dep.h>
56 #include <sys/t_lock.h>
57 #include <sys/vm_machparam.h>
58 #include <sys/promif.h>
59 #include <sys/prom_isa.h>
60 #include <sys/prom_plat.h>
61 #include <sys/prom_debug.h>
62 #include <sys/privregs.h>
63 #include <sys/bootconf.h>
64 #include <sys/memlist.h>
65 #include <sys/memlist_plat.h>
66 #include <sys/cpu_module.h>
67 #include <sys/reboot.h>
68 #include <sys/kdi.h>
69 
70 /*
71  * Static routines
72  */
73 static void	sfmmu_map_prom_mappings(struct translation *, size_t);
74 static struct translation *read_prom_mappings(size_t *);
75 static void	sfmmu_reloc_trap_handler(void *, void *, size_t);
76 
77 /*
78  * External routines
79  */
80 extern void sfmmu_remap_kernel(void);
81 extern void sfmmu_patch_utsb(void);
82 
83 /*
84  * Global Data:
85  */
86 extern caddr_t	textva, datava;
87 extern tte_t	ktext_tte, kdata_tte;	/* ttes for kernel text and data */
88 extern int	enable_bigktsb;
89 
90 uint64_t memsegspa = (uintptr_t)MSEG_NULLPTR_PA; /* memsegs physical linkage */
91 uint64_t memseg_phash[N_MEM_SLOTS];	/* use physical memseg addresses */
92 
93 int	sfmmu_kern_mapped = 0;
94 
95 /*
96  * DMMU primary context register for the kernel context. Machine specific code
97  * inserts correct page size codes when necessary
98  */
99 uint64_t kcontextreg = KCONTEXT;
100 
101 #ifdef DEBUG
102 static int ndata_middle_hole_detected = 0;
103 #endif
104 
105 /* Extern Global Data */
106 
107 extern int page_relocate_ready;
108 
109 /*
110  * Controls the logic which enables the use of the
111  * QUAD_LDD_PHYS ASI for TSB accesses.
112  */
113 extern int	ktsb_phys;
114 
115 /*
116  * Global Routines called from within:
117  *	usr/src/uts/sun4u
118  *	usr/src/uts/sfmmu
119  *	usr/src/uts/sun
120  */
121 
122 pfn_t
123 va_to_pfn(void *vaddr)
124 {
125 	u_longlong_t physaddr;
126 	int mode, valid;
127 
128 	if (tba_taken_over)
129 		return (hat_getpfnum(kas.a_hat, (caddr_t)vaddr));
130 
131 #if !defined(C_OBP)
132 	if ((caddr_t)vaddr >= kmem64_base && (caddr_t)vaddr < kmem64_end) {
133 		if (kmem64_pabase == (uint64_t)-1)
134 			prom_panic("va_to_pfn: kmem64_pabase not init");
135 		physaddr = kmem64_pabase + ((caddr_t)vaddr - kmem64_base);
136 		return ((pfn_t)physaddr >> MMU_PAGESHIFT);
137 	}
138 #endif	/* !C_OBP */
139 
140 	if ((prom_translate_virt(vaddr, &valid, &physaddr, &mode) != -1) &&
141 	    (valid == -1)) {
142 		return ((pfn_t)(physaddr >> MMU_PAGESHIFT));
143 	}
144 	return (PFN_INVALID);
145 }
146 
147 uint64_t
148 va_to_pa(void *vaddr)
149 {
150 	pfn_t pfn;
151 
152 	if ((pfn = va_to_pfn(vaddr)) == PFN_INVALID)
153 		return ((uint64_t)-1);
154 	return (((uint64_t)pfn << MMU_PAGESHIFT) |
155 		((uint64_t)vaddr & MMU_PAGEOFFSET));
156 }
157 
158 void
159 hat_kern_setup(void)
160 {
161 	struct translation *trans_root;
162 	size_t ntrans_root;
163 	extern void startup_fixup_physavail(void);
164 
165 	/*
166 	 * These are the steps we take to take over the mmu from the prom.
167 	 *
168 	 * (1)	Read the prom's mappings through the translation property.
169 	 * (2)	Remap the kernel text and kernel data with 2 locked 4MB ttes.
170 	 *	Create the the hmeblks for these 2 ttes at this time.
171 	 * (3)	Create hat structures for all other prom mappings.  Since the
172 	 *	kernel text and data hme_blks have already been created we
173 	 *	skip the equivalent prom's mappings.
174 	 * (4)	Initialize the tsb and its corresponding hardware regs.
175 	 * (5)	Take over the trap table (currently in startup).
176 	 * (6)	Up to this point it is possible the prom required some of its
177 	 *	locked tte's.  Now that we own the trap table we remove them.
178 	 */
179 
180 	ktsb_pbase = va_to_pa(ktsb_base);
181 	ktsb4m_pbase = va_to_pa(ktsb4m_base);
182 	PRM_DEBUG(ktsb_pbase);
183 	PRM_DEBUG(ktsb4m_pbase);
184 
185 	sfmmu_patch_ktsb();
186 	sfmmu_patch_utsb();
187 	sfmmu_patch_mmu_asi(ktsb_phys);
188 
189 	sfmmu_init_tsbs();
190 
191 	if (kpm_enable) {
192 		sfmmu_kpm_patch_tlbm();
193 		if (kpm_smallpages == 0) {
194 			sfmmu_kpm_patch_tsbm();
195 		}
196 	}
197 
198 	if (!shctx_on || disable_shctx) {
199 		sfmmu_patch_shctx();
200 	}
201 
202 	/*
203 	 * The 8K-indexed kernel TSB space is used to hold
204 	 * translations below...
205 	 */
206 	trans_root = read_prom_mappings(&ntrans_root);
207 	sfmmu_remap_kernel();
208 	startup_fixup_physavail();
209 	mmu_init_kernel_pgsz(kas.a_hat);
210 	sfmmu_map_prom_mappings(trans_root, ntrans_root);
211 
212 	/*
213 	 * We invalidate 8K kernel TSB because we used it in
214 	 * sfmmu_map_prom_mappings()
215 	 */
216 	sfmmu_inv_tsb(ktsb_base, ktsb_sz);
217 	sfmmu_inv_tsb(ktsb4m_base, ktsb4m_sz);
218 
219 	sfmmu_init_ktsbinfo();
220 
221 
222 	sfmmu_kern_mapped = 1;
223 
224 	/*
225 	 * hments have been created for mapped pages, and thus we're ready
226 	 * for kmdb to start using its own trap table.  It walks the hments
227 	 * to resolve TLB misses, and can't be used until they're ready.
228 	 */
229 	if (boothowto & RB_DEBUG)
230 		kdi_dvec_vmready();
231 }
232 
233 /*
234  * Macro used below to convert the prom's 32-bit high and low fields into
235  * a value appropriate for the 64-bit kernel.
236  */
237 
238 #define	COMBINE(hi, lo) (((uint64_t)(uint32_t)(hi) << 32) | (uint32_t)(lo))
239 
240 /*
241  * Track larges pages used.
242  * Provides observability for this feature on non-debug kernels.
243  */
244 ulong_t map_prom_lpcount[MMU_PAGE_SIZES];
245 
246 /*
247  * This function traverses the prom mapping list and creates equivalent
248  * mappings in the sfmmu mapping hash.
249  */
250 static void
251 sfmmu_map_prom_mappings(struct translation *trans_root, size_t ntrans_root)
252 {
253 	struct translation *promt;
254 	tte_t	tte, oldtte, *ttep;
255 	pfn_t	pfn, oldpfn, basepfn;
256 	caddr_t vaddr;
257 	size_t	size, offset;
258 	unsigned long i;
259 	uint_t	attr;
260 	page_t *pp;
261 	extern struct memlist *virt_avail;
262 	char buf[256];
263 
264 	ttep = &tte;
265 	for (i = 0, promt = trans_root; i < ntrans_root; i++, promt++) {
266 		ASSERT(promt->tte_hi != 0);
267 		ASSERT32(promt->virt_hi == 0 && promt->size_hi == 0);
268 
269 		vaddr = (caddr_t)COMBINE(promt->virt_hi, promt->virt_lo);
270 
271 		/*
272 		 * hack until we get rid of map-for-unix
273 		 */
274 		if (vaddr < (caddr_t)KERNELBASE)
275 			continue;
276 
277 		ttep->tte_inthi = promt->tte_hi;
278 		ttep->tte_intlo = promt->tte_lo;
279 		attr = PROC_DATA | HAT_NOSYNC;
280 #if defined(TTE_IS_GLOBAL)
281 		if (TTE_IS_GLOBAL(ttep)) {
282 			/*
283 			 * The prom better not use global translations
284 			 * because a user process might use the same
285 			 * virtual addresses
286 			 */
287 			prom_panic("sfmmu_map_prom_mappings: global"
288 			    " translation");
289 			TTE_SET_LOFLAGS(ttep, TTE_GLB_INT, 0);
290 		}
291 #endif
292 		if (TTE_IS_LOCKED(ttep)) {
293 			/* clear the lock bits */
294 			TTE_CLR_LOCKED(ttep);
295 		}
296 		attr |= (TTE_IS_VCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEVTTE;
297 		attr |= (TTE_IS_PCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEPTTE;
298 		attr |= (TTE_IS_SIDEFFECT(ttep)) ? SFMMU_SIDEFFECT : 0;
299 		attr |= (TTE_IS_IE(ttep)) ? HAT_STRUCTURE_LE : 0;
300 
301 		size = COMBINE(promt->size_hi, promt->size_lo);
302 		offset = 0;
303 		basepfn = TTE_TO_PFN((caddr_t)COMBINE(promt->virt_hi,
304 		    promt->virt_lo), ttep);
305 		while (size) {
306 			vaddr = (caddr_t)(COMBINE(promt->virt_hi,
307 			    promt->virt_lo) + offset);
308 
309 			/*
310 			 * make sure address is not in virt-avail list
311 			 */
312 			if (address_in_memlist(virt_avail, (uint64_t)vaddr,
313 			    size)) {
314 				prom_panic("sfmmu_map_prom_mappings:"
315 				    " inconsistent translation/avail lists");
316 			}
317 
318 			pfn = basepfn + mmu_btop(offset);
319 			if (pf_is_memory(pfn)) {
320 				if (attr & SFMMU_UNCACHEPTTE) {
321 					prom_panic("sfmmu_map_prom_mappings:"
322 					    " uncached prom memory page");
323 				}
324 			} else {
325 				if (!(attr & SFMMU_SIDEFFECT)) {
326 					prom_panic("sfmmu_map_prom_mappings:"
327 					    " prom i/o page without"
328 					    " side-effect");
329 				}
330 			}
331 
332 			/*
333 			 * skip kmem64 area
334 			 */
335 			if (vaddr >= kmem64_base &&
336 			    vaddr < kmem64_aligned_end) {
337 #if !defined(C_OBP)
338 				prom_panic("sfmmu_map_prom_mappings:"
339 				    " unexpected kmem64 prom mapping");
340 #else	/* !C_OBP */
341 				size_t mapsz;
342 
343 				if (ptob(pfn) !=
344 				    kmem64_pabase + (vaddr - kmem64_base)) {
345 					prom_panic("sfmmu_map_prom_mappings:"
346 					    " unexpected kmem64 prom mapping");
347 				}
348 
349 				mapsz = kmem64_aligned_end - vaddr;
350 				if (mapsz >= size) {
351 					break;
352 				}
353 				size -= mapsz;
354 				offset += mapsz;
355 				continue;
356 #endif	/* !C_OBP */
357 			}
358 
359 			oldpfn = sfmmu_vatopfn(vaddr, KHATID, &oldtte);
360 			ASSERT(oldpfn != PFN_SUSPENDED);
361 			ASSERT(page_relocate_ready == 0);
362 
363 			if (oldpfn != PFN_INVALID) {
364 				/*
365 				 * mapping already exists.
366 				 * Verify they are equal
367 				 */
368 				if (pfn != oldpfn) {
369 					(void) snprintf(buf, sizeof (buf),
370 					"sfmmu_map_prom_mappings: mapping"
371 					" conflict (va = 0x%p, pfn = 0x%p,"
372 					" oldpfn = 0x%p)", (void *)vaddr,
373 					    (void *)pfn, (void *)oldpfn);
374 					prom_panic(buf);
375 				}
376 				size -= MMU_PAGESIZE;
377 				offset += MMU_PAGESIZE;
378 				continue;
379 			}
380 
381 			pp = page_numtopp_nolock(pfn);
382 			if ((pp != NULL) && PP_ISFREE((page_t *)pp)) {
383 				(void) snprintf(buf, sizeof (buf),
384 				"sfmmu_map_prom_mappings: prom-mapped"
385 				" page (va = 0x%p, pfn = 0x%p) on free list",
386 				    (void *)vaddr, (void *)pfn);
387 				prom_panic(buf);
388 			}
389 
390 			sfmmu_memtte(ttep, pfn, attr, TTE8K);
391 			sfmmu_tteload(kas.a_hat, ttep, vaddr, pp,
392 			    HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD);
393 			size -= MMU_PAGESIZE;
394 			offset += MMU_PAGESIZE;
395 		}
396 	}
397 
398 	/*
399 	 * We claimed kmem64 from prom, so now we need to load tte.
400 	 */
401 	if (kmem64_base != NULL) {
402 		pgcnt_t pages;
403 		size_t psize;
404 		int pszc;
405 
406 		pszc = kmem64_szc;
407 #ifdef sun4u
408 		if (pszc > TTE8K) {
409 			pszc = segkmem_lpszc;
410 		}
411 #endif	/* sun4u */
412 		psize = TTEBYTES(pszc);
413 		pages = btop(psize);
414 		basepfn = kmem64_pabase >> MMU_PAGESHIFT;
415 		vaddr = kmem64_base;
416 		while (vaddr < kmem64_end) {
417 			sfmmu_memtte(ttep, basepfn,
418 			    PROC_DATA | HAT_NOSYNC, pszc);
419 			sfmmu_tteload(kas.a_hat, ttep, vaddr, NULL,
420 			    HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD);
421 			vaddr += psize;
422 			basepfn += pages;
423 		}
424 		map_prom_lpcount[pszc] =
425 		    ((caddr_t)P2ROUNDUP((uintptr_t)kmem64_end, psize) -
426 		    kmem64_base) >> TTE_PAGE_SHIFT(pszc);
427 	}
428 }
429 
430 #undef COMBINE	/* local to previous routine */
431 
432 /*
433  * This routine reads in the "translations" property in to a buffer and
434  * returns a pointer to this buffer and the number of translations.
435  */
436 static struct translation *
437 read_prom_mappings(size_t *ntransrootp)
438 {
439 	char *prop = "translations";
440 	size_t translen;
441 	pnode_t node;
442 	struct translation *transroot;
443 
444 	/*
445 	 * the "translations" property is associated with the mmu node
446 	 */
447 	node = (pnode_t)prom_getphandle(prom_mmu_ihandle());
448 
449 	/*
450 	 * We use the TSB space to read in the prom mappings.  This space
451 	 * is currently not being used because we haven't taken over the
452 	 * trap table yet.  It should be big enough to hold the mappings.
453 	 */
454 	if ((translen = prom_getproplen(node, prop)) == -1)
455 		cmn_err(CE_PANIC, "no translations property");
456 	*ntransrootp = translen / sizeof (*transroot);
457 	translen = roundup(translen, MMU_PAGESIZE);
458 	PRM_DEBUG(translen);
459 	if (translen > TSB_BYTES(ktsb_szcode))
460 		cmn_err(CE_PANIC, "not enough space for translations");
461 
462 	transroot = (struct translation *)ktsb_base;
463 	ASSERT(transroot);
464 	if (prom_getprop(node, prop, (caddr_t)transroot) == -1) {
465 		cmn_err(CE_PANIC, "translations getprop failed");
466 	}
467 	return (transroot);
468 }
469 
470 /*
471  * Init routine of the nucleus data memory allocator.
472  *
473  * The nucleus data memory allocator is organized in ecache_alignsize'd
474  * memory chunks. Memory allocated by ndata_alloc() will never be freed.
475  *
476  * The ndata argument is used as header of the ndata freelist.
477  * Other freelist nodes are placed in the nucleus memory itself
478  * at the beginning of a free memory chunk. Therefore a freelist
479  * node (struct memlist) must fit into the smallest allocatable
480  * memory chunk (ecache_alignsize bytes).
481  *
482  * The memory interval [base, end] passed to ndata_alloc_init() must be
483  * bzero'd to allow the allocator to return bzero'd memory easily.
484  */
485 void
486 ndata_alloc_init(struct memlist *ndata, uintptr_t base, uintptr_t end)
487 {
488 	ASSERT(sizeof (struct memlist) <= ecache_alignsize);
489 
490 	base = roundup(base, ecache_alignsize);
491 	end = end - end % ecache_alignsize;
492 
493 	ASSERT(base < end);
494 
495 	ndata->address = base;
496 	ndata->size = end - base;
497 	ndata->next = NULL;
498 	ndata->prev = NULL;
499 }
500 
501 /*
502  * Deliver the size of the largest free memory chunk.
503  */
504 size_t
505 ndata_maxsize(struct memlist *ndata)
506 {
507 	size_t chunksize = ndata->size;
508 
509 	while ((ndata = ndata->next) != NULL) {
510 		if (chunksize < ndata->size)
511 			chunksize = ndata->size;
512 	}
513 
514 	return (chunksize);
515 }
516 
517 /*
518  * This is a special function to figure out if the memory chunk needed
519  * for the page structs can fit in the nucleus or not. If it fits the
520  * function calculates and returns the possible remaining ndata size
521  * in the last element if the size needed for page structs would be
522  * allocated from the nucleus.
523  */
524 size_t
525 ndata_spare(struct memlist *ndata, size_t wanted, size_t alignment)
526 {
527 	struct memlist *frlist;
528 	uintptr_t base;
529 	uintptr_t end;
530 
531 	for (frlist = ndata; frlist != NULL; frlist = frlist->next) {
532 		base = roundup(frlist->address, alignment);
533 		end = roundup(base + wanted, ecache_alignsize);
534 
535 		if (end <= frlist->address + frlist->size) {
536 			if (frlist->next == NULL)
537 				return (frlist->address + frlist->size - end);
538 
539 			while (frlist->next != NULL)
540 				frlist = frlist->next;
541 
542 			return (frlist->size);
543 		}
544 	}
545 
546 	return (0);
547 }
548 
549 /*
550  * Allocate the last properly aligned memory chunk.
551  * This function is called when no more large nucleus memory chunks
552  * will be allocated.  The remaining free nucleus memory at the end
553  * of the nucleus can be added to the phys_avail list.
554  */
555 void *
556 ndata_extra_base(struct memlist *ndata, size_t alignment, caddr_t endaddr)
557 {
558 	uintptr_t base;
559 	size_t wasteage = 0;
560 #ifdef	DEBUG
561 	static int called = 0;
562 
563 	if (called++ > 0)
564 		cmn_err(CE_PANIC, "ndata_extra_base() called more than once");
565 #endif /* DEBUG */
566 
567 	/*
568 	 * The alignment needs to be a multiple of ecache_alignsize.
569 	 */
570 	ASSERT((alignment % ecache_alignsize) ==  0);
571 
572 	while (ndata->next != NULL) {
573 		wasteage += ndata->size;
574 		ndata = ndata->next;
575 	}
576 
577 	base = roundup(ndata->address, alignment);
578 
579 	if (base >= ndata->address + ndata->size)
580 		return (NULL);
581 
582 	if ((caddr_t)(ndata->address + ndata->size) != endaddr) {
583 #ifdef DEBUG
584 		ndata_middle_hole_detected = 1;	/* see if we hit this again */
585 #endif
586 		return (NULL);
587 	}
588 
589 	if (base == ndata->address) {
590 		if (ndata->prev != NULL)
591 			ndata->prev->next = NULL;
592 		else
593 			ndata->size = 0;
594 
595 		bzero((void *)base, sizeof (struct memlist));
596 
597 	} else {
598 		ndata->size = base - ndata->address;
599 		wasteage += ndata->size;
600 	}
601 	PRM_DEBUG(wasteage);
602 
603 	return ((void *)base);
604 }
605 
606 /*
607  * Select the best matching buffer, avoid memory fragmentation.
608  */
609 static struct memlist *
610 ndata_select_chunk(struct memlist *ndata, size_t wanted, size_t alignment)
611 {
612 	struct memlist *fnd_below = NULL;
613 	struct memlist *fnd_above = NULL;
614 	struct memlist *fnd_unused = NULL;
615 	struct memlist *frlist;
616 	uintptr_t base;
617 	uintptr_t end;
618 	size_t below;
619 	size_t above;
620 	size_t unused;
621 	size_t best_below = ULONG_MAX;
622 	size_t best_above = ULONG_MAX;
623 	size_t best_unused = ULONG_MAX;
624 
625 	ASSERT(ndata != NULL);
626 
627 	/*
628 	 * Look for the best matching buffer, avoid memory fragmentation.
629 	 * The following strategy is used, try to find
630 	 *   1. an exact fitting buffer
631 	 *   2. avoid wasting any space below the buffer, take first
632 	 *	fitting buffer
633 	 *   3. avoid wasting any space above the buffer, take first
634 	 *	fitting buffer
635 	 *   4. avoid wasting space, take first fitting buffer
636 	 *   5. take the last buffer in chain
637 	 */
638 	for (frlist = ndata; frlist != NULL; frlist = frlist->next) {
639 		base = roundup(frlist->address, alignment);
640 		end = roundup(base + wanted, ecache_alignsize);
641 
642 		if (end > frlist->address + frlist->size)
643 			continue;
644 
645 		below = (base - frlist->address) / ecache_alignsize;
646 		above = (frlist->address + frlist->size - end) /
647 		    ecache_alignsize;
648 		unused = below + above;
649 
650 		if (unused == 0)
651 			return (frlist);
652 
653 		if (frlist->next == NULL)
654 			break;
655 
656 		if (below < best_below) {
657 			best_below = below;
658 			fnd_below = frlist;
659 		}
660 
661 		if (above < best_above) {
662 			best_above = above;
663 			fnd_above = frlist;
664 		}
665 
666 		if (unused < best_unused) {
667 			best_unused = unused;
668 			fnd_unused = frlist;
669 		}
670 	}
671 
672 	if (best_below == 0)
673 		return (fnd_below);
674 	if (best_above == 0)
675 		return (fnd_above);
676 	if (best_unused < ULONG_MAX)
677 		return (fnd_unused);
678 
679 	return (frlist);
680 }
681 
682 /*
683  * Nucleus data memory allocator.
684  * The granularity of the allocator is ecache_alignsize.
685  * See also comment for ndata_alloc_init().
686  */
687 void *
688 ndata_alloc(struct memlist *ndata, size_t wanted, size_t alignment)
689 {
690 	struct memlist *found;
691 	struct memlist *fnd_above;
692 	uintptr_t base;
693 	uintptr_t end;
694 	size_t below;
695 	size_t above;
696 
697 	/*
698 	 * Look for the best matching buffer, avoid memory fragmentation.
699 	 */
700 	if ((found = ndata_select_chunk(ndata, wanted, alignment)) == NULL)
701 		return (NULL);
702 
703 	/*
704 	 * Allocate the nucleus data buffer.
705 	 */
706 	base = roundup(found->address, alignment);
707 	end = roundup(base + wanted, ecache_alignsize);
708 	ASSERT(end <= found->address + found->size);
709 
710 	below = base - found->address;
711 	above = found->address + found->size - end;
712 	ASSERT(above == 0 || (above % ecache_alignsize) == 0);
713 
714 	if (below >= ecache_alignsize) {
715 		/*
716 		 * There is free memory below the allocated memory chunk.
717 		 */
718 		found->size = below - below % ecache_alignsize;
719 
720 		if (above) {
721 			fnd_above = (struct memlist *)end;
722 			fnd_above->address = end;
723 			fnd_above->size = above;
724 
725 			if ((fnd_above->next = found->next) != NULL)
726 				found->next->prev = fnd_above;
727 			fnd_above->prev = found;
728 			found->next = fnd_above;
729 		}
730 
731 		return ((void *)base);
732 	}
733 
734 	if (found->prev == NULL) {
735 		/*
736 		 * The first chunk (ndata) is selected.
737 		 */
738 		ASSERT(found == ndata);
739 		if (above) {
740 			found->address = end;
741 			found->size = above;
742 		} else if (found->next != NULL) {
743 			found->address = found->next->address;
744 			found->size = found->next->size;
745 			if ((found->next = found->next->next) != NULL)
746 				found->next->prev = found;
747 
748 			bzero((void *)found->address, sizeof (struct memlist));
749 		} else {
750 			found->address = end;
751 			found->size = 0;
752 		}
753 
754 		return ((void *)base);
755 	}
756 
757 	/*
758 	 * Not the first chunk.
759 	 */
760 	if (above) {
761 		fnd_above = (struct memlist *)end;
762 		fnd_above->address = end;
763 		fnd_above->size = above;
764 
765 		if ((fnd_above->next = found->next) != NULL)
766 			fnd_above->next->prev = fnd_above;
767 		fnd_above->prev = found->prev;
768 		found->prev->next = fnd_above;
769 
770 	} else {
771 		if ((found->prev->next = found->next) != NULL)
772 			found->next->prev = found->prev;
773 	}
774 
775 	bzero((void *)found->address, sizeof (struct memlist));
776 
777 	return ((void *)base);
778 }
779 
780 /*
781  * Size the kernel TSBs based upon the amount of physical
782  * memory in the system.
783  */
784 static void
785 calc_tsb_sizes(pgcnt_t npages)
786 {
787 	PRM_DEBUG(npages);
788 
789 	if (npages <= TSB_FREEMEM_MIN) {
790 		ktsb_szcode = TSB_128K_SZCODE;
791 		enable_bigktsb = 0;
792 	} else if (npages <= TSB_FREEMEM_LARGE / 2) {
793 		ktsb_szcode = TSB_256K_SZCODE;
794 		enable_bigktsb = 0;
795 	} else if (npages <= TSB_FREEMEM_LARGE) {
796 		ktsb_szcode = TSB_512K_SZCODE;
797 		enable_bigktsb = 0;
798 	} else if (npages <= TSB_FREEMEM_LARGE * 2 ||
799 	    enable_bigktsb == 0) {
800 		ktsb_szcode = TSB_1M_SZCODE;
801 		enable_bigktsb = 0;
802 	} else {
803 		ktsb_szcode = highbit(npages - 1);
804 		ktsb_szcode -= TSB_START_SIZE;
805 		ktsb_szcode = MAX(ktsb_szcode, MIN_BIGKTSB_SZCODE);
806 		ktsb_szcode = MIN(ktsb_szcode, MAX_BIGKTSB_SZCODE);
807 	}
808 
809 	/*
810 	 * We choose the TSB to hold kernel 4M mappings to have twice
811 	 * the reach as the primary kernel TSB since this TSB will
812 	 * potentially (currently) be shared by both mappings to all of
813 	 * physical memory plus user TSBs. If this TSB has to be in nucleus
814 	 * (only for Spitfire and Cheetah) limit its size to 64K.
815 	 */
816 	ktsb4m_szcode = highbit((2 * npages) / TTEPAGES(TTE4M) - 1);
817 	ktsb4m_szcode -= TSB_START_SIZE;
818 	ktsb4m_szcode = MAX(ktsb4m_szcode, TSB_MIN_SZCODE);
819 	ktsb4m_szcode = MIN(ktsb4m_szcode, TSB_SOFTSZ_MASK);
820 	if ((enable_bigktsb == 0 || ktsb_phys == 0) && ktsb4m_szcode >
821 	    TSB_64K_SZCODE) {
822 		ktsb4m_szcode = TSB_64K_SZCODE;
823 		max_bootlp_tteszc = TTE8K;
824 	}
825 
826 	ktsb_sz = TSB_BYTES(ktsb_szcode);	/* kernel 8K tsb size */
827 	ktsb4m_sz = TSB_BYTES(ktsb4m_szcode);	/* kernel 4M tsb size */
828 }
829 
830 /*
831  * Allocate kernel TSBs from nucleus data memory.
832  * The function return 0 on success and -1 on failure.
833  */
834 int
835 ndata_alloc_tsbs(struct memlist *ndata, pgcnt_t npages)
836 {
837 	/*
838 	 * Set ktsb_phys to 1 if the processor supports ASI_QUAD_LDD_PHYS.
839 	 */
840 	sfmmu_setup_4lp();
841 
842 	/*
843 	 * Size the kernel TSBs based upon the amount of physical
844 	 * memory in the system.
845 	 */
846 	calc_tsb_sizes(npages);
847 
848 	/*
849 	 * Allocate the 8K kernel TSB if it belongs inside the nucleus.
850 	 */
851 	if (enable_bigktsb == 0) {
852 		if ((ktsb_base = ndata_alloc(ndata, ktsb_sz, ktsb_sz)) == NULL)
853 			return (-1);
854 		ASSERT(!((uintptr_t)ktsb_base & (ktsb_sz - 1)));
855 
856 		PRM_DEBUG(ktsb_base);
857 		PRM_DEBUG(ktsb_sz);
858 		PRM_DEBUG(ktsb_szcode);
859 	}
860 
861 	/*
862 	 * Next, allocate 4M kernel TSB from the nucleus since it's small.
863 	 */
864 	if (ktsb4m_szcode <= TSB_64K_SZCODE) {
865 
866 		ktsb4m_base = ndata_alloc(ndata, ktsb4m_sz, ktsb4m_sz);
867 		if (ktsb4m_base == NULL)
868 			return (-1);
869 		ASSERT(!((uintptr_t)ktsb4m_base & (ktsb4m_sz - 1)));
870 
871 		PRM_DEBUG(ktsb4m_base);
872 		PRM_DEBUG(ktsb4m_sz);
873 		PRM_DEBUG(ktsb4m_szcode);
874 	}
875 
876 	return (0);
877 }
878 
879 /*
880  * Allocate hat structs from the nucleus data memory.
881  */
882 int
883 ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages, pgcnt_t kpm_npages)
884 {
885 	size_t	mml_alloc_sz;
886 	size_t	cb_alloc_sz;
887 	int	max_nucuhme_buckets = MAX_NUCUHME_BUCKETS;
888 	int	max_nuckhme_buckets = MAX_NUCKHME_BUCKETS;
889 	ulong_t hme_buckets;
890 
891 	if (enable_bigktsb) {
892 		ASSERT((max_nucuhme_buckets + max_nuckhme_buckets) *
893 		    sizeof (struct hmehash_bucket) <=
894 			TSB_BYTES(TSB_1M_SZCODE));
895 
896 		max_nucuhme_buckets *= 2;
897 		max_nuckhme_buckets *= 2;
898 	}
899 
900 	/*
901 	 * The number of buckets in the hme hash tables
902 	 * is a power of 2 such that the average hash chain length is
903 	 * HMENT_HASHAVELEN.  The number of buckets for the user hash is
904 	 * a function of physical memory and a predefined overmapping factor.
905 	 * The number of buckets for the kernel hash is a function of
906 	 * physical memory only.
907 	 */
908 	hme_buckets = (npages * HMEHASH_FACTOR) /
909 		(HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT));
910 
911 	uhmehash_num = (int)MIN(hme_buckets, MAX_UHME_BUCKETS);
912 
913 	if (uhmehash_num > USER_BUCKETS_THRESHOLD) {
914 		/*
915 		 * if uhmehash_num is not power of 2 round it down to the
916 		 *  next power of 2.
917 		 */
918 		uint_t align = 1 << (highbit(uhmehash_num - 1) - 1);
919 		uhmehash_num = P2ALIGN(uhmehash_num, align);
920 	} else
921 		uhmehash_num = 1 << highbit(uhmehash_num - 1);
922 
923 	hme_buckets = npages / (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT);
924 	khmehash_num = (int)MIN(hme_buckets, MAX_KHME_BUCKETS);
925 	khmehash_num = 1 << highbit(khmehash_num - 1);
926 	khmehash_num = MAX(khmehash_num, MIN_KHME_BUCKETS);
927 
928 	if ((khmehash_num > max_nuckhme_buckets) ||
929 		(uhmehash_num > max_nucuhme_buckets)) {
930 		khme_hash = NULL;
931 		uhme_hash = NULL;
932 	} else {
933 		size_t hmehash_sz = (uhmehash_num + khmehash_num) *
934 		    sizeof (struct hmehash_bucket);
935 
936 		if ((khme_hash = ndata_alloc(ndata, hmehash_sz,
937 		    ecache_alignsize)) != NULL)
938 			uhme_hash = &khme_hash[khmehash_num];
939 		else
940 			uhme_hash = NULL;
941 
942 		PRM_DEBUG(hmehash_sz);
943 	}
944 
945 	PRM_DEBUG(khme_hash);
946 	PRM_DEBUG(khmehash_num);
947 	PRM_DEBUG(uhme_hash);
948 	PRM_DEBUG(uhmehash_num);
949 
950 	/*
951 	 * For the page mapping list mutex array we allocate one mutex
952 	 * for every 128 pages (1 MB) with a minimum of 64 entries and
953 	 * a maximum of 8K entries. For the initial computation npages
954 	 * is rounded up (ie. 1 << highbit(npages * 1.5 / 128))
955 	 *
956 	 * mml_shift is roughly log2(mml_table_sz) + 3 for MLIST_HASH
957 	 *
958 	 * It is not required that this be allocated from the nucleus,
959 	 * but it is desirable.  So we first allocate from the nucleus
960 	 * everything that must be there.  Having done so, if mml_table
961 	 * will fit within what remains of the nucleus then it will be
962 	 * allocated here.  If not, set mml_table to NULL, which will cause
963 	 * startup_memlist() to BOP_ALLOC() space for it after our return...
964 	 */
965 	mml_table_sz = 1 << highbit((npages * 3) / 256);
966 	if (mml_table_sz < 64)
967 		mml_table_sz = 64;
968 	else if (mml_table_sz > 8192)
969 		mml_table_sz = 8192;
970 	mml_shift = highbit(mml_table_sz) + 3;
971 
972 	PRM_DEBUG(mml_table_sz);
973 	PRM_DEBUG(mml_shift);
974 
975 	mml_alloc_sz = mml_table_sz * sizeof (kmutex_t);
976 
977 	mml_table = ndata_alloc(ndata, mml_alloc_sz, ecache_alignsize);
978 
979 	PRM_DEBUG(mml_table);
980 
981 	cb_alloc_sz = sfmmu_max_cb_id * sizeof (struct sfmmu_callback);
982 	PRM_DEBUG(cb_alloc_sz);
983 	sfmmu_cb_table = ndata_alloc(ndata, cb_alloc_sz, ecache_alignsize);
984 	PRM_DEBUG(sfmmu_cb_table);
985 
986 	/*
987 	 * For the kpm_page mutex array we allocate one mutex every 16
988 	 * kpm pages (64MB). In smallpage mode we allocate one mutex
989 	 * every 8K pages. The minimum is set to 64 entries and the
990 	 * maximum to 8K entries.
991 	 *
992 	 * It is not required that this be allocated from the nucleus,
993 	 * but it is desirable.  So we first allocate from the nucleus
994 	 * everything that must be there.  Having done so, if kpmp_table
995 	 * or kpmp_stable will fit within what remains of the nucleus
996 	 * then it will be allocated here.  If not, startup_memlist()
997 	 * will use BOP_ALLOC() space for it after our return...
998 	 */
999 	if (kpm_enable) {
1000 		size_t	kpmp_alloc_sz;
1001 
1002 		if (kpm_smallpages == 0) {
1003 			kpmp_shift = highbit(sizeof (kpm_page_t)) - 1;
1004 			kpmp_table_sz = 1 << highbit(kpm_npages / 16);
1005 			kpmp_table_sz = (kpmp_table_sz < 64) ? 64 :
1006 			    ((kpmp_table_sz > 8192) ? 8192 : kpmp_table_sz);
1007 			kpmp_alloc_sz = kpmp_table_sz * sizeof (kpm_hlk_t);
1008 
1009 			kpmp_table = ndata_alloc(ndata, kpmp_alloc_sz,
1010 			    ecache_alignsize);
1011 
1012 			PRM_DEBUG(kpmp_table);
1013 			PRM_DEBUG(kpmp_table_sz);
1014 
1015 			kpmp_stable_sz = 0;
1016 			kpmp_stable = NULL;
1017 		} else {
1018 			ASSERT(kpm_pgsz == PAGESIZE);
1019 			kpmp_shift = highbit(sizeof (kpm_shlk_t)) + 1;
1020 			kpmp_stable_sz = 1 << highbit(kpm_npages / 8192);
1021 			kpmp_stable_sz = (kpmp_stable_sz < 64) ? 64 :
1022 			    ((kpmp_stable_sz > 8192) ? 8192 : kpmp_stable_sz);
1023 			kpmp_alloc_sz = kpmp_stable_sz * sizeof (kpm_shlk_t);
1024 
1025 			kpmp_stable = ndata_alloc(ndata, kpmp_alloc_sz,
1026 			    ecache_alignsize);
1027 
1028 			PRM_DEBUG(kpmp_stable);
1029 			PRM_DEBUG(kpmp_stable_sz);
1030 
1031 			kpmp_table_sz = 0;
1032 			kpmp_table = NULL;
1033 		}
1034 		PRM_DEBUG(kpmp_shift);
1035 	}
1036 
1037 	return (0);
1038 }
1039 
1040 /*
1041  * Allocate virtual addresses at base with given alignment.
1042  * Note that there is no physical memory behind the address yet.
1043  */
1044 caddr_t
1045 alloc_hme_buckets(caddr_t base, int alignsize)
1046 {
1047 	size_t hmehash_sz = (uhmehash_num + khmehash_num) *
1048 	    sizeof (struct hmehash_bucket);
1049 
1050 	ASSERT(khme_hash == NULL);
1051 	ASSERT(uhme_hash == NULL);
1052 
1053 	base = (caddr_t)roundup((uintptr_t)base, alignsize);
1054 	hmehash_sz = roundup(hmehash_sz, alignsize);
1055 
1056 	khme_hash = (struct hmehash_bucket *)base;
1057 	uhme_hash = (struct hmehash_bucket *)((caddr_t)khme_hash +
1058 	    khmehash_num * sizeof (struct hmehash_bucket));
1059 	base += hmehash_sz;
1060 	return (base);
1061 }
1062 
1063 /*
1064  * This function bop allocs kernel TSBs.
1065  */
1066 caddr_t
1067 sfmmu_ktsb_alloc(caddr_t tsbbase)
1068 {
1069 	caddr_t vaddr;
1070 
1071 	if (enable_bigktsb) {
1072 		ktsb_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb_sz);
1073 		vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb_base, ktsb_sz,
1074 		    ktsb_sz);
1075 		if (vaddr != ktsb_base)
1076 			cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc"
1077 			    " 8K bigktsb");
1078 		ktsb_base = vaddr;
1079 		tsbbase = ktsb_base + ktsb_sz;
1080 		PRM_DEBUG(ktsb_base);
1081 		PRM_DEBUG(tsbbase);
1082 	}
1083 
1084 	if (ktsb4m_szcode > TSB_64K_SZCODE) {
1085 		ASSERT(ktsb_phys && enable_bigktsb);
1086 		ktsb4m_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb4m_sz);
1087 		vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb4m_base, ktsb4m_sz,
1088 		    ktsb4m_sz);
1089 		if (vaddr != ktsb4m_base)
1090 			cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc"
1091 			    " 4M bigktsb");
1092 		ktsb4m_base = vaddr;
1093 		tsbbase = ktsb4m_base + ktsb4m_sz;
1094 		PRM_DEBUG(ktsb4m_base);
1095 		PRM_DEBUG(tsbbase);
1096 	}
1097 	return (tsbbase);
1098 }
1099 
1100 /*
1101  * Moves code assembled outside of the trap table into the trap
1102  * table taking care to relocate relative branches to code outside
1103  * of the trap handler.
1104  */
1105 static void
1106 sfmmu_reloc_trap_handler(void *tablep, void *start, size_t count)
1107 {
1108 	size_t i;
1109 	uint32_t *src;
1110 	uint32_t *dst;
1111 	uint32_t inst;
1112 	int op, op2;
1113 	int32_t offset;
1114 	int disp;
1115 
1116 	src = start;
1117 	dst = tablep;
1118 	offset = src - dst;
1119 	for (src = start, i = 0; i < count; i++, src++, dst++) {
1120 		inst = *dst = *src;
1121 		op = (inst >> 30) & 0x2;
1122 		if (op == 1) {
1123 			/* call */
1124 			disp = ((int32_t)inst << 2) >> 2; /* sign-extend */
1125 			if (disp + i >= 0 && disp + i < count)
1126 				continue;
1127 			disp += offset;
1128 			inst = 0x40000000u | (disp & 0x3fffffffu);
1129 			*dst = inst;
1130 		} else if (op == 0) {
1131 			/* branch or sethi */
1132 			op2 = (inst >> 22) & 0x7;
1133 
1134 			switch (op2) {
1135 			case 0x3: /* BPr */
1136 				disp = (((inst >> 20) & 0x3) << 14) |
1137 				    (inst & 0x3fff);
1138 				disp = (disp << 16) >> 16; /* sign-extend */
1139 				if (disp + i >= 0 && disp + i < count)
1140 					continue;
1141 				disp += offset;
1142 				if (((disp << 16) >> 16) != disp)
1143 					cmn_err(CE_PANIC, "bad reloc");
1144 				inst &= ~0x303fff;
1145 				inst |= (disp & 0x3fff);
1146 				inst |= (disp & 0xc000) << 6;
1147 				break;
1148 
1149 			case 0x2: /* Bicc */
1150 				disp = ((int32_t)inst << 10) >> 10;
1151 				if (disp + i >= 0 && disp + i < count)
1152 					continue;
1153 				disp += offset;
1154 				if (((disp << 10) >> 10) != disp)
1155 					cmn_err(CE_PANIC, "bad reloc");
1156 				inst &= ~0x3fffff;
1157 				inst |= (disp & 0x3fffff);
1158 				break;
1159 
1160 			case 0x1: /* Bpcc */
1161 				disp = ((int32_t)inst << 13) >> 13;
1162 				if (disp + i >= 0 && disp + i < count)
1163 					continue;
1164 				disp += offset;
1165 				if (((disp << 13) >> 13) != disp)
1166 					cmn_err(CE_PANIC, "bad reloc");
1167 				inst &= ~0x7ffff;
1168 				inst |= (disp & 0x7ffffu);
1169 				break;
1170 			}
1171 			*dst = inst;
1172 		}
1173 	}
1174 	flush_instr_mem(tablep, count * sizeof (uint32_t));
1175 }
1176 
1177 /*
1178  * Routine to allocate a large page to use in the TSB caches.
1179  */
1180 /*ARGSUSED*/
1181 static page_t *
1182 sfmmu_tsb_page_create(void *addr, size_t size, int vmflag, void *arg)
1183 {
1184 	int pgflags;
1185 
1186 	pgflags = PG_EXCL;
1187 	if ((vmflag & VM_NOSLEEP) == 0)
1188 		pgflags |= PG_WAIT;
1189 	if (vmflag & VM_PANIC)
1190 		pgflags |= PG_PANIC;
1191 	if (vmflag & VM_PUSHPAGE)
1192 		pgflags |= PG_PUSHPAGE;
1193 
1194 	return (page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size,
1195 	    pgflags, &kvseg, addr, arg));
1196 }
1197 
1198 /*
1199  * Allocate a large page to back the virtual address range
1200  * [addr, addr + size).  If addr is NULL, allocate the virtual address
1201  * space as well.
1202  */
1203 static void *
1204 sfmmu_tsb_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag,
1205     uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *),
1206     void *pcarg)
1207 {
1208 	page_t *ppl;
1209 	page_t *rootpp;
1210 	caddr_t addr = inaddr;
1211 	pgcnt_t npages = btopr(size);
1212 	page_t **ppa;
1213 	int i = 0;
1214 
1215 	/*
1216 	 * Assuming that only TSBs will call this with size > PAGESIZE
1217 	 * There is no reason why this couldn't be expanded to 8k pages as
1218 	 * well, or other page sizes in the future .... but for now, we
1219 	 * only support fixed sized page requests.
1220 	 */
1221 	if ((inaddr == NULL) && ((addr = vmem_xalloc(vmp, size, size, 0, 0,
1222 	    NULL, NULL, vmflag)) == NULL))
1223 		return (NULL);
1224 
1225 	if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
1226 		if (inaddr == NULL)
1227 			vmem_xfree(vmp, addr, size);
1228 		return (NULL);
1229 	}
1230 
1231 	ppl = page_create_func(addr, size, vmflag, pcarg);
1232 	if (ppl == NULL) {
1233 		if (inaddr == NULL)
1234 			vmem_xfree(vmp, addr, size);
1235 		page_unresv(npages);
1236 		return (NULL);
1237 	}
1238 
1239 	rootpp = ppl;
1240 	ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP);
1241 	while (ppl != NULL) {
1242 		page_t *pp = ppl;
1243 		ppa[i++] = pp;
1244 		page_sub(&ppl, pp);
1245 		ASSERT(page_iolock_assert(pp));
1246 		page_io_unlock(pp);
1247 	}
1248 
1249 	/*
1250 	 * Load the locked entry.  It's OK to preload the entry into
1251 	 * the TSB since we now support large mappings in the kernel TSB.
1252 	 */
1253 	hat_memload_array(kas.a_hat, (caddr_t)rootpp->p_offset, size,
1254 	    ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr, HAT_LOAD_LOCK);
1255 
1256 	for (--i; i >= 0; --i) {
1257 		(void) page_pp_lock(ppa[i], 0, 1);
1258 		page_unlock(ppa[i]);
1259 	}
1260 
1261 	kmem_free(ppa, npages * sizeof (page_t *));
1262 	return (addr);
1263 }
1264 
1265 /* Called to import new spans into the TSB vmem arenas */
1266 void *
1267 sfmmu_tsb_segkmem_alloc(vmem_t *vmp, size_t size, int vmflag)
1268 {
1269 	lgrp_id_t lgrpid = LGRP_NONE;
1270 
1271 	if (tsb_lgrp_affinity) {
1272 		/*
1273 		 * Search for the vmp->lgrpid mapping by brute force;
1274 		 * some day vmp will have an lgrp, until then we have
1275 		 * to do this the hard way.
1276 		 */
1277 		for (lgrpid = 0; lgrpid < NLGRPS_MAX &&
1278 		    vmp != kmem_tsb_default_arena[lgrpid]; lgrpid++);
1279 		if (lgrpid == NLGRPS_MAX)
1280 			lgrpid = LGRP_NONE;
1281 	}
1282 
1283 	return (sfmmu_tsb_xalloc(vmp, NULL, size, vmflag, 0,
1284 	    sfmmu_tsb_page_create, lgrpid != LGRP_NONE? &lgrpid : NULL));
1285 }
1286 
1287 /* Called to free spans from the TSB vmem arenas */
1288 void
1289 sfmmu_tsb_segkmem_free(vmem_t *vmp, void *inaddr, size_t size)
1290 {
1291 	page_t *pp;
1292 	caddr_t addr = inaddr;
1293 	caddr_t eaddr;
1294 	pgcnt_t npages = btopr(size);
1295 	pgcnt_t pgs_left = npages;
1296 	page_t *rootpp = NULL;
1297 
1298 	hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1299 
1300 	for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
1301 		pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
1302 		if (pp == NULL)
1303 			panic("sfmmu_tsb_segkmem_free: page not found");
1304 
1305 		ASSERT(PAGE_EXCL(pp));
1306 		page_pp_unlock(pp, 0, 1);
1307 
1308 		if (rootpp == NULL)
1309 			rootpp = pp;
1310 		if (--pgs_left == 0) {
1311 			/*
1312 			 * similar logic to segspt_free_pages, but we know we
1313 			 * have one large page.
1314 			 */
1315 			page_destroy_pages(rootpp);
1316 		}
1317 	}
1318 	page_unresv(npages);
1319 
1320 	if (vmp != NULL)
1321 		vmem_xfree(vmp, inaddr, size);
1322 }
1323