xref: /titanic_44/usr/src/uts/sun4u/vm/mach_kpm.c (revision a38ddfee9c8c6b6c5a2947ff52fd2338362a4444)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Kernel Physical Mapping (segkpm) hat interface routines for sun4u.
28  */
29 
30 #include <sys/types.h>
31 #include <vm/hat.h>
32 #include <vm/hat_sfmmu.h>
33 #include <vm/page.h>
34 #include <sys/sysmacros.h>
35 #include <sys/cmn_err.h>
36 #include <sys/machsystm.h>
37 #include <vm/seg_kpm.h>
38 #include <sys/cpu_module.h>
39 #include <vm/mach_kpm.h>
40 
41 /* kpm prototypes */
42 static caddr_t	sfmmu_kpm_mapin(page_t *);
43 static void	sfmmu_kpm_mapout(page_t *, caddr_t);
44 static int	sfmmu_kpme_lookup(struct kpme *, page_t *);
45 static void	sfmmu_kpme_add(struct kpme *, page_t *);
46 static void	sfmmu_kpme_sub(struct kpme *, page_t *);
47 static caddr_t	sfmmu_kpm_getvaddr(page_t *, int *);
48 static int	sfmmu_kpm_fault(caddr_t, struct memseg *, page_t *);
49 static int	sfmmu_kpm_fault_small(caddr_t, struct memseg *, page_t *);
50 static void	sfmmu_kpm_vac_conflict(page_t *, caddr_t);
51 void	sfmmu_kpm_pageunload(page_t *);
52 void	sfmmu_kpm_vac_unload(page_t *, caddr_t);
53 static void	sfmmu_kpm_demap_large(caddr_t);
54 static void	sfmmu_kpm_demap_small(caddr_t);
55 static void	sfmmu_kpm_demap_tlbs(caddr_t);
56 void	sfmmu_kpm_hme_unload(page_t *);
57 kpm_hlk_t *sfmmu_kpm_kpmp_enter(page_t *, pgcnt_t);
58 void	sfmmu_kpm_kpmp_exit(kpm_hlk_t *kpmp);
59 void	sfmmu_kpm_page_cache(page_t *, int, int);
60 
61 /*
62  * Kernel Physical Mapping (kpm) facility
63  */
64 
65 void
66 mach_kpm_init()
67 {}
68 
69 /* -- hat_kpm interface section -- */
70 
71 /*
72  * Mapin a locked page and return the vaddr.
73  * When a kpme is provided by the caller it is added to
74  * the page p_kpmelist. The page to be mapped in must
75  * be at least read locked (p_selock).
76  */
77 caddr_t
78 hat_kpm_mapin(struct page *pp, struct kpme *kpme)
79 {
80 	kmutex_t	*pml;
81 	caddr_t		vaddr;
82 
83 	if (kpm_enable == 0) {
84 		cmn_err(CE_WARN, "hat_kpm_mapin: kpm_enable not set");
85 		return ((caddr_t)NULL);
86 	}
87 
88 	if (pp == NULL || PAGE_LOCKED(pp) == 0) {
89 		cmn_err(CE_WARN, "hat_kpm_mapin: pp zero or not locked");
90 		return ((caddr_t)NULL);
91 	}
92 
93 	pml = sfmmu_mlist_enter(pp);
94 	ASSERT(pp->p_kpmref >= 0);
95 
96 	vaddr = (pp->p_kpmref == 0) ?
97 	    sfmmu_kpm_mapin(pp) : hat_kpm_page2va(pp, 1);
98 
99 	if (kpme != NULL) {
100 		/*
101 		 * Tolerate multiple mapins for the same kpme to avoid
102 		 * the need for an extra serialization.
103 		 */
104 		if ((sfmmu_kpme_lookup(kpme, pp)) == 0)
105 			sfmmu_kpme_add(kpme, pp);
106 
107 		ASSERT(pp->p_kpmref > 0);
108 
109 	} else {
110 		pp->p_kpmref++;
111 	}
112 
113 	sfmmu_mlist_exit(pml);
114 	return (vaddr);
115 }
116 
117 /*
118  * Mapout a locked page.
119  * When a kpme is provided by the caller it is removed from
120  * the page p_kpmelist. The page to be mapped out must be at
121  * least read locked (p_selock).
122  * Note: The seg_kpm layer provides a mapout interface for the
123  * case that a kpme is used and the underlying page is unlocked.
124  * This can be used instead of calling this function directly.
125  */
126 void
127 hat_kpm_mapout(struct page *pp, struct kpme *kpme, caddr_t vaddr)
128 {
129 	kmutex_t	*pml;
130 
131 	if (kpm_enable == 0) {
132 		cmn_err(CE_WARN, "hat_kpm_mapout: kpm_enable not set");
133 		return;
134 	}
135 
136 	if (IS_KPM_ADDR(vaddr) == 0) {
137 		cmn_err(CE_WARN, "hat_kpm_mapout: no kpm address");
138 		return;
139 	}
140 
141 	if (pp == NULL || PAGE_LOCKED(pp) == 0) {
142 		cmn_err(CE_WARN, "hat_kpm_mapout: page zero or not locked");
143 		return;
144 	}
145 
146 	if (kpme != NULL) {
147 		ASSERT(pp == kpme->kpe_page);
148 		pp = kpme->kpe_page;
149 		pml = sfmmu_mlist_enter(pp);
150 
151 		if (sfmmu_kpme_lookup(kpme, pp) == 0)
152 			panic("hat_kpm_mapout: kpme not found pp=%p",
153 			    (void *)pp);
154 
155 		ASSERT(pp->p_kpmref > 0);
156 		sfmmu_kpme_sub(kpme, pp);
157 
158 	} else {
159 		pml = sfmmu_mlist_enter(pp);
160 		pp->p_kpmref--;
161 	}
162 
163 	ASSERT(pp->p_kpmref >= 0);
164 	if (pp->p_kpmref == 0)
165 		sfmmu_kpm_mapout(pp, vaddr);
166 
167 	sfmmu_mlist_exit(pml);
168 }
169 
170 /*
171  * Return the kpm virtual address for the page at pp.
172  * If checkswap is non zero and the page is backed by a
173  * swap vnode the physical address is used rather than
174  * p_offset to determine the kpm region.
175  * Note: The function has to be used w/ extreme care. The
176  * stability of the page identity is in the responsibility
177  * of the caller.
178  */
179 /*ARGSUSED*/
180 caddr_t
181 hat_kpm_page2va(struct page *pp, int checkswap)
182 {
183 	int		vcolor, vcolor_pa;
184 	uintptr_t	paddr, vaddr;
185 
186 	ASSERT(kpm_enable);
187 
188 	paddr = ptob(pp->p_pagenum);
189 	vcolor_pa = addr_to_vcolor(paddr);
190 
191 	if (checkswap && pp->p_vnode && IS_SWAPFSVP(pp->p_vnode))
192 		vcolor = (PP_ISNC(pp)) ? vcolor_pa : PP_GET_VCOLOR(pp);
193 	else
194 		vcolor = addr_to_vcolor(pp->p_offset);
195 
196 	vaddr = (uintptr_t)kpm_vbase + paddr;
197 
198 	if (vcolor_pa != vcolor) {
199 		vaddr += ((uintptr_t)(vcolor - vcolor_pa) << MMU_PAGESHIFT);
200 		vaddr += (vcolor_pa > vcolor) ?
201 		    ((uintptr_t)vcolor_pa << kpm_size_shift) :
202 		    ((uintptr_t)(vcolor - vcolor_pa) << kpm_size_shift);
203 	}
204 
205 	return ((caddr_t)vaddr);
206 }
207 
208 /*
209  * Return the page for the kpm virtual address vaddr.
210  * Caller is responsible for the kpm mapping and lock
211  * state of the page.
212  */
213 page_t *
214 hat_kpm_vaddr2page(caddr_t vaddr)
215 {
216 	uintptr_t	paddr;
217 	pfn_t		pfn;
218 
219 	ASSERT(IS_KPM_ADDR(vaddr));
220 
221 	SFMMU_KPM_VTOP(vaddr, paddr);
222 	pfn = (pfn_t)btop(paddr);
223 
224 	return (page_numtopp_nolock(pfn));
225 }
226 
227 /* page to kpm_page */
228 #define	PP2KPMPG(pp, kp) {						\
229 	struct memseg	*mseg;						\
230 	pgcnt_t		inx;						\
231 	pfn_t		pfn;						\
232 									\
233 	pfn = pp->p_pagenum;						\
234 	mseg = page_numtomemseg_nolock(pfn);				\
235 	ASSERT(mseg);							\
236 	inx = ptokpmp(kpmptop(ptokpmp(pfn)) - mseg->kpm_pbase);		\
237 	ASSERT(inx < mseg->kpm_nkpmpgs);				\
238 	kp = &mseg->kpm_pages[inx];					\
239 }
240 
241 /* page to kpm_spage */
242 #define	PP2KPMSPG(pp, ksp) {						\
243 	struct memseg	*mseg;						\
244 	pgcnt_t		inx;						\
245 	pfn_t		pfn;						\
246 									\
247 	pfn = pp->p_pagenum;						\
248 	mseg = page_numtomemseg_nolock(pfn);				\
249 	ASSERT(mseg);							\
250 	inx = pfn - mseg->kpm_pbase;					\
251 	ksp = &mseg->kpm_spages[inx];					\
252 }
253 
254 /*
255  * hat_kpm_fault is called from segkpm_fault when a kpm tsbmiss occurred
256  * which could not be resolved by the trap level tsbmiss handler for the
257  * following reasons:
258  * . The vaddr is in VAC alias range (always PAGESIZE mapping size).
259  * . The kpm (s)page range of vaddr is in a VAC alias prevention state.
260  * . tsbmiss handling at trap level is not desired (DEBUG kernel only,
261  *   kpm_tsbmtl == 0).
262  */
263 int
264 hat_kpm_fault(struct hat *hat, caddr_t vaddr)
265 {
266 	int		error;
267 	uintptr_t	paddr;
268 	pfn_t		pfn;
269 	struct memseg	*mseg;
270 	page_t	*pp;
271 
272 	if (kpm_enable == 0) {
273 		cmn_err(CE_WARN, "hat_kpm_fault: kpm_enable not set");
274 		return (ENOTSUP);
275 	}
276 
277 	ASSERT(hat == ksfmmup);
278 	ASSERT(IS_KPM_ADDR(vaddr));
279 
280 	SFMMU_KPM_VTOP(vaddr, paddr);
281 	pfn = (pfn_t)btop(paddr);
282 	mseg = page_numtomemseg_nolock(pfn);
283 	if (mseg == NULL)
284 		return (EFAULT);
285 
286 	pp = &mseg->pages[(pgcnt_t)(pfn - mseg->pages_base)];
287 	ASSERT((pfn_t)pp->p_pagenum == pfn);
288 
289 	if (!PAGE_LOCKED(pp))
290 		return (EFAULT);
291 
292 	if (kpm_smallpages == 0)
293 		error = sfmmu_kpm_fault(vaddr, mseg, pp);
294 	else
295 		error = sfmmu_kpm_fault_small(vaddr, mseg, pp);
296 
297 	return (error);
298 }
299 
300 /*
301  * memseg_hash[] was cleared, need to clear memseg_phash[] too.
302  */
303 void
304 hat_kpm_mseghash_clear(int nentries)
305 {
306 	pgcnt_t i;
307 
308 	if (kpm_enable == 0)
309 		return;
310 
311 	for (i = 0; i < nentries; i++)
312 		memseg_phash[i] = MSEG_NULLPTR_PA;
313 }
314 
315 /*
316  * Update memseg_phash[inx] when memseg_hash[inx] was changed.
317  */
318 void
319 hat_kpm_mseghash_update(pgcnt_t inx, struct memseg *msp)
320 {
321 	if (kpm_enable == 0)
322 		return;
323 
324 	memseg_phash[inx] = (msp) ? va_to_pa(msp) : MSEG_NULLPTR_PA;
325 }
326 
327 /*
328  * Update kpm memseg members from basic memseg info.
329  */
330 void
331 hat_kpm_addmem_mseg_update(struct memseg *msp, pgcnt_t nkpmpgs,
332 	offset_t kpm_pages_off)
333 {
334 	if (kpm_enable == 0)
335 		return;
336 
337 	msp->kpm_pages = (kpm_page_t *)((caddr_t)msp->pages + kpm_pages_off);
338 	msp->kpm_nkpmpgs = nkpmpgs;
339 	msp->kpm_pbase = kpmptop(ptokpmp(msp->pages_base));
340 	msp->pagespa = va_to_pa(msp->pages);
341 	msp->epagespa = va_to_pa(msp->epages);
342 	msp->kpm_pagespa = va_to_pa(msp->kpm_pages);
343 }
344 
345 /*
346  * Setup nextpa when a memseg is inserted.
347  * Assumes that the memsegslock is already held.
348  */
349 void
350 hat_kpm_addmem_mseg_insert(struct memseg *msp)
351 {
352 	if (kpm_enable == 0)
353 		return;
354 
355 	ASSERT(memsegs_lock_held());
356 	msp->nextpa = (memsegs) ? va_to_pa(memsegs) : MSEG_NULLPTR_PA;
357 }
358 
359 /*
360  * Setup memsegspa when a memseg is (head) inserted.
361  * Called before memsegs is updated to complete a
362  * memseg insert operation.
363  * Assumes that the memsegslock is already held.
364  */
365 void
366 hat_kpm_addmem_memsegs_update(struct memseg *msp)
367 {
368 	if (kpm_enable == 0)
369 		return;
370 
371 	ASSERT(memsegs_lock_held());
372 	ASSERT(memsegs);
373 	memsegspa = va_to_pa(msp);
374 }
375 
376 /*
377  * Return end of metadata for an already setup memseg.
378  *
379  * Note: kpm_pages and kpm_spages are aliases and the underlying
380  * member of struct memseg is a union, therefore they always have
381  * the same address within a memseg. They must be differentiated
382  * when pointer arithmetic is used with them.
383  */
384 caddr_t
385 hat_kpm_mseg_reuse(struct memseg *msp)
386 {
387 	caddr_t end;
388 
389 	if (kpm_smallpages == 0)
390 		end = (caddr_t)(msp->kpm_pages + msp->kpm_nkpmpgs);
391 	else
392 		end = (caddr_t)(msp->kpm_spages + msp->kpm_nkpmpgs);
393 
394 	return (end);
395 }
396 
397 /*
398  * Update memsegspa (when first memseg in list
399  * is deleted) or nextpa  when a memseg deleted.
400  * Assumes that the memsegslock is already held.
401  */
402 void
403 hat_kpm_delmem_mseg_update(struct memseg *msp, struct memseg **mspp)
404 {
405 	struct memseg *lmsp;
406 
407 	if (kpm_enable == 0)
408 		return;
409 
410 	ASSERT(memsegs_lock_held());
411 
412 	if (mspp == &memsegs) {
413 		memsegspa = (msp->next) ?
414 		    va_to_pa(msp->next) : MSEG_NULLPTR_PA;
415 	} else {
416 		lmsp = (struct memseg *)
417 		    ((uint64_t)mspp - offsetof(struct memseg, next));
418 		lmsp->nextpa = (msp->next) ?
419 		    va_to_pa(msp->next) : MSEG_NULLPTR_PA;
420 	}
421 }
422 
423 /*
424  * Update kpm members for all memseg's involved in a split operation
425  * and do the atomic update of the physical memseg chain.
426  *
427  * Note: kpm_pages and kpm_spages are aliases and the underlying member
428  * of struct memseg is a union, therefore they always have the same
429  * address within a memseg. With that the direct assignments and
430  * va_to_pa conversions below don't have to be distinguished wrt. to
431  * kpm_smallpages. They must be differentiated when pointer arithmetic
432  * is used with them.
433  *
434  * Assumes that the memsegslock is already held.
435  */
436 void
437 hat_kpm_split_mseg_update(struct memseg *msp, struct memseg **mspp,
438 	struct memseg *lo, struct memseg *mid, struct memseg *hi)
439 {
440 	pgcnt_t start, end, kbase, kstart, num;
441 	struct memseg *lmsp;
442 
443 	if (kpm_enable == 0)
444 		return;
445 
446 	ASSERT(memsegs_lock_held());
447 	ASSERT(msp && mid && msp->kpm_pages);
448 
449 	kbase = ptokpmp(msp->kpm_pbase);
450 
451 	if (lo) {
452 		num = lo->pages_end - lo->pages_base;
453 		start = kpmptop(ptokpmp(lo->pages_base));
454 		/* align end to kpm page size granularity */
455 		end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs;
456 		lo->kpm_pbase = start;
457 		lo->kpm_nkpmpgs = ptokpmp(end - start);
458 		lo->kpm_pages = msp->kpm_pages;
459 		lo->kpm_pagespa = va_to_pa(lo->kpm_pages);
460 		lo->pagespa = va_to_pa(lo->pages);
461 		lo->epagespa = va_to_pa(lo->epages);
462 		lo->nextpa = va_to_pa(lo->next);
463 	}
464 
465 	/* mid */
466 	num = mid->pages_end - mid->pages_base;
467 	kstart = ptokpmp(mid->pages_base);
468 	start = kpmptop(kstart);
469 	/* align end to kpm page size granularity */
470 	end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs;
471 	mid->kpm_pbase = start;
472 	mid->kpm_nkpmpgs = ptokpmp(end - start);
473 	if (kpm_smallpages == 0) {
474 		mid->kpm_pages = msp->kpm_pages + (kstart - kbase);
475 	} else {
476 		mid->kpm_spages = msp->kpm_spages + (kstart - kbase);
477 	}
478 	mid->kpm_pagespa = va_to_pa(mid->kpm_pages);
479 	mid->pagespa = va_to_pa(mid->pages);
480 	mid->epagespa = va_to_pa(mid->epages);
481 	mid->nextpa = (mid->next) ?  va_to_pa(mid->next) : MSEG_NULLPTR_PA;
482 
483 	if (hi) {
484 		num = hi->pages_end - hi->pages_base;
485 		kstart = ptokpmp(hi->pages_base);
486 		start = kpmptop(kstart);
487 		/* align end to kpm page size granularity */
488 		end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs;
489 		hi->kpm_pbase = start;
490 		hi->kpm_nkpmpgs = ptokpmp(end - start);
491 		if (kpm_smallpages == 0) {
492 			hi->kpm_pages = msp->kpm_pages + (kstart - kbase);
493 		} else {
494 			hi->kpm_spages = msp->kpm_spages + (kstart - kbase);
495 		}
496 		hi->kpm_pagespa = va_to_pa(hi->kpm_pages);
497 		hi->pagespa = va_to_pa(hi->pages);
498 		hi->epagespa = va_to_pa(hi->epages);
499 		hi->nextpa = (hi->next) ? va_to_pa(hi->next) : MSEG_NULLPTR_PA;
500 	}
501 
502 	/*
503 	 * Atomic update of the physical memseg chain
504 	 */
505 	if (mspp == &memsegs) {
506 		memsegspa = (lo) ? va_to_pa(lo) : va_to_pa(mid);
507 	} else {
508 		lmsp = (struct memseg *)
509 		    ((uint64_t)mspp - offsetof(struct memseg, next));
510 		lmsp->nextpa = (lo) ? va_to_pa(lo) : va_to_pa(mid);
511 	}
512 }
513 
514 /*
515  * Walk the memsegs chain, applying func to each memseg span and vcolor.
516  */
517 void
518 hat_kpm_walk(void (*func)(void *, void *, size_t), void *arg)
519 {
520 	pfn_t	pbase, pend;
521 	int	vcolor;
522 	void	*base;
523 	size_t	size;
524 	struct memseg *msp;
525 	extern uint_t vac_colors;
526 
527 	for (msp = memsegs; msp; msp = msp->next) {
528 		pbase = msp->pages_base;
529 		pend = msp->pages_end;
530 		for (vcolor = 0; vcolor < vac_colors; vcolor++) {
531 			base = ptob(pbase) + kpm_vbase + kpm_size * vcolor;
532 			size = ptob(pend - pbase);
533 			func(arg, base, size);
534 		}
535 	}
536 }
537 
538 
539 /* -- sfmmu_kpm internal section -- */
540 
541 /*
542  * Return the page frame number if a valid segkpm mapping exists
543  * for vaddr, otherwise return PFN_INVALID. No locks are grabbed.
544  * Should only be used by other sfmmu routines.
545  */
546 pfn_t
547 sfmmu_kpm_vatopfn(caddr_t vaddr)
548 {
549 	uintptr_t	paddr;
550 	pfn_t		pfn;
551 	page_t	*pp;
552 
553 	ASSERT(kpm_enable && IS_KPM_ADDR(vaddr));
554 
555 	SFMMU_KPM_VTOP(vaddr, paddr);
556 	pfn = (pfn_t)btop(paddr);
557 	pp = page_numtopp_nolock(pfn);
558 	if (pp && pp->p_kpmref)
559 		return (pfn);
560 	else
561 		return ((pfn_t)PFN_INVALID);
562 }
563 
564 /*
565  * Lookup a kpme in the p_kpmelist.
566  */
567 static int
568 sfmmu_kpme_lookup(struct kpme *kpme, page_t *pp)
569 {
570 	struct kpme	*p;
571 
572 	for (p = pp->p_kpmelist; p; p = p->kpe_next) {
573 		if (p == kpme)
574 			return (1);
575 	}
576 	return (0);
577 }
578 
579 /*
580  * Insert a kpme into the p_kpmelist and increment
581  * the per page kpm reference count.
582  */
583 static void
584 sfmmu_kpme_add(struct kpme *kpme, page_t *pp)
585 {
586 	ASSERT(pp->p_kpmref >= 0);
587 
588 	/* head insert */
589 	kpme->kpe_prev = NULL;
590 	kpme->kpe_next = pp->p_kpmelist;
591 
592 	if (pp->p_kpmelist)
593 		pp->p_kpmelist->kpe_prev = kpme;
594 
595 	pp->p_kpmelist = kpme;
596 	kpme->kpe_page = pp;
597 	pp->p_kpmref++;
598 }
599 
600 /*
601  * Remove a kpme from the p_kpmelist and decrement
602  * the per page kpm reference count.
603  */
604 static void
605 sfmmu_kpme_sub(struct kpme *kpme, page_t *pp)
606 {
607 	ASSERT(pp->p_kpmref > 0);
608 
609 	if (kpme->kpe_prev) {
610 		ASSERT(pp->p_kpmelist != kpme);
611 		ASSERT(kpme->kpe_prev->kpe_page == pp);
612 		kpme->kpe_prev->kpe_next = kpme->kpe_next;
613 	} else {
614 		ASSERT(pp->p_kpmelist == kpme);
615 		pp->p_kpmelist = kpme->kpe_next;
616 	}
617 
618 	if (kpme->kpe_next) {
619 		ASSERT(kpme->kpe_next->kpe_page == pp);
620 		kpme->kpe_next->kpe_prev = kpme->kpe_prev;
621 	}
622 
623 	kpme->kpe_next = kpme->kpe_prev = NULL;
624 	kpme->kpe_page = NULL;
625 	pp->p_kpmref--;
626 }
627 
628 /*
629  * Mapin a single page, it is called every time a page changes it's state
630  * from kpm-unmapped to kpm-mapped. It may not be called, when only a new
631  * kpm instance does a mapin and wants to share the mapping.
632  * Assumes that the mlist mutex is already grabbed.
633  */
634 static caddr_t
635 sfmmu_kpm_mapin(page_t *pp)
636 {
637 	kpm_page_t	*kp;
638 	kpm_hlk_t	*kpmp;
639 	caddr_t		vaddr;
640 	int		kpm_vac_range;
641 	pfn_t		pfn;
642 	tte_t		tte;
643 	kmutex_t	*pmtx;
644 	int		uncached;
645 	kpm_spage_t	*ksp;
646 	kpm_shlk_t	*kpmsp;
647 	int		oldval;
648 
649 	ASSERT(sfmmu_mlist_held(pp));
650 	ASSERT(pp->p_kpmref == 0);
651 
652 	vaddr = sfmmu_kpm_getvaddr(pp, &kpm_vac_range);
653 
654 	ASSERT(IS_KPM_ADDR(vaddr));
655 	uncached = PP_ISNC(pp);
656 	pfn = pp->p_pagenum;
657 
658 	if (kpm_smallpages)
659 		goto smallpages_mapin;
660 
661 	PP2KPMPG(pp, kp);
662 
663 	kpmp = KPMP_HASH(kp);
664 	mutex_enter(&kpmp->khl_mutex);
665 
666 	ASSERT(PP_ISKPMC(pp) == 0);
667 	ASSERT(PP_ISKPMS(pp) == 0);
668 
669 	if (uncached) {
670 		/* ASSERT(pp->p_share); XXX use hat_page_getshare */
671 		if (kpm_vac_range == 0) {
672 			if (kp->kp_refcnts == 0) {
673 				/*
674 				 * Must remove large page mapping if it exists.
675 				 * Pages in uncached state can only be mapped
676 				 * small (PAGESIZE) within the regular kpm
677 				 * range.
678 				 */
679 				if (kp->kp_refcntc == -1) {
680 					/* remove go indication */
681 					sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
682 					    &kpmp->khl_lock, KPMTSBM_STOP);
683 				}
684 				if (kp->kp_refcnt > 0 && kp->kp_refcntc == 0)
685 					sfmmu_kpm_demap_large(vaddr);
686 			}
687 			ASSERT(kp->kp_refcntc >= 0);
688 			kp->kp_refcntc++;
689 		}
690 		pmtx = sfmmu_page_enter(pp);
691 		PP_SETKPMC(pp);
692 		sfmmu_page_exit(pmtx);
693 	}
694 
695 	if ((kp->kp_refcntc > 0 || kp->kp_refcnts > 0) && kpm_vac_range == 0) {
696 		/*
697 		 * Have to do a small (PAGESIZE) mapin within this kpm_page
698 		 * range since it is marked to be in VAC conflict mode or
699 		 * when there are still other small mappings around.
700 		 */
701 
702 		/* tte assembly */
703 		if (uncached == 0)
704 			KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
705 		else
706 			KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
707 
708 		/* tsb dropin */
709 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
710 
711 		pmtx = sfmmu_page_enter(pp);
712 		PP_SETKPMS(pp);
713 		sfmmu_page_exit(pmtx);
714 
715 		kp->kp_refcnts++;
716 		ASSERT(kp->kp_refcnts > 0);
717 		goto exit;
718 	}
719 
720 	if (kpm_vac_range == 0) {
721 		/*
722 		 * Fast path / regular case, no VAC conflict handling
723 		 * in progress within this kpm_page range.
724 		 */
725 		if (kp->kp_refcnt == 0) {
726 
727 			/* tte assembly */
728 			KPM_TTE_VCACHED(tte.ll, pfn, TTE4M);
729 
730 			/* tsb dropin */
731 			sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT4M);
732 
733 			/* Set go flag for TL tsbmiss handler */
734 			if (kp->kp_refcntc == 0)
735 				sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
736 				    &kpmp->khl_lock, KPMTSBM_START);
737 
738 			ASSERT(kp->kp_refcntc == -1);
739 		}
740 		kp->kp_refcnt++;
741 		ASSERT(kp->kp_refcnt);
742 
743 	} else {
744 		/*
745 		 * The page is not setup according to the common VAC
746 		 * prevention rules for the regular and kpm mapping layer
747 		 * E.g. the page layer was not able to deliver a right
748 		 * vcolor'ed page for a given vaddr corresponding to
749 		 * the wanted p_offset. It has to be mapped in small in
750 		 * within the corresponding kpm vac range in order to
751 		 * prevent VAC alias conflicts.
752 		 */
753 
754 		/* tte assembly */
755 		if (uncached == 0) {
756 			KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
757 		} else {
758 			KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
759 		}
760 
761 		/* tsb dropin */
762 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
763 
764 		kp->kp_refcnta++;
765 		if (kp->kp_refcntc == -1) {
766 			ASSERT(kp->kp_refcnt > 0);
767 
768 			/* remove go indication */
769 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc, &kpmp->khl_lock,
770 			    KPMTSBM_STOP);
771 		}
772 		ASSERT(kp->kp_refcntc >= 0);
773 	}
774 exit:
775 	mutex_exit(&kpmp->khl_mutex);
776 	return (vaddr);
777 
778 smallpages_mapin:
779 	if (uncached == 0) {
780 		/* tte assembly */
781 		KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
782 	} else {
783 		/*
784 		 * Just in case this same page was mapped cacheable prior to
785 		 * this and the old tte remains in tlb.
786 		 */
787 		sfmmu_kpm_demap_small(vaddr);
788 
789 		/* ASSERT(pp->p_share); XXX use hat_page_getshare */
790 		pmtx = sfmmu_page_enter(pp);
791 		PP_SETKPMC(pp);
792 		sfmmu_page_exit(pmtx);
793 		/* tte assembly */
794 		KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
795 	}
796 
797 	/* tsb dropin */
798 	sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
799 
800 	PP2KPMSPG(pp, ksp);
801 	kpmsp = KPMP_SHASH(ksp);
802 
803 	oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag, &kpmsp->kshl_lock,
804 	    (uncached) ? (KPM_MAPPED_GO | KPM_MAPPEDSC) :
805 	    (KPM_MAPPED_GO | KPM_MAPPEDS));
806 
807 	if (oldval != 0)
808 		panic("sfmmu_kpm_mapin: stale smallpages mapping");
809 
810 	return (vaddr);
811 }
812 
813 /*
814  * Mapout a single page, it is called every time a page changes it's state
815  * from kpm-mapped to kpm-unmapped. It may not be called, when only a kpm
816  * instance calls mapout and there are still other instances mapping the
817  * page. Assumes that the mlist mutex is already grabbed.
818  *
819  * Note: In normal mode (no VAC conflict prevention pending) TLB's are
820  * not flushed. This is the core segkpm behavior to avoid xcalls. It is
821  * no problem because a translation from a segkpm virtual address to a
822  * physical address is always the same. The only downside is a slighty
823  * increased window of vulnerability for misbehaving _kernel_ modules.
824  */
825 static void
826 sfmmu_kpm_mapout(page_t *pp, caddr_t vaddr)
827 {
828 	kpm_page_t	*kp;
829 	kpm_hlk_t	*kpmp;
830 	int		alias_range;
831 	kmutex_t	*pmtx;
832 	kpm_spage_t	*ksp;
833 	kpm_shlk_t	*kpmsp;
834 	int		oldval;
835 
836 	ASSERT(sfmmu_mlist_held(pp));
837 	ASSERT(pp->p_kpmref == 0);
838 
839 	alias_range = IS_KPM_ALIAS_RANGE(vaddr);
840 
841 	if (kpm_smallpages)
842 		goto smallpages_mapout;
843 
844 	PP2KPMPG(pp, kp);
845 	kpmp = KPMP_HASH(kp);
846 	mutex_enter(&kpmp->khl_mutex);
847 
848 	if (alias_range) {
849 		ASSERT(PP_ISKPMS(pp) == 0);
850 		if (kp->kp_refcnta <= 0) {
851 			panic("sfmmu_kpm_mapout: bad refcnta kp=%p",
852 			    (void *)kp);
853 		}
854 
855 		if (PP_ISTNC(pp))  {
856 			if (PP_ISKPMC(pp) == 0) {
857 				/*
858 				 * Uncached kpm mappings must always have
859 				 * forced "small page" mode.
860 				 */
861 				panic("sfmmu_kpm_mapout: uncached page not "
862 				    "kpm marked");
863 			}
864 			sfmmu_kpm_demap_small(vaddr);
865 
866 			pmtx = sfmmu_page_enter(pp);
867 			PP_CLRKPMC(pp);
868 			sfmmu_page_exit(pmtx);
869 
870 			/*
871 			 * Check if we can resume cached mode. This might
872 			 * be the case if the kpm mapping was the only
873 			 * mapping in conflict with other non rule
874 			 * compliant mappings. The page is no more marked
875 			 * as kpm mapped, so the conv_tnc path will not
876 			 * change kpm state.
877 			 */
878 			conv_tnc(pp, TTE8K);
879 
880 		} else if (PP_ISKPMC(pp) == 0) {
881 			/* remove TSB entry only */
882 			sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT);
883 
884 		} else {
885 			/* already demapped */
886 			pmtx = sfmmu_page_enter(pp);
887 			PP_CLRKPMC(pp);
888 			sfmmu_page_exit(pmtx);
889 		}
890 		kp->kp_refcnta--;
891 		goto exit;
892 	}
893 
894 	if (kp->kp_refcntc <= 0 && kp->kp_refcnts == 0) {
895 		/*
896 		 * Fast path / regular case.
897 		 */
898 		ASSERT(kp->kp_refcntc >= -1);
899 		ASSERT(!(pp->p_nrm & (P_KPMC | P_KPMS | P_TNC | P_PNC)));
900 
901 		if (kp->kp_refcnt <= 0)
902 			panic("sfmmu_kpm_mapout: bad refcnt kp=%p", (void *)kp);
903 
904 		if (--kp->kp_refcnt == 0) {
905 			/* remove go indication */
906 			if (kp->kp_refcntc == -1) {
907 				sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
908 				    &kpmp->khl_lock, KPMTSBM_STOP);
909 			}
910 			ASSERT(kp->kp_refcntc == 0);
911 
912 			/* remove TSB entry */
913 			sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT4M);
914 #ifdef	DEBUG
915 			if (kpm_tlb_flush)
916 				sfmmu_kpm_demap_tlbs(vaddr);
917 #endif
918 		}
919 
920 	} else {
921 		/*
922 		 * The VAC alias path.
923 		 * We come here if the kpm vaddr is not in any alias_range
924 		 * and we are unmapping a page within the regular kpm_page
925 		 * range. The kpm_page either holds conflict pages and/or
926 		 * is in "small page" mode. If the page is not marked
927 		 * P_KPMS it couldn't have a valid PAGESIZE sized TSB
928 		 * entry. Dcache flushing is done lazy and follows the
929 		 * rules of the regular virtual page coloring scheme.
930 		 *
931 		 * Per page states and required actions:
932 		 *   P_KPMC: remove a kpm mapping that is conflicting.
933 		 *   P_KPMS: remove a small kpm mapping within a kpm_page.
934 		 *   P_TNC:  check if we can re-cache the page.
935 		 *   P_PNC:  we cannot re-cache, sorry.
936 		 * Per kpm_page:
937 		 *   kp_refcntc > 0: page is part of a kpm_page with conflicts.
938 		 *   kp_refcnts > 0: rm a small mapped page within a kpm_page.
939 		 */
940 
941 		if (PP_ISKPMS(pp)) {
942 			if (kp->kp_refcnts < 1) {
943 				panic("sfmmu_kpm_mapout: bad refcnts kp=%p",
944 				    (void *)kp);
945 			}
946 			sfmmu_kpm_demap_small(vaddr);
947 
948 			/*
949 			 * Check if we can resume cached mode. This might
950 			 * be the case if the kpm mapping was the only
951 			 * mapping in conflict with other non rule
952 			 * compliant mappings. The page is no more marked
953 			 * as kpm mapped, so the conv_tnc path will not
954 			 * change kpm state.
955 			 */
956 			if (PP_ISTNC(pp))  {
957 				if (!PP_ISKPMC(pp)) {
958 					/*
959 					 * Uncached kpm mappings must always
960 					 * have forced "small page" mode.
961 					 */
962 					panic("sfmmu_kpm_mapout: uncached "
963 					    "page not kpm marked");
964 				}
965 				conv_tnc(pp, TTE8K);
966 			}
967 			kp->kp_refcnts--;
968 			kp->kp_refcnt++;
969 			pmtx = sfmmu_page_enter(pp);
970 			PP_CLRKPMS(pp);
971 			sfmmu_page_exit(pmtx);
972 		}
973 
974 		if (PP_ISKPMC(pp)) {
975 			if (kp->kp_refcntc < 1) {
976 				panic("sfmmu_kpm_mapout: bad refcntc kp=%p",
977 				    (void *)kp);
978 			}
979 			pmtx = sfmmu_page_enter(pp);
980 			PP_CLRKPMC(pp);
981 			sfmmu_page_exit(pmtx);
982 			kp->kp_refcntc--;
983 		}
984 
985 		if (kp->kp_refcnt-- < 1)
986 			panic("sfmmu_kpm_mapout: bad refcnt kp=%p", (void *)kp);
987 	}
988 exit:
989 	mutex_exit(&kpmp->khl_mutex);
990 	return;
991 
992 smallpages_mapout:
993 	PP2KPMSPG(pp, ksp);
994 	kpmsp = KPMP_SHASH(ksp);
995 
996 	if (PP_ISKPMC(pp) == 0) {
997 		oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag,
998 		    &kpmsp->kshl_lock, 0);
999 
1000 		if (oldval != KPM_MAPPEDS) {
1001 			/*
1002 			 * When we're called after sfmmu_kpm_hme_unload,
1003 			 * KPM_MAPPEDSC is valid too.
1004 			 */
1005 			if (oldval != KPM_MAPPEDSC)
1006 				panic("sfmmu_kpm_mapout: incorrect mapping");
1007 		}
1008 
1009 		/* remove TSB entry */
1010 		sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT);
1011 #ifdef	DEBUG
1012 		if (kpm_tlb_flush)
1013 			sfmmu_kpm_demap_tlbs(vaddr);
1014 #endif
1015 
1016 	} else if (PP_ISTNC(pp)) {
1017 		oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag,
1018 		    &kpmsp->kshl_lock, 0);
1019 
1020 		if (oldval != KPM_MAPPEDSC || PP_ISKPMC(pp) == 0)
1021 			panic("sfmmu_kpm_mapout: inconsistent TNC mapping");
1022 
1023 		sfmmu_kpm_demap_small(vaddr);
1024 
1025 		pmtx = sfmmu_page_enter(pp);
1026 		PP_CLRKPMC(pp);
1027 		sfmmu_page_exit(pmtx);
1028 
1029 		/*
1030 		 * Check if we can resume cached mode. This might be
1031 		 * the case if the kpm mapping was the only mapping
1032 		 * in conflict with other non rule compliant mappings.
1033 		 * The page is no more marked as kpm mapped, so the
1034 		 * conv_tnc path will not change the kpm state.
1035 		 */
1036 		conv_tnc(pp, TTE8K);
1037 
1038 	} else {
1039 		oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag,
1040 		    &kpmsp->kshl_lock, 0);
1041 
1042 		if (oldval != KPM_MAPPEDSC)
1043 			panic("sfmmu_kpm_mapout: inconsistent mapping");
1044 
1045 		pmtx = sfmmu_page_enter(pp);
1046 		PP_CLRKPMC(pp);
1047 		sfmmu_page_exit(pmtx);
1048 	}
1049 }
1050 
1051 #define	abs(x)  ((x) < 0 ? -(x) : (x))
1052 
1053 /*
1054  * Determine appropriate kpm mapping address and handle any kpm/hme
1055  * conflicts. Page mapping list and its vcolor parts must be protected.
1056  */
1057 static caddr_t
1058 sfmmu_kpm_getvaddr(page_t *pp, int *kpm_vac_rangep)
1059 {
1060 	int		vcolor, vcolor_pa;
1061 	caddr_t		vaddr;
1062 	uintptr_t	paddr;
1063 
1064 
1065 	ASSERT(sfmmu_mlist_held(pp));
1066 
1067 	paddr = ptob(pp->p_pagenum);
1068 	vcolor_pa = addr_to_vcolor(paddr);
1069 
1070 	if (pp->p_vnode && IS_SWAPFSVP(pp->p_vnode)) {
1071 		vcolor = (PP_NEWPAGE(pp) || PP_ISNC(pp)) ?
1072 		    vcolor_pa : PP_GET_VCOLOR(pp);
1073 	} else {
1074 		vcolor = addr_to_vcolor(pp->p_offset);
1075 	}
1076 
1077 	vaddr = kpm_vbase + paddr;
1078 	*kpm_vac_rangep = 0;
1079 
1080 	if (vcolor_pa != vcolor) {
1081 		*kpm_vac_rangep = abs(vcolor - vcolor_pa);
1082 		vaddr += ((uintptr_t)(vcolor - vcolor_pa) << MMU_PAGESHIFT);
1083 		vaddr += (vcolor_pa > vcolor) ?
1084 		    ((uintptr_t)vcolor_pa << kpm_size_shift) :
1085 		    ((uintptr_t)(vcolor - vcolor_pa) << kpm_size_shift);
1086 
1087 		ASSERT(!PP_ISMAPPED_LARGE(pp));
1088 	}
1089 
1090 	if (PP_ISNC(pp))
1091 		return (vaddr);
1092 
1093 	if (PP_NEWPAGE(pp)) {
1094 		PP_SET_VCOLOR(pp, vcolor);
1095 		return (vaddr);
1096 	}
1097 
1098 	if (PP_GET_VCOLOR(pp) == vcolor)
1099 		return (vaddr);
1100 
1101 	ASSERT(!PP_ISMAPPED_KPM(pp));
1102 	sfmmu_kpm_vac_conflict(pp, vaddr);
1103 
1104 	return (vaddr);
1105 }
1106 
1107 /*
1108  * VAC conflict state bit values.
1109  * The following defines are used to make the handling of the
1110  * various input states more concise. For that the kpm states
1111  * per kpm_page and per page are combined in a summary state.
1112  * Each single state has a corresponding bit value in the
1113  * summary state. These defines only apply for kpm large page
1114  * mappings. Within comments the abbreviations "kc, c, ks, s"
1115  * are used as short form of the actual state, e.g. "kc" for
1116  * "kp_refcntc > 0", etc.
1117  */
1118 #define	KPM_KC	0x00000008	/* kpm_page: kp_refcntc > 0 */
1119 #define	KPM_C	0x00000004	/* page: P_KPMC set */
1120 #define	KPM_KS	0x00000002	/* kpm_page: kp_refcnts > 0 */
1121 #define	KPM_S	0x00000001	/* page: P_KPMS set */
1122 
1123 /*
1124  * Summary states used in sfmmu_kpm_fault (KPM_TSBM_*).
1125  * See also more detailed comments within in the sfmmu_kpm_fault switch.
1126  * Abbreviations used:
1127  * CONFL: VAC conflict(s) within a kpm_page.
1128  * MAPS:  Mapped small: Page mapped in using a regular page size kpm mapping.
1129  * RASM:  Re-assembling of a large page mapping possible.
1130  * RPLS:  Replace: TSB miss due to TSB replacement only.
1131  * BRKO:  Breakup Other: A large kpm mapping has to be broken because another
1132  *        page within the kpm_page is already involved in a VAC conflict.
1133  * BRKT:  Breakup This: A large kpm mapping has to be broken, this page is
1134  *        is involved in a VAC conflict.
1135  */
1136 #define	KPM_TSBM_CONFL_GONE	(0)
1137 #define	KPM_TSBM_MAPS_RASM	(KPM_KS)
1138 #define	KPM_TSBM_RPLS_RASM	(KPM_KS | KPM_S)
1139 #define	KPM_TSBM_MAPS_BRKO	(KPM_KC)
1140 #define	KPM_TSBM_MAPS		(KPM_KC | KPM_KS)
1141 #define	KPM_TSBM_RPLS		(KPM_KC | KPM_KS | KPM_S)
1142 #define	KPM_TSBM_MAPS_BRKT	(KPM_KC | KPM_C)
1143 #define	KPM_TSBM_MAPS_CONFL	(KPM_KC | KPM_C | KPM_KS)
1144 #define	KPM_TSBM_RPLS_CONFL	(KPM_KC | KPM_C | KPM_KS | KPM_S)
1145 
1146 /*
1147  * kpm fault handler for mappings with large page size.
1148  */
1149 int
1150 sfmmu_kpm_fault(caddr_t vaddr, struct memseg *mseg, page_t *pp)
1151 {
1152 	int		error;
1153 	pgcnt_t		inx;
1154 	kpm_page_t	*kp;
1155 	tte_t		tte;
1156 	pfn_t		pfn = pp->p_pagenum;
1157 	kpm_hlk_t	*kpmp;
1158 	kmutex_t	*pml;
1159 	int		alias_range;
1160 	int		uncached = 0;
1161 	kmutex_t	*pmtx;
1162 	int		badstate;
1163 	uint_t		tsbmcase;
1164 
1165 	alias_range = IS_KPM_ALIAS_RANGE(vaddr);
1166 
1167 	inx = ptokpmp(kpmptop(ptokpmp(pfn)) - mseg->kpm_pbase);
1168 	if (inx >= mseg->kpm_nkpmpgs) {
1169 		cmn_err(CE_PANIC, "sfmmu_kpm_fault: kpm overflow in memseg "
1170 		    "0x%p  pp 0x%p", (void *)mseg, (void *)pp);
1171 	}
1172 
1173 	kp = &mseg->kpm_pages[inx];
1174 	kpmp = KPMP_HASH(kp);
1175 
1176 	pml = sfmmu_mlist_enter(pp);
1177 
1178 	if (!PP_ISMAPPED_KPM(pp)) {
1179 		sfmmu_mlist_exit(pml);
1180 		return (EFAULT);
1181 	}
1182 
1183 	mutex_enter(&kpmp->khl_mutex);
1184 
1185 	if (alias_range) {
1186 		ASSERT(!PP_ISMAPPED_LARGE(pp));
1187 		if (kp->kp_refcnta > 0) {
1188 			if (PP_ISKPMC(pp)) {
1189 				pmtx = sfmmu_page_enter(pp);
1190 				PP_CLRKPMC(pp);
1191 				sfmmu_page_exit(pmtx);
1192 			}
1193 			/*
1194 			 * Check for vcolor conflicts. Return here
1195 			 * w/ either no conflict (fast path), removed hme
1196 			 * mapping chains (unload conflict) or uncached
1197 			 * (uncache conflict). VACaches are cleaned and
1198 			 * p_vcolor and PP_TNC are set accordingly for the
1199 			 * conflict cases.  Drop kpmp for uncache conflict
1200 			 * cases since it will be grabbed within
1201 			 * sfmmu_kpm_page_cache in case of an uncache
1202 			 * conflict.
1203 			 */
1204 			mutex_exit(&kpmp->khl_mutex);
1205 			sfmmu_kpm_vac_conflict(pp, vaddr);
1206 			mutex_enter(&kpmp->khl_mutex);
1207 
1208 			if (PP_ISNC(pp)) {
1209 				uncached = 1;
1210 				pmtx = sfmmu_page_enter(pp);
1211 				PP_SETKPMC(pp);
1212 				sfmmu_page_exit(pmtx);
1213 			}
1214 			goto smallexit;
1215 
1216 		} else {
1217 			/*
1218 			 * We got a tsbmiss on a not active kpm_page range.
1219 			 * Let segkpm_fault decide how to panic.
1220 			 */
1221 			error = EFAULT;
1222 		}
1223 		goto exit;
1224 	}
1225 
1226 	badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0);
1227 	if (kp->kp_refcntc == -1) {
1228 		/*
1229 		 * We should come here only if trap level tsb miss
1230 		 * handler is disabled.
1231 		 */
1232 		badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 ||
1233 		    PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp));
1234 
1235 		if (badstate == 0)
1236 			goto largeexit;
1237 	}
1238 
1239 	if (badstate || kp->kp_refcntc < 0)
1240 		goto badstate_exit;
1241 
1242 	/*
1243 	 * Combine the per kpm_page and per page kpm VAC states to
1244 	 * a summary state in order to make the kpm fault handling
1245 	 * more concise.
1246 	 */
1247 	tsbmcase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) |
1248 	    ((kp->kp_refcnts > 0) ? KPM_KS : 0) |
1249 	    (PP_ISKPMC(pp) ? KPM_C : 0) |
1250 	    (PP_ISKPMS(pp) ? KPM_S : 0));
1251 
1252 	switch (tsbmcase) {
1253 	case KPM_TSBM_CONFL_GONE:		/* - - - - */
1254 		/*
1255 		 * That's fine, we either have no more vac conflict in
1256 		 * this kpm page or someone raced in and has solved the
1257 		 * vac conflict for us -- call sfmmu_kpm_vac_conflict
1258 		 * to take care for correcting the vcolor and flushing
1259 		 * the dcache if required.
1260 		 */
1261 		mutex_exit(&kpmp->khl_mutex);
1262 		sfmmu_kpm_vac_conflict(pp, vaddr);
1263 		mutex_enter(&kpmp->khl_mutex);
1264 
1265 		if (PP_ISNC(pp) || kp->kp_refcnt <= 0 ||
1266 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1267 			panic("sfmmu_kpm_fault: inconsistent CONFL_GONE "
1268 			    "state, pp=%p", (void *)pp);
1269 		}
1270 		goto largeexit;
1271 
1272 	case KPM_TSBM_MAPS_RASM:		/* - - ks - */
1273 		/*
1274 		 * All conflicts in this kpm page are gone but there are
1275 		 * already small mappings around, so we also map this
1276 		 * page small. This could be the trigger case for a
1277 		 * small mapping reaper, if this is really needed.
1278 		 * For now fall thru to the KPM_TSBM_MAPS handling.
1279 		 */
1280 
1281 	case KPM_TSBM_MAPS:			/* kc - ks - */
1282 		/*
1283 		 * Large page mapping is already broken, this page is not
1284 		 * conflicting, so map it small. Call sfmmu_kpm_vac_conflict
1285 		 * to take care for correcting the vcolor and flushing
1286 		 * the dcache if required.
1287 		 */
1288 		mutex_exit(&kpmp->khl_mutex);
1289 		sfmmu_kpm_vac_conflict(pp, vaddr);
1290 		mutex_enter(&kpmp->khl_mutex);
1291 
1292 		if (PP_ISNC(pp) || kp->kp_refcnt <= 0 ||
1293 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1294 			panic("sfmmu_kpm_fault:  inconsistent MAPS state, "
1295 			    "pp=%p", (void *)pp);
1296 		}
1297 		kp->kp_refcnt--;
1298 		kp->kp_refcnts++;
1299 		pmtx = sfmmu_page_enter(pp);
1300 		PP_SETKPMS(pp);
1301 		sfmmu_page_exit(pmtx);
1302 		goto smallexit;
1303 
1304 	case KPM_TSBM_RPLS_RASM:		/* - - ks s */
1305 		/*
1306 		 * All conflicts in this kpm page are gone but this page
1307 		 * is mapped small. This could be the trigger case for a
1308 		 * small mapping reaper, if this is really needed.
1309 		 * For now we drop it in small again. Fall thru to the
1310 		 * KPM_TSBM_RPLS handling.
1311 		 */
1312 
1313 	case KPM_TSBM_RPLS:			/* kc - ks s */
1314 		/*
1315 		 * Large page mapping is already broken, this page is not
1316 		 * conflicting but already mapped small, so drop it in
1317 		 * small again.
1318 		 */
1319 		if (PP_ISNC(pp) ||
1320 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1321 			panic("sfmmu_kpm_fault:  inconsistent RPLS state, "
1322 			    "pp=%p", (void *)pp);
1323 		}
1324 		goto smallexit;
1325 
1326 	case KPM_TSBM_MAPS_BRKO:		/* kc - - - */
1327 		/*
1328 		 * The kpm page where we live in is marked conflicting
1329 		 * but this page is not conflicting. So we have to map it
1330 		 * in small. Call sfmmu_kpm_vac_conflict to take care for
1331 		 * correcting the vcolor and flushing the dcache if required.
1332 		 */
1333 		mutex_exit(&kpmp->khl_mutex);
1334 		sfmmu_kpm_vac_conflict(pp, vaddr);
1335 		mutex_enter(&kpmp->khl_mutex);
1336 
1337 		if (PP_ISNC(pp) || kp->kp_refcnt <= 0 ||
1338 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1339 			panic("sfmmu_kpm_fault:  inconsistent MAPS_BRKO state, "
1340 			    "pp=%p", (void *)pp);
1341 		}
1342 		kp->kp_refcnt--;
1343 		kp->kp_refcnts++;
1344 		pmtx = sfmmu_page_enter(pp);
1345 		PP_SETKPMS(pp);
1346 		sfmmu_page_exit(pmtx);
1347 		goto smallexit;
1348 
1349 	case KPM_TSBM_MAPS_BRKT:		/* kc c - - */
1350 	case KPM_TSBM_MAPS_CONFL:		/* kc c ks - */
1351 		if (!PP_ISMAPPED(pp)) {
1352 			/*
1353 			 * We got a tsbmiss on kpm large page range that is
1354 			 * marked to contain vac conflicting pages introduced
1355 			 * by hme mappings. The hme mappings are all gone and
1356 			 * must have bypassed the kpm alias prevention logic.
1357 			 */
1358 			panic("sfmmu_kpm_fault: stale VAC conflict, pp=%p",
1359 			    (void *)pp);
1360 		}
1361 
1362 		/*
1363 		 * Check for vcolor conflicts. Return here w/ either no
1364 		 * conflict (fast path), removed hme mapping chains
1365 		 * (unload conflict) or uncached (uncache conflict).
1366 		 * Dcache is cleaned and p_vcolor and P_TNC are set
1367 		 * accordingly. Drop kpmp for uncache conflict cases
1368 		 * since it will be grabbed within sfmmu_kpm_page_cache
1369 		 * in case of an uncache conflict.
1370 		 */
1371 		mutex_exit(&kpmp->khl_mutex);
1372 		sfmmu_kpm_vac_conflict(pp, vaddr);
1373 		mutex_enter(&kpmp->khl_mutex);
1374 
1375 		if (kp->kp_refcnt <= 0)
1376 			panic("sfmmu_kpm_fault: bad refcnt kp=%p", (void *)kp);
1377 
1378 		if (PP_ISNC(pp)) {
1379 			uncached = 1;
1380 		} else {
1381 			/*
1382 			 * When an unload conflict is solved and there are
1383 			 * no other small mappings around, we can resume
1384 			 * largepage mode. Otherwise we have to map or drop
1385 			 * in small. This could be a trigger for a small
1386 			 * mapping reaper when this was the last conflict
1387 			 * within the kpm page and when there are only
1388 			 * other small mappings around.
1389 			 */
1390 			ASSERT(addr_to_vcolor(vaddr) == PP_GET_VCOLOR(pp));
1391 			ASSERT(kp->kp_refcntc > 0);
1392 			kp->kp_refcntc--;
1393 			pmtx = sfmmu_page_enter(pp);
1394 			PP_CLRKPMC(pp);
1395 			sfmmu_page_exit(pmtx);
1396 			ASSERT(PP_ISKPMS(pp) == 0);
1397 			if (kp->kp_refcntc == 0 && kp->kp_refcnts == 0)
1398 				goto largeexit;
1399 		}
1400 
1401 		kp->kp_refcnt--;
1402 		kp->kp_refcnts++;
1403 		pmtx = sfmmu_page_enter(pp);
1404 		PP_SETKPMS(pp);
1405 		sfmmu_page_exit(pmtx);
1406 		goto smallexit;
1407 
1408 	case KPM_TSBM_RPLS_CONFL:		/* kc c ks s */
1409 		if (!PP_ISMAPPED(pp)) {
1410 			/*
1411 			 * We got a tsbmiss on kpm large page range that is
1412 			 * marked to contain vac conflicting pages introduced
1413 			 * by hme mappings. They are all gone and must have
1414 			 * somehow bypassed the kpm alias prevention logic.
1415 			 */
1416 			panic("sfmmu_kpm_fault: stale VAC conflict, pp=%p",
1417 			    (void *)pp);
1418 		}
1419 
1420 		/*
1421 		 * This state is only possible for an uncached mapping.
1422 		 */
1423 		if (!PP_ISNC(pp)) {
1424 			panic("sfmmu_kpm_fault: page not uncached, pp=%p",
1425 			    (void *)pp);
1426 		}
1427 		uncached = 1;
1428 		goto smallexit;
1429 
1430 	default:
1431 badstate_exit:
1432 		panic("sfmmu_kpm_fault: inconsistent VAC state, vaddr=%p kp=%p "
1433 		    "pp=%p", (void *)vaddr, (void *)kp, (void *)pp);
1434 	}
1435 
1436 smallexit:
1437 	/* tte assembly */
1438 	if (uncached == 0)
1439 		KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
1440 	else
1441 		KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
1442 
1443 	/* tsb dropin */
1444 	sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1445 
1446 	error = 0;
1447 	goto exit;
1448 
1449 largeexit:
1450 	if (kp->kp_refcnt > 0) {
1451 
1452 		/* tte assembly */
1453 		KPM_TTE_VCACHED(tte.ll, pfn, TTE4M);
1454 
1455 		/* tsb dropin */
1456 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT4M);
1457 
1458 		if (kp->kp_refcntc == 0) {
1459 			/* Set "go" flag for TL tsbmiss handler */
1460 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc, &kpmp->khl_lock,
1461 			    KPMTSBM_START);
1462 		}
1463 		ASSERT(kp->kp_refcntc == -1);
1464 		error = 0;
1465 
1466 	} else
1467 		error = EFAULT;
1468 exit:
1469 	mutex_exit(&kpmp->khl_mutex);
1470 	sfmmu_mlist_exit(pml);
1471 	return (error);
1472 }
1473 
1474 /*
1475  * kpm fault handler for mappings with small page size.
1476  */
1477 int
1478 sfmmu_kpm_fault_small(caddr_t vaddr, struct memseg *mseg, page_t *pp)
1479 {
1480 	int		error = 0;
1481 	pgcnt_t		inx;
1482 	kpm_spage_t	*ksp;
1483 	kpm_shlk_t	*kpmsp;
1484 	kmutex_t	*pml;
1485 	pfn_t		pfn = pp->p_pagenum;
1486 	tte_t		tte;
1487 	kmutex_t	*pmtx;
1488 	int		oldval;
1489 
1490 	inx = pfn - mseg->kpm_pbase;
1491 	ksp = &mseg->kpm_spages[inx];
1492 	kpmsp = KPMP_SHASH(ksp);
1493 
1494 	pml = sfmmu_mlist_enter(pp);
1495 
1496 	if (!PP_ISMAPPED_KPM(pp)) {
1497 		sfmmu_mlist_exit(pml);
1498 		return (EFAULT);
1499 	}
1500 
1501 	/*
1502 	 * kp_mapped lookup protected by mlist mutex
1503 	 */
1504 	if (ksp->kp_mapped == KPM_MAPPEDS) {
1505 		/*
1506 		 * Fast path tsbmiss
1507 		 */
1508 		ASSERT(!PP_ISKPMC(pp));
1509 		ASSERT(!PP_ISNC(pp));
1510 
1511 		/* tte assembly */
1512 		KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
1513 
1514 		/* tsb dropin */
1515 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1516 
1517 	} else if (ksp->kp_mapped == KPM_MAPPEDSC) {
1518 		/*
1519 		 * Got here due to existing or gone kpm/hme VAC conflict.
1520 		 * Recheck for vcolor conflicts. Return here w/ either
1521 		 * no conflict, removed hme mapping chain (unload
1522 		 * conflict) or uncached (uncache conflict). VACaches
1523 		 * are cleaned and p_vcolor and PP_TNC are set accordingly
1524 		 * for the conflict cases.
1525 		 */
1526 		sfmmu_kpm_vac_conflict(pp, vaddr);
1527 
1528 		if (PP_ISNC(pp)) {
1529 			/* ASSERT(pp->p_share); XXX use hat_page_getshare */
1530 
1531 			/* tte assembly */
1532 			KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
1533 
1534 			/* tsb dropin */
1535 			sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1536 
1537 			oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag,
1538 			    &kpmsp->kshl_lock, (KPM_MAPPED_GO | KPM_MAPPEDSC));
1539 
1540 			if (oldval != KPM_MAPPEDSC)
1541 				panic("sfmmu_kpm_fault_small: "
1542 				    "stale smallpages mapping");
1543 		} else {
1544 			if (PP_ISKPMC(pp)) {
1545 				pmtx = sfmmu_page_enter(pp);
1546 				PP_CLRKPMC(pp);
1547 				sfmmu_page_exit(pmtx);
1548 			}
1549 
1550 			/* tte assembly */
1551 			KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
1552 
1553 			/* tsb dropin */
1554 			sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1555 
1556 			oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag,
1557 			    &kpmsp->kshl_lock, (KPM_MAPPED_GO | KPM_MAPPEDS));
1558 
1559 			if (oldval != KPM_MAPPEDSC)
1560 				panic("sfmmu_kpm_fault_small: "
1561 				    "stale smallpages mapping");
1562 		}
1563 
1564 	} else {
1565 		/*
1566 		 * We got a tsbmiss on a not active kpm_page range.
1567 		 * Let decide segkpm_fault how to panic.
1568 		 */
1569 		error = EFAULT;
1570 	}
1571 
1572 	sfmmu_mlist_exit(pml);
1573 	return (error);
1574 }
1575 
1576 /*
1577  * Check/handle potential hme/kpm mapping conflicts
1578  */
1579 static void
1580 sfmmu_kpm_vac_conflict(page_t *pp, caddr_t vaddr)
1581 {
1582 	int		vcolor;
1583 	struct sf_hment	*sfhmep;
1584 	struct hat	*tmphat;
1585 	struct sf_hment	*tmphme = NULL;
1586 	struct hme_blk	*hmeblkp;
1587 	tte_t		tte;
1588 
1589 	ASSERT(sfmmu_mlist_held(pp));
1590 
1591 	if (PP_ISNC(pp))
1592 		return;
1593 
1594 	vcolor = addr_to_vcolor(vaddr);
1595 	if (PP_GET_VCOLOR(pp) == vcolor)
1596 		return;
1597 
1598 	/*
1599 	 * There could be no vcolor conflict between a large cached
1600 	 * hme page and a non alias range kpm page (neither large nor
1601 	 * small mapped). So if a hme conflict already exists between
1602 	 * a constituent page of a large hme mapping and a shared small
1603 	 * conflicting hme mapping, both mappings must be already
1604 	 * uncached at this point.
1605 	 */
1606 	ASSERT(!PP_ISMAPPED_LARGE(pp));
1607 
1608 	if (!PP_ISMAPPED(pp)) {
1609 		/*
1610 		 * Previous hme user of page had a different color
1611 		 * but since there are no current users
1612 		 * we just flush the cache and change the color.
1613 		 */
1614 		SFMMU_STAT(sf_pgcolor_conflict);
1615 		sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp));
1616 		PP_SET_VCOLOR(pp, vcolor);
1617 		return;
1618 	}
1619 
1620 	/*
1621 	 * If we get here we have a vac conflict with a current hme
1622 	 * mapping. This must have been established by forcing a wrong
1623 	 * colored mapping, e.g. by using mmap(2) with MAP_FIXED.
1624 	 */
1625 
1626 	/*
1627 	 * Check if any mapping is in same as or if it is locked
1628 	 * since in that case we need to uncache.
1629 	 */
1630 	for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
1631 		tmphme = sfhmep->hme_next;
1632 		if (IS_PAHME(sfhmep))
1633 			continue;
1634 		hmeblkp = sfmmu_hmetohblk(sfhmep);
1635 		if (hmeblkp->hblk_xhat_bit)
1636 			continue;
1637 		tmphat = hblktosfmmu(hmeblkp);
1638 		sfmmu_copytte(&sfhmep->hme_tte, &tte);
1639 		ASSERT(TTE_IS_VALID(&tte));
1640 		if ((tmphat == ksfmmup) || hmeblkp->hblk_lckcnt) {
1641 			/*
1642 			 * We have an uncache conflict
1643 			 */
1644 			SFMMU_STAT(sf_uncache_conflict);
1645 			sfmmu_page_cache_array(pp, HAT_TMPNC, CACHE_FLUSH, 1);
1646 			return;
1647 		}
1648 	}
1649 
1650 	/*
1651 	 * We have an unload conflict
1652 	 */
1653 	SFMMU_STAT(sf_unload_conflict);
1654 
1655 	for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
1656 		tmphme = sfhmep->hme_next;
1657 		if (IS_PAHME(sfhmep))
1658 			continue;
1659 		hmeblkp = sfmmu_hmetohblk(sfhmep);
1660 		if (hmeblkp->hblk_xhat_bit)
1661 			continue;
1662 		(void) sfmmu_pageunload(pp, sfhmep, TTE8K);
1663 	}
1664 
1665 	/*
1666 	 * Unloads only does tlb flushes so we need to flush the
1667 	 * dcache vcolor here.
1668 	 */
1669 	sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp));
1670 	PP_SET_VCOLOR(pp, vcolor);
1671 }
1672 
1673 /*
1674  * Remove all kpm mappings using kpme's for pp and check that
1675  * all kpm mappings (w/ and w/o kpme's) are gone.
1676  */
1677 void
1678 sfmmu_kpm_pageunload(page_t *pp)
1679 {
1680 	caddr_t		vaddr;
1681 	struct kpme	*kpme, *nkpme;
1682 
1683 	ASSERT(pp != NULL);
1684 	ASSERT(pp->p_kpmref);
1685 	ASSERT(sfmmu_mlist_held(pp));
1686 
1687 	vaddr = hat_kpm_page2va(pp, 1);
1688 
1689 	for (kpme = pp->p_kpmelist; kpme; kpme = nkpme) {
1690 		ASSERT(kpme->kpe_page == pp);
1691 
1692 		if (pp->p_kpmref == 0)
1693 			panic("sfmmu_kpm_pageunload: stale p_kpmref pp=%p "
1694 			    "kpme=%p", (void *)pp, (void *)kpme);
1695 
1696 		nkpme = kpme->kpe_next;
1697 
1698 		/* Add instance callback here here if needed later */
1699 		sfmmu_kpme_sub(kpme, pp);
1700 	}
1701 
1702 	/*
1703 	 * Also correct after mixed kpme/nonkpme mappings. If nonkpme
1704 	 * segkpm clients have unlocked the page and forgot to mapout
1705 	 * we panic here.
1706 	 */
1707 	if (pp->p_kpmref != 0)
1708 		panic("sfmmu_kpm_pageunload: bad refcnt pp=%p", (void *)pp);
1709 
1710 	sfmmu_kpm_mapout(pp, vaddr);
1711 }
1712 
1713 /*
1714  * Remove a large kpm mapping from kernel TSB and all TLB's.
1715  */
1716 static void
1717 sfmmu_kpm_demap_large(caddr_t vaddr)
1718 {
1719 	sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT4M);
1720 	sfmmu_kpm_demap_tlbs(vaddr);
1721 }
1722 
1723 /*
1724  * Remove a small kpm mapping from kernel TSB and all TLB's.
1725  */
1726 static void
1727 sfmmu_kpm_demap_small(caddr_t vaddr)
1728 {
1729 	sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT);
1730 	sfmmu_kpm_demap_tlbs(vaddr);
1731 }
1732 
1733 /*
1734  * Demap a kpm mapping in all TLB's.
1735  */
1736 static void
1737 sfmmu_kpm_demap_tlbs(caddr_t vaddr)
1738 {
1739 	cpuset_t cpuset;
1740 
1741 	kpreempt_disable();
1742 	cpuset = ksfmmup->sfmmu_cpusran;
1743 	CPUSET_AND(cpuset, cpu_ready_set);
1744 	CPUSET_DEL(cpuset, CPU->cpu_id);
1745 	SFMMU_XCALL_STATS(ksfmmup);
1746 
1747 	xt_some(cpuset, vtag_flushpage_tl1, (uint64_t)vaddr,
1748 	    (uint64_t)ksfmmup);
1749 	vtag_flushpage(vaddr, (uint64_t)ksfmmup);
1750 
1751 	kpreempt_enable();
1752 }
1753 
1754 /*
1755  * Summary states used in sfmmu_kpm_vac_unload (KPM_VUL__*).
1756  * See also more detailed comments within in the sfmmu_kpm_vac_unload switch.
1757  * Abbreviations used:
1758  * BIG:   Large page kpm mapping in use.
1759  * CONFL: VAC conflict(s) within a kpm_page.
1760  * INCR:  Count of conflicts within a kpm_page is going to be incremented.
1761  * DECR:  Count of conflicts within a kpm_page is going to be decremented.
1762  * UNMAP_SMALL: A small (regular page size) mapping is going to be unmapped.
1763  * TNC:   Temporary non cached: a kpm mapped page is mapped in TNC state.
1764  */
1765 #define	KPM_VUL_BIG		(0)
1766 #define	KPM_VUL_CONFL_INCR1	(KPM_KS)
1767 #define	KPM_VUL_UNMAP_SMALL1	(KPM_KS | KPM_S)
1768 #define	KPM_VUL_CONFL_INCR2	(KPM_KC)
1769 #define	KPM_VUL_CONFL_INCR3	(KPM_KC | KPM_KS)
1770 #define	KPM_VUL_UNMAP_SMALL2	(KPM_KC | KPM_KS | KPM_S)
1771 #define	KPM_VUL_CONFL_DECR1	(KPM_KC | KPM_C)
1772 #define	KPM_VUL_CONFL_DECR2	(KPM_KC | KPM_C | KPM_KS)
1773 #define	KPM_VUL_TNC		(KPM_KC | KPM_C | KPM_KS | KPM_S)
1774 
1775 /*
1776  * Handle VAC unload conflicts introduced by hme mappings or vice
1777  * versa when a hme conflict mapping is replaced by a non conflict
1778  * one. Perform actions and state transitions according to the
1779  * various page and kpm_page entry states. VACache flushes are in
1780  * the responsibiliy of the caller. We still hold the mlist lock.
1781  */
1782 void
1783 sfmmu_kpm_vac_unload(page_t *pp, caddr_t vaddr)
1784 {
1785 	kpm_page_t	*kp;
1786 	kpm_hlk_t	*kpmp;
1787 	caddr_t		kpmvaddr = hat_kpm_page2va(pp, 1);
1788 	int		newcolor;
1789 	kmutex_t	*pmtx;
1790 	uint_t		vacunlcase;
1791 	int		badstate = 0;
1792 	kpm_spage_t	*ksp;
1793 	kpm_shlk_t	*kpmsp;
1794 
1795 	ASSERT(PAGE_LOCKED(pp));
1796 	ASSERT(sfmmu_mlist_held(pp));
1797 	ASSERT(!PP_ISNC(pp));
1798 
1799 	newcolor = addr_to_vcolor(kpmvaddr) != addr_to_vcolor(vaddr);
1800 	if (kpm_smallpages)
1801 		goto smallpages_vac_unload;
1802 
1803 	PP2KPMPG(pp, kp);
1804 	kpmp = KPMP_HASH(kp);
1805 	mutex_enter(&kpmp->khl_mutex);
1806 
1807 	if (IS_KPM_ALIAS_RANGE(kpmvaddr)) {
1808 		if (kp->kp_refcnta < 1) {
1809 			panic("sfmmu_kpm_vac_unload: bad refcnta kpm_page=%p\n",
1810 			    (void *)kp);
1811 		}
1812 
1813 		if (PP_ISKPMC(pp) == 0) {
1814 			if (newcolor == 0)
1815 				goto exit;
1816 			sfmmu_kpm_demap_small(kpmvaddr);
1817 			pmtx = sfmmu_page_enter(pp);
1818 			PP_SETKPMC(pp);
1819 			sfmmu_page_exit(pmtx);
1820 
1821 		} else if (newcolor == 0) {
1822 			pmtx = sfmmu_page_enter(pp);
1823 			PP_CLRKPMC(pp);
1824 			sfmmu_page_exit(pmtx);
1825 
1826 		} else {
1827 			badstate++;
1828 		}
1829 
1830 		goto exit;
1831 	}
1832 
1833 	badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0);
1834 	if (kp->kp_refcntc == -1) {
1835 		/*
1836 		 * We should come here only if trap level tsb miss
1837 		 * handler is disabled.
1838 		 */
1839 		badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 ||
1840 		    PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp));
1841 	} else {
1842 		badstate |= (kp->kp_refcntc < 0);
1843 	}
1844 
1845 	if (badstate)
1846 		goto exit;
1847 
1848 	if (PP_ISKPMC(pp) == 0 && newcolor == 0) {
1849 		ASSERT(PP_ISKPMS(pp) == 0);
1850 		goto exit;
1851 	}
1852 
1853 	/*
1854 	 * Combine the per kpm_page and per page kpm VAC states
1855 	 * to a summary state in order to make the vac unload
1856 	 * handling more concise.
1857 	 */
1858 	vacunlcase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) |
1859 	    ((kp->kp_refcnts > 0) ? KPM_KS : 0) |
1860 	    (PP_ISKPMC(pp) ? KPM_C : 0) |
1861 	    (PP_ISKPMS(pp) ? KPM_S : 0));
1862 
1863 	switch (vacunlcase) {
1864 	case KPM_VUL_BIG:				/* - - - - */
1865 		/*
1866 		 * Have to breakup the large page mapping to be
1867 		 * able to handle the conflicting hme vaddr.
1868 		 */
1869 		if (kp->kp_refcntc == -1) {
1870 			/* remove go indication */
1871 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
1872 			    &kpmp->khl_lock, KPMTSBM_STOP);
1873 		}
1874 		sfmmu_kpm_demap_large(kpmvaddr);
1875 
1876 		ASSERT(kp->kp_refcntc == 0);
1877 		kp->kp_refcntc++;
1878 		pmtx = sfmmu_page_enter(pp);
1879 		PP_SETKPMC(pp);
1880 		sfmmu_page_exit(pmtx);
1881 		break;
1882 
1883 	case KPM_VUL_UNMAP_SMALL1:			/* -  - ks s */
1884 	case KPM_VUL_UNMAP_SMALL2:			/* kc - ks s */
1885 		/*
1886 		 * New conflict w/ an active kpm page, actually mapped
1887 		 * in by small TSB/TLB entries. Remove the mapping and
1888 		 * update states.
1889 		 */
1890 		ASSERT(newcolor);
1891 		sfmmu_kpm_demap_small(kpmvaddr);
1892 		kp->kp_refcnts--;
1893 		kp->kp_refcnt++;
1894 		kp->kp_refcntc++;
1895 		pmtx = sfmmu_page_enter(pp);
1896 		PP_CLRKPMS(pp);
1897 		PP_SETKPMC(pp);
1898 		sfmmu_page_exit(pmtx);
1899 		break;
1900 
1901 	case KPM_VUL_CONFL_INCR1:			/* -  - ks - */
1902 	case KPM_VUL_CONFL_INCR2:			/* kc - -  - */
1903 	case KPM_VUL_CONFL_INCR3:			/* kc - ks - */
1904 		/*
1905 		 * New conflict on a active kpm mapped page not yet in
1906 		 * TSB/TLB. Mark page and increment the kpm_page conflict
1907 		 * count.
1908 		 */
1909 		ASSERT(newcolor);
1910 		kp->kp_refcntc++;
1911 		pmtx = sfmmu_page_enter(pp);
1912 		PP_SETKPMC(pp);
1913 		sfmmu_page_exit(pmtx);
1914 		break;
1915 
1916 	case KPM_VUL_CONFL_DECR1:			/* kc c -  - */
1917 	case KPM_VUL_CONFL_DECR2:			/* kc c ks - */
1918 		/*
1919 		 * A conflicting hme mapping is removed for an active
1920 		 * kpm page not yet in TSB/TLB. Unmark page and decrement
1921 		 * the kpm_page conflict count.
1922 		 */
1923 		ASSERT(newcolor == 0);
1924 		kp->kp_refcntc--;
1925 		pmtx = sfmmu_page_enter(pp);
1926 		PP_CLRKPMC(pp);
1927 		sfmmu_page_exit(pmtx);
1928 		break;
1929 
1930 	case KPM_VUL_TNC:				/* kc c ks s */
1931 		cmn_err(CE_NOTE, "sfmmu_kpm_vac_unload: "
1932 		    "page not in NC state");
1933 		/* FALLTHRU */
1934 
1935 	default:
1936 		badstate++;
1937 	}
1938 exit:
1939 	if (badstate) {
1940 		panic("sfmmu_kpm_vac_unload: inconsistent VAC state, "
1941 		    "kpmvaddr=%p kp=%p pp=%p",
1942 		    (void *)kpmvaddr, (void *)kp, (void *)pp);
1943 	}
1944 	mutex_exit(&kpmp->khl_mutex);
1945 
1946 	return;
1947 
1948 smallpages_vac_unload:
1949 	if (newcolor == 0)
1950 		return;
1951 
1952 	PP2KPMSPG(pp, ksp);
1953 	kpmsp = KPMP_SHASH(ksp);
1954 
1955 	if (PP_ISKPMC(pp) == 0) {
1956 		if (ksp->kp_mapped == KPM_MAPPEDS) {
1957 			/*
1958 			 * Stop TL tsbmiss handling
1959 			 */
1960 			(void) sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag,
1961 			    &kpmsp->kshl_lock, KPM_MAPPEDSC);
1962 
1963 			sfmmu_kpm_demap_small(kpmvaddr);
1964 
1965 		} else if (ksp->kp_mapped != KPM_MAPPEDSC) {
1966 			panic("sfmmu_kpm_vac_unload: inconsistent mapping");
1967 		}
1968 
1969 		pmtx = sfmmu_page_enter(pp);
1970 		PP_SETKPMC(pp);
1971 		sfmmu_page_exit(pmtx);
1972 
1973 	} else {
1974 		if (ksp->kp_mapped != KPM_MAPPEDSC)
1975 			panic("sfmmu_kpm_vac_unload: inconsistent mapping");
1976 	}
1977 }
1978 
1979 /*
1980  * Page is marked to be in VAC conflict to an existing kpm mapping
1981  * or is kpm mapped using only the regular pagesize. Called from
1982  * sfmmu_hblk_unload when a mlist is completely removed.
1983  */
1984 void
1985 sfmmu_kpm_hme_unload(page_t *pp)
1986 {
1987 	/* tte assembly */
1988 	kpm_page_t	*kp;
1989 	kpm_hlk_t	*kpmp;
1990 	caddr_t		vaddr;
1991 	kmutex_t	*pmtx;
1992 	uint_t		flags;
1993 	kpm_spage_t	*ksp;
1994 
1995 	ASSERT(sfmmu_mlist_held(pp));
1996 	ASSERT(PP_ISMAPPED_KPM(pp));
1997 
1998 	flags = pp->p_nrm & (P_KPMC | P_KPMS);
1999 	if (kpm_smallpages)
2000 		goto smallpages_hme_unload;
2001 
2002 	if (flags == (P_KPMC | P_KPMS)) {
2003 		panic("sfmmu_kpm_hme_unload: page should be uncached");
2004 
2005 	} else if (flags == P_KPMS) {
2006 		/*
2007 		 * Page mapped small but not involved in VAC conflict
2008 		 */
2009 		return;
2010 	}
2011 
2012 	vaddr = hat_kpm_page2va(pp, 1);
2013 
2014 	PP2KPMPG(pp, kp);
2015 	kpmp = KPMP_HASH(kp);
2016 	mutex_enter(&kpmp->khl_mutex);
2017 
2018 	if (IS_KPM_ALIAS_RANGE(vaddr)) {
2019 		if (kp->kp_refcnta < 1) {
2020 			panic("sfmmu_kpm_hme_unload: bad refcnta kpm_page=%p\n",
2021 			    (void *)kp);
2022 		}
2023 	} else {
2024 		if (kp->kp_refcntc < 1) {
2025 			panic("sfmmu_kpm_hme_unload: bad refcntc kpm_page=%p\n",
2026 			    (void *)kp);
2027 		}
2028 		kp->kp_refcntc--;
2029 	}
2030 
2031 	pmtx = sfmmu_page_enter(pp);
2032 	PP_CLRKPMC(pp);
2033 	sfmmu_page_exit(pmtx);
2034 
2035 	mutex_exit(&kpmp->khl_mutex);
2036 	return;
2037 
2038 smallpages_hme_unload:
2039 	if (flags != P_KPMC)
2040 		panic("sfmmu_kpm_hme_unload: page should be uncached");
2041 
2042 	vaddr = hat_kpm_page2va(pp, 1);
2043 	PP2KPMSPG(pp, ksp);
2044 
2045 	if (ksp->kp_mapped != KPM_MAPPEDSC)
2046 		panic("sfmmu_kpm_hme_unload: inconsistent mapping");
2047 
2048 	/*
2049 	 * Keep KPM_MAPPEDSC until the next kpm tsbmiss where it
2050 	 * prevents TL tsbmiss handling and force a hat_kpm_fault.
2051 	 * There we can start over again.
2052 	 */
2053 
2054 	pmtx = sfmmu_page_enter(pp);
2055 	PP_CLRKPMC(pp);
2056 	sfmmu_page_exit(pmtx);
2057 }
2058 
2059 /*
2060  * Special hooks for sfmmu_page_cache_array() when changing the
2061  * cacheability of a page. It is used to obey the hat_kpm lock
2062  * ordering (mlist -> kpmp -> spl, and back).
2063  */
2064 kpm_hlk_t *
2065 sfmmu_kpm_kpmp_enter(page_t *pp, pgcnt_t npages)
2066 {
2067 	kpm_page_t	*kp;
2068 	kpm_hlk_t	*kpmp;
2069 
2070 	ASSERT(sfmmu_mlist_held(pp));
2071 
2072 	if (kpm_smallpages || PP_ISMAPPED_KPM(pp) == 0)
2073 		return (NULL);
2074 
2075 	ASSERT(npages <= kpmpnpgs);
2076 
2077 	PP2KPMPG(pp, kp);
2078 	kpmp = KPMP_HASH(kp);
2079 	mutex_enter(&kpmp->khl_mutex);
2080 
2081 	return (kpmp);
2082 }
2083 
2084 void
2085 sfmmu_kpm_kpmp_exit(kpm_hlk_t *kpmp)
2086 {
2087 	if (kpm_smallpages || kpmp == NULL)
2088 		return;
2089 
2090 	mutex_exit(&kpmp->khl_mutex);
2091 }
2092 
2093 /*
2094  * Summary states used in sfmmu_kpm_page_cache (KPM_*).
2095  * See also more detailed comments within in the sfmmu_kpm_page_cache switch.
2096  * Abbreviations used:
2097  * UNC:     Input state for an uncache request.
2098  *   BIG:     Large page kpm mapping in use.
2099  *   SMALL:   Page has a small kpm mapping within a kpm_page range.
2100  *   NODEMAP: No demap needed.
2101  *   NOP:     No operation needed on this input state.
2102  * CACHE:   Input state for a re-cache request.
2103  *   MAPS:    Page is in TNC and kpm VAC conflict state and kpm mapped small.
2104  *   NOMAP:   Page is in TNC and kpm VAC conflict state, but not small kpm
2105  *            mapped.
2106  *   NOMAPO:  Page is in TNC and kpm VAC conflict state, but not small kpm
2107  *            mapped. There are also other small kpm mappings within this
2108  *            kpm_page.
2109  */
2110 #define	KPM_UNC_BIG		(0)
2111 #define	KPM_UNC_NODEMAP1	(KPM_KS)
2112 #define	KPM_UNC_SMALL1		(KPM_KS | KPM_S)
2113 #define	KPM_UNC_NODEMAP2	(KPM_KC)
2114 #define	KPM_UNC_NODEMAP3	(KPM_KC | KPM_KS)
2115 #define	KPM_UNC_SMALL2		(KPM_KC | KPM_KS | KPM_S)
2116 #define	KPM_UNC_NOP1		(KPM_KC | KPM_C)
2117 #define	KPM_UNC_NOP2		(KPM_KC | KPM_C | KPM_KS)
2118 #define	KPM_CACHE_NOMAP		(KPM_KC | KPM_C)
2119 #define	KPM_CACHE_NOMAPO	(KPM_KC | KPM_C | KPM_KS)
2120 #define	KPM_CACHE_MAPS		(KPM_KC | KPM_C | KPM_KS | KPM_S)
2121 
2122 /*
2123  * This function is called when the virtual cacheability of a page
2124  * is changed and the page has an actice kpm mapping. The mlist mutex,
2125  * the spl hash lock and the kpmp mutex (if needed) are already grabbed.
2126  */
2127 /*ARGSUSED2*/
2128 void
2129 sfmmu_kpm_page_cache(page_t *pp, int flags, int cache_flush_tag)
2130 {
2131 	kpm_page_t	*kp;
2132 	kpm_hlk_t	*kpmp;
2133 	caddr_t		kpmvaddr;
2134 	int		badstate = 0;
2135 	uint_t		pgcacase;
2136 	kpm_spage_t	*ksp;
2137 	kpm_shlk_t	*kpmsp;
2138 	int		oldval;
2139 
2140 	ASSERT(PP_ISMAPPED_KPM(pp));
2141 	ASSERT(sfmmu_mlist_held(pp));
2142 	ASSERT(sfmmu_page_spl_held(pp));
2143 
2144 	if (flags != HAT_TMPNC && flags != HAT_CACHE)
2145 		panic("sfmmu_kpm_page_cache: bad flags");
2146 
2147 	kpmvaddr = hat_kpm_page2va(pp, 1);
2148 
2149 	if (flags == HAT_TMPNC && cache_flush_tag == CACHE_FLUSH) {
2150 		pfn_t pfn = pp->p_pagenum;
2151 		int vcolor = addr_to_vcolor(kpmvaddr);
2152 		cpuset_t cpuset = cpu_ready_set;
2153 
2154 		/* Flush vcolor in DCache */
2155 		CPUSET_DEL(cpuset, CPU->cpu_id);
2156 		SFMMU_XCALL_STATS(ksfmmup);
2157 		xt_some(cpuset, vac_flushpage_tl1, pfn, vcolor);
2158 		vac_flushpage(pfn, vcolor);
2159 	}
2160 
2161 	if (kpm_smallpages)
2162 		goto smallpages_page_cache;
2163 
2164 	PP2KPMPG(pp, kp);
2165 	kpmp = KPMP_HASH(kp);
2166 	ASSERT(MUTEX_HELD(&kpmp->khl_mutex));
2167 
2168 	if (IS_KPM_ALIAS_RANGE(kpmvaddr)) {
2169 		if (kp->kp_refcnta < 1) {
2170 			panic("sfmmu_kpm_page_cache: bad refcnta "
2171 			    "kpm_page=%p\n", (void *)kp);
2172 		}
2173 		sfmmu_kpm_demap_small(kpmvaddr);
2174 		if (flags == HAT_TMPNC) {
2175 			PP_SETKPMC(pp);
2176 			ASSERT(!PP_ISKPMS(pp));
2177 		} else {
2178 			ASSERT(PP_ISKPMC(pp));
2179 			PP_CLRKPMC(pp);
2180 		}
2181 		goto exit;
2182 	}
2183 
2184 	badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0);
2185 	if (kp->kp_refcntc == -1) {
2186 		/*
2187 		 * We should come here only if trap level tsb miss
2188 		 * handler is disabled.
2189 		 */
2190 		badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 ||
2191 		    PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp));
2192 	} else {
2193 		badstate |= (kp->kp_refcntc < 0);
2194 	}
2195 
2196 	if (badstate)
2197 		goto exit;
2198 
2199 	/*
2200 	 * Combine the per kpm_page and per page kpm VAC states to
2201 	 * a summary state in order to make the VAC cache/uncache
2202 	 * handling more concise.
2203 	 */
2204 	pgcacase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) |
2205 	    ((kp->kp_refcnts > 0) ? KPM_KS : 0) |
2206 	    (PP_ISKPMC(pp) ? KPM_C : 0) |
2207 	    (PP_ISKPMS(pp) ? KPM_S : 0));
2208 
2209 	if (flags == HAT_CACHE) {
2210 		switch (pgcacase) {
2211 		case KPM_CACHE_MAPS:			/* kc c ks s */
2212 			sfmmu_kpm_demap_small(kpmvaddr);
2213 			if (kp->kp_refcnts < 1) {
2214 				panic("sfmmu_kpm_page_cache: bad refcnts "
2215 				"kpm_page=%p\n", (void *)kp);
2216 			}
2217 			kp->kp_refcnts--;
2218 			kp->kp_refcnt++;
2219 			PP_CLRKPMS(pp);
2220 			/* FALLTHRU */
2221 
2222 		case KPM_CACHE_NOMAP:			/* kc c -  - */
2223 		case KPM_CACHE_NOMAPO:			/* kc c ks - */
2224 			kp->kp_refcntc--;
2225 			PP_CLRKPMC(pp);
2226 			break;
2227 
2228 		default:
2229 			badstate++;
2230 		}
2231 		goto exit;
2232 	}
2233 
2234 	switch (pgcacase) {
2235 	case KPM_UNC_BIG:				/* - - - - */
2236 		if (kp->kp_refcnt < 1) {
2237 			panic("sfmmu_kpm_page_cache: bad refcnt "
2238 			    "kpm_page=%p\n", (void *)kp);
2239 		}
2240 
2241 		/*
2242 		 * Have to breakup the large page mapping in preparation
2243 		 * to the upcoming TNC mode handled by small mappings.
2244 		 * The demap can already be done due to another conflict
2245 		 * within the kpm_page.
2246 		 */
2247 		if (kp->kp_refcntc == -1) {
2248 			/* remove go indication */
2249 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
2250 			    &kpmp->khl_lock, KPMTSBM_STOP);
2251 		}
2252 		ASSERT(kp->kp_refcntc == 0);
2253 		sfmmu_kpm_demap_large(kpmvaddr);
2254 		kp->kp_refcntc++;
2255 		PP_SETKPMC(pp);
2256 		break;
2257 
2258 	case KPM_UNC_SMALL1:				/* -  - ks s */
2259 	case KPM_UNC_SMALL2:				/* kc - ks s */
2260 		/*
2261 		 * Have to demap an already small kpm mapping in preparation
2262 		 * to the upcoming TNC mode. The demap can already be done
2263 		 * due to another conflict within the kpm_page.
2264 		 */
2265 		sfmmu_kpm_demap_small(kpmvaddr);
2266 		kp->kp_refcntc++;
2267 		kp->kp_refcnts--;
2268 		kp->kp_refcnt++;
2269 		PP_CLRKPMS(pp);
2270 		PP_SETKPMC(pp);
2271 		break;
2272 
2273 	case KPM_UNC_NODEMAP1:				/* -  - ks - */
2274 		/* fallthru */
2275 
2276 	case KPM_UNC_NODEMAP2:				/* kc - -  - */
2277 	case KPM_UNC_NODEMAP3:				/* kc - ks - */
2278 		kp->kp_refcntc++;
2279 		PP_SETKPMC(pp);
2280 		break;
2281 
2282 	case KPM_UNC_NOP1:				/* kc c -  - */
2283 	case KPM_UNC_NOP2:				/* kc c ks - */
2284 		break;
2285 
2286 	default:
2287 		badstate++;
2288 	}
2289 exit:
2290 	if (badstate) {
2291 		panic("sfmmu_kpm_page_cache: inconsistent VAC state "
2292 		    "kpmvaddr=%p kp=%p pp=%p", (void *)kpmvaddr,
2293 		    (void *)kp, (void *)pp);
2294 	}
2295 	return;
2296 
2297 smallpages_page_cache:
2298 	PP2KPMSPG(pp, ksp);
2299 	kpmsp = KPMP_SHASH(ksp);
2300 
2301 	/*
2302 	 * marked as nogo for we will fault in and resolve it
2303 	 * through sfmmu_kpm_fault_small
2304 	 */
2305 	oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag, &kpmsp->kshl_lock,
2306 	    KPM_MAPPEDSC);
2307 
2308 	if (!(oldval == KPM_MAPPEDS || oldval == KPM_MAPPEDSC))
2309 		panic("smallpages_page_cache: inconsistent mapping");
2310 
2311 	sfmmu_kpm_demap_small(kpmvaddr);
2312 
2313 	if (flags == HAT_TMPNC) {
2314 		PP_SETKPMC(pp);
2315 		ASSERT(!PP_ISKPMS(pp));
2316 
2317 	} else {
2318 		ASSERT(PP_ISKPMC(pp));
2319 		PP_CLRKPMC(pp);
2320 	}
2321 
2322 	/*
2323 	 * Keep KPM_MAPPEDSC until the next kpm tsbmiss where it
2324 	 * prevents TL tsbmiss handling and force a hat_kpm_fault.
2325 	 * There we can start over again.
2326 	 */
2327 }
2328