xref: /titanic_51/usr/src/uts/sun4u/vm/mach_kpm.c (revision ac4d633f367252125bb35e97c5725d2aa68c1291)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Kernel Physical Mapping (segkpm) hat interface routines for sun4u.
30  */
31 
32 #include <sys/types.h>
33 #include <vm/hat.h>
34 #include <vm/hat_sfmmu.h>
35 #include <vm/page.h>
36 #include <sys/sysmacros.h>
37 #include <sys/cmn_err.h>
38 #include <sys/machsystm.h>
39 #include <vm/seg_kpm.h>
40 #include <sys/cpu_module.h>
41 #include <vm/mach_kpm.h>
42 
43 /* kpm prototypes */
44 static caddr_t	sfmmu_kpm_mapin(page_t *);
45 static void	sfmmu_kpm_mapout(page_t *, caddr_t);
46 static int	sfmmu_kpme_lookup(struct kpme *, page_t *);
47 static void	sfmmu_kpme_add(struct kpme *, page_t *);
48 static void	sfmmu_kpme_sub(struct kpme *, page_t *);
49 static caddr_t	sfmmu_kpm_getvaddr(page_t *, int *);
50 static int	sfmmu_kpm_fault(caddr_t, struct memseg *, page_t *);
51 static int	sfmmu_kpm_fault_small(caddr_t, struct memseg *, page_t *);
52 static void	sfmmu_kpm_vac_conflict(page_t *, caddr_t);
53 void	sfmmu_kpm_pageunload(page_t *);
54 void	sfmmu_kpm_vac_unload(page_t *, caddr_t);
55 static void	sfmmu_kpm_demap_large(caddr_t);
56 static void	sfmmu_kpm_demap_small(caddr_t);
57 static void	sfmmu_kpm_demap_tlbs(caddr_t);
58 void	sfmmu_kpm_hme_unload(page_t *);
59 kpm_hlk_t *sfmmu_kpm_kpmp_enter(page_t *, pgcnt_t);
60 void	sfmmu_kpm_kpmp_exit(kpm_hlk_t *kpmp);
61 void	sfmmu_kpm_page_cache(page_t *, int, int);
62 
63 /*
64  * Kernel Physical Mapping (kpm) facility
65  */
66 
67 void
68 mach_kpm_init()
69 {}
70 
71 /* -- hat_kpm interface section -- */
72 
73 /*
74  * Mapin a locked page and return the vaddr.
75  * When a kpme is provided by the caller it is added to
76  * the page p_kpmelist. The page to be mapped in must
77  * be at least read locked (p_selock).
78  */
79 caddr_t
80 hat_kpm_mapin(struct page *pp, struct kpme *kpme)
81 {
82 	kmutex_t	*pml;
83 	caddr_t		vaddr;
84 
85 	if (kpm_enable == 0) {
86 		cmn_err(CE_WARN, "hat_kpm_mapin: kpm_enable not set");
87 		return ((caddr_t)NULL);
88 	}
89 
90 	if (pp == NULL || PAGE_LOCKED(pp) == 0) {
91 		cmn_err(CE_WARN, "hat_kpm_mapin: pp zero or not locked");
92 		return ((caddr_t)NULL);
93 	}
94 
95 	pml = sfmmu_mlist_enter(pp);
96 	ASSERT(pp->p_kpmref >= 0);
97 
98 	vaddr = (pp->p_kpmref == 0) ?
99 		sfmmu_kpm_mapin(pp) : hat_kpm_page2va(pp, 1);
100 
101 	if (kpme != NULL) {
102 		/*
103 		 * Tolerate multiple mapins for the same kpme to avoid
104 		 * the need for an extra serialization.
105 		 */
106 		if ((sfmmu_kpme_lookup(kpme, pp)) == 0)
107 			sfmmu_kpme_add(kpme, pp);
108 
109 		ASSERT(pp->p_kpmref > 0);
110 
111 	} else {
112 		pp->p_kpmref++;
113 	}
114 
115 	sfmmu_mlist_exit(pml);
116 	return (vaddr);
117 }
118 
119 /*
120  * Mapout a locked page.
121  * When a kpme is provided by the caller it is removed from
122  * the page p_kpmelist. The page to be mapped out must be at
123  * least read locked (p_selock).
124  * Note: The seg_kpm layer provides a mapout interface for the
125  * case that a kpme is used and the underlying page is unlocked.
126  * This can be used instead of calling this function directly.
127  */
128 void
129 hat_kpm_mapout(struct page *pp, struct kpme *kpme, caddr_t vaddr)
130 {
131 	kmutex_t	*pml;
132 
133 	if (kpm_enable == 0) {
134 		cmn_err(CE_WARN, "hat_kpm_mapout: kpm_enable not set");
135 		return;
136 	}
137 
138 	if (IS_KPM_ADDR(vaddr) == 0) {
139 		cmn_err(CE_WARN, "hat_kpm_mapout: no kpm address");
140 		return;
141 	}
142 
143 	if (pp == NULL || PAGE_LOCKED(pp) == 0) {
144 		cmn_err(CE_WARN, "hat_kpm_mapout: page zero or not locked");
145 		return;
146 	}
147 
148 	if (kpme != NULL) {
149 		ASSERT(pp == kpme->kpe_page);
150 		pp = kpme->kpe_page;
151 		pml = sfmmu_mlist_enter(pp);
152 
153 		if (sfmmu_kpme_lookup(kpme, pp) == 0)
154 			panic("hat_kpm_mapout: kpme not found pp=%p",
155 				(void *)pp);
156 
157 		ASSERT(pp->p_kpmref > 0);
158 		sfmmu_kpme_sub(kpme, pp);
159 
160 	} else {
161 		pml = sfmmu_mlist_enter(pp);
162 		pp->p_kpmref--;
163 	}
164 
165 	ASSERT(pp->p_kpmref >= 0);
166 	if (pp->p_kpmref == 0)
167 		sfmmu_kpm_mapout(pp, vaddr);
168 
169 	sfmmu_mlist_exit(pml);
170 }
171 
172 /*
173  * Return the kpm virtual address for the page at pp.
174  * If checkswap is non zero and the page is backed by a
175  * swap vnode the physical address is used rather than
176  * p_offset to determine the kpm region.
177  * Note: The function has to be used w/ extreme care. The
178  * stability of the page identity is in the responsibility
179  * of the caller.
180  */
181 /*ARGSUSED*/
182 caddr_t
183 hat_kpm_page2va(struct page *pp, int checkswap)
184 {
185 	int		vcolor, vcolor_pa;
186 	uintptr_t	paddr, vaddr;
187 
188 	ASSERT(kpm_enable);
189 
190 	paddr = ptob(pp->p_pagenum);
191 	vcolor_pa = addr_to_vcolor(paddr);
192 
193 	if (checkswap && pp->p_vnode && IS_SWAPFSVP(pp->p_vnode))
194 		vcolor = (PP_ISNC(pp)) ? vcolor_pa : PP_GET_VCOLOR(pp);
195 	else
196 		vcolor = addr_to_vcolor(pp->p_offset);
197 
198 	vaddr = (uintptr_t)kpm_vbase + paddr;
199 
200 	if (vcolor_pa != vcolor) {
201 		vaddr += ((uintptr_t)(vcolor - vcolor_pa) << MMU_PAGESHIFT);
202 		vaddr += (vcolor_pa > vcolor) ?
203 			((uintptr_t)vcolor_pa << kpm_size_shift) :
204 			((uintptr_t)(vcolor - vcolor_pa) << kpm_size_shift);
205 	}
206 
207 	return ((caddr_t)vaddr);
208 }
209 
210 /*
211  * Return the page for the kpm virtual address vaddr.
212  * Caller is responsible for the kpm mapping and lock
213  * state of the page.
214  */
215 page_t *
216 hat_kpm_vaddr2page(caddr_t vaddr)
217 {
218 	uintptr_t	paddr;
219 	pfn_t		pfn;
220 
221 	ASSERT(IS_KPM_ADDR(vaddr));
222 
223 	SFMMU_KPM_VTOP(vaddr, paddr);
224 	pfn = (pfn_t)btop(paddr);
225 
226 	return (page_numtopp_nolock(pfn));
227 }
228 
229 /* page to kpm_page */
230 #define	PP2KPMPG(pp, kp) {						\
231 	struct memseg	*mseg;						\
232 	pgcnt_t		inx;						\
233 	pfn_t		pfn;						\
234 									\
235 	pfn = pp->p_pagenum;						\
236 	mseg = page_numtomemseg_nolock(pfn);				\
237 	ASSERT(mseg);							\
238 	inx = ptokpmp(kpmptop(ptokpmp(pfn)) - mseg->kpm_pbase);		\
239 	ASSERT(inx < mseg->kpm_nkpmpgs);				\
240 	kp = &mseg->kpm_pages[inx];					\
241 }
242 
243 /* page to kpm_spage */
244 #define	PP2KPMSPG(pp, ksp) {						\
245 	struct memseg	*mseg;						\
246 	pgcnt_t		inx;						\
247 	pfn_t		pfn;						\
248 									\
249 	pfn = pp->p_pagenum;						\
250 	mseg = page_numtomemseg_nolock(pfn);				\
251 	ASSERT(mseg);							\
252 	inx = pfn - mseg->kpm_pbase;					\
253 	ksp = &mseg->kpm_spages[inx];					\
254 }
255 
256 /*
257  * hat_kpm_fault is called from segkpm_fault when a kpm tsbmiss occurred
258  * which could not be resolved by the trap level tsbmiss handler for the
259  * following reasons:
260  * . The vaddr is in VAC alias range (always PAGESIZE mapping size).
261  * . The kpm (s)page range of vaddr is in a VAC alias prevention state.
262  * . tsbmiss handling at trap level is not desired (DEBUG kernel only,
263  *   kpm_tsbmtl == 0).
264  */
265 int
266 hat_kpm_fault(struct hat *hat, caddr_t vaddr)
267 {
268 	int		error;
269 	uintptr_t	paddr;
270 	pfn_t		pfn;
271 	struct memseg	*mseg;
272 	page_t	*pp;
273 
274 	if (kpm_enable == 0) {
275 		cmn_err(CE_WARN, "hat_kpm_fault: kpm_enable not set");
276 		return (ENOTSUP);
277 	}
278 
279 	ASSERT(hat == ksfmmup);
280 	ASSERT(IS_KPM_ADDR(vaddr));
281 
282 	SFMMU_KPM_VTOP(vaddr, paddr);
283 	pfn = (pfn_t)btop(paddr);
284 	mseg = page_numtomemseg_nolock(pfn);
285 	if (mseg == NULL)
286 		return (EFAULT);
287 
288 	pp = &mseg->pages[(pgcnt_t)(pfn - mseg->pages_base)];
289 	ASSERT((pfn_t)pp->p_pagenum == pfn);
290 
291 	if (!PAGE_LOCKED(pp))
292 		return (EFAULT);
293 
294 	if (kpm_smallpages == 0)
295 		error = sfmmu_kpm_fault(vaddr, mseg, pp);
296 	else
297 		error = sfmmu_kpm_fault_small(vaddr, mseg, pp);
298 
299 	return (error);
300 }
301 
302 extern  krwlock_t memsegslock;
303 
304 /*
305  * memseg_hash[] was cleared, need to clear memseg_phash[] too.
306  */
307 void
308 hat_kpm_mseghash_clear(int nentries)
309 {
310 	pgcnt_t i;
311 
312 	if (kpm_enable == 0)
313 		return;
314 
315 	for (i = 0; i < nentries; i++)
316 		memseg_phash[i] = MSEG_NULLPTR_PA;
317 }
318 
319 /*
320  * Update memseg_phash[inx] when memseg_hash[inx] was changed.
321  */
322 void
323 hat_kpm_mseghash_update(pgcnt_t inx, struct memseg *msp)
324 {
325 	if (kpm_enable == 0)
326 		return;
327 
328 	memseg_phash[inx] = (msp) ? va_to_pa(msp) : MSEG_NULLPTR_PA;
329 }
330 
331 /*
332  * Update kpm memseg members from basic memseg info.
333  */
334 void
335 hat_kpm_addmem_mseg_update(struct memseg *msp, pgcnt_t nkpmpgs,
336 	offset_t kpm_pages_off)
337 {
338 	if (kpm_enable == 0)
339 		return;
340 
341 	msp->kpm_pages = (kpm_page_t *)((caddr_t)msp->pages + kpm_pages_off);
342 	msp->kpm_nkpmpgs = nkpmpgs;
343 	msp->kpm_pbase = kpmptop(ptokpmp(msp->pages_base));
344 	msp->pagespa = va_to_pa(msp->pages);
345 	msp->epagespa = va_to_pa(msp->epages);
346 	msp->kpm_pagespa = va_to_pa(msp->kpm_pages);
347 }
348 
349 /*
350  * Setup nextpa when a memseg is inserted.
351  * Assumes that the memsegslock is already held.
352  */
353 void
354 hat_kpm_addmem_mseg_insert(struct memseg *msp)
355 {
356 	if (kpm_enable == 0)
357 		return;
358 
359 	ASSERT(RW_LOCK_HELD(&memsegslock));
360 	msp->nextpa = (memsegs) ? va_to_pa(memsegs) : MSEG_NULLPTR_PA;
361 }
362 
363 /*
364  * Setup memsegspa when a memseg is (head) inserted.
365  * Called before memsegs is updated to complete a
366  * memseg insert operation.
367  * Assumes that the memsegslock is already held.
368  */
369 void
370 hat_kpm_addmem_memsegs_update(struct memseg *msp)
371 {
372 	if (kpm_enable == 0)
373 		return;
374 
375 	ASSERT(RW_LOCK_HELD(&memsegslock));
376 	ASSERT(memsegs);
377 	memsegspa = va_to_pa(msp);
378 }
379 
380 /*
381  * Return end of metadata for an already setup memseg.
382  *
383  * Note: kpm_pages and kpm_spages are aliases and the underlying
384  * member of struct memseg is a union, therefore they always have
385  * the same address within a memseg. They must be differentiated
386  * when pointer arithmetic is used with them.
387  */
388 caddr_t
389 hat_kpm_mseg_reuse(struct memseg *msp)
390 {
391 	caddr_t end;
392 
393 	if (kpm_smallpages == 0)
394 		end = (caddr_t)(msp->kpm_pages + msp->kpm_nkpmpgs);
395 	else
396 		end = (caddr_t)(msp->kpm_spages + msp->kpm_nkpmpgs);
397 
398 	return (end);
399 }
400 
401 /*
402  * Update memsegspa (when first memseg in list
403  * is deleted) or nextpa  when a memseg deleted.
404  * Assumes that the memsegslock is already held.
405  */
406 void
407 hat_kpm_delmem_mseg_update(struct memseg *msp, struct memseg **mspp)
408 {
409 	struct memseg *lmsp;
410 
411 	if (kpm_enable == 0)
412 		return;
413 
414 	ASSERT(RW_LOCK_HELD(&memsegslock));
415 
416 	if (mspp == &memsegs) {
417 		memsegspa = (msp->next) ?
418 				va_to_pa(msp->next) : MSEG_NULLPTR_PA;
419 	} else {
420 		lmsp = (struct memseg *)
421 			((uint64_t)mspp - offsetof(struct memseg, next));
422 		lmsp->nextpa = (msp->next) ?
423 				va_to_pa(msp->next) : MSEG_NULLPTR_PA;
424 	}
425 }
426 
427 /*
428  * Update kpm members for all memseg's involved in a split operation
429  * and do the atomic update of the physical memseg chain.
430  *
431  * Note: kpm_pages and kpm_spages are aliases and the underlying member
432  * of struct memseg is a union, therefore they always have the same
433  * address within a memseg. With that the direct assignments and
434  * va_to_pa conversions below don't have to be distinguished wrt. to
435  * kpm_smallpages. They must be differentiated when pointer arithmetic
436  * is used with them.
437  *
438  * Assumes that the memsegslock is already held.
439  */
440 void
441 hat_kpm_split_mseg_update(struct memseg *msp, struct memseg **mspp,
442 	struct memseg *lo, struct memseg *mid, struct memseg *hi)
443 {
444 	pgcnt_t start, end, kbase, kstart, num;
445 	struct memseg *lmsp;
446 
447 	if (kpm_enable == 0)
448 		return;
449 
450 	ASSERT(RW_LOCK_HELD(&memsegslock));
451 	ASSERT(msp && mid && msp->kpm_pages);
452 
453 	kbase = ptokpmp(msp->kpm_pbase);
454 
455 	if (lo) {
456 		num = lo->pages_end - lo->pages_base;
457 		start = kpmptop(ptokpmp(lo->pages_base));
458 		/* align end to kpm page size granularity */
459 		end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs;
460 		lo->kpm_pbase = start;
461 		lo->kpm_nkpmpgs = ptokpmp(end - start);
462 		lo->kpm_pages = msp->kpm_pages;
463 		lo->kpm_pagespa = va_to_pa(lo->kpm_pages);
464 		lo->pagespa = va_to_pa(lo->pages);
465 		lo->epagespa = va_to_pa(lo->epages);
466 		lo->nextpa = va_to_pa(lo->next);
467 	}
468 
469 	/* mid */
470 	num = mid->pages_end - mid->pages_base;
471 	kstart = ptokpmp(mid->pages_base);
472 	start = kpmptop(kstart);
473 	/* align end to kpm page size granularity */
474 	end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs;
475 	mid->kpm_pbase = start;
476 	mid->kpm_nkpmpgs = ptokpmp(end - start);
477 	if (kpm_smallpages == 0) {
478 		mid->kpm_pages = msp->kpm_pages + (kstart - kbase);
479 	} else {
480 		mid->kpm_spages = msp->kpm_spages + (kstart - kbase);
481 	}
482 	mid->kpm_pagespa = va_to_pa(mid->kpm_pages);
483 	mid->pagespa = va_to_pa(mid->pages);
484 	mid->epagespa = va_to_pa(mid->epages);
485 	mid->nextpa = (mid->next) ?  va_to_pa(mid->next) : MSEG_NULLPTR_PA;
486 
487 	if (hi) {
488 		num = hi->pages_end - hi->pages_base;
489 		kstart = ptokpmp(hi->pages_base);
490 		start = kpmptop(kstart);
491 		/* align end to kpm page size granularity */
492 		end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs;
493 		hi->kpm_pbase = start;
494 		hi->kpm_nkpmpgs = ptokpmp(end - start);
495 		if (kpm_smallpages == 0) {
496 			hi->kpm_pages = msp->kpm_pages + (kstart - kbase);
497 		} else {
498 			hi->kpm_spages = msp->kpm_spages + (kstart - kbase);
499 		}
500 		hi->kpm_pagespa = va_to_pa(hi->kpm_pages);
501 		hi->pagespa = va_to_pa(hi->pages);
502 		hi->epagespa = va_to_pa(hi->epages);
503 		hi->nextpa = (hi->next) ? va_to_pa(hi->next) : MSEG_NULLPTR_PA;
504 	}
505 
506 	/*
507 	 * Atomic update of the physical memseg chain
508 	 */
509 	if (mspp == &memsegs) {
510 		memsegspa = (lo) ? va_to_pa(lo) : va_to_pa(mid);
511 	} else {
512 		lmsp = (struct memseg *)
513 			((uint64_t)mspp - offsetof(struct memseg, next));
514 		lmsp->nextpa = (lo) ? va_to_pa(lo) : va_to_pa(mid);
515 	}
516 }
517 
518 /*
519  * Walk the memsegs chain, applying func to each memseg span and vcolor.
520  */
521 void
522 hat_kpm_walk(void (*func)(void *, void *, size_t), void *arg)
523 {
524 	pfn_t	pbase, pend;
525 	int	vcolor;
526 	void	*base;
527 	size_t	size;
528 	struct memseg *msp;
529 	extern uint_t vac_colors;
530 
531 	for (msp = memsegs; msp; msp = msp->next) {
532 		pbase = msp->pages_base;
533 		pend = msp->pages_end;
534 		for (vcolor = 0; vcolor < vac_colors; vcolor++) {
535 			base = ptob(pbase) + kpm_vbase + kpm_size * vcolor;
536 			size = ptob(pend - pbase);
537 			func(arg, base, size);
538 		}
539 	}
540 }
541 
542 
543 /* -- sfmmu_kpm internal section -- */
544 
545 /*
546  * Return the page frame number if a valid segkpm mapping exists
547  * for vaddr, otherwise return PFN_INVALID. No locks are grabbed.
548  * Should only be used by other sfmmu routines.
549  */
550 pfn_t
551 sfmmu_kpm_vatopfn(caddr_t vaddr)
552 {
553 	uintptr_t	paddr;
554 	pfn_t		pfn;
555 	page_t	*pp;
556 
557 	ASSERT(kpm_enable && IS_KPM_ADDR(vaddr));
558 
559 	SFMMU_KPM_VTOP(vaddr, paddr);
560 	pfn = (pfn_t)btop(paddr);
561 	pp = page_numtopp_nolock(pfn);
562 	if (pp && pp->p_kpmref)
563 		return (pfn);
564 	else
565 		return ((pfn_t)PFN_INVALID);
566 }
567 
568 /*
569  * Lookup a kpme in the p_kpmelist.
570  */
571 static int
572 sfmmu_kpme_lookup(struct kpme *kpme, page_t *pp)
573 {
574 	struct kpme	*p;
575 
576 	for (p = pp->p_kpmelist; p; p = p->kpe_next) {
577 		if (p == kpme)
578 			return (1);
579 	}
580 	return (0);
581 }
582 
583 /*
584  * Insert a kpme into the p_kpmelist and increment
585  * the per page kpm reference count.
586  */
587 static void
588 sfmmu_kpme_add(struct kpme *kpme, page_t *pp)
589 {
590 	ASSERT(pp->p_kpmref >= 0);
591 
592 	/* head insert */
593 	kpme->kpe_prev = NULL;
594 	kpme->kpe_next = pp->p_kpmelist;
595 
596 	if (pp->p_kpmelist)
597 		pp->p_kpmelist->kpe_prev = kpme;
598 
599 	pp->p_kpmelist = kpme;
600 	kpme->kpe_page = pp;
601 	pp->p_kpmref++;
602 }
603 
604 /*
605  * Remove a kpme from the p_kpmelist and decrement
606  * the per page kpm reference count.
607  */
608 static void
609 sfmmu_kpme_sub(struct kpme *kpme, page_t *pp)
610 {
611 	ASSERT(pp->p_kpmref > 0);
612 
613 	if (kpme->kpe_prev) {
614 		ASSERT(pp->p_kpmelist != kpme);
615 		ASSERT(kpme->kpe_prev->kpe_page == pp);
616 		kpme->kpe_prev->kpe_next = kpme->kpe_next;
617 	} else {
618 		ASSERT(pp->p_kpmelist == kpme);
619 		pp->p_kpmelist = kpme->kpe_next;
620 	}
621 
622 	if (kpme->kpe_next) {
623 		ASSERT(kpme->kpe_next->kpe_page == pp);
624 		kpme->kpe_next->kpe_prev = kpme->kpe_prev;
625 	}
626 
627 	kpme->kpe_next = kpme->kpe_prev = NULL;
628 	kpme->kpe_page = NULL;
629 	pp->p_kpmref--;
630 }
631 
632 /*
633  * Mapin a single page, it is called every time a page changes it's state
634  * from kpm-unmapped to kpm-mapped. It may not be called, when only a new
635  * kpm instance does a mapin and wants to share the mapping.
636  * Assumes that the mlist mutex is already grabbed.
637  */
638 static caddr_t
639 sfmmu_kpm_mapin(page_t *pp)
640 {
641 	kpm_page_t	*kp;
642 	kpm_hlk_t	*kpmp;
643 	caddr_t		vaddr;
644 	int		kpm_vac_range;
645 	pfn_t		pfn;
646 	tte_t		tte;
647 	kmutex_t	*pmtx;
648 	int		uncached;
649 	kpm_spage_t	*ksp;
650 	kpm_shlk_t	*kpmsp;
651 	int		oldval;
652 
653 	ASSERT(sfmmu_mlist_held(pp));
654 	ASSERT(pp->p_kpmref == 0);
655 
656 	vaddr = sfmmu_kpm_getvaddr(pp, &kpm_vac_range);
657 
658 	ASSERT(IS_KPM_ADDR(vaddr));
659 	uncached = PP_ISNC(pp);
660 	pfn = pp->p_pagenum;
661 
662 	if (kpm_smallpages)
663 		goto smallpages_mapin;
664 
665 	PP2KPMPG(pp, kp);
666 
667 	kpmp = KPMP_HASH(kp);
668 	mutex_enter(&kpmp->khl_mutex);
669 
670 	ASSERT(PP_ISKPMC(pp) == 0);
671 	ASSERT(PP_ISKPMS(pp) == 0);
672 
673 	if (uncached) {
674 		/* ASSERT(pp->p_share); XXX use hat_page_getshare */
675 		if (kpm_vac_range == 0) {
676 			if (kp->kp_refcnts == 0) {
677 				/*
678 				 * Must remove large page mapping if it exists.
679 				 * Pages in uncached state can only be mapped
680 				 * small (PAGESIZE) within the regular kpm
681 				 * range.
682 				 */
683 				if (kp->kp_refcntc == -1) {
684 					/* remove go indication */
685 					sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
686 						&kpmp->khl_lock, KPMTSBM_STOP);
687 				}
688 				if (kp->kp_refcnt > 0 && kp->kp_refcntc == 0)
689 					sfmmu_kpm_demap_large(vaddr);
690 			}
691 			ASSERT(kp->kp_refcntc >= 0);
692 			kp->kp_refcntc++;
693 		}
694 		pmtx = sfmmu_page_enter(pp);
695 		PP_SETKPMC(pp);
696 		sfmmu_page_exit(pmtx);
697 	}
698 
699 	if ((kp->kp_refcntc > 0 || kp->kp_refcnts > 0) && kpm_vac_range == 0) {
700 		/*
701 		 * Have to do a small (PAGESIZE) mapin within this kpm_page
702 		 * range since it is marked to be in VAC conflict mode or
703 		 * when there are still other small mappings around.
704 		 */
705 
706 		/* tte assembly */
707 		if (uncached == 0)
708 			KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
709 		else
710 			KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
711 
712 		/* tsb dropin */
713 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
714 
715 		pmtx = sfmmu_page_enter(pp);
716 		PP_SETKPMS(pp);
717 		sfmmu_page_exit(pmtx);
718 
719 		kp->kp_refcnts++;
720 		ASSERT(kp->kp_refcnts > 0);
721 		goto exit;
722 	}
723 
724 	if (kpm_vac_range == 0) {
725 		/*
726 		 * Fast path / regular case, no VAC conflict handling
727 		 * in progress within this kpm_page range.
728 		 */
729 		if (kp->kp_refcnt == 0) {
730 
731 			/* tte assembly */
732 			KPM_TTE_VCACHED(tte.ll, pfn, TTE4M);
733 
734 			/* tsb dropin */
735 			sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT4M);
736 
737 			/* Set go flag for TL tsbmiss handler */
738 			if (kp->kp_refcntc == 0)
739 				sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
740 						&kpmp->khl_lock, KPMTSBM_START);
741 
742 			ASSERT(kp->kp_refcntc == -1);
743 		}
744 		kp->kp_refcnt++;
745 		ASSERT(kp->kp_refcnt);
746 
747 	} else {
748 		/*
749 		 * The page is not setup according to the common VAC
750 		 * prevention rules for the regular and kpm mapping layer
751 		 * E.g. the page layer was not able to deliver a right
752 		 * vcolor'ed page for a given vaddr corresponding to
753 		 * the wanted p_offset. It has to be mapped in small in
754 		 * within the corresponding kpm vac range in order to
755 		 * prevent VAC alias conflicts.
756 		 */
757 
758 		/* tte assembly */
759 		if (uncached == 0) {
760 			KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
761 		} else {
762 			KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
763 		}
764 
765 		/* tsb dropin */
766 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
767 
768 		kp->kp_refcnta++;
769 		if (kp->kp_refcntc == -1) {
770 			ASSERT(kp->kp_refcnt > 0);
771 
772 			/* remove go indication */
773 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc, &kpmp->khl_lock,
774 					KPMTSBM_STOP);
775 		}
776 		ASSERT(kp->kp_refcntc >= 0);
777 	}
778 exit:
779 	mutex_exit(&kpmp->khl_mutex);
780 	return (vaddr);
781 
782 smallpages_mapin:
783 	if (uncached == 0) {
784 		/* tte assembly */
785 		KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
786 	} else {
787 		/* ASSERT(pp->p_share); XXX use hat_page_getshare */
788 		pmtx = sfmmu_page_enter(pp);
789 		PP_SETKPMC(pp);
790 		sfmmu_page_exit(pmtx);
791 		/* tte assembly */
792 		KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
793 	}
794 
795 	/* tsb dropin */
796 	sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
797 
798 	PP2KPMSPG(pp, ksp);
799 	kpmsp = KPMP_SHASH(ksp);
800 
801 	oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped, &kpmsp->kshl_lock,
802 				(uncached) ? KPM_MAPPEDSC : KPM_MAPPEDS);
803 
804 	if (oldval != 0)
805 		panic("sfmmu_kpm_mapin: stale smallpages mapping");
806 
807 	return (vaddr);
808 }
809 
810 /*
811  * Mapout a single page, it is called every time a page changes it's state
812  * from kpm-mapped to kpm-unmapped. It may not be called, when only a kpm
813  * instance calls mapout and there are still other instances mapping the
814  * page. Assumes that the mlist mutex is already grabbed.
815  *
816  * Note: In normal mode (no VAC conflict prevention pending) TLB's are
817  * not flushed. This is the core segkpm behavior to avoid xcalls. It is
818  * no problem because a translation from a segkpm virtual address to a
819  * physical address is always the same. The only downside is a slighty
820  * increased window of vulnerability for misbehaving _kernel_ modules.
821  */
822 static void
823 sfmmu_kpm_mapout(page_t *pp, caddr_t vaddr)
824 {
825 	kpm_page_t	*kp;
826 	kpm_hlk_t	*kpmp;
827 	int		alias_range;
828 	kmutex_t	*pmtx;
829 	kpm_spage_t	*ksp;
830 	kpm_shlk_t	*kpmsp;
831 	int		oldval;
832 
833 	ASSERT(sfmmu_mlist_held(pp));
834 	ASSERT(pp->p_kpmref == 0);
835 
836 	alias_range = IS_KPM_ALIAS_RANGE(vaddr);
837 
838 	if (kpm_smallpages)
839 		goto smallpages_mapout;
840 
841 	PP2KPMPG(pp, kp);
842 	kpmp = KPMP_HASH(kp);
843 	mutex_enter(&kpmp->khl_mutex);
844 
845 	if (alias_range) {
846 		ASSERT(PP_ISKPMS(pp) == 0);
847 		if (kp->kp_refcnta <= 0) {
848 			panic("sfmmu_kpm_mapout: bad refcnta kp=%p",
849 				(void *)kp);
850 		}
851 
852 		if (PP_ISTNC(pp))  {
853 			if (PP_ISKPMC(pp) == 0) {
854 				/*
855 				 * Uncached kpm mappings must always have
856 				 * forced "small page" mode.
857 				 */
858 				panic("sfmmu_kpm_mapout: uncached page not "
859 					"kpm marked");
860 			}
861 			sfmmu_kpm_demap_small(vaddr);
862 
863 			pmtx = sfmmu_page_enter(pp);
864 			PP_CLRKPMC(pp);
865 			sfmmu_page_exit(pmtx);
866 
867 			/*
868 			 * Check if we can resume cached mode. This might
869 			 * be the case if the kpm mapping was the only
870 			 * mapping in conflict with other non rule
871 			 * compliant mappings. The page is no more marked
872 			 * as kpm mapped, so the conv_tnc path will not
873 			 * change kpm state.
874 			 */
875 			conv_tnc(pp, TTE8K);
876 
877 		} else if (PP_ISKPMC(pp) == 0) {
878 			/* remove TSB entry only */
879 			sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT);
880 
881 		} else {
882 			/* already demapped */
883 			pmtx = sfmmu_page_enter(pp);
884 			PP_CLRKPMC(pp);
885 			sfmmu_page_exit(pmtx);
886 		}
887 		kp->kp_refcnta--;
888 		goto exit;
889 	}
890 
891 	if (kp->kp_refcntc <= 0 && kp->kp_refcnts == 0) {
892 		/*
893 		 * Fast path / regular case.
894 		 */
895 		ASSERT(kp->kp_refcntc >= -1);
896 		ASSERT(!(pp->p_nrm & (P_KPMC | P_KPMS | P_TNC | P_PNC)));
897 
898 		if (kp->kp_refcnt <= 0)
899 			panic("sfmmu_kpm_mapout: bad refcnt kp=%p", (void *)kp);
900 
901 		if (--kp->kp_refcnt == 0) {
902 			/* remove go indication */
903 			if (kp->kp_refcntc == -1) {
904 				sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
905 					&kpmp->khl_lock, KPMTSBM_STOP);
906 			}
907 			ASSERT(kp->kp_refcntc == 0);
908 
909 			/* remove TSB entry */
910 			sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT4M);
911 #ifdef	DEBUG
912 			if (kpm_tlb_flush)
913 				sfmmu_kpm_demap_tlbs(vaddr);
914 #endif
915 		}
916 
917 	} else {
918 		/*
919 		 * The VAC alias path.
920 		 * We come here if the kpm vaddr is not in any alias_range
921 		 * and we are unmapping a page within the regular kpm_page
922 		 * range. The kpm_page either holds conflict pages and/or
923 		 * is in "small page" mode. If the page is not marked
924 		 * P_KPMS it couldn't have a valid PAGESIZE sized TSB
925 		 * entry. Dcache flushing is done lazy and follows the
926 		 * rules of the regular virtual page coloring scheme.
927 		 *
928 		 * Per page states and required actions:
929 		 *   P_KPMC: remove a kpm mapping that is conflicting.
930 		 *   P_KPMS: remove a small kpm mapping within a kpm_page.
931 		 *   P_TNC:  check if we can re-cache the page.
932 		 *   P_PNC:  we cannot re-cache, sorry.
933 		 * Per kpm_page:
934 		 *   kp_refcntc > 0: page is part of a kpm_page with conflicts.
935 		 *   kp_refcnts > 0: rm a small mapped page within a kpm_page.
936 		 */
937 
938 		if (PP_ISKPMS(pp)) {
939 			if (kp->kp_refcnts < 1) {
940 				panic("sfmmu_kpm_mapout: bad refcnts kp=%p",
941 					(void *)kp);
942 			}
943 			sfmmu_kpm_demap_small(vaddr);
944 
945 			/*
946 			 * Check if we can resume cached mode. This might
947 			 * be the case if the kpm mapping was the only
948 			 * mapping in conflict with other non rule
949 			 * compliant mappings. The page is no more marked
950 			 * as kpm mapped, so the conv_tnc path will not
951 			 * change kpm state.
952 			 */
953 			if (PP_ISTNC(pp))  {
954 				if (!PP_ISKPMC(pp)) {
955 					/*
956 					 * Uncached kpm mappings must always
957 					 * have forced "small page" mode.
958 					 */
959 					panic("sfmmu_kpm_mapout: uncached "
960 						"page not kpm marked");
961 				}
962 				conv_tnc(pp, TTE8K);
963 			}
964 			kp->kp_refcnts--;
965 			kp->kp_refcnt++;
966 			pmtx = sfmmu_page_enter(pp);
967 			PP_CLRKPMS(pp);
968 			sfmmu_page_exit(pmtx);
969 		}
970 
971 		if (PP_ISKPMC(pp)) {
972 			if (kp->kp_refcntc < 1) {
973 				panic("sfmmu_kpm_mapout: bad refcntc kp=%p",
974 					(void *)kp);
975 			}
976 			pmtx = sfmmu_page_enter(pp);
977 			PP_CLRKPMC(pp);
978 			sfmmu_page_exit(pmtx);
979 			kp->kp_refcntc--;
980 		}
981 
982 		if (kp->kp_refcnt-- < 1)
983 			panic("sfmmu_kpm_mapout: bad refcnt kp=%p", (void *)kp);
984 	}
985 exit:
986 	mutex_exit(&kpmp->khl_mutex);
987 	return;
988 
989 smallpages_mapout:
990 	PP2KPMSPG(pp, ksp);
991 	kpmsp = KPMP_SHASH(ksp);
992 
993 	if (PP_ISKPMC(pp) == 0) {
994 		oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped,
995 					&kpmsp->kshl_lock, 0);
996 
997 		if (oldval != KPM_MAPPEDS) {
998 			/*
999 			 * When we're called after sfmmu_kpm_hme_unload,
1000 			 * KPM_MAPPEDSC is valid too.
1001 			 */
1002 			if (oldval != KPM_MAPPEDSC)
1003 				panic("sfmmu_kpm_mapout: incorrect mapping");
1004 		}
1005 
1006 		/* remove TSB entry */
1007 		sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT);
1008 #ifdef	DEBUG
1009 		if (kpm_tlb_flush)
1010 			sfmmu_kpm_demap_tlbs(vaddr);
1011 #endif
1012 
1013 	} else if (PP_ISTNC(pp)) {
1014 		oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped,
1015 					&kpmsp->kshl_lock, 0);
1016 
1017 		if (oldval != KPM_MAPPEDSC || PP_ISKPMC(pp) == 0)
1018 			panic("sfmmu_kpm_mapout: inconsistent TNC mapping");
1019 
1020 		sfmmu_kpm_demap_small(vaddr);
1021 
1022 		pmtx = sfmmu_page_enter(pp);
1023 		PP_CLRKPMC(pp);
1024 		sfmmu_page_exit(pmtx);
1025 
1026 		/*
1027 		 * Check if we can resume cached mode. This might be
1028 		 * the case if the kpm mapping was the only mapping
1029 		 * in conflict with other non rule compliant mappings.
1030 		 * The page is no more marked as kpm mapped, so the
1031 		 * conv_tnc path will not change the kpm state.
1032 		 */
1033 		conv_tnc(pp, TTE8K);
1034 
1035 	} else {
1036 		oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped,
1037 					&kpmsp->kshl_lock, 0);
1038 
1039 		if (oldval != KPM_MAPPEDSC)
1040 			panic("sfmmu_kpm_mapout: inconsistent mapping");
1041 
1042 		pmtx = sfmmu_page_enter(pp);
1043 		PP_CLRKPMC(pp);
1044 		sfmmu_page_exit(pmtx);
1045 	}
1046 }
1047 
1048 #define	abs(x)  ((x) < 0 ? -(x) : (x))
1049 
1050 /*
1051  * Determine appropriate kpm mapping address and handle any kpm/hme
1052  * conflicts. Page mapping list and its vcolor parts must be protected.
1053  */
1054 static caddr_t
1055 sfmmu_kpm_getvaddr(page_t *pp, int *kpm_vac_rangep)
1056 {
1057 	int		vcolor, vcolor_pa;
1058 	caddr_t		vaddr;
1059 	uintptr_t	paddr;
1060 
1061 
1062 	ASSERT(sfmmu_mlist_held(pp));
1063 
1064 	paddr = ptob(pp->p_pagenum);
1065 	vcolor_pa = addr_to_vcolor(paddr);
1066 
1067 	if (pp->p_vnode && IS_SWAPFSVP(pp->p_vnode)) {
1068 		vcolor = (PP_NEWPAGE(pp) || PP_ISNC(pp)) ?
1069 		    vcolor_pa : PP_GET_VCOLOR(pp);
1070 	} else {
1071 		vcolor = addr_to_vcolor(pp->p_offset);
1072 	}
1073 
1074 	vaddr = kpm_vbase + paddr;
1075 	*kpm_vac_rangep = 0;
1076 
1077 	if (vcolor_pa != vcolor) {
1078 		*kpm_vac_rangep = abs(vcolor - vcolor_pa);
1079 		vaddr += ((uintptr_t)(vcolor - vcolor_pa) << MMU_PAGESHIFT);
1080 		vaddr += (vcolor_pa > vcolor) ?
1081 			((uintptr_t)vcolor_pa << kpm_size_shift) :
1082 			((uintptr_t)(vcolor - vcolor_pa) << kpm_size_shift);
1083 
1084 		ASSERT(!PP_ISMAPPED_LARGE(pp));
1085 	}
1086 
1087 	if (PP_ISNC(pp))
1088 		return (vaddr);
1089 
1090 	if (PP_NEWPAGE(pp)) {
1091 		PP_SET_VCOLOR(pp, vcolor);
1092 		return (vaddr);
1093 	}
1094 
1095 	if (PP_GET_VCOLOR(pp) == vcolor)
1096 		return (vaddr);
1097 
1098 	ASSERT(!PP_ISMAPPED_KPM(pp));
1099 	sfmmu_kpm_vac_conflict(pp, vaddr);
1100 
1101 	return (vaddr);
1102 }
1103 
1104 /*
1105  * VAC conflict state bit values.
1106  * The following defines are used to make the handling of the
1107  * various input states more concise. For that the kpm states
1108  * per kpm_page and per page are combined in a summary state.
1109  * Each single state has a corresponding bit value in the
1110  * summary state. These defines only apply for kpm large page
1111  * mappings. Within comments the abbreviations "kc, c, ks, s"
1112  * are used as short form of the actual state, e.g. "kc" for
1113  * "kp_refcntc > 0", etc.
1114  */
1115 #define	KPM_KC	0x00000008	/* kpm_page: kp_refcntc > 0 */
1116 #define	KPM_C	0x00000004	/* page: P_KPMC set */
1117 #define	KPM_KS	0x00000002	/* kpm_page: kp_refcnts > 0 */
1118 #define	KPM_S	0x00000001	/* page: P_KPMS set */
1119 
1120 /*
1121  * Summary states used in sfmmu_kpm_fault (KPM_TSBM_*).
1122  * See also more detailed comments within in the sfmmu_kpm_fault switch.
1123  * Abbreviations used:
1124  * CONFL: VAC conflict(s) within a kpm_page.
1125  * MAPS:  Mapped small: Page mapped in using a regular page size kpm mapping.
1126  * RASM:  Re-assembling of a large page mapping possible.
1127  * RPLS:  Replace: TSB miss due to TSB replacement only.
1128  * BRKO:  Breakup Other: A large kpm mapping has to be broken because another
1129  *        page within the kpm_page is already involved in a VAC conflict.
1130  * BRKT:  Breakup This: A large kpm mapping has to be broken, this page is
1131  *        is involved in a VAC conflict.
1132  */
1133 #define	KPM_TSBM_CONFL_GONE	(0)
1134 #define	KPM_TSBM_MAPS_RASM	(KPM_KS)
1135 #define	KPM_TSBM_RPLS_RASM	(KPM_KS | KPM_S)
1136 #define	KPM_TSBM_MAPS_BRKO	(KPM_KC)
1137 #define	KPM_TSBM_MAPS		(KPM_KC | KPM_KS)
1138 #define	KPM_TSBM_RPLS		(KPM_KC | KPM_KS | KPM_S)
1139 #define	KPM_TSBM_MAPS_BRKT	(KPM_KC | KPM_C)
1140 #define	KPM_TSBM_MAPS_CONFL	(KPM_KC | KPM_C | KPM_KS)
1141 #define	KPM_TSBM_RPLS_CONFL	(KPM_KC | KPM_C | KPM_KS | KPM_S)
1142 
1143 /*
1144  * kpm fault handler for mappings with large page size.
1145  */
1146 int
1147 sfmmu_kpm_fault(caddr_t vaddr, struct memseg *mseg, page_t *pp)
1148 {
1149 	int		error;
1150 	pgcnt_t		inx;
1151 	kpm_page_t	*kp;
1152 	tte_t		tte;
1153 	pfn_t		pfn = pp->p_pagenum;
1154 	kpm_hlk_t	*kpmp;
1155 	kmutex_t	*pml;
1156 	int		alias_range;
1157 	int		uncached = 0;
1158 	kmutex_t	*pmtx;
1159 	int		badstate;
1160 	uint_t		tsbmcase;
1161 
1162 	alias_range = IS_KPM_ALIAS_RANGE(vaddr);
1163 
1164 	inx = ptokpmp(kpmptop(ptokpmp(pfn)) - mseg->kpm_pbase);
1165 	if (inx >= mseg->kpm_nkpmpgs) {
1166 		cmn_err(CE_PANIC, "sfmmu_kpm_fault: kpm overflow in memseg "
1167 			"0x%p  pp 0x%p", (void *)mseg, (void *)pp);
1168 	}
1169 
1170 	kp = &mseg->kpm_pages[inx];
1171 	kpmp = KPMP_HASH(kp);
1172 
1173 	pml = sfmmu_mlist_enter(pp);
1174 
1175 	if (!PP_ISMAPPED_KPM(pp)) {
1176 		sfmmu_mlist_exit(pml);
1177 		return (EFAULT);
1178 	}
1179 
1180 	mutex_enter(&kpmp->khl_mutex);
1181 
1182 	if (alias_range) {
1183 		ASSERT(!PP_ISMAPPED_LARGE(pp));
1184 		if (kp->kp_refcnta > 0) {
1185 			if (PP_ISKPMC(pp)) {
1186 				pmtx = sfmmu_page_enter(pp);
1187 				PP_CLRKPMC(pp);
1188 				sfmmu_page_exit(pmtx);
1189 			}
1190 			/*
1191 			 * Check for vcolor conflicts. Return here
1192 			 * w/ either no conflict (fast path), removed hme
1193 			 * mapping chains (unload conflict) or uncached
1194 			 * (uncache conflict). VACaches are cleaned and
1195 			 * p_vcolor and PP_TNC are set accordingly for the
1196 			 * conflict cases.  Drop kpmp for uncache conflict
1197 			 * cases since it will be grabbed within
1198 			 * sfmmu_kpm_page_cache in case of an uncache
1199 			 * conflict.
1200 			 */
1201 			mutex_exit(&kpmp->khl_mutex);
1202 			sfmmu_kpm_vac_conflict(pp, vaddr);
1203 			mutex_enter(&kpmp->khl_mutex);
1204 
1205 			if (PP_ISNC(pp)) {
1206 				uncached = 1;
1207 				pmtx = sfmmu_page_enter(pp);
1208 				PP_SETKPMC(pp);
1209 				sfmmu_page_exit(pmtx);
1210 			}
1211 			goto smallexit;
1212 
1213 		} else {
1214 			/*
1215 			 * We got a tsbmiss on a not active kpm_page range.
1216 			 * Let segkpm_fault decide how to panic.
1217 			 */
1218 			error = EFAULT;
1219 		}
1220 		goto exit;
1221 	}
1222 
1223 	badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0);
1224 	if (kp->kp_refcntc == -1) {
1225 		/*
1226 		 * We should come here only if trap level tsb miss
1227 		 * handler is disabled.
1228 		 */
1229 		badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 ||
1230 			PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp));
1231 
1232 		if (badstate == 0)
1233 			goto largeexit;
1234 	}
1235 
1236 	if (badstate || kp->kp_refcntc < 0)
1237 		goto badstate_exit;
1238 
1239 	/*
1240 	 * Combine the per kpm_page and per page kpm VAC states to
1241 	 * a summary state in order to make the kpm fault handling
1242 	 * more concise.
1243 	 */
1244 	tsbmcase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) |
1245 			((kp->kp_refcnts > 0) ? KPM_KS : 0) |
1246 			(PP_ISKPMC(pp) ? KPM_C : 0) |
1247 			(PP_ISKPMS(pp) ? KPM_S : 0));
1248 
1249 	switch (tsbmcase) {
1250 	case KPM_TSBM_CONFL_GONE:		/* - - - - */
1251 		/*
1252 		 * That's fine, we either have no more vac conflict in
1253 		 * this kpm page or someone raced in and has solved the
1254 		 * vac conflict for us -- call sfmmu_kpm_vac_conflict
1255 		 * to take care for correcting the vcolor and flushing
1256 		 * the dcache if required.
1257 		 */
1258 		mutex_exit(&kpmp->khl_mutex);
1259 		sfmmu_kpm_vac_conflict(pp, vaddr);
1260 		mutex_enter(&kpmp->khl_mutex);
1261 
1262 		if (PP_ISNC(pp) || kp->kp_refcnt <= 0 ||
1263 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1264 			panic("sfmmu_kpm_fault: inconsistent CONFL_GONE "
1265 				"state, pp=%p", (void *)pp);
1266 		}
1267 		goto largeexit;
1268 
1269 	case KPM_TSBM_MAPS_RASM:		/* - - ks - */
1270 		/*
1271 		 * All conflicts in this kpm page are gone but there are
1272 		 * already small mappings around, so we also map this
1273 		 * page small. This could be the trigger case for a
1274 		 * small mapping reaper, if this is really needed.
1275 		 * For now fall thru to the KPM_TSBM_MAPS handling.
1276 		 */
1277 
1278 	case KPM_TSBM_MAPS:			/* kc - ks - */
1279 		/*
1280 		 * Large page mapping is already broken, this page is not
1281 		 * conflicting, so map it small. Call sfmmu_kpm_vac_conflict
1282 		 * to take care for correcting the vcolor and flushing
1283 		 * the dcache if required.
1284 		 */
1285 		mutex_exit(&kpmp->khl_mutex);
1286 		sfmmu_kpm_vac_conflict(pp, vaddr);
1287 		mutex_enter(&kpmp->khl_mutex);
1288 
1289 		if (PP_ISNC(pp) || kp->kp_refcnt <= 0 ||
1290 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1291 			panic("sfmmu_kpm_fault:  inconsistent MAPS state, "
1292 				"pp=%p", (void *)pp);
1293 		}
1294 		kp->kp_refcnt--;
1295 		kp->kp_refcnts++;
1296 		pmtx = sfmmu_page_enter(pp);
1297 		PP_SETKPMS(pp);
1298 		sfmmu_page_exit(pmtx);
1299 		goto smallexit;
1300 
1301 	case KPM_TSBM_RPLS_RASM:		/* - - ks s */
1302 		/*
1303 		 * All conflicts in this kpm page are gone but this page
1304 		 * is mapped small. This could be the trigger case for a
1305 		 * small mapping reaper, if this is really needed.
1306 		 * For now we drop it in small again. Fall thru to the
1307 		 * KPM_TSBM_RPLS handling.
1308 		 */
1309 
1310 	case KPM_TSBM_RPLS:			/* kc - ks s */
1311 		/*
1312 		 * Large page mapping is already broken, this page is not
1313 		 * conflicting but already mapped small, so drop it in
1314 		 * small again.
1315 		 */
1316 		if (PP_ISNC(pp) ||
1317 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1318 			panic("sfmmu_kpm_fault:  inconsistent RPLS state, "
1319 				"pp=%p", (void *)pp);
1320 		}
1321 		goto smallexit;
1322 
1323 	case KPM_TSBM_MAPS_BRKO:		/* kc - - - */
1324 		/*
1325 		 * The kpm page where we live in is marked conflicting
1326 		 * but this page is not conflicting. So we have to map it
1327 		 * in small. Call sfmmu_kpm_vac_conflict to take care for
1328 		 * correcting the vcolor and flushing the dcache if required.
1329 		 */
1330 		mutex_exit(&kpmp->khl_mutex);
1331 		sfmmu_kpm_vac_conflict(pp, vaddr);
1332 		mutex_enter(&kpmp->khl_mutex);
1333 
1334 		if (PP_ISNC(pp) || kp->kp_refcnt <= 0 ||
1335 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1336 			panic("sfmmu_kpm_fault:  inconsistent MAPS_BRKO state, "
1337 				"pp=%p", (void *)pp);
1338 		}
1339 		kp->kp_refcnt--;
1340 		kp->kp_refcnts++;
1341 		pmtx = sfmmu_page_enter(pp);
1342 		PP_SETKPMS(pp);
1343 		sfmmu_page_exit(pmtx);
1344 		goto smallexit;
1345 
1346 	case KPM_TSBM_MAPS_BRKT:		/* kc c - - */
1347 	case KPM_TSBM_MAPS_CONFL:		/* kc c ks - */
1348 		if (!PP_ISMAPPED(pp)) {
1349 			/*
1350 			 * We got a tsbmiss on kpm large page range that is
1351 			 * marked to contain vac conflicting pages introduced
1352 			 * by hme mappings. The hme mappings are all gone and
1353 			 * must have bypassed the kpm alias prevention logic.
1354 			 */
1355 			panic("sfmmu_kpm_fault: stale VAC conflict, pp=%p",
1356 				(void *)pp);
1357 		}
1358 
1359 		/*
1360 		 * Check for vcolor conflicts. Return here w/ either no
1361 		 * conflict (fast path), removed hme mapping chains
1362 		 * (unload conflict) or uncached (uncache conflict).
1363 		 * Dcache is cleaned and p_vcolor and P_TNC are set
1364 		 * accordingly. Drop kpmp for uncache conflict cases
1365 		 * since it will be grabbed within sfmmu_kpm_page_cache
1366 		 * in case of an uncache conflict.
1367 		 */
1368 		mutex_exit(&kpmp->khl_mutex);
1369 		sfmmu_kpm_vac_conflict(pp, vaddr);
1370 		mutex_enter(&kpmp->khl_mutex);
1371 
1372 		if (kp->kp_refcnt <= 0)
1373 			panic("sfmmu_kpm_fault: bad refcnt kp=%p", (void *)kp);
1374 
1375 		if (PP_ISNC(pp)) {
1376 			uncached = 1;
1377 		} else {
1378 			/*
1379 			 * When an unload conflict is solved and there are
1380 			 * no other small mappings around, we can resume
1381 			 * largepage mode. Otherwise we have to map or drop
1382 			 * in small. This could be a trigger for a small
1383 			 * mapping reaper when this was the last conflict
1384 			 * within the kpm page and when there are only
1385 			 * other small mappings around.
1386 			 */
1387 			ASSERT(addr_to_vcolor(vaddr) == PP_GET_VCOLOR(pp));
1388 			ASSERT(kp->kp_refcntc > 0);
1389 			kp->kp_refcntc--;
1390 			pmtx = sfmmu_page_enter(pp);
1391 			PP_CLRKPMC(pp);
1392 			sfmmu_page_exit(pmtx);
1393 			ASSERT(PP_ISKPMS(pp) == 0);
1394 			if (kp->kp_refcntc == 0 && kp->kp_refcnts == 0)
1395 				goto largeexit;
1396 		}
1397 
1398 		kp->kp_refcnt--;
1399 		kp->kp_refcnts++;
1400 		pmtx = sfmmu_page_enter(pp);
1401 		PP_SETKPMS(pp);
1402 		sfmmu_page_exit(pmtx);
1403 		goto smallexit;
1404 
1405 	case KPM_TSBM_RPLS_CONFL:		/* kc c ks s */
1406 		if (!PP_ISMAPPED(pp)) {
1407 			/*
1408 			 * We got a tsbmiss on kpm large page range that is
1409 			 * marked to contain vac conflicting pages introduced
1410 			 * by hme mappings. They are all gone and must have
1411 			 * somehow bypassed the kpm alias prevention logic.
1412 			 */
1413 			panic("sfmmu_kpm_fault: stale VAC conflict, pp=%p",
1414 				(void *)pp);
1415 		}
1416 
1417 		/*
1418 		 * This state is only possible for an uncached mapping.
1419 		 */
1420 		if (!PP_ISNC(pp)) {
1421 			panic("sfmmu_kpm_fault: page not uncached, pp=%p",
1422 				(void *)pp);
1423 		}
1424 		uncached = 1;
1425 		goto smallexit;
1426 
1427 	default:
1428 badstate_exit:
1429 		panic("sfmmu_kpm_fault: inconsistent VAC state, vaddr=%p kp=%p "
1430 			"pp=%p", (void *)vaddr, (void *)kp, (void *)pp);
1431 	}
1432 
1433 smallexit:
1434 	/* tte assembly */
1435 	if (uncached == 0)
1436 		KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
1437 	else
1438 		KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
1439 
1440 	/* tsb dropin */
1441 	sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1442 
1443 	error = 0;
1444 	goto exit;
1445 
1446 largeexit:
1447 	if (kp->kp_refcnt > 0) {
1448 
1449 		/* tte assembly */
1450 		KPM_TTE_VCACHED(tte.ll, pfn, TTE4M);
1451 
1452 		/* tsb dropin */
1453 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT4M);
1454 
1455 		if (kp->kp_refcntc == 0) {
1456 			/* Set "go" flag for TL tsbmiss handler */
1457 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc, &kpmp->khl_lock,
1458 					KPMTSBM_START);
1459 		}
1460 		ASSERT(kp->kp_refcntc == -1);
1461 		error = 0;
1462 
1463 	} else
1464 		error = EFAULT;
1465 exit:
1466 	mutex_exit(&kpmp->khl_mutex);
1467 	sfmmu_mlist_exit(pml);
1468 	return (error);
1469 }
1470 
1471 /*
1472  * kpm fault handler for mappings with small page size.
1473  */
1474 int
1475 sfmmu_kpm_fault_small(caddr_t vaddr, struct memseg *mseg, page_t *pp)
1476 {
1477 	int		error = 0;
1478 	pgcnt_t		inx;
1479 	kpm_spage_t	*ksp;
1480 	kpm_shlk_t	*kpmsp;
1481 	kmutex_t	*pml;
1482 	pfn_t		pfn = pp->p_pagenum;
1483 	tte_t		tte;
1484 	kmutex_t	*pmtx;
1485 	int		oldval;
1486 
1487 	inx = pfn - mseg->kpm_pbase;
1488 	ksp = &mseg->kpm_spages[inx];
1489 	kpmsp = KPMP_SHASH(ksp);
1490 
1491 	pml = sfmmu_mlist_enter(pp);
1492 
1493 	if (!PP_ISMAPPED_KPM(pp)) {
1494 		sfmmu_mlist_exit(pml);
1495 		return (EFAULT);
1496 	}
1497 
1498 	/*
1499 	 * kp_mapped lookup protected by mlist mutex
1500 	 */
1501 	if (ksp->kp_mapped == KPM_MAPPEDS) {
1502 		/*
1503 		 * Fast path tsbmiss
1504 		 */
1505 		ASSERT(!PP_ISKPMC(pp));
1506 		ASSERT(!PP_ISNC(pp));
1507 
1508 		/* tte assembly */
1509 		KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
1510 
1511 		/* tsb dropin */
1512 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1513 
1514 	} else if (ksp->kp_mapped == KPM_MAPPEDSC) {
1515 		/*
1516 		 * Got here due to existing or gone kpm/hme VAC conflict.
1517 		 * Recheck for vcolor conflicts. Return here w/ either
1518 		 * no conflict, removed hme mapping chain (unload
1519 		 * conflict) or uncached (uncache conflict). VACaches
1520 		 * are cleaned and p_vcolor and PP_TNC are set accordingly
1521 		 * for the conflict cases.
1522 		 */
1523 		sfmmu_kpm_vac_conflict(pp, vaddr);
1524 
1525 		if (PP_ISNC(pp)) {
1526 			/* ASSERT(pp->p_share); XXX use hat_page_getshare */
1527 
1528 			/* tte assembly */
1529 			KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
1530 
1531 			/* tsb dropin */
1532 			sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1533 
1534 		} else {
1535 			if (PP_ISKPMC(pp)) {
1536 				pmtx = sfmmu_page_enter(pp);
1537 				PP_CLRKPMC(pp);
1538 				sfmmu_page_exit(pmtx);
1539 			}
1540 
1541 			/* tte assembly */
1542 			KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
1543 
1544 			/* tsb dropin */
1545 			sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1546 
1547 			oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped,
1548 					&kpmsp->kshl_lock, KPM_MAPPEDS);
1549 
1550 			if (oldval != KPM_MAPPEDSC)
1551 				panic("sfmmu_kpm_fault_small: "
1552 					"stale smallpages mapping");
1553 		}
1554 
1555 	} else {
1556 		/*
1557 		 * We got a tsbmiss on a not active kpm_page range.
1558 		 * Let decide segkpm_fault how to panic.
1559 		 */
1560 		error = EFAULT;
1561 	}
1562 
1563 	sfmmu_mlist_exit(pml);
1564 	return (error);
1565 }
1566 
1567 /*
1568  * Check/handle potential hme/kpm mapping conflicts
1569  */
1570 static void
1571 sfmmu_kpm_vac_conflict(page_t *pp, caddr_t vaddr)
1572 {
1573 	int		vcolor;
1574 	struct sf_hment	*sfhmep;
1575 	struct hat	*tmphat;
1576 	struct sf_hment	*tmphme = NULL;
1577 	struct hme_blk	*hmeblkp;
1578 	tte_t		tte;
1579 
1580 	ASSERT(sfmmu_mlist_held(pp));
1581 
1582 	if (PP_ISNC(pp))
1583 		return;
1584 
1585 	vcolor = addr_to_vcolor(vaddr);
1586 	if (PP_GET_VCOLOR(pp) == vcolor)
1587 		return;
1588 
1589 	/*
1590 	 * There could be no vcolor conflict between a large cached
1591 	 * hme page and a non alias range kpm page (neither large nor
1592 	 * small mapped). So if a hme conflict already exists between
1593 	 * a constituent page of a large hme mapping and a shared small
1594 	 * conflicting hme mapping, both mappings must be already
1595 	 * uncached at this point.
1596 	 */
1597 	ASSERT(!PP_ISMAPPED_LARGE(pp));
1598 
1599 	if (!PP_ISMAPPED(pp)) {
1600 		/*
1601 		 * Previous hme user of page had a different color
1602 		 * but since there are no current users
1603 		 * we just flush the cache and change the color.
1604 		 */
1605 		SFMMU_STAT(sf_pgcolor_conflict);
1606 		sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp));
1607 		PP_SET_VCOLOR(pp, vcolor);
1608 		return;
1609 	}
1610 
1611 	/*
1612 	 * If we get here we have a vac conflict with a current hme
1613 	 * mapping. This must have been established by forcing a wrong
1614 	 * colored mapping, e.g. by using mmap(2) with MAP_FIXED.
1615 	 */
1616 
1617 	/*
1618 	 * Check if any mapping is in same as or if it is locked
1619 	 * since in that case we need to uncache.
1620 	 */
1621 	for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
1622 		tmphme = sfhmep->hme_next;
1623 		hmeblkp = sfmmu_hmetohblk(sfhmep);
1624 		if (hmeblkp->hblk_xhat_bit)
1625 			continue;
1626 		tmphat = hblktosfmmu(hmeblkp);
1627 		sfmmu_copytte(&sfhmep->hme_tte, &tte);
1628 		ASSERT(TTE_IS_VALID(&tte));
1629 		if ((tmphat == ksfmmup) || hmeblkp->hblk_lckcnt) {
1630 			/*
1631 			 * We have an uncache conflict
1632 			 */
1633 			SFMMU_STAT(sf_uncache_conflict);
1634 			sfmmu_page_cache_array(pp, HAT_TMPNC, CACHE_FLUSH, 1);
1635 			return;
1636 		}
1637 	}
1638 
1639 	/*
1640 	 * We have an unload conflict
1641 	 */
1642 	SFMMU_STAT(sf_unload_conflict);
1643 
1644 	for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
1645 		tmphme = sfhmep->hme_next;
1646 		hmeblkp = sfmmu_hmetohblk(sfhmep);
1647 		if (hmeblkp->hblk_xhat_bit)
1648 			continue;
1649 		(void) sfmmu_pageunload(pp, sfhmep, TTE8K);
1650 	}
1651 
1652 	/*
1653 	 * Unloads only does tlb flushes so we need to flush the
1654 	 * dcache vcolor here.
1655 	 */
1656 	sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp));
1657 	PP_SET_VCOLOR(pp, vcolor);
1658 }
1659 
1660 /*
1661  * Remove all kpm mappings using kpme's for pp and check that
1662  * all kpm mappings (w/ and w/o kpme's) are gone.
1663  */
1664 void
1665 sfmmu_kpm_pageunload(page_t *pp)
1666 {
1667 	caddr_t		vaddr;
1668 	struct kpme	*kpme, *nkpme;
1669 
1670 	ASSERT(pp != NULL);
1671 	ASSERT(pp->p_kpmref);
1672 	ASSERT(sfmmu_mlist_held(pp));
1673 
1674 	vaddr = hat_kpm_page2va(pp, 1);
1675 
1676 	for (kpme = pp->p_kpmelist; kpme; kpme = nkpme) {
1677 		ASSERT(kpme->kpe_page == pp);
1678 
1679 		if (pp->p_kpmref == 0)
1680 			panic("sfmmu_kpm_pageunload: stale p_kpmref pp=%p "
1681 				"kpme=%p", (void *)pp, (void *)kpme);
1682 
1683 		nkpme = kpme->kpe_next;
1684 
1685 		/* Add instance callback here here if needed later */
1686 		sfmmu_kpme_sub(kpme, pp);
1687 	}
1688 
1689 	/*
1690 	 * Also correct after mixed kpme/nonkpme mappings. If nonkpme
1691 	 * segkpm clients have unlocked the page and forgot to mapout
1692 	 * we panic here.
1693 	 */
1694 	if (pp->p_kpmref != 0)
1695 		panic("sfmmu_kpm_pageunload: bad refcnt pp=%p", (void *)pp);
1696 
1697 	sfmmu_kpm_mapout(pp, vaddr);
1698 }
1699 
1700 /*
1701  * Remove a large kpm mapping from kernel TSB and all TLB's.
1702  */
1703 static void
1704 sfmmu_kpm_demap_large(caddr_t vaddr)
1705 {
1706 	sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT4M);
1707 	sfmmu_kpm_demap_tlbs(vaddr);
1708 }
1709 
1710 /*
1711  * Remove a small kpm mapping from kernel TSB and all TLB's.
1712  */
1713 static void
1714 sfmmu_kpm_demap_small(caddr_t vaddr)
1715 {
1716 	sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT);
1717 	sfmmu_kpm_demap_tlbs(vaddr);
1718 }
1719 
1720 /*
1721  * Demap a kpm mapping in all TLB's.
1722  */
1723 static void
1724 sfmmu_kpm_demap_tlbs(caddr_t vaddr)
1725 {
1726 	cpuset_t cpuset;
1727 
1728 	kpreempt_disable();
1729 	cpuset = ksfmmup->sfmmu_cpusran;
1730 	CPUSET_AND(cpuset, cpu_ready_set);
1731 	CPUSET_DEL(cpuset, CPU->cpu_id);
1732 	SFMMU_XCALL_STATS(ksfmmup);
1733 
1734 	xt_some(cpuset, vtag_flushpage_tl1, (uint64_t)vaddr,
1735 	    (uint64_t)ksfmmup);
1736 	vtag_flushpage(vaddr, (uint64_t)ksfmmup);
1737 
1738 	kpreempt_enable();
1739 }
1740 
1741 /*
1742  * Summary states used in sfmmu_kpm_vac_unload (KPM_VUL__*).
1743  * See also more detailed comments within in the sfmmu_kpm_vac_unload switch.
1744  * Abbreviations used:
1745  * BIG:   Large page kpm mapping in use.
1746  * CONFL: VAC conflict(s) within a kpm_page.
1747  * INCR:  Count of conflicts within a kpm_page is going to be incremented.
1748  * DECR:  Count of conflicts within a kpm_page is going to be decremented.
1749  * UNMAP_SMALL: A small (regular page size) mapping is going to be unmapped.
1750  * TNC:   Temporary non cached: a kpm mapped page is mapped in TNC state.
1751  */
1752 #define	KPM_VUL_BIG		(0)
1753 #define	KPM_VUL_CONFL_INCR1	(KPM_KS)
1754 #define	KPM_VUL_UNMAP_SMALL1	(KPM_KS | KPM_S)
1755 #define	KPM_VUL_CONFL_INCR2	(KPM_KC)
1756 #define	KPM_VUL_CONFL_INCR3	(KPM_KC | KPM_KS)
1757 #define	KPM_VUL_UNMAP_SMALL2	(KPM_KC | KPM_KS | KPM_S)
1758 #define	KPM_VUL_CONFL_DECR1	(KPM_KC | KPM_C)
1759 #define	KPM_VUL_CONFL_DECR2	(KPM_KC | KPM_C | KPM_KS)
1760 #define	KPM_VUL_TNC		(KPM_KC | KPM_C | KPM_KS | KPM_S)
1761 
1762 /*
1763  * Handle VAC unload conflicts introduced by hme mappings or vice
1764  * versa when a hme conflict mapping is replaced by a non conflict
1765  * one. Perform actions and state transitions according to the
1766  * various page and kpm_page entry states. VACache flushes are in
1767  * the responsibiliy of the caller. We still hold the mlist lock.
1768  */
1769 void
1770 sfmmu_kpm_vac_unload(page_t *pp, caddr_t vaddr)
1771 {
1772 	kpm_page_t	*kp;
1773 	kpm_hlk_t	*kpmp;
1774 	caddr_t		kpmvaddr = hat_kpm_page2va(pp, 1);
1775 	int		newcolor;
1776 	kmutex_t	*pmtx;
1777 	uint_t		vacunlcase;
1778 	int		badstate = 0;
1779 	kpm_spage_t	*ksp;
1780 	kpm_shlk_t	*kpmsp;
1781 
1782 	ASSERT(PAGE_LOCKED(pp));
1783 	ASSERT(sfmmu_mlist_held(pp));
1784 	ASSERT(!PP_ISNC(pp));
1785 
1786 	newcolor = addr_to_vcolor(kpmvaddr) != addr_to_vcolor(vaddr);
1787 	if (kpm_smallpages)
1788 		goto smallpages_vac_unload;
1789 
1790 	PP2KPMPG(pp, kp);
1791 	kpmp = KPMP_HASH(kp);
1792 	mutex_enter(&kpmp->khl_mutex);
1793 
1794 	if (IS_KPM_ALIAS_RANGE(kpmvaddr)) {
1795 		if (kp->kp_refcnta < 1) {
1796 			panic("sfmmu_kpm_vac_unload: bad refcnta kpm_page=%p\n",
1797 				(void *)kp);
1798 		}
1799 
1800 		if (PP_ISKPMC(pp) == 0) {
1801 			if (newcolor == 0)
1802 				goto exit;
1803 			sfmmu_kpm_demap_small(kpmvaddr);
1804 			pmtx = sfmmu_page_enter(pp);
1805 			PP_SETKPMC(pp);
1806 			sfmmu_page_exit(pmtx);
1807 
1808 		} else if (newcolor == 0) {
1809 			pmtx = sfmmu_page_enter(pp);
1810 			PP_CLRKPMC(pp);
1811 			sfmmu_page_exit(pmtx);
1812 
1813 		} else {
1814 			badstate++;
1815 		}
1816 
1817 		goto exit;
1818 	}
1819 
1820 	badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0);
1821 	if (kp->kp_refcntc == -1) {
1822 		/*
1823 		 * We should come here only if trap level tsb miss
1824 		 * handler is disabled.
1825 		 */
1826 		badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 ||
1827 			PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp));
1828 	} else {
1829 		badstate |= (kp->kp_refcntc < 0);
1830 	}
1831 
1832 	if (badstate)
1833 		goto exit;
1834 
1835 	if (PP_ISKPMC(pp) == 0 && newcolor == 0) {
1836 		ASSERT(PP_ISKPMS(pp) == 0);
1837 		goto exit;
1838 	}
1839 
1840 	/*
1841 	 * Combine the per kpm_page and per page kpm VAC states
1842 	 * to a summary state in order to make the vac unload
1843 	 * handling more concise.
1844 	 */
1845 	vacunlcase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) |
1846 			((kp->kp_refcnts > 0) ? KPM_KS : 0) |
1847 			(PP_ISKPMC(pp) ? KPM_C : 0) |
1848 			(PP_ISKPMS(pp) ? KPM_S : 0));
1849 
1850 	switch (vacunlcase) {
1851 	case KPM_VUL_BIG:				/* - - - - */
1852 		/*
1853 		 * Have to breakup the large page mapping to be
1854 		 * able to handle the conflicting hme vaddr.
1855 		 */
1856 		if (kp->kp_refcntc == -1) {
1857 			/* remove go indication */
1858 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
1859 					&kpmp->khl_lock, KPMTSBM_STOP);
1860 		}
1861 		sfmmu_kpm_demap_large(kpmvaddr);
1862 
1863 		ASSERT(kp->kp_refcntc == 0);
1864 		kp->kp_refcntc++;
1865 		pmtx = sfmmu_page_enter(pp);
1866 		PP_SETKPMC(pp);
1867 		sfmmu_page_exit(pmtx);
1868 		break;
1869 
1870 	case KPM_VUL_UNMAP_SMALL1:			/* -  - ks s */
1871 	case KPM_VUL_UNMAP_SMALL2:			/* kc - ks s */
1872 		/*
1873 		 * New conflict w/ an active kpm page, actually mapped
1874 		 * in by small TSB/TLB entries. Remove the mapping and
1875 		 * update states.
1876 		 */
1877 		ASSERT(newcolor);
1878 		sfmmu_kpm_demap_small(kpmvaddr);
1879 		kp->kp_refcnts--;
1880 		kp->kp_refcnt++;
1881 		kp->kp_refcntc++;
1882 		pmtx = sfmmu_page_enter(pp);
1883 		PP_CLRKPMS(pp);
1884 		PP_SETKPMC(pp);
1885 		sfmmu_page_exit(pmtx);
1886 		break;
1887 
1888 	case KPM_VUL_CONFL_INCR1:			/* -  - ks - */
1889 	case KPM_VUL_CONFL_INCR2:			/* kc - -  - */
1890 	case KPM_VUL_CONFL_INCR3:			/* kc - ks - */
1891 		/*
1892 		 * New conflict on a active kpm mapped page not yet in
1893 		 * TSB/TLB. Mark page and increment the kpm_page conflict
1894 		 * count.
1895 		 */
1896 		ASSERT(newcolor);
1897 		kp->kp_refcntc++;
1898 		pmtx = sfmmu_page_enter(pp);
1899 		PP_SETKPMC(pp);
1900 		sfmmu_page_exit(pmtx);
1901 		break;
1902 
1903 	case KPM_VUL_CONFL_DECR1:			/* kc c -  - */
1904 	case KPM_VUL_CONFL_DECR2:			/* kc c ks - */
1905 		/*
1906 		 * A conflicting hme mapping is removed for an active
1907 		 * kpm page not yet in TSB/TLB. Unmark page and decrement
1908 		 * the kpm_page conflict count.
1909 		 */
1910 		ASSERT(newcolor == 0);
1911 		kp->kp_refcntc--;
1912 		pmtx = sfmmu_page_enter(pp);
1913 		PP_CLRKPMC(pp);
1914 		sfmmu_page_exit(pmtx);
1915 		break;
1916 
1917 	case KPM_VUL_TNC:				/* kc c ks s */
1918 		cmn_err(CE_NOTE, "sfmmu_kpm_vac_unload: "
1919 			"page not in NC state");
1920 		/* FALLTHRU */
1921 
1922 	default:
1923 		badstate++;
1924 	}
1925 exit:
1926 	if (badstate) {
1927 		panic("sfmmu_kpm_vac_unload: inconsistent VAC state, "
1928 			"kpmvaddr=%p kp=%p pp=%p",
1929 			(void *)kpmvaddr, (void *)kp, (void *)pp);
1930 	}
1931 	mutex_exit(&kpmp->khl_mutex);
1932 
1933 	return;
1934 
1935 smallpages_vac_unload:
1936 	if (newcolor == 0)
1937 		return;
1938 
1939 	PP2KPMSPG(pp, ksp);
1940 	kpmsp = KPMP_SHASH(ksp);
1941 
1942 	if (PP_ISKPMC(pp) == 0) {
1943 		if (ksp->kp_mapped == KPM_MAPPEDS) {
1944 			/*
1945 			 * Stop TL tsbmiss handling
1946 			 */
1947 			(void) sfmmu_kpm_stsbmtl(&ksp->kp_mapped,
1948 					&kpmsp->kshl_lock, KPM_MAPPEDSC);
1949 
1950 			sfmmu_kpm_demap_small(kpmvaddr);
1951 
1952 		} else if (ksp->kp_mapped != KPM_MAPPEDSC) {
1953 			panic("sfmmu_kpm_vac_unload: inconsistent mapping");
1954 		}
1955 
1956 		pmtx = sfmmu_page_enter(pp);
1957 		PP_SETKPMC(pp);
1958 		sfmmu_page_exit(pmtx);
1959 
1960 	} else {
1961 		if (ksp->kp_mapped != KPM_MAPPEDSC)
1962 			panic("sfmmu_kpm_vac_unload: inconsistent mapping");
1963 	}
1964 }
1965 
1966 /*
1967  * Page is marked to be in VAC conflict to an existing kpm mapping
1968  * or is kpm mapped using only the regular pagesize. Called from
1969  * sfmmu_hblk_unload when a mlist is completely removed.
1970  */
1971 void
1972 sfmmu_kpm_hme_unload(page_t *pp)
1973 {
1974 	/* tte assembly */
1975 	kpm_page_t	*kp;
1976 	kpm_hlk_t	*kpmp;
1977 	caddr_t		vaddr;
1978 	kmutex_t	*pmtx;
1979 	uint_t		flags;
1980 	kpm_spage_t	*ksp;
1981 
1982 	ASSERT(sfmmu_mlist_held(pp));
1983 	ASSERT(PP_ISMAPPED_KPM(pp));
1984 
1985 	flags = pp->p_nrm & (P_KPMC | P_KPMS);
1986 	if (kpm_smallpages)
1987 		goto smallpages_hme_unload;
1988 
1989 	if (flags == (P_KPMC | P_KPMS)) {
1990 		panic("sfmmu_kpm_hme_unload: page should be uncached");
1991 
1992 	} else if (flags == P_KPMS) {
1993 		/*
1994 		 * Page mapped small but not involved in VAC conflict
1995 		 */
1996 		return;
1997 	}
1998 
1999 	vaddr = hat_kpm_page2va(pp, 1);
2000 
2001 	PP2KPMPG(pp, kp);
2002 	kpmp = KPMP_HASH(kp);
2003 	mutex_enter(&kpmp->khl_mutex);
2004 
2005 	if (IS_KPM_ALIAS_RANGE(vaddr)) {
2006 		if (kp->kp_refcnta < 1) {
2007 			panic("sfmmu_kpm_hme_unload: bad refcnta kpm_page=%p\n",
2008 				(void *)kp);
2009 		}
2010 	} else {
2011 		if (kp->kp_refcntc < 1) {
2012 			panic("sfmmu_kpm_hme_unload: bad refcntc kpm_page=%p\n",
2013 				(void *)kp);
2014 		}
2015 		kp->kp_refcntc--;
2016 	}
2017 
2018 	pmtx = sfmmu_page_enter(pp);
2019 	PP_CLRKPMC(pp);
2020 	sfmmu_page_exit(pmtx);
2021 
2022 	mutex_exit(&kpmp->khl_mutex);
2023 	return;
2024 
2025 smallpages_hme_unload:
2026 	if (flags != P_KPMC)
2027 		panic("sfmmu_kpm_hme_unload: page should be uncached");
2028 
2029 	vaddr = hat_kpm_page2va(pp, 1);
2030 	PP2KPMSPG(pp, ksp);
2031 
2032 	if (ksp->kp_mapped != KPM_MAPPEDSC)
2033 		panic("sfmmu_kpm_hme_unload: inconsistent mapping");
2034 
2035 	/*
2036 	 * Keep KPM_MAPPEDSC until the next kpm tsbmiss where it
2037 	 * prevents TL tsbmiss handling and force a hat_kpm_fault.
2038 	 * There we can start over again.
2039 	 */
2040 
2041 	pmtx = sfmmu_page_enter(pp);
2042 	PP_CLRKPMC(pp);
2043 	sfmmu_page_exit(pmtx);
2044 }
2045 
2046 /*
2047  * Special hooks for sfmmu_page_cache_array() when changing the
2048  * cacheability of a page. It is used to obey the hat_kpm lock
2049  * ordering (mlist -> kpmp -> spl, and back).
2050  */
2051 kpm_hlk_t *
2052 sfmmu_kpm_kpmp_enter(page_t *pp, pgcnt_t npages)
2053 {
2054 	kpm_page_t	*kp;
2055 	kpm_hlk_t	*kpmp;
2056 
2057 	ASSERT(sfmmu_mlist_held(pp));
2058 
2059 	if (kpm_smallpages || PP_ISMAPPED_KPM(pp) == 0)
2060 		return (NULL);
2061 
2062 	ASSERT(npages <= kpmpnpgs);
2063 
2064 	PP2KPMPG(pp, kp);
2065 	kpmp = KPMP_HASH(kp);
2066 	mutex_enter(&kpmp->khl_mutex);
2067 
2068 	return (kpmp);
2069 }
2070 
2071 void
2072 sfmmu_kpm_kpmp_exit(kpm_hlk_t *kpmp)
2073 {
2074 	if (kpm_smallpages || kpmp == NULL)
2075 		return;
2076 
2077 	mutex_exit(&kpmp->khl_mutex);
2078 }
2079 
2080 /*
2081  * Summary states used in sfmmu_kpm_page_cache (KPM_*).
2082  * See also more detailed comments within in the sfmmu_kpm_page_cache switch.
2083  * Abbreviations used:
2084  * UNC:     Input state for an uncache request.
2085  *   BIG:     Large page kpm mapping in use.
2086  *   SMALL:   Page has a small kpm mapping within a kpm_page range.
2087  *   NODEMAP: No demap needed.
2088  *   NOP:     No operation needed on this input state.
2089  * CACHE:   Input state for a re-cache request.
2090  *   MAPS:    Page is in TNC and kpm VAC conflict state and kpm mapped small.
2091  *   NOMAP:   Page is in TNC and kpm VAC conflict state, but not small kpm
2092  *            mapped.
2093  *   NOMAPO:  Page is in TNC and kpm VAC conflict state, but not small kpm
2094  *            mapped. There are also other small kpm mappings within this
2095  *            kpm_page.
2096  */
2097 #define	KPM_UNC_BIG		(0)
2098 #define	KPM_UNC_NODEMAP1	(KPM_KS)
2099 #define	KPM_UNC_SMALL1		(KPM_KS | KPM_S)
2100 #define	KPM_UNC_NODEMAP2	(KPM_KC)
2101 #define	KPM_UNC_NODEMAP3	(KPM_KC | KPM_KS)
2102 #define	KPM_UNC_SMALL2		(KPM_KC | KPM_KS | KPM_S)
2103 #define	KPM_UNC_NOP1		(KPM_KC | KPM_C)
2104 #define	KPM_UNC_NOP2		(KPM_KC | KPM_C | KPM_KS)
2105 #define	KPM_CACHE_NOMAP		(KPM_KC | KPM_C)
2106 #define	KPM_CACHE_NOMAPO	(KPM_KC | KPM_C | KPM_KS)
2107 #define	KPM_CACHE_MAPS		(KPM_KC | KPM_C | KPM_KS | KPM_S)
2108 
2109 /*
2110  * This function is called when the virtual cacheability of a page
2111  * is changed and the page has an actice kpm mapping. The mlist mutex,
2112  * the spl hash lock and the kpmp mutex (if needed) are already grabbed.
2113  */
2114 /*ARGSUSED2*/
2115 void
2116 sfmmu_kpm_page_cache(page_t *pp, int flags, int cache_flush_tag)
2117 {
2118 	kpm_page_t	*kp;
2119 	kpm_hlk_t	*kpmp;
2120 	caddr_t		kpmvaddr;
2121 	int		badstate = 0;
2122 	uint_t		pgcacase;
2123 	kpm_spage_t	*ksp;
2124 	kpm_shlk_t	*kpmsp;
2125 	int		oldval;
2126 
2127 	ASSERT(PP_ISMAPPED_KPM(pp));
2128 	ASSERT(sfmmu_mlist_held(pp));
2129 	ASSERT(sfmmu_page_spl_held(pp));
2130 
2131 	if (flags != HAT_TMPNC && flags != HAT_CACHE)
2132 		panic("sfmmu_kpm_page_cache: bad flags");
2133 
2134 	kpmvaddr = hat_kpm_page2va(pp, 1);
2135 
2136 	if (flags == HAT_TMPNC && cache_flush_tag == CACHE_FLUSH) {
2137 		pfn_t pfn = pp->p_pagenum;
2138 		int vcolor = addr_to_vcolor(kpmvaddr);
2139 		cpuset_t cpuset = cpu_ready_set;
2140 
2141 		/* Flush vcolor in DCache */
2142 		CPUSET_DEL(cpuset, CPU->cpu_id);
2143 		SFMMU_XCALL_STATS(ksfmmup);
2144 		xt_some(cpuset, vac_flushpage_tl1, pfn, vcolor);
2145 		vac_flushpage(pfn, vcolor);
2146 	}
2147 
2148 	if (kpm_smallpages)
2149 		goto smallpages_page_cache;
2150 
2151 	PP2KPMPG(pp, kp);
2152 	kpmp = KPMP_HASH(kp);
2153 	ASSERT(MUTEX_HELD(&kpmp->khl_mutex));
2154 
2155 	if (IS_KPM_ALIAS_RANGE(kpmvaddr)) {
2156 		if (kp->kp_refcnta < 1) {
2157 			panic("sfmmu_kpm_page_cache: bad refcnta "
2158 				"kpm_page=%p\n", (void *)kp);
2159 		}
2160 		sfmmu_kpm_demap_small(kpmvaddr);
2161 		if (flags == HAT_TMPNC) {
2162 			PP_SETKPMC(pp);
2163 			ASSERT(!PP_ISKPMS(pp));
2164 		} else {
2165 			ASSERT(PP_ISKPMC(pp));
2166 			PP_CLRKPMC(pp);
2167 		}
2168 		goto exit;
2169 	}
2170 
2171 	badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0);
2172 	if (kp->kp_refcntc == -1) {
2173 		/*
2174 		 * We should come here only if trap level tsb miss
2175 		 * handler is disabled.
2176 		 */
2177 		badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 ||
2178 			PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp));
2179 	} else {
2180 		badstate |= (kp->kp_refcntc < 0);
2181 	}
2182 
2183 	if (badstate)
2184 		goto exit;
2185 
2186 	/*
2187 	 * Combine the per kpm_page and per page kpm VAC states to
2188 	 * a summary state in order to make the VAC cache/uncache
2189 	 * handling more concise.
2190 	 */
2191 	pgcacase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) |
2192 			((kp->kp_refcnts > 0) ? KPM_KS : 0) |
2193 			(PP_ISKPMC(pp) ? KPM_C : 0) |
2194 			(PP_ISKPMS(pp) ? KPM_S : 0));
2195 
2196 	if (flags == HAT_CACHE) {
2197 		switch (pgcacase) {
2198 		case KPM_CACHE_MAPS:			/* kc c ks s */
2199 			sfmmu_kpm_demap_small(kpmvaddr);
2200 			if (kp->kp_refcnts < 1) {
2201 				panic("sfmmu_kpm_page_cache: bad refcnts "
2202 				"kpm_page=%p\n", (void *)kp);
2203 			}
2204 			kp->kp_refcnts--;
2205 			kp->kp_refcnt++;
2206 			PP_CLRKPMS(pp);
2207 			/* FALLTHRU */
2208 
2209 		case KPM_CACHE_NOMAP:			/* kc c -  - */
2210 		case KPM_CACHE_NOMAPO:			/* kc c ks - */
2211 			kp->kp_refcntc--;
2212 			PP_CLRKPMC(pp);
2213 			break;
2214 
2215 		default:
2216 			badstate++;
2217 		}
2218 		goto exit;
2219 	}
2220 
2221 	switch (pgcacase) {
2222 	case KPM_UNC_BIG:				/* - - - - */
2223 		if (kp->kp_refcnt < 1) {
2224 			panic("sfmmu_kpm_page_cache: bad refcnt "
2225 				"kpm_page=%p\n", (void *)kp);
2226 		}
2227 
2228 		/*
2229 		 * Have to breakup the large page mapping in preparation
2230 		 * to the upcoming TNC mode handled by small mappings.
2231 		 * The demap can already be done due to another conflict
2232 		 * within the kpm_page.
2233 		 */
2234 		if (kp->kp_refcntc == -1) {
2235 			/* remove go indication */
2236 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
2237 				&kpmp->khl_lock, KPMTSBM_STOP);
2238 		}
2239 		ASSERT(kp->kp_refcntc == 0);
2240 		sfmmu_kpm_demap_large(kpmvaddr);
2241 		kp->kp_refcntc++;
2242 		PP_SETKPMC(pp);
2243 		break;
2244 
2245 	case KPM_UNC_SMALL1:				/* -  - ks s */
2246 	case KPM_UNC_SMALL2:				/* kc - ks s */
2247 		/*
2248 		 * Have to demap an already small kpm mapping in preparation
2249 		 * to the upcoming TNC mode. The demap can already be done
2250 		 * due to another conflict within the kpm_page.
2251 		 */
2252 		sfmmu_kpm_demap_small(kpmvaddr);
2253 		kp->kp_refcntc++;
2254 		kp->kp_refcnts--;
2255 		kp->kp_refcnt++;
2256 		PP_CLRKPMS(pp);
2257 		PP_SETKPMC(pp);
2258 		break;
2259 
2260 	case KPM_UNC_NODEMAP1:				/* -  - ks - */
2261 		/* fallthru */
2262 
2263 	case KPM_UNC_NODEMAP2:				/* kc - -  - */
2264 	case KPM_UNC_NODEMAP3:				/* kc - ks - */
2265 		kp->kp_refcntc++;
2266 		PP_SETKPMC(pp);
2267 		break;
2268 
2269 	case KPM_UNC_NOP1:				/* kc c -  - */
2270 	case KPM_UNC_NOP2:				/* kc c ks - */
2271 		break;
2272 
2273 	default:
2274 		badstate++;
2275 	}
2276 exit:
2277 	if (badstate) {
2278 		panic("sfmmu_kpm_page_cache: inconsistent VAC state "
2279 			"kpmvaddr=%p kp=%p pp=%p", (void *)kpmvaddr,
2280 			(void *)kp, (void *)pp);
2281 	}
2282 	return;
2283 
2284 smallpages_page_cache:
2285 	PP2KPMSPG(pp, ksp);
2286 	kpmsp = KPMP_SHASH(ksp);
2287 
2288 	oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped,
2289 				&kpmsp->kshl_lock, KPM_MAPPEDSC);
2290 
2291 	if (!(oldval == KPM_MAPPEDS || oldval == KPM_MAPPEDSC))
2292 		panic("smallpages_page_cache: inconsistent mapping");
2293 
2294 	sfmmu_kpm_demap_small(kpmvaddr);
2295 
2296 	if (flags == HAT_TMPNC) {
2297 		PP_SETKPMC(pp);
2298 		ASSERT(!PP_ISKPMS(pp));
2299 
2300 	} else {
2301 		ASSERT(PP_ISKPMC(pp));
2302 		PP_CLRKPMC(pp);
2303 	}
2304 
2305 	/*
2306 	 * Keep KPM_MAPPEDSC until the next kpm tsbmiss where it
2307 	 * prevents TL tsbmiss handling and force a hat_kpm_fault.
2308 	 * There we can start over again.
2309 	 */
2310 }
2311