xref: /titanic_44/usr/src/uts/sun4u/vm/mach_kpm.c (revision d20abfaa275f62e387fe9d814375fb3829fdb91f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Kernel Physical Mapping (segkpm) hat interface routines for sun4u.
28  */
29 
30 #include <sys/types.h>
31 #include <vm/hat.h>
32 #include <vm/hat_sfmmu.h>
33 #include <vm/page.h>
34 #include <sys/sysmacros.h>
35 #include <sys/cmn_err.h>
36 #include <sys/machsystm.h>
37 #include <vm/seg_kpm.h>
38 #include <sys/cpu_module.h>
39 #include <vm/mach_kpm.h>
40 
41 /* kpm prototypes */
42 static caddr_t	sfmmu_kpm_mapin(page_t *);
43 static void	sfmmu_kpm_mapout(page_t *, caddr_t);
44 static int	sfmmu_kpme_lookup(struct kpme *, page_t *);
45 static void	sfmmu_kpme_add(struct kpme *, page_t *);
46 static void	sfmmu_kpme_sub(struct kpme *, page_t *);
47 static caddr_t	sfmmu_kpm_getvaddr(page_t *, int *);
48 static int	sfmmu_kpm_fault(caddr_t, struct memseg *, page_t *);
49 static int	sfmmu_kpm_fault_small(caddr_t, struct memseg *, page_t *);
50 static void	sfmmu_kpm_vac_conflict(page_t *, caddr_t);
51 void	sfmmu_kpm_pageunload(page_t *);
52 void	sfmmu_kpm_vac_unload(page_t *, caddr_t);
53 static void	sfmmu_kpm_demap_large(caddr_t);
54 static void	sfmmu_kpm_demap_small(caddr_t);
55 static void	sfmmu_kpm_demap_tlbs(caddr_t);
56 void	sfmmu_kpm_hme_unload(page_t *);
57 kpm_hlk_t *sfmmu_kpm_kpmp_enter(page_t *, pgcnt_t);
58 void	sfmmu_kpm_kpmp_exit(kpm_hlk_t *kpmp);
59 void	sfmmu_kpm_page_cache(page_t *, int, int);
60 
61 extern uint_t vac_colors;
62 
63 /*
64  * Kernel Physical Mapping (kpm) facility
65  */
66 
67 void
mach_kpm_init()68 mach_kpm_init()
69 {}
70 
71 /* -- hat_kpm interface section -- */
72 
73 /*
74  * Mapin a locked page and return the vaddr.
75  * When a kpme is provided by the caller it is added to
76  * the page p_kpmelist. The page to be mapped in must
77  * be at least read locked (p_selock).
78  */
79 caddr_t
hat_kpm_mapin(struct page * pp,struct kpme * kpme)80 hat_kpm_mapin(struct page *pp, struct kpme *kpme)
81 {
82 	kmutex_t	*pml;
83 	caddr_t		vaddr;
84 
85 	if (kpm_enable == 0) {
86 		cmn_err(CE_WARN, "hat_kpm_mapin: kpm_enable not set");
87 		return ((caddr_t)NULL);
88 	}
89 
90 	if (pp == NULL || PAGE_LOCKED(pp) == 0) {
91 		cmn_err(CE_WARN, "hat_kpm_mapin: pp zero or not locked");
92 		return ((caddr_t)NULL);
93 	}
94 
95 	pml = sfmmu_mlist_enter(pp);
96 	ASSERT(pp->p_kpmref >= 0);
97 
98 	vaddr = (pp->p_kpmref == 0) ?
99 	    sfmmu_kpm_mapin(pp) : hat_kpm_page2va(pp, 1);
100 
101 	if (kpme != NULL) {
102 		/*
103 		 * Tolerate multiple mapins for the same kpme to avoid
104 		 * the need for an extra serialization.
105 		 */
106 		if ((sfmmu_kpme_lookup(kpme, pp)) == 0)
107 			sfmmu_kpme_add(kpme, pp);
108 
109 		ASSERT(pp->p_kpmref > 0);
110 
111 	} else {
112 		pp->p_kpmref++;
113 	}
114 
115 	sfmmu_mlist_exit(pml);
116 	return (vaddr);
117 }
118 
119 /*
120  * Mapout a locked page.
121  * When a kpme is provided by the caller it is removed from
122  * the page p_kpmelist. The page to be mapped out must be at
123  * least read locked (p_selock).
124  * Note: The seg_kpm layer provides a mapout interface for the
125  * case that a kpme is used and the underlying page is unlocked.
126  * This can be used instead of calling this function directly.
127  */
128 void
hat_kpm_mapout(struct page * pp,struct kpme * kpme,caddr_t vaddr)129 hat_kpm_mapout(struct page *pp, struct kpme *kpme, caddr_t vaddr)
130 {
131 	kmutex_t	*pml;
132 
133 	if (kpm_enable == 0) {
134 		cmn_err(CE_WARN, "hat_kpm_mapout: kpm_enable not set");
135 		return;
136 	}
137 
138 	if (IS_KPM_ADDR(vaddr) == 0) {
139 		cmn_err(CE_WARN, "hat_kpm_mapout: no kpm address");
140 		return;
141 	}
142 
143 	if (pp == NULL || PAGE_LOCKED(pp) == 0) {
144 		cmn_err(CE_WARN, "hat_kpm_mapout: page zero or not locked");
145 		return;
146 	}
147 
148 	if (kpme != NULL) {
149 		ASSERT(pp == kpme->kpe_page);
150 		pp = kpme->kpe_page;
151 		pml = sfmmu_mlist_enter(pp);
152 
153 		if (sfmmu_kpme_lookup(kpme, pp) == 0)
154 			panic("hat_kpm_mapout: kpme not found pp=%p",
155 			    (void *)pp);
156 
157 		ASSERT(pp->p_kpmref > 0);
158 		sfmmu_kpme_sub(kpme, pp);
159 
160 	} else {
161 		pml = sfmmu_mlist_enter(pp);
162 		pp->p_kpmref--;
163 	}
164 
165 	ASSERT(pp->p_kpmref >= 0);
166 	if (pp->p_kpmref == 0)
167 		sfmmu_kpm_mapout(pp, vaddr);
168 
169 	sfmmu_mlist_exit(pml);
170 }
171 
172 /*
173  * hat_kpm_mapin_pfn is used to obtain a kpm mapping for physical
174  * memory addresses that are not described by a page_t.  It can
175  * only be supported if vac_colors=1, because there is no page_t
176  * and corresponding kpm_page_t to track VAC conflicts.  Currently,
177  * this may not be used on pfn's backed by page_t's, because the
178  * kpm state may not be consistent in hat_kpm_fault if the page is
179  * mapped using both this routine and hat_kpm_mapin.  KPM should be
180  * cleaned up on sun4u/vac_colors=1 to be minimal as on sun4v.
181  * The caller must only pass pfn's for valid physical addresses; violation
182  * of this rule will cause panic.
183  */
184 caddr_t
hat_kpm_mapin_pfn(pfn_t pfn)185 hat_kpm_mapin_pfn(pfn_t pfn)
186 {
187 	caddr_t paddr, vaddr;
188 	tte_t tte;
189 	uint_t szc = kpm_smallpages ? TTE8K : TTE4M;
190 	uint_t shift = kpm_smallpages ? MMU_PAGESHIFT : MMU_PAGESHIFT4M;
191 
192 	if (kpm_enable == 0 || vac_colors > 1 ||
193 	    page_numtomemseg_nolock(pfn) != NULL)
194 		return ((caddr_t)NULL);
195 
196 	paddr = (caddr_t)ptob(pfn);
197 	vaddr = (uintptr_t)kpm_vbase + paddr;
198 
199 	KPM_TTE_VCACHED(tte.ll, pfn, szc);
200 	sfmmu_kpm_load_tsb(vaddr, &tte, shift);
201 
202 	return (vaddr);
203 }
204 
205 /*ARGSUSED*/
206 void
hat_kpm_mapout_pfn(pfn_t pfn)207 hat_kpm_mapout_pfn(pfn_t pfn)
208 {
209 	/* empty */
210 }
211 
212 /*
213  * Return the kpm virtual address for the page at pp.
214  * If checkswap is non zero and the page is backed by a
215  * swap vnode the physical address is used rather than
216  * p_offset to determine the kpm region.
217  * Note: The function has to be used w/ extreme care. The
218  * stability of the page identity is in the responsibility
219  * of the caller.
220  */
221 /*ARGSUSED*/
222 caddr_t
hat_kpm_page2va(struct page * pp,int checkswap)223 hat_kpm_page2va(struct page *pp, int checkswap)
224 {
225 	int		vcolor, vcolor_pa;
226 	uintptr_t	paddr, vaddr;
227 
228 	ASSERT(kpm_enable);
229 
230 	paddr = ptob(pp->p_pagenum);
231 	vcolor_pa = addr_to_vcolor(paddr);
232 
233 	if (checkswap && pp->p_vnode && IS_SWAPFSVP(pp->p_vnode))
234 		vcolor = (PP_ISNC(pp)) ? vcolor_pa : PP_GET_VCOLOR(pp);
235 	else
236 		vcolor = addr_to_vcolor(pp->p_offset);
237 
238 	vaddr = (uintptr_t)kpm_vbase + paddr;
239 
240 	if (vcolor_pa != vcolor) {
241 		vaddr += ((uintptr_t)(vcolor - vcolor_pa) << MMU_PAGESHIFT);
242 		vaddr += (vcolor_pa > vcolor) ?
243 		    ((uintptr_t)vcolor_pa << kpm_size_shift) :
244 		    ((uintptr_t)(vcolor - vcolor_pa) << kpm_size_shift);
245 	}
246 
247 	return ((caddr_t)vaddr);
248 }
249 
250 /*
251  * Return the page for the kpm virtual address vaddr.
252  * Caller is responsible for the kpm mapping and lock
253  * state of the page.
254  */
255 page_t *
hat_kpm_vaddr2page(caddr_t vaddr)256 hat_kpm_vaddr2page(caddr_t vaddr)
257 {
258 	uintptr_t	paddr;
259 	pfn_t		pfn;
260 
261 	ASSERT(IS_KPM_ADDR(vaddr));
262 
263 	SFMMU_KPM_VTOP(vaddr, paddr);
264 	pfn = (pfn_t)btop(paddr);
265 
266 	return (page_numtopp_nolock(pfn));
267 }
268 
269 /* page to kpm_page */
270 #define	PP2KPMPG(pp, kp) {						\
271 	struct memseg	*mseg;						\
272 	pgcnt_t		inx;						\
273 	pfn_t		pfn;						\
274 									\
275 	pfn = pp->p_pagenum;						\
276 	mseg = page_numtomemseg_nolock(pfn);				\
277 	ASSERT(mseg);							\
278 	inx = ptokpmp(kpmptop(ptokpmp(pfn)) - mseg->kpm_pbase);		\
279 	ASSERT(inx < mseg->kpm_nkpmpgs);				\
280 	kp = &mseg->kpm_pages[inx];					\
281 }
282 
283 /* page to kpm_spage */
284 #define	PP2KPMSPG(pp, ksp) {						\
285 	struct memseg	*mseg;						\
286 	pgcnt_t		inx;						\
287 	pfn_t		pfn;						\
288 									\
289 	pfn = pp->p_pagenum;						\
290 	mseg = page_numtomemseg_nolock(pfn);				\
291 	ASSERT(mseg);							\
292 	inx = pfn - mseg->kpm_pbase;					\
293 	ksp = &mseg->kpm_spages[inx];					\
294 }
295 
296 /*
297  * hat_kpm_fault is called from segkpm_fault when a kpm tsbmiss occurred
298  * which could not be resolved by the trap level tsbmiss handler for the
299  * following reasons:
300  * . The vaddr is in VAC alias range (always PAGESIZE mapping size).
301  * . The kpm (s)page range of vaddr is in a VAC alias prevention state.
302  * . tsbmiss handling at trap level is not desired (DEBUG kernel only,
303  *   kpm_tsbmtl == 0).
304  */
305 int
hat_kpm_fault(struct hat * hat,caddr_t vaddr)306 hat_kpm_fault(struct hat *hat, caddr_t vaddr)
307 {
308 	int		error;
309 	uintptr_t	paddr;
310 	pfn_t		pfn;
311 	struct memseg	*mseg;
312 	page_t	*pp;
313 
314 	if (kpm_enable == 0) {
315 		cmn_err(CE_WARN, "hat_kpm_fault: kpm_enable not set");
316 		return (ENOTSUP);
317 	}
318 
319 	ASSERT(hat == ksfmmup);
320 	ASSERT(IS_KPM_ADDR(vaddr));
321 
322 	SFMMU_KPM_VTOP(vaddr, paddr);
323 	pfn = (pfn_t)btop(paddr);
324 	if ((mseg = page_numtomemseg_nolock(pfn)) != NULL) {
325 		pp = &mseg->pages[(pgcnt_t)(pfn - mseg->pages_base)];
326 		ASSERT((pfn_t)pp->p_pagenum == pfn);
327 	}
328 
329 	/*
330 	 * hat_kpm_mapin_pfn may add a kpm translation for memory that falls
331 	 * outside of memsegs.  Check for this case and provide the translation
332 	 * here.
333 	 */
334 	if (vac_colors == 1 && mseg == NULL) {
335 		tte_t tte;
336 		uint_t szc = kpm_smallpages ? TTE8K : TTE4M;
337 		uint_t shift = kpm_smallpages ? MMU_PAGESHIFT : MMU_PAGESHIFT4M;
338 
339 		ASSERT(address_in_memlist(phys_install, paddr, 1));
340 		KPM_TTE_VCACHED(tte.ll, pfn, szc);
341 		sfmmu_kpm_load_tsb(vaddr, &tte, shift);
342 		error = 0;
343 	} else if (mseg == NULL || !PAGE_LOCKED(pp))
344 		error = EFAULT;
345 	else if (kpm_smallpages == 0)
346 		error = sfmmu_kpm_fault(vaddr, mseg, pp);
347 	else
348 		error = sfmmu_kpm_fault_small(vaddr, mseg, pp);
349 
350 	return (error);
351 }
352 
353 /*
354  * memseg_hash[] was cleared, need to clear memseg_phash[] too.
355  */
356 void
hat_kpm_mseghash_clear(int nentries)357 hat_kpm_mseghash_clear(int nentries)
358 {
359 	pgcnt_t i;
360 
361 	if (kpm_enable == 0)
362 		return;
363 
364 	for (i = 0; i < nentries; i++)
365 		memseg_phash[i] = MSEG_NULLPTR_PA;
366 }
367 
368 /*
369  * Update memseg_phash[inx] when memseg_hash[inx] was changed.
370  */
371 void
hat_kpm_mseghash_update(pgcnt_t inx,struct memseg * msp)372 hat_kpm_mseghash_update(pgcnt_t inx, struct memseg *msp)
373 {
374 	if (kpm_enable == 0)
375 		return;
376 
377 	memseg_phash[inx] = (msp) ? va_to_pa(msp) : MSEG_NULLPTR_PA;
378 }
379 
380 /*
381  * Update kpm memseg members from basic memseg info.
382  */
383 void
hat_kpm_addmem_mseg_update(struct memseg * msp,pgcnt_t nkpmpgs,offset_t kpm_pages_off)384 hat_kpm_addmem_mseg_update(struct memseg *msp, pgcnt_t nkpmpgs,
385 	offset_t kpm_pages_off)
386 {
387 	if (kpm_enable == 0)
388 		return;
389 
390 	msp->kpm_pages = (kpm_page_t *)((caddr_t)msp->pages + kpm_pages_off);
391 	msp->kpm_nkpmpgs = nkpmpgs;
392 	msp->kpm_pbase = kpmptop(ptokpmp(msp->pages_base));
393 	msp->pagespa = va_to_pa(msp->pages);
394 	msp->epagespa = va_to_pa(msp->epages);
395 	msp->kpm_pagespa = va_to_pa(msp->kpm_pages);
396 }
397 
398 /*
399  * Setup nextpa when a memseg is inserted.
400  * Assumes that the memsegslock is already held.
401  */
402 void
hat_kpm_addmem_mseg_insert(struct memseg * msp)403 hat_kpm_addmem_mseg_insert(struct memseg *msp)
404 {
405 	if (kpm_enable == 0)
406 		return;
407 
408 	ASSERT(memsegs_lock_held());
409 	msp->nextpa = (memsegs) ? va_to_pa(memsegs) : MSEG_NULLPTR_PA;
410 }
411 
412 /*
413  * Setup memsegspa when a memseg is (head) inserted.
414  * Called before memsegs is updated to complete a
415  * memseg insert operation.
416  * Assumes that the memsegslock is already held.
417  */
418 void
hat_kpm_addmem_memsegs_update(struct memseg * msp)419 hat_kpm_addmem_memsegs_update(struct memseg *msp)
420 {
421 	if (kpm_enable == 0)
422 		return;
423 
424 	ASSERT(memsegs_lock_held());
425 	ASSERT(memsegs);
426 	memsegspa = va_to_pa(msp);
427 }
428 
429 /*
430  * Return end of metadata for an already setup memseg.
431  *
432  * Note: kpm_pages and kpm_spages are aliases and the underlying
433  * member of struct memseg is a union, therefore they always have
434  * the same address within a memseg. They must be differentiated
435  * when pointer arithmetic is used with them.
436  */
437 caddr_t
hat_kpm_mseg_reuse(struct memseg * msp)438 hat_kpm_mseg_reuse(struct memseg *msp)
439 {
440 	caddr_t end;
441 
442 	if (kpm_smallpages == 0)
443 		end = (caddr_t)(msp->kpm_pages + msp->kpm_nkpmpgs);
444 	else
445 		end = (caddr_t)(msp->kpm_spages + msp->kpm_nkpmpgs);
446 
447 	return (end);
448 }
449 
450 /*
451  * Update memsegspa (when first memseg in list
452  * is deleted) or nextpa  when a memseg deleted.
453  * Assumes that the memsegslock is already held.
454  */
455 void
hat_kpm_delmem_mseg_update(struct memseg * msp,struct memseg ** mspp)456 hat_kpm_delmem_mseg_update(struct memseg *msp, struct memseg **mspp)
457 {
458 	struct memseg *lmsp;
459 
460 	if (kpm_enable == 0)
461 		return;
462 
463 	ASSERT(memsegs_lock_held());
464 
465 	if (mspp == &memsegs) {
466 		memsegspa = (msp->next) ?
467 		    va_to_pa(msp->next) : MSEG_NULLPTR_PA;
468 	} else {
469 		lmsp = (struct memseg *)
470 		    ((uint64_t)mspp - offsetof(struct memseg, next));
471 		lmsp->nextpa = (msp->next) ?
472 		    va_to_pa(msp->next) : MSEG_NULLPTR_PA;
473 	}
474 }
475 
476 /*
477  * Update kpm members for all memseg's involved in a split operation
478  * and do the atomic update of the physical memseg chain.
479  *
480  * Note: kpm_pages and kpm_spages are aliases and the underlying member
481  * of struct memseg is a union, therefore they always have the same
482  * address within a memseg. With that the direct assignments and
483  * va_to_pa conversions below don't have to be distinguished wrt. to
484  * kpm_smallpages. They must be differentiated when pointer arithmetic
485  * is used with them.
486  *
487  * Assumes that the memsegslock is already held.
488  */
489 void
hat_kpm_split_mseg_update(struct memseg * msp,struct memseg ** mspp,struct memseg * lo,struct memseg * mid,struct memseg * hi)490 hat_kpm_split_mseg_update(struct memseg *msp, struct memseg **mspp,
491 	struct memseg *lo, struct memseg *mid, struct memseg *hi)
492 {
493 	pgcnt_t start, end, kbase, kstart, num;
494 	struct memseg *lmsp;
495 
496 	if (kpm_enable == 0)
497 		return;
498 
499 	ASSERT(memsegs_lock_held());
500 	ASSERT(msp && mid && msp->kpm_pages);
501 
502 	kbase = ptokpmp(msp->kpm_pbase);
503 
504 	if (lo) {
505 		num = lo->pages_end - lo->pages_base;
506 		start = kpmptop(ptokpmp(lo->pages_base));
507 		/* align end to kpm page size granularity */
508 		end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs;
509 		lo->kpm_pbase = start;
510 		lo->kpm_nkpmpgs = ptokpmp(end - start);
511 		lo->kpm_pages = msp->kpm_pages;
512 		lo->kpm_pagespa = va_to_pa(lo->kpm_pages);
513 		lo->pagespa = va_to_pa(lo->pages);
514 		lo->epagespa = va_to_pa(lo->epages);
515 		lo->nextpa = va_to_pa(lo->next);
516 	}
517 
518 	/* mid */
519 	num = mid->pages_end - mid->pages_base;
520 	kstart = ptokpmp(mid->pages_base);
521 	start = kpmptop(kstart);
522 	/* align end to kpm page size granularity */
523 	end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs;
524 	mid->kpm_pbase = start;
525 	mid->kpm_nkpmpgs = ptokpmp(end - start);
526 	if (kpm_smallpages == 0) {
527 		mid->kpm_pages = msp->kpm_pages + (kstart - kbase);
528 	} else {
529 		mid->kpm_spages = msp->kpm_spages + (kstart - kbase);
530 	}
531 	mid->kpm_pagespa = va_to_pa(mid->kpm_pages);
532 	mid->pagespa = va_to_pa(mid->pages);
533 	mid->epagespa = va_to_pa(mid->epages);
534 	mid->nextpa = (mid->next) ?  va_to_pa(mid->next) : MSEG_NULLPTR_PA;
535 
536 	if (hi) {
537 		num = hi->pages_end - hi->pages_base;
538 		kstart = ptokpmp(hi->pages_base);
539 		start = kpmptop(kstart);
540 		/* align end to kpm page size granularity */
541 		end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs;
542 		hi->kpm_pbase = start;
543 		hi->kpm_nkpmpgs = ptokpmp(end - start);
544 		if (kpm_smallpages == 0) {
545 			hi->kpm_pages = msp->kpm_pages + (kstart - kbase);
546 		} else {
547 			hi->kpm_spages = msp->kpm_spages + (kstart - kbase);
548 		}
549 		hi->kpm_pagespa = va_to_pa(hi->kpm_pages);
550 		hi->pagespa = va_to_pa(hi->pages);
551 		hi->epagespa = va_to_pa(hi->epages);
552 		hi->nextpa = (hi->next) ? va_to_pa(hi->next) : MSEG_NULLPTR_PA;
553 	}
554 
555 	/*
556 	 * Atomic update of the physical memseg chain
557 	 */
558 	if (mspp == &memsegs) {
559 		memsegspa = (lo) ? va_to_pa(lo) : va_to_pa(mid);
560 	} else {
561 		lmsp = (struct memseg *)
562 		    ((uint64_t)mspp - offsetof(struct memseg, next));
563 		lmsp->nextpa = (lo) ? va_to_pa(lo) : va_to_pa(mid);
564 	}
565 }
566 
567 /*
568  * Walk the memsegs chain, applying func to each memseg span and vcolor.
569  */
570 void
hat_kpm_walk(void (* func)(void *,void *,size_t),void * arg)571 hat_kpm_walk(void (*func)(void *, void *, size_t), void *arg)
572 {
573 	pfn_t	pbase, pend;
574 	int	vcolor;
575 	void	*base;
576 	size_t	size;
577 	struct memseg *msp;
578 
579 	for (msp = memsegs; msp; msp = msp->next) {
580 		pbase = msp->pages_base;
581 		pend = msp->pages_end;
582 		for (vcolor = 0; vcolor < vac_colors; vcolor++) {
583 			base = ptob(pbase) + kpm_vbase + kpm_size * vcolor;
584 			size = ptob(pend - pbase);
585 			func(arg, base, size);
586 		}
587 	}
588 }
589 
590 
591 /* -- sfmmu_kpm internal section -- */
592 
593 /*
594  * Return the page frame number if a valid segkpm mapping exists
595  * for vaddr, otherwise return PFN_INVALID. No locks are grabbed.
596  * Should only be used by other sfmmu routines.
597  */
598 pfn_t
sfmmu_kpm_vatopfn(caddr_t vaddr)599 sfmmu_kpm_vatopfn(caddr_t vaddr)
600 {
601 	uintptr_t	paddr;
602 	pfn_t		pfn;
603 	page_t	*pp;
604 
605 	ASSERT(kpm_enable && IS_KPM_ADDR(vaddr));
606 
607 	SFMMU_KPM_VTOP(vaddr, paddr);
608 	pfn = (pfn_t)btop(paddr);
609 	pp = page_numtopp_nolock(pfn);
610 	if (pp && pp->p_kpmref)
611 		return (pfn);
612 	else
613 		return ((pfn_t)PFN_INVALID);
614 }
615 
616 /*
617  * Lookup a kpme in the p_kpmelist.
618  */
619 static int
sfmmu_kpme_lookup(struct kpme * kpme,page_t * pp)620 sfmmu_kpme_lookup(struct kpme *kpme, page_t *pp)
621 {
622 	struct kpme	*p;
623 
624 	for (p = pp->p_kpmelist; p; p = p->kpe_next) {
625 		if (p == kpme)
626 			return (1);
627 	}
628 	return (0);
629 }
630 
631 /*
632  * Insert a kpme into the p_kpmelist and increment
633  * the per page kpm reference count.
634  */
635 static void
sfmmu_kpme_add(struct kpme * kpme,page_t * pp)636 sfmmu_kpme_add(struct kpme *kpme, page_t *pp)
637 {
638 	ASSERT(pp->p_kpmref >= 0);
639 
640 	/* head insert */
641 	kpme->kpe_prev = NULL;
642 	kpme->kpe_next = pp->p_kpmelist;
643 
644 	if (pp->p_kpmelist)
645 		pp->p_kpmelist->kpe_prev = kpme;
646 
647 	pp->p_kpmelist = kpme;
648 	kpme->kpe_page = pp;
649 	pp->p_kpmref++;
650 }
651 
652 /*
653  * Remove a kpme from the p_kpmelist and decrement
654  * the per page kpm reference count.
655  */
656 static void
sfmmu_kpme_sub(struct kpme * kpme,page_t * pp)657 sfmmu_kpme_sub(struct kpme *kpme, page_t *pp)
658 {
659 	ASSERT(pp->p_kpmref > 0);
660 
661 	if (kpme->kpe_prev) {
662 		ASSERT(pp->p_kpmelist != kpme);
663 		ASSERT(kpme->kpe_prev->kpe_page == pp);
664 		kpme->kpe_prev->kpe_next = kpme->kpe_next;
665 	} else {
666 		ASSERT(pp->p_kpmelist == kpme);
667 		pp->p_kpmelist = kpme->kpe_next;
668 	}
669 
670 	if (kpme->kpe_next) {
671 		ASSERT(kpme->kpe_next->kpe_page == pp);
672 		kpme->kpe_next->kpe_prev = kpme->kpe_prev;
673 	}
674 
675 	kpme->kpe_next = kpme->kpe_prev = NULL;
676 	kpme->kpe_page = NULL;
677 	pp->p_kpmref--;
678 }
679 
680 /*
681  * Mapin a single page, it is called every time a page changes it's state
682  * from kpm-unmapped to kpm-mapped. It may not be called, when only a new
683  * kpm instance does a mapin and wants to share the mapping.
684  * Assumes that the mlist mutex is already grabbed.
685  */
686 static caddr_t
sfmmu_kpm_mapin(page_t * pp)687 sfmmu_kpm_mapin(page_t *pp)
688 {
689 	kpm_page_t	*kp;
690 	kpm_hlk_t	*kpmp;
691 	caddr_t		vaddr;
692 	int		kpm_vac_range;
693 	pfn_t		pfn;
694 	tte_t		tte;
695 	kmutex_t	*pmtx;
696 	int		uncached;
697 	kpm_spage_t	*ksp;
698 	kpm_shlk_t	*kpmsp;
699 	int		oldval;
700 
701 	ASSERT(sfmmu_mlist_held(pp));
702 	ASSERT(pp->p_kpmref == 0);
703 
704 	vaddr = sfmmu_kpm_getvaddr(pp, &kpm_vac_range);
705 
706 	ASSERT(IS_KPM_ADDR(vaddr));
707 	uncached = PP_ISNC(pp);
708 	pfn = pp->p_pagenum;
709 
710 	if (kpm_smallpages)
711 		goto smallpages_mapin;
712 
713 	PP2KPMPG(pp, kp);
714 
715 	kpmp = KPMP_HASH(kp);
716 	mutex_enter(&kpmp->khl_mutex);
717 
718 	ASSERT(PP_ISKPMC(pp) == 0);
719 	ASSERT(PP_ISKPMS(pp) == 0);
720 
721 	if (uncached) {
722 		/* ASSERT(pp->p_share); XXX use hat_page_getshare */
723 		if (kpm_vac_range == 0) {
724 			if (kp->kp_refcnts == 0) {
725 				/*
726 				 * Must remove large page mapping if it exists.
727 				 * Pages in uncached state can only be mapped
728 				 * small (PAGESIZE) within the regular kpm
729 				 * range.
730 				 */
731 				if (kp->kp_refcntc == -1) {
732 					/* remove go indication */
733 					sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
734 					    &kpmp->khl_lock, KPMTSBM_STOP);
735 				}
736 				if (kp->kp_refcnt > 0 && kp->kp_refcntc == 0)
737 					sfmmu_kpm_demap_large(vaddr);
738 			}
739 			ASSERT(kp->kp_refcntc >= 0);
740 			kp->kp_refcntc++;
741 		}
742 		pmtx = sfmmu_page_enter(pp);
743 		PP_SETKPMC(pp);
744 		sfmmu_page_exit(pmtx);
745 	}
746 
747 	if ((kp->kp_refcntc > 0 || kp->kp_refcnts > 0) && kpm_vac_range == 0) {
748 		/*
749 		 * Have to do a small (PAGESIZE) mapin within this kpm_page
750 		 * range since it is marked to be in VAC conflict mode or
751 		 * when there are still other small mappings around.
752 		 */
753 
754 		/* tte assembly */
755 		if (uncached == 0)
756 			KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
757 		else
758 			KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
759 
760 		/* tsb dropin */
761 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
762 
763 		pmtx = sfmmu_page_enter(pp);
764 		PP_SETKPMS(pp);
765 		sfmmu_page_exit(pmtx);
766 
767 		kp->kp_refcnts++;
768 		ASSERT(kp->kp_refcnts > 0);
769 		goto exit;
770 	}
771 
772 	if (kpm_vac_range == 0) {
773 		/*
774 		 * Fast path / regular case, no VAC conflict handling
775 		 * in progress within this kpm_page range.
776 		 */
777 		if (kp->kp_refcnt == 0) {
778 
779 			/* tte assembly */
780 			KPM_TTE_VCACHED(tte.ll, pfn, TTE4M);
781 
782 			/* tsb dropin */
783 			sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT4M);
784 
785 			/* Set go flag for TL tsbmiss handler */
786 			if (kp->kp_refcntc == 0)
787 				sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
788 				    &kpmp->khl_lock, KPMTSBM_START);
789 
790 			ASSERT(kp->kp_refcntc == -1);
791 		}
792 		kp->kp_refcnt++;
793 		ASSERT(kp->kp_refcnt);
794 
795 	} else {
796 		/*
797 		 * The page is not setup according to the common VAC
798 		 * prevention rules for the regular and kpm mapping layer
799 		 * E.g. the page layer was not able to deliver a right
800 		 * vcolor'ed page for a given vaddr corresponding to
801 		 * the wanted p_offset. It has to be mapped in small in
802 		 * within the corresponding kpm vac range in order to
803 		 * prevent VAC alias conflicts.
804 		 */
805 
806 		/* tte assembly */
807 		if (uncached == 0) {
808 			KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
809 		} else {
810 			KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
811 		}
812 
813 		/* tsb dropin */
814 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
815 
816 		kp->kp_refcnta++;
817 		if (kp->kp_refcntc == -1) {
818 			ASSERT(kp->kp_refcnt > 0);
819 
820 			/* remove go indication */
821 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc, &kpmp->khl_lock,
822 			    KPMTSBM_STOP);
823 		}
824 		ASSERT(kp->kp_refcntc >= 0);
825 	}
826 exit:
827 	mutex_exit(&kpmp->khl_mutex);
828 	return (vaddr);
829 
830 smallpages_mapin:
831 	if (uncached == 0) {
832 		/* tte assembly */
833 		KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
834 	} else {
835 		/*
836 		 * Just in case this same page was mapped cacheable prior to
837 		 * this and the old tte remains in tlb.
838 		 */
839 		sfmmu_kpm_demap_small(vaddr);
840 
841 		/* ASSERT(pp->p_share); XXX use hat_page_getshare */
842 		pmtx = sfmmu_page_enter(pp);
843 		PP_SETKPMC(pp);
844 		sfmmu_page_exit(pmtx);
845 		/* tte assembly */
846 		KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
847 	}
848 
849 	/* tsb dropin */
850 	sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
851 
852 	PP2KPMSPG(pp, ksp);
853 	kpmsp = KPMP_SHASH(ksp);
854 
855 	oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag, &kpmsp->kshl_lock,
856 	    (uncached) ? (KPM_MAPPED_GO | KPM_MAPPEDSC) :
857 	    (KPM_MAPPED_GO | KPM_MAPPEDS));
858 
859 	if (oldval != 0)
860 		panic("sfmmu_kpm_mapin: stale smallpages mapping");
861 
862 	return (vaddr);
863 }
864 
865 /*
866  * Mapout a single page, it is called every time a page changes it's state
867  * from kpm-mapped to kpm-unmapped. It may not be called, when only a kpm
868  * instance calls mapout and there are still other instances mapping the
869  * page. Assumes that the mlist mutex is already grabbed.
870  *
871  * Note: In normal mode (no VAC conflict prevention pending) TLB's are
872  * not flushed. This is the core segkpm behavior to avoid xcalls. It is
873  * no problem because a translation from a segkpm virtual address to a
874  * physical address is always the same. The only downside is a slighty
875  * increased window of vulnerability for misbehaving _kernel_ modules.
876  */
877 static void
sfmmu_kpm_mapout(page_t * pp,caddr_t vaddr)878 sfmmu_kpm_mapout(page_t *pp, caddr_t vaddr)
879 {
880 	kpm_page_t	*kp;
881 	kpm_hlk_t	*kpmp;
882 	int		alias_range;
883 	kmutex_t	*pmtx;
884 	kpm_spage_t	*ksp;
885 	kpm_shlk_t	*kpmsp;
886 	int		oldval;
887 
888 	ASSERT(sfmmu_mlist_held(pp));
889 	ASSERT(pp->p_kpmref == 0);
890 
891 	alias_range = IS_KPM_ALIAS_RANGE(vaddr);
892 
893 	if (kpm_smallpages)
894 		goto smallpages_mapout;
895 
896 	PP2KPMPG(pp, kp);
897 	kpmp = KPMP_HASH(kp);
898 	mutex_enter(&kpmp->khl_mutex);
899 
900 	if (alias_range) {
901 		ASSERT(PP_ISKPMS(pp) == 0);
902 		if (kp->kp_refcnta <= 0) {
903 			panic("sfmmu_kpm_mapout: bad refcnta kp=%p",
904 			    (void *)kp);
905 		}
906 
907 		if (PP_ISTNC(pp))  {
908 			if (PP_ISKPMC(pp) == 0) {
909 				/*
910 				 * Uncached kpm mappings must always have
911 				 * forced "small page" mode.
912 				 */
913 				panic("sfmmu_kpm_mapout: uncached page not "
914 				    "kpm marked");
915 			}
916 			sfmmu_kpm_demap_small(vaddr);
917 
918 			pmtx = sfmmu_page_enter(pp);
919 			PP_CLRKPMC(pp);
920 			sfmmu_page_exit(pmtx);
921 
922 			/*
923 			 * Check if we can resume cached mode. This might
924 			 * be the case if the kpm mapping was the only
925 			 * mapping in conflict with other non rule
926 			 * compliant mappings. The page is no more marked
927 			 * as kpm mapped, so the conv_tnc path will not
928 			 * change kpm state.
929 			 */
930 			conv_tnc(pp, TTE8K);
931 
932 		} else if (PP_ISKPMC(pp) == 0) {
933 			/* remove TSB entry only */
934 			sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT);
935 
936 		} else {
937 			/* already demapped */
938 			pmtx = sfmmu_page_enter(pp);
939 			PP_CLRKPMC(pp);
940 			sfmmu_page_exit(pmtx);
941 		}
942 		kp->kp_refcnta--;
943 		goto exit;
944 	}
945 
946 	if (kp->kp_refcntc <= 0 && kp->kp_refcnts == 0) {
947 		/*
948 		 * Fast path / regular case.
949 		 */
950 		ASSERT(kp->kp_refcntc >= -1);
951 		ASSERT(!(pp->p_nrm & (P_KPMC | P_KPMS | P_TNC | P_PNC)));
952 
953 		if (kp->kp_refcnt <= 0)
954 			panic("sfmmu_kpm_mapout: bad refcnt kp=%p", (void *)kp);
955 
956 		if (--kp->kp_refcnt == 0) {
957 			/* remove go indication */
958 			if (kp->kp_refcntc == -1) {
959 				sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
960 				    &kpmp->khl_lock, KPMTSBM_STOP);
961 			}
962 			ASSERT(kp->kp_refcntc == 0);
963 
964 			/* remove TSB entry */
965 			sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT4M);
966 #ifdef	DEBUG
967 			if (kpm_tlb_flush)
968 				sfmmu_kpm_demap_tlbs(vaddr);
969 #endif
970 		}
971 
972 	} else {
973 		/*
974 		 * The VAC alias path.
975 		 * We come here if the kpm vaddr is not in any alias_range
976 		 * and we are unmapping a page within the regular kpm_page
977 		 * range. The kpm_page either holds conflict pages and/or
978 		 * is in "small page" mode. If the page is not marked
979 		 * P_KPMS it couldn't have a valid PAGESIZE sized TSB
980 		 * entry. Dcache flushing is done lazy and follows the
981 		 * rules of the regular virtual page coloring scheme.
982 		 *
983 		 * Per page states and required actions:
984 		 *   P_KPMC: remove a kpm mapping that is conflicting.
985 		 *   P_KPMS: remove a small kpm mapping within a kpm_page.
986 		 *   P_TNC:  check if we can re-cache the page.
987 		 *   P_PNC:  we cannot re-cache, sorry.
988 		 * Per kpm_page:
989 		 *   kp_refcntc > 0: page is part of a kpm_page with conflicts.
990 		 *   kp_refcnts > 0: rm a small mapped page within a kpm_page.
991 		 */
992 
993 		if (PP_ISKPMS(pp)) {
994 			if (kp->kp_refcnts < 1) {
995 				panic("sfmmu_kpm_mapout: bad refcnts kp=%p",
996 				    (void *)kp);
997 			}
998 			sfmmu_kpm_demap_small(vaddr);
999 
1000 			/*
1001 			 * Check if we can resume cached mode. This might
1002 			 * be the case if the kpm mapping was the only
1003 			 * mapping in conflict with other non rule
1004 			 * compliant mappings. The page is no more marked
1005 			 * as kpm mapped, so the conv_tnc path will not
1006 			 * change kpm state.
1007 			 */
1008 			if (PP_ISTNC(pp))  {
1009 				if (!PP_ISKPMC(pp)) {
1010 					/*
1011 					 * Uncached kpm mappings must always
1012 					 * have forced "small page" mode.
1013 					 */
1014 					panic("sfmmu_kpm_mapout: uncached "
1015 					    "page not kpm marked");
1016 				}
1017 				conv_tnc(pp, TTE8K);
1018 			}
1019 			kp->kp_refcnts--;
1020 			kp->kp_refcnt++;
1021 			pmtx = sfmmu_page_enter(pp);
1022 			PP_CLRKPMS(pp);
1023 			sfmmu_page_exit(pmtx);
1024 		}
1025 
1026 		if (PP_ISKPMC(pp)) {
1027 			if (kp->kp_refcntc < 1) {
1028 				panic("sfmmu_kpm_mapout: bad refcntc kp=%p",
1029 				    (void *)kp);
1030 			}
1031 			pmtx = sfmmu_page_enter(pp);
1032 			PP_CLRKPMC(pp);
1033 			sfmmu_page_exit(pmtx);
1034 			kp->kp_refcntc--;
1035 		}
1036 
1037 		if (kp->kp_refcnt-- < 1)
1038 			panic("sfmmu_kpm_mapout: bad refcnt kp=%p", (void *)kp);
1039 	}
1040 exit:
1041 	mutex_exit(&kpmp->khl_mutex);
1042 	return;
1043 
1044 smallpages_mapout:
1045 	PP2KPMSPG(pp, ksp);
1046 	kpmsp = KPMP_SHASH(ksp);
1047 
1048 	if (PP_ISKPMC(pp) == 0) {
1049 		oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag,
1050 		    &kpmsp->kshl_lock, 0);
1051 
1052 		if (oldval != KPM_MAPPEDS) {
1053 			/*
1054 			 * When we're called after sfmmu_kpm_hme_unload,
1055 			 * KPM_MAPPEDSC is valid too.
1056 			 */
1057 			if (oldval != KPM_MAPPEDSC)
1058 				panic("sfmmu_kpm_mapout: incorrect mapping");
1059 		}
1060 
1061 		/* remove TSB entry */
1062 		sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT);
1063 #ifdef	DEBUG
1064 		if (kpm_tlb_flush)
1065 			sfmmu_kpm_demap_tlbs(vaddr);
1066 #endif
1067 
1068 	} else if (PP_ISTNC(pp)) {
1069 		oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag,
1070 		    &kpmsp->kshl_lock, 0);
1071 
1072 		if (oldval != KPM_MAPPEDSC || PP_ISKPMC(pp) == 0)
1073 			panic("sfmmu_kpm_mapout: inconsistent TNC mapping");
1074 
1075 		sfmmu_kpm_demap_small(vaddr);
1076 
1077 		pmtx = sfmmu_page_enter(pp);
1078 		PP_CLRKPMC(pp);
1079 		sfmmu_page_exit(pmtx);
1080 
1081 		/*
1082 		 * Check if we can resume cached mode. This might be
1083 		 * the case if the kpm mapping was the only mapping
1084 		 * in conflict with other non rule compliant mappings.
1085 		 * The page is no more marked as kpm mapped, so the
1086 		 * conv_tnc path will not change the kpm state.
1087 		 */
1088 		conv_tnc(pp, TTE8K);
1089 
1090 	} else {
1091 		oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag,
1092 		    &kpmsp->kshl_lock, 0);
1093 
1094 		if (oldval != KPM_MAPPEDSC)
1095 			panic("sfmmu_kpm_mapout: inconsistent mapping");
1096 
1097 		pmtx = sfmmu_page_enter(pp);
1098 		PP_CLRKPMC(pp);
1099 		sfmmu_page_exit(pmtx);
1100 	}
1101 }
1102 
1103 #define	abs(x)  ((x) < 0 ? -(x) : (x))
1104 
1105 /*
1106  * Determine appropriate kpm mapping address and handle any kpm/hme
1107  * conflicts. Page mapping list and its vcolor parts must be protected.
1108  */
1109 static caddr_t
sfmmu_kpm_getvaddr(page_t * pp,int * kpm_vac_rangep)1110 sfmmu_kpm_getvaddr(page_t *pp, int *kpm_vac_rangep)
1111 {
1112 	int		vcolor, vcolor_pa;
1113 	caddr_t		vaddr;
1114 	uintptr_t	paddr;
1115 
1116 
1117 	ASSERT(sfmmu_mlist_held(pp));
1118 
1119 	paddr = ptob(pp->p_pagenum);
1120 	vcolor_pa = addr_to_vcolor(paddr);
1121 
1122 	if (pp->p_vnode && IS_SWAPFSVP(pp->p_vnode)) {
1123 		vcolor = (PP_NEWPAGE(pp) || PP_ISNC(pp)) ?
1124 		    vcolor_pa : PP_GET_VCOLOR(pp);
1125 	} else {
1126 		vcolor = addr_to_vcolor(pp->p_offset);
1127 	}
1128 
1129 	vaddr = kpm_vbase + paddr;
1130 	*kpm_vac_rangep = 0;
1131 
1132 	if (vcolor_pa != vcolor) {
1133 		*kpm_vac_rangep = abs(vcolor - vcolor_pa);
1134 		vaddr += ((uintptr_t)(vcolor - vcolor_pa) << MMU_PAGESHIFT);
1135 		vaddr += (vcolor_pa > vcolor) ?
1136 		    ((uintptr_t)vcolor_pa << kpm_size_shift) :
1137 		    ((uintptr_t)(vcolor - vcolor_pa) << kpm_size_shift);
1138 
1139 		ASSERT(!PP_ISMAPPED_LARGE(pp));
1140 	}
1141 
1142 	if (PP_ISNC(pp))
1143 		return (vaddr);
1144 
1145 	if (PP_NEWPAGE(pp)) {
1146 		PP_SET_VCOLOR(pp, vcolor);
1147 		return (vaddr);
1148 	}
1149 
1150 	if (PP_GET_VCOLOR(pp) == vcolor)
1151 		return (vaddr);
1152 
1153 	ASSERT(!PP_ISMAPPED_KPM(pp));
1154 	sfmmu_kpm_vac_conflict(pp, vaddr);
1155 
1156 	return (vaddr);
1157 }
1158 
1159 /*
1160  * VAC conflict state bit values.
1161  * The following defines are used to make the handling of the
1162  * various input states more concise. For that the kpm states
1163  * per kpm_page and per page are combined in a summary state.
1164  * Each single state has a corresponding bit value in the
1165  * summary state. These defines only apply for kpm large page
1166  * mappings. Within comments the abbreviations "kc, c, ks, s"
1167  * are used as short form of the actual state, e.g. "kc" for
1168  * "kp_refcntc > 0", etc.
1169  */
1170 #define	KPM_KC	0x00000008	/* kpm_page: kp_refcntc > 0 */
1171 #define	KPM_C	0x00000004	/* page: P_KPMC set */
1172 #define	KPM_KS	0x00000002	/* kpm_page: kp_refcnts > 0 */
1173 #define	KPM_S	0x00000001	/* page: P_KPMS set */
1174 
1175 /*
1176  * Summary states used in sfmmu_kpm_fault (KPM_TSBM_*).
1177  * See also more detailed comments within in the sfmmu_kpm_fault switch.
1178  * Abbreviations used:
1179  * CONFL: VAC conflict(s) within a kpm_page.
1180  * MAPS:  Mapped small: Page mapped in using a regular page size kpm mapping.
1181  * RASM:  Re-assembling of a large page mapping possible.
1182  * RPLS:  Replace: TSB miss due to TSB replacement only.
1183  * BRKO:  Breakup Other: A large kpm mapping has to be broken because another
1184  *        page within the kpm_page is already involved in a VAC conflict.
1185  * BRKT:  Breakup This: A large kpm mapping has to be broken, this page is
1186  *        is involved in a VAC conflict.
1187  */
1188 #define	KPM_TSBM_CONFL_GONE	(0)
1189 #define	KPM_TSBM_MAPS_RASM	(KPM_KS)
1190 #define	KPM_TSBM_RPLS_RASM	(KPM_KS | KPM_S)
1191 #define	KPM_TSBM_MAPS_BRKO	(KPM_KC)
1192 #define	KPM_TSBM_MAPS		(KPM_KC | KPM_KS)
1193 #define	KPM_TSBM_RPLS		(KPM_KC | KPM_KS | KPM_S)
1194 #define	KPM_TSBM_MAPS_BRKT	(KPM_KC | KPM_C)
1195 #define	KPM_TSBM_MAPS_CONFL	(KPM_KC | KPM_C | KPM_KS)
1196 #define	KPM_TSBM_RPLS_CONFL	(KPM_KC | KPM_C | KPM_KS | KPM_S)
1197 
1198 /*
1199  * kpm fault handler for mappings with large page size.
1200  */
1201 int
sfmmu_kpm_fault(caddr_t vaddr,struct memseg * mseg,page_t * pp)1202 sfmmu_kpm_fault(caddr_t vaddr, struct memseg *mseg, page_t *pp)
1203 {
1204 	int		error;
1205 	pgcnt_t		inx;
1206 	kpm_page_t	*kp;
1207 	tte_t		tte;
1208 	pfn_t		pfn = pp->p_pagenum;
1209 	kpm_hlk_t	*kpmp;
1210 	kmutex_t	*pml;
1211 	int		alias_range;
1212 	int		uncached = 0;
1213 	kmutex_t	*pmtx;
1214 	int		badstate;
1215 	uint_t		tsbmcase;
1216 
1217 	alias_range = IS_KPM_ALIAS_RANGE(vaddr);
1218 
1219 	inx = ptokpmp(kpmptop(ptokpmp(pfn)) - mseg->kpm_pbase);
1220 	if (inx >= mseg->kpm_nkpmpgs) {
1221 		cmn_err(CE_PANIC, "sfmmu_kpm_fault: kpm overflow in memseg "
1222 		    "0x%p  pp 0x%p", (void *)mseg, (void *)pp);
1223 	}
1224 
1225 	kp = &mseg->kpm_pages[inx];
1226 	kpmp = KPMP_HASH(kp);
1227 
1228 	pml = sfmmu_mlist_enter(pp);
1229 
1230 	if (!PP_ISMAPPED_KPM(pp)) {
1231 		sfmmu_mlist_exit(pml);
1232 		return (EFAULT);
1233 	}
1234 
1235 	mutex_enter(&kpmp->khl_mutex);
1236 
1237 	if (alias_range) {
1238 		ASSERT(!PP_ISMAPPED_LARGE(pp));
1239 		if (kp->kp_refcnta > 0) {
1240 			if (PP_ISKPMC(pp)) {
1241 				pmtx = sfmmu_page_enter(pp);
1242 				PP_CLRKPMC(pp);
1243 				sfmmu_page_exit(pmtx);
1244 			}
1245 			/*
1246 			 * Check for vcolor conflicts. Return here
1247 			 * w/ either no conflict (fast path), removed hme
1248 			 * mapping chains (unload conflict) or uncached
1249 			 * (uncache conflict). VACaches are cleaned and
1250 			 * p_vcolor and PP_TNC are set accordingly for the
1251 			 * conflict cases.  Drop kpmp for uncache conflict
1252 			 * cases since it will be grabbed within
1253 			 * sfmmu_kpm_page_cache in case of an uncache
1254 			 * conflict.
1255 			 */
1256 			mutex_exit(&kpmp->khl_mutex);
1257 			sfmmu_kpm_vac_conflict(pp, vaddr);
1258 			mutex_enter(&kpmp->khl_mutex);
1259 
1260 			if (PP_ISNC(pp)) {
1261 				uncached = 1;
1262 				pmtx = sfmmu_page_enter(pp);
1263 				PP_SETKPMC(pp);
1264 				sfmmu_page_exit(pmtx);
1265 			}
1266 			goto smallexit;
1267 
1268 		} else {
1269 			/*
1270 			 * We got a tsbmiss on a not active kpm_page range.
1271 			 * Let segkpm_fault decide how to panic.
1272 			 */
1273 			error = EFAULT;
1274 		}
1275 		goto exit;
1276 	}
1277 
1278 	badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0);
1279 	if (kp->kp_refcntc == -1) {
1280 		/*
1281 		 * We should come here only if trap level tsb miss
1282 		 * handler is disabled.
1283 		 */
1284 		badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 ||
1285 		    PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp));
1286 
1287 		if (badstate == 0)
1288 			goto largeexit;
1289 	}
1290 
1291 	if (badstate || kp->kp_refcntc < 0)
1292 		goto badstate_exit;
1293 
1294 	/*
1295 	 * Combine the per kpm_page and per page kpm VAC states to
1296 	 * a summary state in order to make the kpm fault handling
1297 	 * more concise.
1298 	 */
1299 	tsbmcase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) |
1300 	    ((kp->kp_refcnts > 0) ? KPM_KS : 0) |
1301 	    (PP_ISKPMC(pp) ? KPM_C : 0) |
1302 	    (PP_ISKPMS(pp) ? KPM_S : 0));
1303 
1304 	switch (tsbmcase) {
1305 	case KPM_TSBM_CONFL_GONE:		/* - - - - */
1306 		/*
1307 		 * That's fine, we either have no more vac conflict in
1308 		 * this kpm page or someone raced in and has solved the
1309 		 * vac conflict for us -- call sfmmu_kpm_vac_conflict
1310 		 * to take care for correcting the vcolor and flushing
1311 		 * the dcache if required.
1312 		 */
1313 		mutex_exit(&kpmp->khl_mutex);
1314 		sfmmu_kpm_vac_conflict(pp, vaddr);
1315 		mutex_enter(&kpmp->khl_mutex);
1316 
1317 		if (PP_ISNC(pp) || kp->kp_refcnt <= 0 ||
1318 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1319 			panic("sfmmu_kpm_fault: inconsistent CONFL_GONE "
1320 			    "state, pp=%p", (void *)pp);
1321 		}
1322 		goto largeexit;
1323 
1324 	case KPM_TSBM_MAPS_RASM:		/* - - ks - */
1325 		/*
1326 		 * All conflicts in this kpm page are gone but there are
1327 		 * already small mappings around, so we also map this
1328 		 * page small. This could be the trigger case for a
1329 		 * small mapping reaper, if this is really needed.
1330 		 * For now fall thru to the KPM_TSBM_MAPS handling.
1331 		 */
1332 
1333 	case KPM_TSBM_MAPS:			/* kc - ks - */
1334 		/*
1335 		 * Large page mapping is already broken, this page is not
1336 		 * conflicting, so map it small. Call sfmmu_kpm_vac_conflict
1337 		 * to take care for correcting the vcolor and flushing
1338 		 * the dcache if required.
1339 		 */
1340 		mutex_exit(&kpmp->khl_mutex);
1341 		sfmmu_kpm_vac_conflict(pp, vaddr);
1342 		mutex_enter(&kpmp->khl_mutex);
1343 
1344 		if (PP_ISNC(pp) || kp->kp_refcnt <= 0 ||
1345 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1346 			panic("sfmmu_kpm_fault:  inconsistent MAPS state, "
1347 			    "pp=%p", (void *)pp);
1348 		}
1349 		kp->kp_refcnt--;
1350 		kp->kp_refcnts++;
1351 		pmtx = sfmmu_page_enter(pp);
1352 		PP_SETKPMS(pp);
1353 		sfmmu_page_exit(pmtx);
1354 		goto smallexit;
1355 
1356 	case KPM_TSBM_RPLS_RASM:		/* - - ks s */
1357 		/*
1358 		 * All conflicts in this kpm page are gone but this page
1359 		 * is mapped small. This could be the trigger case for a
1360 		 * small mapping reaper, if this is really needed.
1361 		 * For now we drop it in small again. Fall thru to the
1362 		 * KPM_TSBM_RPLS handling.
1363 		 */
1364 
1365 	case KPM_TSBM_RPLS:			/* kc - ks s */
1366 		/*
1367 		 * Large page mapping is already broken, this page is not
1368 		 * conflicting but already mapped small, so drop it in
1369 		 * small again.
1370 		 */
1371 		if (PP_ISNC(pp) ||
1372 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1373 			panic("sfmmu_kpm_fault:  inconsistent RPLS state, "
1374 			    "pp=%p", (void *)pp);
1375 		}
1376 		goto smallexit;
1377 
1378 	case KPM_TSBM_MAPS_BRKO:		/* kc - - - */
1379 		/*
1380 		 * The kpm page where we live in is marked conflicting
1381 		 * but this page is not conflicting. So we have to map it
1382 		 * in small. Call sfmmu_kpm_vac_conflict to take care for
1383 		 * correcting the vcolor and flushing the dcache if required.
1384 		 */
1385 		mutex_exit(&kpmp->khl_mutex);
1386 		sfmmu_kpm_vac_conflict(pp, vaddr);
1387 		mutex_enter(&kpmp->khl_mutex);
1388 
1389 		if (PP_ISNC(pp) || kp->kp_refcnt <= 0 ||
1390 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1391 			panic("sfmmu_kpm_fault:  inconsistent MAPS_BRKO state, "
1392 			    "pp=%p", (void *)pp);
1393 		}
1394 		kp->kp_refcnt--;
1395 		kp->kp_refcnts++;
1396 		pmtx = sfmmu_page_enter(pp);
1397 		PP_SETKPMS(pp);
1398 		sfmmu_page_exit(pmtx);
1399 		goto smallexit;
1400 
1401 	case KPM_TSBM_MAPS_BRKT:		/* kc c - - */
1402 	case KPM_TSBM_MAPS_CONFL:		/* kc c ks - */
1403 		if (!PP_ISMAPPED(pp)) {
1404 			/*
1405 			 * We got a tsbmiss on kpm large page range that is
1406 			 * marked to contain vac conflicting pages introduced
1407 			 * by hme mappings. The hme mappings are all gone and
1408 			 * must have bypassed the kpm alias prevention logic.
1409 			 */
1410 			panic("sfmmu_kpm_fault: stale VAC conflict, pp=%p",
1411 			    (void *)pp);
1412 		}
1413 
1414 		/*
1415 		 * Check for vcolor conflicts. Return here w/ either no
1416 		 * conflict (fast path), removed hme mapping chains
1417 		 * (unload conflict) or uncached (uncache conflict).
1418 		 * Dcache is cleaned and p_vcolor and P_TNC are set
1419 		 * accordingly. Drop kpmp for uncache conflict cases
1420 		 * since it will be grabbed within sfmmu_kpm_page_cache
1421 		 * in case of an uncache conflict.
1422 		 */
1423 		mutex_exit(&kpmp->khl_mutex);
1424 		sfmmu_kpm_vac_conflict(pp, vaddr);
1425 		mutex_enter(&kpmp->khl_mutex);
1426 
1427 		if (kp->kp_refcnt <= 0)
1428 			panic("sfmmu_kpm_fault: bad refcnt kp=%p", (void *)kp);
1429 
1430 		if (PP_ISNC(pp)) {
1431 			uncached = 1;
1432 		} else {
1433 			/*
1434 			 * When an unload conflict is solved and there are
1435 			 * no other small mappings around, we can resume
1436 			 * largepage mode. Otherwise we have to map or drop
1437 			 * in small. This could be a trigger for a small
1438 			 * mapping reaper when this was the last conflict
1439 			 * within the kpm page and when there are only
1440 			 * other small mappings around.
1441 			 */
1442 			ASSERT(addr_to_vcolor(vaddr) == PP_GET_VCOLOR(pp));
1443 			ASSERT(kp->kp_refcntc > 0);
1444 			kp->kp_refcntc--;
1445 			pmtx = sfmmu_page_enter(pp);
1446 			PP_CLRKPMC(pp);
1447 			sfmmu_page_exit(pmtx);
1448 			ASSERT(PP_ISKPMS(pp) == 0);
1449 			if (kp->kp_refcntc == 0 && kp->kp_refcnts == 0)
1450 				goto largeexit;
1451 		}
1452 
1453 		kp->kp_refcnt--;
1454 		kp->kp_refcnts++;
1455 		pmtx = sfmmu_page_enter(pp);
1456 		PP_SETKPMS(pp);
1457 		sfmmu_page_exit(pmtx);
1458 		goto smallexit;
1459 
1460 	case KPM_TSBM_RPLS_CONFL:		/* kc c ks s */
1461 		if (!PP_ISMAPPED(pp)) {
1462 			/*
1463 			 * We got a tsbmiss on kpm large page range that is
1464 			 * marked to contain vac conflicting pages introduced
1465 			 * by hme mappings. They are all gone and must have
1466 			 * somehow bypassed the kpm alias prevention logic.
1467 			 */
1468 			panic("sfmmu_kpm_fault: stale VAC conflict, pp=%p",
1469 			    (void *)pp);
1470 		}
1471 
1472 		/*
1473 		 * This state is only possible for an uncached mapping.
1474 		 */
1475 		if (!PP_ISNC(pp)) {
1476 			panic("sfmmu_kpm_fault: page not uncached, pp=%p",
1477 			    (void *)pp);
1478 		}
1479 		uncached = 1;
1480 		goto smallexit;
1481 
1482 	default:
1483 badstate_exit:
1484 		panic("sfmmu_kpm_fault: inconsistent VAC state, vaddr=%p kp=%p "
1485 		    "pp=%p", (void *)vaddr, (void *)kp, (void *)pp);
1486 	}
1487 
1488 smallexit:
1489 	/* tte assembly */
1490 	if (uncached == 0)
1491 		KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
1492 	else
1493 		KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
1494 
1495 	/* tsb dropin */
1496 	sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1497 
1498 	error = 0;
1499 	goto exit;
1500 
1501 largeexit:
1502 	if (kp->kp_refcnt > 0) {
1503 
1504 		/* tte assembly */
1505 		KPM_TTE_VCACHED(tte.ll, pfn, TTE4M);
1506 
1507 		/* tsb dropin */
1508 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT4M);
1509 
1510 		if (kp->kp_refcntc == 0) {
1511 			/* Set "go" flag for TL tsbmiss handler */
1512 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc, &kpmp->khl_lock,
1513 			    KPMTSBM_START);
1514 		}
1515 		ASSERT(kp->kp_refcntc == -1);
1516 		error = 0;
1517 
1518 	} else
1519 		error = EFAULT;
1520 exit:
1521 	mutex_exit(&kpmp->khl_mutex);
1522 	sfmmu_mlist_exit(pml);
1523 	return (error);
1524 }
1525 
1526 /*
1527  * kpm fault handler for mappings with small page size.
1528  */
1529 int
sfmmu_kpm_fault_small(caddr_t vaddr,struct memseg * mseg,page_t * pp)1530 sfmmu_kpm_fault_small(caddr_t vaddr, struct memseg *mseg, page_t *pp)
1531 {
1532 	int		error = 0;
1533 	pgcnt_t		inx;
1534 	kpm_spage_t	*ksp;
1535 	kpm_shlk_t	*kpmsp;
1536 	kmutex_t	*pml;
1537 	pfn_t		pfn = pp->p_pagenum;
1538 	tte_t		tte;
1539 	kmutex_t	*pmtx;
1540 	int		oldval;
1541 
1542 	inx = pfn - mseg->kpm_pbase;
1543 	ksp = &mseg->kpm_spages[inx];
1544 	kpmsp = KPMP_SHASH(ksp);
1545 
1546 	pml = sfmmu_mlist_enter(pp);
1547 
1548 	if (!PP_ISMAPPED_KPM(pp)) {
1549 		sfmmu_mlist_exit(pml);
1550 		return (EFAULT);
1551 	}
1552 
1553 	/*
1554 	 * kp_mapped lookup protected by mlist mutex
1555 	 */
1556 	if (ksp->kp_mapped == KPM_MAPPEDS) {
1557 		/*
1558 		 * Fast path tsbmiss
1559 		 */
1560 		ASSERT(!PP_ISKPMC(pp));
1561 		ASSERT(!PP_ISNC(pp));
1562 
1563 		/* tte assembly */
1564 		KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
1565 
1566 		/* tsb dropin */
1567 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1568 
1569 	} else if (ksp->kp_mapped == KPM_MAPPEDSC) {
1570 		/*
1571 		 * Got here due to existing or gone kpm/hme VAC conflict.
1572 		 * Recheck for vcolor conflicts. Return here w/ either
1573 		 * no conflict, removed hme mapping chain (unload
1574 		 * conflict) or uncached (uncache conflict). VACaches
1575 		 * are cleaned and p_vcolor and PP_TNC are set accordingly
1576 		 * for the conflict cases.
1577 		 */
1578 		sfmmu_kpm_vac_conflict(pp, vaddr);
1579 
1580 		if (PP_ISNC(pp)) {
1581 			/* ASSERT(pp->p_share); XXX use hat_page_getshare */
1582 
1583 			/* tte assembly */
1584 			KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
1585 
1586 			/* tsb dropin */
1587 			sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1588 
1589 			oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag,
1590 			    &kpmsp->kshl_lock, (KPM_MAPPED_GO | KPM_MAPPEDSC));
1591 
1592 			if (oldval != KPM_MAPPEDSC)
1593 				panic("sfmmu_kpm_fault_small: "
1594 				    "stale smallpages mapping");
1595 		} else {
1596 			if (PP_ISKPMC(pp)) {
1597 				pmtx = sfmmu_page_enter(pp);
1598 				PP_CLRKPMC(pp);
1599 				sfmmu_page_exit(pmtx);
1600 			}
1601 
1602 			/* tte assembly */
1603 			KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
1604 
1605 			/* tsb dropin */
1606 			sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1607 
1608 			oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag,
1609 			    &kpmsp->kshl_lock, (KPM_MAPPED_GO | KPM_MAPPEDS));
1610 
1611 			if (oldval != KPM_MAPPEDSC)
1612 				panic("sfmmu_kpm_fault_small: "
1613 				    "stale smallpages mapping");
1614 		}
1615 
1616 	} else {
1617 		/*
1618 		 * We got a tsbmiss on a not active kpm_page range.
1619 		 * Let decide segkpm_fault how to panic.
1620 		 */
1621 		error = EFAULT;
1622 	}
1623 
1624 	sfmmu_mlist_exit(pml);
1625 	return (error);
1626 }
1627 
1628 /*
1629  * Check/handle potential hme/kpm mapping conflicts
1630  */
1631 static void
sfmmu_kpm_vac_conflict(page_t * pp,caddr_t vaddr)1632 sfmmu_kpm_vac_conflict(page_t *pp, caddr_t vaddr)
1633 {
1634 	int		vcolor;
1635 	struct sf_hment	*sfhmep;
1636 	struct hat	*tmphat;
1637 	struct sf_hment	*tmphme = NULL;
1638 	struct hme_blk	*hmeblkp;
1639 	tte_t		tte;
1640 
1641 	ASSERT(sfmmu_mlist_held(pp));
1642 
1643 	if (PP_ISNC(pp))
1644 		return;
1645 
1646 	vcolor = addr_to_vcolor(vaddr);
1647 	if (PP_GET_VCOLOR(pp) == vcolor)
1648 		return;
1649 
1650 	/*
1651 	 * There could be no vcolor conflict between a large cached
1652 	 * hme page and a non alias range kpm page (neither large nor
1653 	 * small mapped). So if a hme conflict already exists between
1654 	 * a constituent page of a large hme mapping and a shared small
1655 	 * conflicting hme mapping, both mappings must be already
1656 	 * uncached at this point.
1657 	 */
1658 	ASSERT(!PP_ISMAPPED_LARGE(pp));
1659 
1660 	if (!PP_ISMAPPED(pp)) {
1661 		/*
1662 		 * Previous hme user of page had a different color
1663 		 * but since there are no current users
1664 		 * we just flush the cache and change the color.
1665 		 */
1666 		SFMMU_STAT(sf_pgcolor_conflict);
1667 		sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp));
1668 		PP_SET_VCOLOR(pp, vcolor);
1669 		return;
1670 	}
1671 
1672 	/*
1673 	 * If we get here we have a vac conflict with a current hme
1674 	 * mapping. This must have been established by forcing a wrong
1675 	 * colored mapping, e.g. by using mmap(2) with MAP_FIXED.
1676 	 */
1677 
1678 	/*
1679 	 * Check if any mapping is in same as or if it is locked
1680 	 * since in that case we need to uncache.
1681 	 */
1682 	for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
1683 		tmphme = sfhmep->hme_next;
1684 		if (IS_PAHME(sfhmep))
1685 			continue;
1686 		hmeblkp = sfmmu_hmetohblk(sfhmep);
1687 		if (hmeblkp->hblk_xhat_bit)
1688 			continue;
1689 		tmphat = hblktosfmmu(hmeblkp);
1690 		sfmmu_copytte(&sfhmep->hme_tte, &tte);
1691 		ASSERT(TTE_IS_VALID(&tte));
1692 		if ((tmphat == ksfmmup) || hmeblkp->hblk_lckcnt) {
1693 			/*
1694 			 * We have an uncache conflict
1695 			 */
1696 			SFMMU_STAT(sf_uncache_conflict);
1697 			sfmmu_page_cache_array(pp, HAT_TMPNC, CACHE_FLUSH, 1);
1698 			return;
1699 		}
1700 	}
1701 
1702 	/*
1703 	 * We have an unload conflict
1704 	 */
1705 	SFMMU_STAT(sf_unload_conflict);
1706 
1707 	for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
1708 		tmphme = sfhmep->hme_next;
1709 		if (IS_PAHME(sfhmep))
1710 			continue;
1711 		hmeblkp = sfmmu_hmetohblk(sfhmep);
1712 		if (hmeblkp->hblk_xhat_bit)
1713 			continue;
1714 		(void) sfmmu_pageunload(pp, sfhmep, TTE8K);
1715 	}
1716 
1717 	/*
1718 	 * Unloads only does tlb flushes so we need to flush the
1719 	 * dcache vcolor here.
1720 	 */
1721 	sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp));
1722 	PP_SET_VCOLOR(pp, vcolor);
1723 }
1724 
1725 /*
1726  * Remove all kpm mappings using kpme's for pp and check that
1727  * all kpm mappings (w/ and w/o kpme's) are gone.
1728  */
1729 void
sfmmu_kpm_pageunload(page_t * pp)1730 sfmmu_kpm_pageunload(page_t *pp)
1731 {
1732 	caddr_t		vaddr;
1733 	struct kpme	*kpme, *nkpme;
1734 
1735 	ASSERT(pp != NULL);
1736 	ASSERT(pp->p_kpmref);
1737 	ASSERT(sfmmu_mlist_held(pp));
1738 
1739 	vaddr = hat_kpm_page2va(pp, 1);
1740 
1741 	for (kpme = pp->p_kpmelist; kpme; kpme = nkpme) {
1742 		ASSERT(kpme->kpe_page == pp);
1743 
1744 		if (pp->p_kpmref == 0)
1745 			panic("sfmmu_kpm_pageunload: stale p_kpmref pp=%p "
1746 			    "kpme=%p", (void *)pp, (void *)kpme);
1747 
1748 		nkpme = kpme->kpe_next;
1749 
1750 		/* Add instance callback here here if needed later */
1751 		sfmmu_kpme_sub(kpme, pp);
1752 	}
1753 
1754 	/*
1755 	 * Also correct after mixed kpme/nonkpme mappings. If nonkpme
1756 	 * segkpm clients have unlocked the page and forgot to mapout
1757 	 * we panic here.
1758 	 */
1759 	if (pp->p_kpmref != 0)
1760 		panic("sfmmu_kpm_pageunload: bad refcnt pp=%p", (void *)pp);
1761 
1762 	sfmmu_kpm_mapout(pp, vaddr);
1763 }
1764 
1765 /*
1766  * Remove a large kpm mapping from kernel TSB and all TLB's.
1767  */
1768 static void
sfmmu_kpm_demap_large(caddr_t vaddr)1769 sfmmu_kpm_demap_large(caddr_t vaddr)
1770 {
1771 	sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT4M);
1772 	sfmmu_kpm_demap_tlbs(vaddr);
1773 }
1774 
1775 /*
1776  * Remove a small kpm mapping from kernel TSB and all TLB's.
1777  */
1778 static void
sfmmu_kpm_demap_small(caddr_t vaddr)1779 sfmmu_kpm_demap_small(caddr_t vaddr)
1780 {
1781 	sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT);
1782 	sfmmu_kpm_demap_tlbs(vaddr);
1783 }
1784 
1785 /*
1786  * Demap a kpm mapping in all TLB's.
1787  */
1788 static void
sfmmu_kpm_demap_tlbs(caddr_t vaddr)1789 sfmmu_kpm_demap_tlbs(caddr_t vaddr)
1790 {
1791 	cpuset_t cpuset;
1792 
1793 	kpreempt_disable();
1794 	cpuset = ksfmmup->sfmmu_cpusran;
1795 	CPUSET_AND(cpuset, cpu_ready_set);
1796 	CPUSET_DEL(cpuset, CPU->cpu_id);
1797 	SFMMU_XCALL_STATS(ksfmmup);
1798 
1799 	xt_some(cpuset, vtag_flushpage_tl1, (uint64_t)vaddr,
1800 	    (uint64_t)ksfmmup);
1801 	vtag_flushpage(vaddr, (uint64_t)ksfmmup);
1802 
1803 	kpreempt_enable();
1804 }
1805 
1806 /*
1807  * Summary states used in sfmmu_kpm_vac_unload (KPM_VUL__*).
1808  * See also more detailed comments within in the sfmmu_kpm_vac_unload switch.
1809  * Abbreviations used:
1810  * BIG:   Large page kpm mapping in use.
1811  * CONFL: VAC conflict(s) within a kpm_page.
1812  * INCR:  Count of conflicts within a kpm_page is going to be incremented.
1813  * DECR:  Count of conflicts within a kpm_page is going to be decremented.
1814  * UNMAP_SMALL: A small (regular page size) mapping is going to be unmapped.
1815  * TNC:   Temporary non cached: a kpm mapped page is mapped in TNC state.
1816  */
1817 #define	KPM_VUL_BIG		(0)
1818 #define	KPM_VUL_CONFL_INCR1	(KPM_KS)
1819 #define	KPM_VUL_UNMAP_SMALL1	(KPM_KS | KPM_S)
1820 #define	KPM_VUL_CONFL_INCR2	(KPM_KC)
1821 #define	KPM_VUL_CONFL_INCR3	(KPM_KC | KPM_KS)
1822 #define	KPM_VUL_UNMAP_SMALL2	(KPM_KC | KPM_KS | KPM_S)
1823 #define	KPM_VUL_CONFL_DECR1	(KPM_KC | KPM_C)
1824 #define	KPM_VUL_CONFL_DECR2	(KPM_KC | KPM_C | KPM_KS)
1825 #define	KPM_VUL_TNC		(KPM_KC | KPM_C | KPM_KS | KPM_S)
1826 
1827 /*
1828  * Handle VAC unload conflicts introduced by hme mappings or vice
1829  * versa when a hme conflict mapping is replaced by a non conflict
1830  * one. Perform actions and state transitions according to the
1831  * various page and kpm_page entry states. VACache flushes are in
1832  * the responsibiliy of the caller. We still hold the mlist lock.
1833  */
1834 void
sfmmu_kpm_vac_unload(page_t * pp,caddr_t vaddr)1835 sfmmu_kpm_vac_unload(page_t *pp, caddr_t vaddr)
1836 {
1837 	kpm_page_t	*kp;
1838 	kpm_hlk_t	*kpmp;
1839 	caddr_t		kpmvaddr = hat_kpm_page2va(pp, 1);
1840 	int		newcolor;
1841 	kmutex_t	*pmtx;
1842 	uint_t		vacunlcase;
1843 	int		badstate = 0;
1844 	kpm_spage_t	*ksp;
1845 	kpm_shlk_t	*kpmsp;
1846 
1847 	ASSERT(PAGE_LOCKED(pp));
1848 	ASSERT(sfmmu_mlist_held(pp));
1849 	ASSERT(!PP_ISNC(pp));
1850 
1851 	newcolor = addr_to_vcolor(kpmvaddr) != addr_to_vcolor(vaddr);
1852 	if (kpm_smallpages)
1853 		goto smallpages_vac_unload;
1854 
1855 	PP2KPMPG(pp, kp);
1856 	kpmp = KPMP_HASH(kp);
1857 	mutex_enter(&kpmp->khl_mutex);
1858 
1859 	if (IS_KPM_ALIAS_RANGE(kpmvaddr)) {
1860 		if (kp->kp_refcnta < 1) {
1861 			panic("sfmmu_kpm_vac_unload: bad refcnta kpm_page=%p\n",
1862 			    (void *)kp);
1863 		}
1864 
1865 		if (PP_ISKPMC(pp) == 0) {
1866 			if (newcolor == 0)
1867 				goto exit;
1868 			sfmmu_kpm_demap_small(kpmvaddr);
1869 			pmtx = sfmmu_page_enter(pp);
1870 			PP_SETKPMC(pp);
1871 			sfmmu_page_exit(pmtx);
1872 
1873 		} else if (newcolor == 0) {
1874 			pmtx = sfmmu_page_enter(pp);
1875 			PP_CLRKPMC(pp);
1876 			sfmmu_page_exit(pmtx);
1877 
1878 		} else {
1879 			badstate++;
1880 		}
1881 
1882 		goto exit;
1883 	}
1884 
1885 	badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0);
1886 	if (kp->kp_refcntc == -1) {
1887 		/*
1888 		 * We should come here only if trap level tsb miss
1889 		 * handler is disabled.
1890 		 */
1891 		badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 ||
1892 		    PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp));
1893 	} else {
1894 		badstate |= (kp->kp_refcntc < 0);
1895 	}
1896 
1897 	if (badstate)
1898 		goto exit;
1899 
1900 	if (PP_ISKPMC(pp) == 0 && newcolor == 0) {
1901 		ASSERT(PP_ISKPMS(pp) == 0);
1902 		goto exit;
1903 	}
1904 
1905 	/*
1906 	 * Combine the per kpm_page and per page kpm VAC states
1907 	 * to a summary state in order to make the vac unload
1908 	 * handling more concise.
1909 	 */
1910 	vacunlcase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) |
1911 	    ((kp->kp_refcnts > 0) ? KPM_KS : 0) |
1912 	    (PP_ISKPMC(pp) ? KPM_C : 0) |
1913 	    (PP_ISKPMS(pp) ? KPM_S : 0));
1914 
1915 	switch (vacunlcase) {
1916 	case KPM_VUL_BIG:				/* - - - - */
1917 		/*
1918 		 * Have to breakup the large page mapping to be
1919 		 * able to handle the conflicting hme vaddr.
1920 		 */
1921 		if (kp->kp_refcntc == -1) {
1922 			/* remove go indication */
1923 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
1924 			    &kpmp->khl_lock, KPMTSBM_STOP);
1925 		}
1926 		sfmmu_kpm_demap_large(kpmvaddr);
1927 
1928 		ASSERT(kp->kp_refcntc == 0);
1929 		kp->kp_refcntc++;
1930 		pmtx = sfmmu_page_enter(pp);
1931 		PP_SETKPMC(pp);
1932 		sfmmu_page_exit(pmtx);
1933 		break;
1934 
1935 	case KPM_VUL_UNMAP_SMALL1:			/* -  - ks s */
1936 	case KPM_VUL_UNMAP_SMALL2:			/* kc - ks s */
1937 		/*
1938 		 * New conflict w/ an active kpm page, actually mapped
1939 		 * in by small TSB/TLB entries. Remove the mapping and
1940 		 * update states.
1941 		 */
1942 		ASSERT(newcolor);
1943 		sfmmu_kpm_demap_small(kpmvaddr);
1944 		kp->kp_refcnts--;
1945 		kp->kp_refcnt++;
1946 		kp->kp_refcntc++;
1947 		pmtx = sfmmu_page_enter(pp);
1948 		PP_CLRKPMS(pp);
1949 		PP_SETKPMC(pp);
1950 		sfmmu_page_exit(pmtx);
1951 		break;
1952 
1953 	case KPM_VUL_CONFL_INCR1:			/* -  - ks - */
1954 	case KPM_VUL_CONFL_INCR2:			/* kc - -  - */
1955 	case KPM_VUL_CONFL_INCR3:			/* kc - ks - */
1956 		/*
1957 		 * New conflict on a active kpm mapped page not yet in
1958 		 * TSB/TLB. Mark page and increment the kpm_page conflict
1959 		 * count.
1960 		 */
1961 		ASSERT(newcolor);
1962 		kp->kp_refcntc++;
1963 		pmtx = sfmmu_page_enter(pp);
1964 		PP_SETKPMC(pp);
1965 		sfmmu_page_exit(pmtx);
1966 		break;
1967 
1968 	case KPM_VUL_CONFL_DECR1:			/* kc c -  - */
1969 	case KPM_VUL_CONFL_DECR2:			/* kc c ks - */
1970 		/*
1971 		 * A conflicting hme mapping is removed for an active
1972 		 * kpm page not yet in TSB/TLB. Unmark page and decrement
1973 		 * the kpm_page conflict count.
1974 		 */
1975 		ASSERT(newcolor == 0);
1976 		kp->kp_refcntc--;
1977 		pmtx = sfmmu_page_enter(pp);
1978 		PP_CLRKPMC(pp);
1979 		sfmmu_page_exit(pmtx);
1980 		break;
1981 
1982 	case KPM_VUL_TNC:				/* kc c ks s */
1983 		cmn_err(CE_NOTE, "sfmmu_kpm_vac_unload: "
1984 		    "page not in NC state");
1985 		/* FALLTHRU */
1986 
1987 	default:
1988 		badstate++;
1989 	}
1990 exit:
1991 	if (badstate) {
1992 		panic("sfmmu_kpm_vac_unload: inconsistent VAC state, "
1993 		    "kpmvaddr=%p kp=%p pp=%p",
1994 		    (void *)kpmvaddr, (void *)kp, (void *)pp);
1995 	}
1996 	mutex_exit(&kpmp->khl_mutex);
1997 
1998 	return;
1999 
2000 smallpages_vac_unload:
2001 	if (newcolor == 0)
2002 		return;
2003 
2004 	PP2KPMSPG(pp, ksp);
2005 	kpmsp = KPMP_SHASH(ksp);
2006 
2007 	if (PP_ISKPMC(pp) == 0) {
2008 		if (ksp->kp_mapped == KPM_MAPPEDS) {
2009 			/*
2010 			 * Stop TL tsbmiss handling
2011 			 */
2012 			(void) sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag,
2013 			    &kpmsp->kshl_lock, KPM_MAPPEDSC);
2014 
2015 			sfmmu_kpm_demap_small(kpmvaddr);
2016 
2017 		} else if (ksp->kp_mapped != KPM_MAPPEDSC) {
2018 			panic("sfmmu_kpm_vac_unload: inconsistent mapping");
2019 		}
2020 
2021 		pmtx = sfmmu_page_enter(pp);
2022 		PP_SETKPMC(pp);
2023 		sfmmu_page_exit(pmtx);
2024 
2025 	} else {
2026 		if (ksp->kp_mapped != KPM_MAPPEDSC)
2027 			panic("sfmmu_kpm_vac_unload: inconsistent mapping");
2028 	}
2029 }
2030 
2031 /*
2032  * Page is marked to be in VAC conflict to an existing kpm mapping
2033  * or is kpm mapped using only the regular pagesize. Called from
2034  * sfmmu_hblk_unload when a mlist is completely removed.
2035  */
2036 void
sfmmu_kpm_hme_unload(page_t * pp)2037 sfmmu_kpm_hme_unload(page_t *pp)
2038 {
2039 	/* tte assembly */
2040 	kpm_page_t	*kp;
2041 	kpm_hlk_t	*kpmp;
2042 	caddr_t		vaddr;
2043 	kmutex_t	*pmtx;
2044 	uint_t		flags;
2045 	kpm_spage_t	*ksp;
2046 
2047 	ASSERT(sfmmu_mlist_held(pp));
2048 	ASSERT(PP_ISMAPPED_KPM(pp));
2049 
2050 	flags = pp->p_nrm & (P_KPMC | P_KPMS);
2051 	if (kpm_smallpages)
2052 		goto smallpages_hme_unload;
2053 
2054 	if (flags == (P_KPMC | P_KPMS)) {
2055 		panic("sfmmu_kpm_hme_unload: page should be uncached");
2056 
2057 	} else if (flags == P_KPMS) {
2058 		/*
2059 		 * Page mapped small but not involved in VAC conflict
2060 		 */
2061 		return;
2062 	}
2063 
2064 	vaddr = hat_kpm_page2va(pp, 1);
2065 
2066 	PP2KPMPG(pp, kp);
2067 	kpmp = KPMP_HASH(kp);
2068 	mutex_enter(&kpmp->khl_mutex);
2069 
2070 	if (IS_KPM_ALIAS_RANGE(vaddr)) {
2071 		if (kp->kp_refcnta < 1) {
2072 			panic("sfmmu_kpm_hme_unload: bad refcnta kpm_page=%p\n",
2073 			    (void *)kp);
2074 		}
2075 	} else {
2076 		if (kp->kp_refcntc < 1) {
2077 			panic("sfmmu_kpm_hme_unload: bad refcntc kpm_page=%p\n",
2078 			    (void *)kp);
2079 		}
2080 		kp->kp_refcntc--;
2081 	}
2082 
2083 	pmtx = sfmmu_page_enter(pp);
2084 	PP_CLRKPMC(pp);
2085 	sfmmu_page_exit(pmtx);
2086 
2087 	mutex_exit(&kpmp->khl_mutex);
2088 	return;
2089 
2090 smallpages_hme_unload:
2091 	if (flags != P_KPMC)
2092 		panic("sfmmu_kpm_hme_unload: page should be uncached");
2093 
2094 	vaddr = hat_kpm_page2va(pp, 1);
2095 	PP2KPMSPG(pp, ksp);
2096 
2097 	if (ksp->kp_mapped != KPM_MAPPEDSC)
2098 		panic("sfmmu_kpm_hme_unload: inconsistent mapping");
2099 
2100 	/*
2101 	 * Keep KPM_MAPPEDSC until the next kpm tsbmiss where it
2102 	 * prevents TL tsbmiss handling and force a hat_kpm_fault.
2103 	 * There we can start over again.
2104 	 */
2105 
2106 	pmtx = sfmmu_page_enter(pp);
2107 	PP_CLRKPMC(pp);
2108 	sfmmu_page_exit(pmtx);
2109 }
2110 
2111 /*
2112  * Special hooks for sfmmu_page_cache_array() when changing the
2113  * cacheability of a page. It is used to obey the hat_kpm lock
2114  * ordering (mlist -> kpmp -> spl, and back).
2115  */
2116 kpm_hlk_t *
sfmmu_kpm_kpmp_enter(page_t * pp,pgcnt_t npages)2117 sfmmu_kpm_kpmp_enter(page_t *pp, pgcnt_t npages)
2118 {
2119 	kpm_page_t	*kp;
2120 	kpm_hlk_t	*kpmp;
2121 
2122 	ASSERT(sfmmu_mlist_held(pp));
2123 
2124 	if (kpm_smallpages || PP_ISMAPPED_KPM(pp) == 0)
2125 		return (NULL);
2126 
2127 	ASSERT(npages <= kpmpnpgs);
2128 
2129 	PP2KPMPG(pp, kp);
2130 	kpmp = KPMP_HASH(kp);
2131 	mutex_enter(&kpmp->khl_mutex);
2132 
2133 	return (kpmp);
2134 }
2135 
2136 void
sfmmu_kpm_kpmp_exit(kpm_hlk_t * kpmp)2137 sfmmu_kpm_kpmp_exit(kpm_hlk_t *kpmp)
2138 {
2139 	if (kpm_smallpages || kpmp == NULL)
2140 		return;
2141 
2142 	mutex_exit(&kpmp->khl_mutex);
2143 }
2144 
2145 /*
2146  * Summary states used in sfmmu_kpm_page_cache (KPM_*).
2147  * See also more detailed comments within in the sfmmu_kpm_page_cache switch.
2148  * Abbreviations used:
2149  * UNC:     Input state for an uncache request.
2150  *   BIG:     Large page kpm mapping in use.
2151  *   SMALL:   Page has a small kpm mapping within a kpm_page range.
2152  *   NODEMAP: No demap needed.
2153  *   NOP:     No operation needed on this input state.
2154  * CACHE:   Input state for a re-cache request.
2155  *   MAPS:    Page is in TNC and kpm VAC conflict state and kpm mapped small.
2156  *   NOMAP:   Page is in TNC and kpm VAC conflict state, but not small kpm
2157  *            mapped.
2158  *   NOMAPO:  Page is in TNC and kpm VAC conflict state, but not small kpm
2159  *            mapped. There are also other small kpm mappings within this
2160  *            kpm_page.
2161  */
2162 #define	KPM_UNC_BIG		(0)
2163 #define	KPM_UNC_NODEMAP1	(KPM_KS)
2164 #define	KPM_UNC_SMALL1		(KPM_KS | KPM_S)
2165 #define	KPM_UNC_NODEMAP2	(KPM_KC)
2166 #define	KPM_UNC_NODEMAP3	(KPM_KC | KPM_KS)
2167 #define	KPM_UNC_SMALL2		(KPM_KC | KPM_KS | KPM_S)
2168 #define	KPM_UNC_NOP1		(KPM_KC | KPM_C)
2169 #define	KPM_UNC_NOP2		(KPM_KC | KPM_C | KPM_KS)
2170 #define	KPM_CACHE_NOMAP		(KPM_KC | KPM_C)
2171 #define	KPM_CACHE_NOMAPO	(KPM_KC | KPM_C | KPM_KS)
2172 #define	KPM_CACHE_MAPS		(KPM_KC | KPM_C | KPM_KS | KPM_S)
2173 
2174 /*
2175  * This function is called when the virtual cacheability of a page
2176  * is changed and the page has an actice kpm mapping. The mlist mutex,
2177  * the spl hash lock and the kpmp mutex (if needed) are already grabbed.
2178  */
2179 /*ARGSUSED2*/
2180 void
sfmmu_kpm_page_cache(page_t * pp,int flags,int cache_flush_tag)2181 sfmmu_kpm_page_cache(page_t *pp, int flags, int cache_flush_tag)
2182 {
2183 	kpm_page_t	*kp;
2184 	kpm_hlk_t	*kpmp;
2185 	caddr_t		kpmvaddr;
2186 	int		badstate = 0;
2187 	uint_t		pgcacase;
2188 	kpm_spage_t	*ksp;
2189 	kpm_shlk_t	*kpmsp;
2190 	int		oldval;
2191 
2192 	ASSERT(PP_ISMAPPED_KPM(pp));
2193 	ASSERT(sfmmu_mlist_held(pp));
2194 	ASSERT(sfmmu_page_spl_held(pp));
2195 
2196 	if (flags != HAT_TMPNC && flags != HAT_CACHE)
2197 		panic("sfmmu_kpm_page_cache: bad flags");
2198 
2199 	kpmvaddr = hat_kpm_page2va(pp, 1);
2200 
2201 	if (flags == HAT_TMPNC && cache_flush_tag == CACHE_FLUSH) {
2202 		pfn_t pfn = pp->p_pagenum;
2203 		int vcolor = addr_to_vcolor(kpmvaddr);
2204 		cpuset_t cpuset = cpu_ready_set;
2205 
2206 		/* Flush vcolor in DCache */
2207 		CPUSET_DEL(cpuset, CPU->cpu_id);
2208 		SFMMU_XCALL_STATS(ksfmmup);
2209 		xt_some(cpuset, vac_flushpage_tl1, pfn, vcolor);
2210 		vac_flushpage(pfn, vcolor);
2211 	}
2212 
2213 	if (kpm_smallpages)
2214 		goto smallpages_page_cache;
2215 
2216 	PP2KPMPG(pp, kp);
2217 	kpmp = KPMP_HASH(kp);
2218 	ASSERT(MUTEX_HELD(&kpmp->khl_mutex));
2219 
2220 	if (IS_KPM_ALIAS_RANGE(kpmvaddr)) {
2221 		if (kp->kp_refcnta < 1) {
2222 			panic("sfmmu_kpm_page_cache: bad refcnta "
2223 			    "kpm_page=%p\n", (void *)kp);
2224 		}
2225 		sfmmu_kpm_demap_small(kpmvaddr);
2226 		if (flags == HAT_TMPNC) {
2227 			PP_SETKPMC(pp);
2228 			ASSERT(!PP_ISKPMS(pp));
2229 		} else {
2230 			ASSERT(PP_ISKPMC(pp));
2231 			PP_CLRKPMC(pp);
2232 		}
2233 		goto exit;
2234 	}
2235 
2236 	badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0);
2237 	if (kp->kp_refcntc == -1) {
2238 		/*
2239 		 * We should come here only if trap level tsb miss
2240 		 * handler is disabled.
2241 		 */
2242 		badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 ||
2243 		    PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp));
2244 	} else {
2245 		badstate |= (kp->kp_refcntc < 0);
2246 	}
2247 
2248 	if (badstate)
2249 		goto exit;
2250 
2251 	/*
2252 	 * Combine the per kpm_page and per page kpm VAC states to
2253 	 * a summary state in order to make the VAC cache/uncache
2254 	 * handling more concise.
2255 	 */
2256 	pgcacase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) |
2257 	    ((kp->kp_refcnts > 0) ? KPM_KS : 0) |
2258 	    (PP_ISKPMC(pp) ? KPM_C : 0) |
2259 	    (PP_ISKPMS(pp) ? KPM_S : 0));
2260 
2261 	if (flags == HAT_CACHE) {
2262 		switch (pgcacase) {
2263 		case KPM_CACHE_MAPS:			/* kc c ks s */
2264 			sfmmu_kpm_demap_small(kpmvaddr);
2265 			if (kp->kp_refcnts < 1) {
2266 				panic("sfmmu_kpm_page_cache: bad refcnts "
2267 				"kpm_page=%p\n", (void *)kp);
2268 			}
2269 			kp->kp_refcnts--;
2270 			kp->kp_refcnt++;
2271 			PP_CLRKPMS(pp);
2272 			/* FALLTHRU */
2273 
2274 		case KPM_CACHE_NOMAP:			/* kc c -  - */
2275 		case KPM_CACHE_NOMAPO:			/* kc c ks - */
2276 			kp->kp_refcntc--;
2277 			PP_CLRKPMC(pp);
2278 			break;
2279 
2280 		default:
2281 			badstate++;
2282 		}
2283 		goto exit;
2284 	}
2285 
2286 	switch (pgcacase) {
2287 	case KPM_UNC_BIG:				/* - - - - */
2288 		if (kp->kp_refcnt < 1) {
2289 			panic("sfmmu_kpm_page_cache: bad refcnt "
2290 			    "kpm_page=%p\n", (void *)kp);
2291 		}
2292 
2293 		/*
2294 		 * Have to breakup the large page mapping in preparation
2295 		 * to the upcoming TNC mode handled by small mappings.
2296 		 * The demap can already be done due to another conflict
2297 		 * within the kpm_page.
2298 		 */
2299 		if (kp->kp_refcntc == -1) {
2300 			/* remove go indication */
2301 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
2302 			    &kpmp->khl_lock, KPMTSBM_STOP);
2303 		}
2304 		ASSERT(kp->kp_refcntc == 0);
2305 		sfmmu_kpm_demap_large(kpmvaddr);
2306 		kp->kp_refcntc++;
2307 		PP_SETKPMC(pp);
2308 		break;
2309 
2310 	case KPM_UNC_SMALL1:				/* -  - ks s */
2311 	case KPM_UNC_SMALL2:				/* kc - ks s */
2312 		/*
2313 		 * Have to demap an already small kpm mapping in preparation
2314 		 * to the upcoming TNC mode. The demap can already be done
2315 		 * due to another conflict within the kpm_page.
2316 		 */
2317 		sfmmu_kpm_demap_small(kpmvaddr);
2318 		kp->kp_refcntc++;
2319 		kp->kp_refcnts--;
2320 		kp->kp_refcnt++;
2321 		PP_CLRKPMS(pp);
2322 		PP_SETKPMC(pp);
2323 		break;
2324 
2325 	case KPM_UNC_NODEMAP1:				/* -  - ks - */
2326 		/* fallthru */
2327 
2328 	case KPM_UNC_NODEMAP2:				/* kc - -  - */
2329 	case KPM_UNC_NODEMAP3:				/* kc - ks - */
2330 		kp->kp_refcntc++;
2331 		PP_SETKPMC(pp);
2332 		break;
2333 
2334 	case KPM_UNC_NOP1:				/* kc c -  - */
2335 	case KPM_UNC_NOP2:				/* kc c ks - */
2336 		break;
2337 
2338 	default:
2339 		badstate++;
2340 	}
2341 exit:
2342 	if (badstate) {
2343 		panic("sfmmu_kpm_page_cache: inconsistent VAC state "
2344 		    "kpmvaddr=%p kp=%p pp=%p", (void *)kpmvaddr,
2345 		    (void *)kp, (void *)pp);
2346 	}
2347 	return;
2348 
2349 smallpages_page_cache:
2350 	PP2KPMSPG(pp, ksp);
2351 	kpmsp = KPMP_SHASH(ksp);
2352 
2353 	/*
2354 	 * marked as nogo for we will fault in and resolve it
2355 	 * through sfmmu_kpm_fault_small
2356 	 */
2357 	oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag, &kpmsp->kshl_lock,
2358 	    KPM_MAPPEDSC);
2359 
2360 	if (!(oldval == KPM_MAPPEDS || oldval == KPM_MAPPEDSC))
2361 		panic("smallpages_page_cache: inconsistent mapping");
2362 
2363 	sfmmu_kpm_demap_small(kpmvaddr);
2364 
2365 	if (flags == HAT_TMPNC) {
2366 		PP_SETKPMC(pp);
2367 		ASSERT(!PP_ISKPMS(pp));
2368 
2369 	} else {
2370 		ASSERT(PP_ISKPMC(pp));
2371 		PP_CLRKPMC(pp);
2372 	}
2373 
2374 	/*
2375 	 * Keep KPM_MAPPEDSC until the next kpm tsbmiss where it
2376 	 * prevents TL tsbmiss handling and force a hat_kpm_fault.
2377 	 * There we can start over again.
2378 	 */
2379 }
2380