/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

/*
 * SFMMU primitives.  These primitives should only be used by sfmmu
 * routines.
 */

#if defined(lint)
#include <sys/types.h>
#else	/* lint */
#include "assym.h"
#endif	/* lint */

#include <sys/asm_linkage.h>
#include <sys/machtrap.h>
#include <sys/machasi.h>
#include <sys/sun4asi.h>
#include <sys/pte.h>
#include <sys/mmu.h>
#include <vm/hat_sfmmu.h>
#include <vm/seg_spt.h>
#include <sys/machparam.h>
#include <sys/privregs.h>
#include <sys/scb.h>
#include <sys/intreg.h>
#include <sys/machthread.h>
#include <sys/intr.h>
#include <sys/clock.h>
#include <sys/trapstat.h>

#ifdef TRAPTRACE
#include <sys/traptrace.h>

/*
 * Tracing macro. Adds two instructions if TRAPTRACE is defined.
 */
#define	TT_TRACE(label)		\
	ba	label		;\
	rd	%pc, %g7
#else

#define	TT_TRACE(label)

#endif /* TRAPTRACE */

#ifndef	lint

#if (TTE_SUSPEND_SHIFT > 0)
#define	TTE_SUSPEND_INT_SHIFT(reg)				\
	sllx	reg, TTE_SUSPEND_SHIFT, reg
#else
#define	TTE_SUSPEND_INT_SHIFT(reg)
#endif

#endif /* lint */

#ifndef	lint

/*
 * Assumes TSBE_TAG is 0
 * Assumes TSBE_INTHI is 0
 * Assumes TSBREG.split is 0	
 */

#if TSBE_TAG != 0
#error "TSB_UPDATE and TSB_INVALIDATE assume TSBE_TAG = 0"
#endif

#if TSBTAG_INTHI != 0
#error "TSB_UPDATE and TSB_INVALIDATE assume TSBTAG_INTHI = 0"
#endif

/*
 * The following code assumes the tsb is not split.
 *
 * With TSBs no longer shared between processes, it's no longer
 * necessary to hash the context bits into the tsb index to get
 * tsb coloring; the new implementation treats the TSB as a
 * direct-mapped, virtually-addressed cache.
 *
 * In:
 *    vpshift = virtual page shift; e.g. 13 for 8K TTEs (constant or ro)
 *    tsbbase = base address of TSB (clobbered)
 *    tagacc = tag access register (clobbered)
 *    szc = size code of TSB (ro)
 *    tmp = scratch reg
 * Out:
 *    tsbbase = pointer to entry in TSB
 */
#define	GET_TSBE_POINTER(vpshift, tsbbase, tagacc, szc, tmp)		\
	mov	TSB_ENTRIES(0), tmp	/* nentries in TSB size 0 */	;\
	srlx	tagacc, vpshift, tagacc 				;\
	sllx	tmp, szc, tmp		/* tmp = nentries in TSB */	;\
	sub	tmp, 1, tmp		/* mask = nentries - 1 */	;\
	and	tagacc, tmp, tmp	/* tsbent = virtpage & mask */	;\
	sllx	tmp, TSB_ENTRY_SHIFT, tmp	/* entry num --> ptr */	;\
	add	tsbbase, tmp, tsbbase	/* add entry offset to TSB base */

/*
 * When the kpm TSB is used it is assumed that it is direct mapped
 * using (vaddr>>vpshift)%tsbsz as the index.
 *
 * Note that, for now, the kpm TSB and kernel TSB are the same for
 * each mapping size.  However that need not always be the case.  If
 * the trap handlers are updated to search a different TSB for kpm
 * addresses than for kernel addresses then kpm_tsbbase and kpm_tsbsz
 * (and/or kpmsm_tsbbase/kpmsm_tsbsz) may be entirely independent.
 *
 * In:
 *    vpshift = virtual page shift; e.g. 13 for 8K TTEs (constant or ro)
 *    vaddr = virtual address (clobbered)
 *    tsbp, szc, tmp = scratch
 * Out:
 *    tsbp = pointer to entry in TSB
 */
#define	GET_KPM_TSBE_POINTER(vpshift, tsbp, vaddr, szc, tmp)		\
	cmp	vpshift, MMU_PAGESHIFT					;\
	bne,pn	%icc, 1f		/* branch if large case */	;\
	  sethi	%hi(kpmsm_tsbsz), szc					;\
	sethi	%hi(kpmsm_tsbbase), tsbp				;\
	ld	[szc + %lo(kpmsm_tsbsz)], szc				;\
	ldx	[tsbp + %lo(kpmsm_tsbbase)], tsbp			;\
	ba,pt	%icc, 2f						;\
	  nop								;\
1:	sethi	%hi(kpm_tsbsz), szc					;\
	sethi	%hi(kpm_tsbbase), tsbp					;\
	ld	[szc + %lo(kpm_tsbsz)], szc				;\
	ldx	[tsbp + %lo(kpm_tsbbase)], tsbp				;\
2:	GET_TSBE_POINTER(vpshift, tsbp, vaddr, szc, tmp)

/*
 * Lock the TSBE at virtual address tsbep.
 *
 * tsbep = TSBE va (ro)
 * tmp1, tmp2 = scratch registers (clobbered)
 * label = label to use for branches (text)
 * %asi = ASI to use for TSB access
 *
 * NOTE that we flush the TSB using fast VIS instructions that
 * set all 1's in the TSB tag, so TSBTAG_LOCKED|TSBTAG_INVALID must
 * not be treated as a locked entry or we'll get stuck spinning on
 * an entry that isn't locked but really invalid.
 */

#if defined(UTSB_PHYS)

#define	TSB_LOCK_ENTRY(tsbep, tmp1, tmp2, label)			\
	lda	[tsbep]ASI_MEM, tmp1					;\
label:									;\
	sethi	%hi(TSBTAG_LOCKED), tmp2				;\
	cmp	tmp1, tmp2 						;\
	be,a,pn	%icc, label/**/b	/* if locked spin */		;\
	  lda	[tsbep]ASI_MEM, tmp1					;\
	casa	[tsbep]ASI_MEM, tmp1, tmp2				;\
	cmp	tmp1, tmp2 						;\
	bne,a,pn %icc, label/**/b	/* didn't lock so try again */	;\
	  lda	[tsbep]ASI_MEM, tmp1					;\
	/* tsbe lock acquired */					;\
	membar #StoreStore

#else /* UTSB_PHYS */

#define	TSB_LOCK_ENTRY(tsbep, tmp1, tmp2, label)			\
	lda	[tsbep]%asi, tmp1					;\
label:									;\
	sethi	%hi(TSBTAG_LOCKED), tmp2				;\
	cmp	tmp1, tmp2 						;\
	be,a,pn	%icc, label/**/b	/* if locked spin */		;\
	  lda	[tsbep]%asi, tmp1					;\
	casa	[tsbep]%asi, tmp1, tmp2					;\
	cmp	tmp1, tmp2 						;\
	bne,a,pn %icc, label/**/b	/* didn't lock so try again */	;\
	  lda	[tsbep]%asi, tmp1					;\
	/* tsbe lock acquired */					;\
	membar #StoreStore

#endif /* UTSB_PHYS */

/*
 * Atomically write TSBE at virtual address tsbep.
 *
 * tsbep = TSBE va (ro)
 * tte = TSBE TTE (ro)
 * tagtarget = TSBE tag (ro)
 * %asi = ASI to use for TSB access
 */

#if defined(UTSB_PHYS)

#define	TSB_INSERT_UNLOCK_ENTRY(tsbep, tte, tagtarget, tmp1)		\
	add	tsbep, TSBE_TTE, tmp1					;\
	stxa	tte, [tmp1]ASI_MEM		/* write tte data */	;\
	membar #StoreStore						;\
	add	tsbep, TSBE_TAG, tmp1					;\
	stxa	tagtarget, [tmp1]ASI_MEM	/* write tte tag & unlock */

#else /* UTSB_PHYS */

#define	TSB_INSERT_UNLOCK_ENTRY(tsbep, tte, tagtarget,tmp1)		\
	stxa	tte, [tsbep + TSBE_TTE]%asi	/* write tte data */	;\
	membar #StoreStore						;\
	stxa	tagtarget, [tsbep + TSBE_TAG]%asi /* write tte tag & unlock */

#endif /* UTSB_PHYS */

/*
 * Load an entry into the TSB at TL > 0.
 *
 * tsbep = pointer to the TSBE to load as va (ro)
 * tte = value of the TTE retrieved and loaded (wo)
 * tagtarget = tag target register.  To get TSBE tag to load,
 *   we need to mask off the context and leave only the va (clobbered)
 * ttepa = pointer to the TTE to retrieve/load as pa (ro)
 * tmp1, tmp2 = scratch registers
 * label = label to use for branches (text)
 * %asi = ASI to use for TSB access
 */

#if defined(UTSB_PHYS)

#define	TSB_UPDATE_TL(tsbep, tte, tagtarget, ttepa, tmp1, tmp2, label) \
	TSB_LOCK_ENTRY(tsbep, tmp1, tmp2, label)			;\
	/*								;\
	 * I don't need to update the TSB then check for the valid tte.	;\
	 * TSB invalidate will spin till the entry is unlocked.	Note,	;\
	 * we always invalidate the hash table before we unload the TSB.;\
	 */								;\
	sllx	tagtarget, TTARGET_VA_SHIFT, tagtarget			;\
	ldxa	[ttepa]ASI_MEM, tte					;\
	srlx	tagtarget, TTARGET_VA_SHIFT, tagtarget			;\
	sethi	%hi(TSBTAG_INVALID), tmp2				;\
	add	tsbep, TSBE_TAG, tmp1					;\
	brgez,a,pn tte, label/**/f					;\
	 sta	tmp2, [tmp1]ASI_MEM			/* unlock */	;\
	TSB_INSERT_UNLOCK_ENTRY(tsbep, tte, tagtarget, tmp1)		;\
label:

#else /* UTSB_PHYS */

#define	TSB_UPDATE_TL(tsbep, tte, tagtarget, ttepa, tmp1, tmp2, label) \
	TSB_LOCK_ENTRY(tsbep, tmp1, tmp2, label)			;\
	/*								;\
	 * I don't need to update the TSB then check for the valid tte.	;\
	 * TSB invalidate will spin till the entry is unlocked.	Note,	;\
	 * we always invalidate the hash table before we unload the TSB.;\
	 */								;\
	sllx	tagtarget, TTARGET_VA_SHIFT, tagtarget			;\
	ldxa	[ttepa]ASI_MEM, tte					;\
	srlx	tagtarget, TTARGET_VA_SHIFT, tagtarget			;\
	sethi	%hi(TSBTAG_INVALID), tmp2				;\
	brgez,a,pn tte, label/**/f					;\
	 sta	tmp2, [tsbep + TSBE_TAG]%asi		/* unlock */	;\
	TSB_INSERT_UNLOCK_ENTRY(tsbep, tte, tagtarget, tmp1)		;\
label:

#endif /* UTSB_PHYS */

/*
 * Load a 32M/256M Panther TSB entry into the TSB at TL > 0,
 *   for ITLB synthesis.
 *
 * tsbep = pointer to the TSBE to load as va (ro)
 * tte = 4M pfn offset (in), value of the TTE retrieved and loaded (out)
 *   with exec_perm turned off and exec_synth turned on
 * tagtarget = tag target register.  To get TSBE tag to load,
 *   we need to mask off the context and leave only the va (clobbered)
 * ttepa = pointer to the TTE to retrieve/load as pa (ro)
 * tmp1, tmp2 = scratch registers
 * label = label to use for branch (text)
 * %asi = ASI to use for TSB access
 */

#define	TSB_UPDATE_TL_PN(tsbep, tte, tagtarget, ttepa, tmp1, tmp2, label) \
	TSB_LOCK_ENTRY(tsbep, tmp1, tmp2, label)			;\
	/*								;\
	 * I don't need to update the TSB then check for the valid tte.	;\
	 * TSB invalidate will spin till the entry is unlocked.	Note,	;\
	 * we always invalidate the hash table before we unload the TSB.;\
	 * Or in 4M pfn offset to TTE and set the exec_perm bit to 0	;\
	 * and exec_synth bit to 1.					;\
	 */								;\
	sllx	tagtarget, TTARGET_VA_SHIFT, tagtarget			;\
	mov	tte, tmp1						;\
	ldxa	[ttepa]ASI_MEM, tte					;\
	srlx	tagtarget, TTARGET_VA_SHIFT, tagtarget			;\
	sethi	%hi(TSBTAG_INVALID), tmp2				;\
	brgez,a,pn tte, label/**/f					;\
	 sta	tmp2, [tsbep + TSBE_TAG]%asi		/* unlock */	;\
	or	tte, tmp1, tte						;\
	andn	tte, TTE_EXECPRM_INT, tte				;\
	or	tte, TTE_E_SYNTH_INT, tte				;\
	TSB_INSERT_UNLOCK_ENTRY(tsbep, tte, tagtarget, tmp1)		;\
label:

/*
 * Build a 4M pfn offset for a Panther 32M/256M page, for ITLB synthesis.
 *
 * tte = value of the TTE, used to get tte_size bits (ro)
 * tagaccess = tag access register, used to get 4M pfn bits (ro)
 * pfn = 4M pfn bits shifted to offset for tte (out)
 * tmp1 = scratch register
 * label = label to use for branch (text)
 */

#define	GET_4M_PFN_OFF(tte, tagaccess, pfn, tmp, label)			\
	/*								;\
	 * Get 4M bits from tagaccess for 32M, 256M pagesizes.		;\
	 * Return them, shifted, in pfn.				;\
	 */								;\
	srlx	tagaccess, MMU_PAGESHIFT4M, tagaccess			;\
	srlx	tte, TTE_SZ_SHFT, tmp		/* isolate the */	;\
	andcc	tmp, TTE_SZ_BITS, %g0		/* tte_size bits */	;\
	bz,a,pt %icc, label/**/f		/* if 0, is */		;\
	  and	tagaccess, 0x7, tagaccess	/* 32M page size */	;\
	and	tagaccess, 0x3f, tagaccess /* else 256M page size */	;\
label:									;\
	sllx	tagaccess, MMU_PAGESHIFT4M, pfn

/*
 * Add 4M TTE size code to a tte for a Panther 32M/256M page,
 * for ITLB synthesis.
 *
 * tte = value of the TTE, used to get tte_size bits (rw)
 * tmp1 = scratch register
 */

#define	SET_TTE4M_PN(tte, tmp)						\
	/*								;\
	 * Set 4M pagesize tte bits. 					;\
	 */								;\
	set	TTE4M, tmp						;\
	sllx	tmp, TTE_SZ_SHFT, tmp					;\
	or	tte, tmp, tte

/*
 * Load an entry into the TSB at TL=0.
 *
 * tsbep = pointer to the TSBE to load as va (ro)
 * tteva = pointer to the TTE to load as va (ro)
 * tagtarget = TSBE tag to load (which contains no context), synthesized 
 * to match va of MMU tag target register only (ro)
 * tmp1, tmp2 = scratch registers (clobbered)
 * label = label to use for branches (text)
 * %asi = ASI to use for TSB access
 */

#if defined(UTSB_PHYS)

#define	TSB_UPDATE(tsbep, tteva, tagtarget, tmp1, tmp2, label)		\
	/* can't rd tteva after locking tsb because it can tlb miss */	;\
	ldx	[tteva], tteva			/* load tte */		;\
	TSB_LOCK_ENTRY(tsbep, tmp1, tmp2, label)			;\
	sethi	%hi(TSBTAG_INVALID), tmp2				;\
	add	tsbep, TSBE_TAG, tmp1					;\
	brgez,a,pn tteva, label/**/f					;\
	 sta	tmp2, [tmp1]ASI_MEM			/* unlock */	;\
	TSB_INSERT_UNLOCK_ENTRY(tsbep, tteva, tagtarget, tmp1)		;\
label:

#else /* UTSB_PHYS */

#define	TSB_UPDATE(tsbep, tteva, tagtarget, tmp1, tmp2, label)		\
	/* can't rd tteva after locking tsb because it can tlb miss */	;\
	ldx	[tteva], tteva			/* load tte */		;\
	TSB_LOCK_ENTRY(tsbep, tmp1, tmp2, label)			;\
	sethi	%hi(TSBTAG_INVALID), tmp2				;\
	brgez,a,pn tteva, label/**/f					;\
	 sta	tmp2, [tsbep + TSBE_TAG]%asi		/* unlock */	;\
	TSB_INSERT_UNLOCK_ENTRY(tsbep, tteva, tagtarget, tmp1)		;\
label:

#endif /* UTSB_PHYS */

/*
 * Invalidate a TSB entry in the TSB.
 *
 * NOTE: TSBE_TAG is assumed to be zero.  There is a compile time check
 *	 about this earlier to ensure this is true.  Thus when we are
 *	 directly referencing tsbep below, we are referencing the tte_tag
 *	 field of the TSBE.  If this  offset ever changes, the code below
 *	 will need to be modified.
 *
 * tsbep = pointer to TSBE as va (ro)
 * tag = invalidation is done if this matches the TSBE tag (ro)
 * tmp1 - tmp3 = scratch registers (clobbered)
 * label = label name to use for branches (text)
 * %asi = ASI to use for TSB access
 */

#if defined(UTSB_PHYS)

#define	TSB_INVALIDATE(tsbep, tag, tmp1, tmp2, tmp3, label)		\
	lda	[tsbep]ASI_MEM, tmp1	/* tmp1 = tsbe tag */		;\
	sethi	%hi(TSBTAG_LOCKED), tmp2				;\
label/**/1:								;\
	cmp	tmp1, tmp2		/* see if tsbe is locked, if */	;\
	be,a,pn	%icc, label/**/1	/* so, loop until unlocked */	;\
	  lda	[tsbep]ASI_MEM, tmp1	/* reloading value each time */	;\
	ldxa	[tsbep]ASI_MEM, tmp3	/* tmp3 = tsbe tag */		;\
	cmp	tag, tmp3		/* compare tags */		;\
	bne,pt	%xcc, label/**/2	/* if different, do nothing */	;\
	  sethi	%hi(TSBTAG_INVALID), tmp3				;\
	casa	[tsbep]ASI_MEM, tmp1, tmp3 /* try to set tag invalid */	;\
	cmp	tmp1, tmp3		/* if not successful */		;\
	bne,a,pn %icc, label/**/1	/* start over from the top */	;\
	  lda	[tsbep]ASI_MEM, tmp1	/* reloading tsbe tag */	;\
label/**/2:	

#else /* UTSB_PHYS */

#define	TSB_INVALIDATE(tsbep, tag, tmp1, tmp2, tmp3, label)		\
	lda	[tsbep]%asi, tmp1	/* tmp1 = tsbe tag */		;\
	sethi	%hi(TSBTAG_LOCKED), tmp2				;\
label/**/1:								;\
	cmp	tmp1, tmp2		/* see if tsbe is locked, if */	;\
	be,a,pn	%icc, label/**/1	/* so, loop until unlocked */	;\
	  lda	[tsbep]%asi, tmp1	/* reloading value each time */	;\
	ldxa	[tsbep]%asi, tmp3	/* tmp3 = tsbe tag */		;\
	cmp	tag, tmp3		/* compare tags */		;\
	bne,pt	%xcc, label/**/2	/* if different, do nothing */	;\
	  sethi	%hi(TSBTAG_INVALID), tmp3				;\
	casa	[tsbep]%asi, tmp1, tmp3	/* try to set tag invalid */	;\
	cmp	tmp1, tmp3		/* if not successful */		;\
	bne,a,pn %icc, label/**/1	/* start over from the top */	;\
	  lda	[tsbep]%asi, tmp1	/* reloading tsbe tag */	;\
label/**/2:	

#endif /* UTSB_PHYS */

#if TSB_SOFTSZ_MASK < TSB_SZ_MASK
#error	- TSB_SOFTSZ_MASK too small
#endif


/*
 * An implementation of setx which will be hot patched at run time.
 * since it is being hot patched, there is no value passed in.
 * Thus, essentially we are implementing
 *	setx value, tmp, dest
 * where value is RUNTIME_PATCH (aka 0) in this case.
 */
#define	RUNTIME_PATCH_SETX(dest, tmp)					\
	sethi	%hh(RUNTIME_PATCH), tmp					;\
	sethi	%lm(RUNTIME_PATCH), dest				;\
	or	tmp, %hm(RUNTIME_PATCH), tmp				;\
	or	dest, %lo(RUNTIME_PATCH), dest				;\
	sllx	tmp, 32, tmp						;\
	nop				/* for perf reasons */		;\
	or	tmp, dest, dest		/* contents of patched value */


#endif (lint)


#if defined (lint)

/*
 * sfmmu related subroutines
 */
uint_t
sfmmu_disable_intrs()
{ return(0); }

/* ARGSUSED */
void
sfmmu_enable_intrs(uint_t pstate_save)
{}

/* ARGSUSED */
int
sfmmu_alloc_ctx(sfmmu_t *sfmmup, int allocflag, struct cpu *cp, int shflag)
{ return(0); }
	
/*
 * Use cas, if tte has changed underneath us then reread and try again.
 * In the case of a retry, it will update sttep with the new original.
 */
/* ARGSUSED */
int
sfmmu_modifytte(tte_t *sttep, tte_t *stmodttep, tte_t *dttep)
{ return(0); }

/*
 * Use cas, if tte has changed underneath us then return 1, else return 0
 */
/* ARGSUSED */
int
sfmmu_modifytte_try(tte_t *sttep, tte_t *stmodttep, tte_t *dttep)
{ return(0); }

/* ARGSUSED */
void
sfmmu_copytte(tte_t *sttep, tte_t *dttep)
{}

/*ARGSUSED*/
struct tsbe *
sfmmu_get_tsbe(uint64_t tsbeptr, caddr_t vaddr, int vpshift, int tsb_szc)
{ return(0); }

/*ARGSUSED*/
uint64_t
sfmmu_make_tsbtag(caddr_t va)
{ return(0); }

#else	/* lint */

	.seg	".data"
	.global	sfmmu_panic1
sfmmu_panic1:
	.asciz	"sfmmu_asm: interrupts already disabled"

	.global	sfmmu_panic3
sfmmu_panic3:
	.asciz	"sfmmu_asm: sfmmu_vatopfn called for user"

	.global	sfmmu_panic4
sfmmu_panic4:
	.asciz	"sfmmu_asm: 4M tsb pointer mis-match"

	.global	sfmmu_panic5
sfmmu_panic5:
	.asciz	"sfmmu_asm: no unlocked TTEs in TLB 0"

	.global	sfmmu_panic6
sfmmu_panic6:
	.asciz	"sfmmu_asm: interrupts not disabled"

	.global	sfmmu_panic7
sfmmu_panic7:
	.asciz	"sfmmu_asm: kernel as"

	.global	sfmmu_panic8
sfmmu_panic8:
	.asciz	"sfmmu_asm: gnum is zero"

	.global	sfmmu_panic9
sfmmu_panic9:
	.asciz	"sfmmu_asm: cnum is greater than MAX_SFMMU_CTX_VAL"
	
	.global	sfmmu_panic10
sfmmu_panic10:
	.asciz	"sfmmu_asm: valid SCD with no 3rd scd TSB"
	
        ENTRY(sfmmu_disable_intrs)
        rdpr    %pstate, %o0
#ifdef DEBUG
	PANIC_IF_INTR_DISABLED_PSTR(%o0, sfmmu_di_l0, %g1)
#endif /* DEBUG */
        retl
          wrpr   %o0, PSTATE_IE, %pstate
        SET_SIZE(sfmmu_disable_intrs)
	
	ENTRY(sfmmu_enable_intrs)
        retl
          wrpr    %g0, %o0, %pstate
        SET_SIZE(sfmmu_enable_intrs)

/*
 * This routine is called both by resume() and sfmmu_get_ctx() to
 * allocate a new context for the process on a MMU.
 * if allocflag == 1, then alloc ctx when HAT mmu cnum == INVALID .
 * if allocflag == 0, then do not alloc ctx if HAT mmu cnum == INVALID, which
 * is the case when sfmmu_alloc_ctx is called from resume().
 *
 * The caller must disable interrupts before entering this routine.
 * To reduce ctx switch overhead, the code contains both 'fast path' and
 * 'slow path' code. The fast path code covers the common case where only
 * a quick check is needed and the real ctx allocation is not required.
 * It can be done without holding the per-process (PP) lock.
 * The 'slow path' code must be protected by the PP Lock and performs ctx
 * allocation.
 * Hardware context register and HAT mmu cnum are updated accordingly.
 *
 * %o0 - sfmmup
 * %o1 - allocflag
 * %o2 - CPU
 * %o3 - sfmmu private/shared flag
 *
 * ret - 0: no ctx is allocated
 *       1: a ctx is allocated
 */
        ENTRY_NP(sfmmu_alloc_ctx)

#ifdef DEBUG
	sethi   %hi(ksfmmup), %g1
	ldx     [%g1 + %lo(ksfmmup)], %g1
	cmp     %g1, %o0
	bne,pt   %xcc, 0f
	  nop

	sethi   %hi(panicstr), %g1		! if kernel as, panic
        ldx     [%g1 + %lo(panicstr)], %g1
        tst     %g1
        bnz,pn  %icc, 7f
          nop

	sethi	%hi(sfmmu_panic7), %o0
	call	panic
	  or	%o0, %lo(sfmmu_panic7), %o0

7:
	retl
	  mov	%g0, %o0			! %o0 = ret = 0

0:
	PANIC_IF_INTR_ENABLED_PSTR(sfmmu_ei_l1, %g1)
#endif /* DEBUG */
	
	mov	%o3, %g1			! save sfmmu pri/sh flag in %g1
		
	! load global mmu_ctxp info
	ldx	[%o2 + CPU_MMU_CTXP], %o3		! %o3 = mmu_ctx_t ptr
        lduw	[%o2 + CPU_MMU_IDX], %g2		! %g2 = mmu index

	! load global mmu_ctxp gnum
	ldx	[%o3 + MMU_CTX_GNUM], %o4		! %o4 = mmu_ctxp->gnum

#ifdef DEBUG
	cmp	%o4, %g0		! mmu_ctxp->gnum should never be 0
	bne,pt	%xcc, 3f
	  nop
	
	sethi   %hi(panicstr), %g1	! test if panicstr is already set
        ldx     [%g1 + %lo(panicstr)], %g1
        tst     %g1
        bnz,pn  %icc, 1f
          nop
	
	sethi	%hi(sfmmu_panic8), %o0
	call	panic
	  or	%o0, %lo(sfmmu_panic8), %o0
1:	
	retl
	  mov	%g0, %o0			! %o0 = ret = 0
3:	
#endif

	! load HAT sfmmu_ctxs[mmuid] gnum, cnum

	sllx	%g2, SFMMU_MMU_CTX_SHIFT, %g2
	add	%o0, %g2, %g2		! %g2 = &sfmmu_ctxs[mmuid] - SFMMU_CTXS

	/*
	 * %g5 = sfmmu gnum returned
	 * %g6 = sfmmu cnum returned
	 * %g2 = &sfmmu_ctxs[mmuid] - SFMMU_CTXS
	 * %g4 = scratch
	 *
	 * Fast path code, do a quick check.
	 */
	SFMMU_MMUID_GNUM_CNUM(%g2, %g5, %g6, %g4)
	
	cmp	%g6, INVALID_CONTEXT		! hat cnum == INVALID ??
	bne,pt	%icc, 1f			! valid hat cnum, check gnum
	  nop

	! cnum == INVALID, check allocflag
	mov	%g0, %g4	! %g4 = ret = 0
	brz,pt  %o1, 8f		! allocflag == 0, skip ctx allocation, bail
	  mov	%g6, %o1

	! (invalid HAT cnum) && (allocflag == 1)
	ba,pt	%icc, 2f
	  nop
1:
	! valid HAT cnum, check gnum
	cmp	%g5, %o4
	mov	1, %g4				!%g4 = ret = 1
	be,a,pt	%icc, 8f			! gnum unchanged, go to done
	  mov	%g6, %o1

2:
	/* 
	 * Grab per process (PP) sfmmu_ctx_lock spinlock,
	 * followed by the 'slow path' code.
	 */
	ldstub	[%o0 + SFMMU_CTX_LOCK], %g3	! %g3 = per process (PP) lock
3:
	brz	%g3, 5f
	  nop
4:
	brnz,a,pt       %g3, 4b				! spin if lock is 1
	  ldub	[%o0 + SFMMU_CTX_LOCK], %g3
	ba	%xcc, 3b				! retry the lock
	  ldstub	[%o0 + SFMMU_CTX_LOCK], %g3    ! %g3 = PP lock

5:
	membar  #LoadLoad
	/*
	 * %g5 = sfmmu gnum returned
	 * %g6 = sfmmu cnum returned
	 * %g2 = &sfmmu_ctxs[mmuid] - SFMMU_CTXS
	 * %g4 = scratch
	 */
	SFMMU_MMUID_GNUM_CNUM(%g2, %g5, %g6, %g4)

	cmp	%g6, INVALID_CONTEXT		! hat cnum == INVALID ??
	bne,pt	%icc, 1f			! valid hat cnum, check gnum
	  nop

	! cnum == INVALID, check allocflag
	mov	%g0, %g4	! %g4 = ret = 0
	brz,pt	%o1, 2f		! allocflag == 0, called from resume, set hw
	  mov	%g6, %o1

	! (invalid HAT cnum) && (allocflag == 1)
	ba,pt	%icc, 6f
	  nop
1:
	! valid HAT cnum, check gnum
	cmp	%g5, %o4
	mov	1, %g4				! %g4 = ret  = 1
	be,a,pt	%icc, 2f			! gnum unchanged, go to done
	  mov	%g6, %o1

	ba,pt	%icc, 6f
	  nop
2:
	membar  #LoadStore|#StoreStore
	ba,pt %icc, 8f
	  clrb  [%o0 + SFMMU_CTX_LOCK]
6:
	/*
	 * We get here if we do not have a valid context, or
	 * the HAT gnum does not match global gnum. We hold
	 * sfmmu_ctx_lock spinlock. Allocate that context.
	 *
	 * %o3 = mmu_ctxp
	 */
	add	%o3, MMU_CTX_CNUM, %g3
	ld	[%o3 + MMU_CTX_NCTXS], %g4

	/*
         * %g2 = &sfmmu_ctx_t[mmuid] - SFMMU_CTXS;
         * %g3 = mmu cnum address
	 * %g4 = mmu nctxs
	 *
	 * %o0 = sfmmup
	 * %o1 = mmu current cnum value (used as new cnum)
	 * %o4 = mmu gnum
	 *
	 * %o5 = scratch
	 */
	ld	[%g3], %o1
0:
	cmp	%o1, %g4
	bl,a,pt %icc, 1f
	  add	%o1, 1, %o5		! %o5 = mmu_ctxp->cnum + 1

	/*
	 * cnum reachs max, bail, so wrap around can be performed later.
	 */
	set	INVALID_CONTEXT, %o1
	/*
	 * When the routine is called by shared ctx, we want to set
	 * both private and shared ctx regs to INVALID. In order to
	 * do so, we set the sfmmu priv/shared flag to 'private' regardless.
	 * so that private ctx reg will be set to invalid.
	 * Note that values written to private context register are
	 * automatically written to shared context register as well.
	 */
	mov	%g0, %g1		! %g1 = sfmmu private/shared flag
	mov	%g0, %g4		! %g4 = ret = 0
	
	membar  #LoadStore|#StoreStore
	ba,pt	%icc, 8f
	  clrb	[%o0 + SFMMU_CTX_LOCK]
1:
	! %g3 = addr of mmu_ctxp->cnum
	! %o5 = mmu_ctxp->cnum + 1
	cas	[%g3], %o1, %o5
	cmp	%o1, %o5
	bne,a,pn %xcc, 0b	! cas failed
	  ld	[%g3], %o1

#ifdef DEBUG
        set	MAX_SFMMU_CTX_VAL, %o5
	cmp	%o1, %o5
	ble,pt %icc, 2f
	  nop
	
	sethi	%hi(sfmmu_panic9), %o0
	call	panic
	  or	%o0, %lo(sfmmu_panic9), %o0
2:	
#endif
	! update hat gnum and cnum
	sllx	%o4, SFMMU_MMU_GNUM_RSHIFT, %o4
	or	%o4, %o1, %o4
	stx	%o4, [%g2 + SFMMU_CTXS]

	membar  #LoadStore|#StoreStore
	clrb	[%o0 + SFMMU_CTX_LOCK]
	
	mov	1, %g4			! %g4 = ret = 1
8:
	/*
	 * program the secondary context register
	 *
	 * %o1 = cnum
	 * %g1 = sfmmu private/shared flag (0:private,  1:shared)
	 */

#ifdef	sun4u
	ldub	[%o0 + SFMMU_CEXT], %o2
	sll	%o2, CTXREG_EXT_SHIFT, %o2
	or	%o1, %o2, %o1
#endif
	SET_SECCTX(%o1, %g1, %o4, %o5)
	
	retl
	  mov	%g4, %o0			! %o0 = ret

	SET_SIZE(sfmmu_alloc_ctx)

	ENTRY_NP(sfmmu_modifytte)
	ldx	[%o2], %g3			/* current */
	ldx	[%o0], %g1			/* original */
2:
	ldx	[%o1], %g2			/* modified */
	cmp	%g2, %g3			/* is modified = current? */
	be,a,pt	%xcc,1f				/* yes, don't write */
	stx	%g3, [%o0]			/* update new original */
	casx	[%o2], %g1, %g2
	cmp	%g1, %g2
	be,pt	%xcc, 1f			/* cas succeeded - return */
	  nop
	ldx	[%o2], %g3			/* new current */
	stx	%g3, [%o0]			/* save as new original */
	ba,pt	%xcc, 2b
	  mov	%g3, %g1
1:	retl
	membar	#StoreLoad
	SET_SIZE(sfmmu_modifytte)

	ENTRY_NP(sfmmu_modifytte_try)
	ldx	[%o1], %g2			/* modified */
	ldx	[%o2], %g3			/* current */
	ldx	[%o0], %g1			/* original */
	cmp	%g3, %g2			/* is modified = current? */
	be,a,pn %xcc,1f				/* yes, don't write */
	mov	0, %o1				/* as if cas failed. */
		
	casx	[%o2], %g1, %g2
	membar	#StoreLoad
	cmp	%g1, %g2
	movne	%xcc, -1, %o1			/* cas failed. */
	move	%xcc, 1, %o1			/* cas succeeded. */
1:
	stx	%g2, [%o0]			/* report "current" value */
	retl
	mov	%o1, %o0
	SET_SIZE(sfmmu_modifytte_try)

	ENTRY_NP(sfmmu_copytte)
	ldx	[%o0], %g1
	retl
	stx	%g1, [%o1]
	SET_SIZE(sfmmu_copytte)


	/*
	 * Calculate a TSB entry pointer for the given TSB, va, pagesize.
	 * %o0 = TSB base address (in), pointer to TSB entry (out)
	 * %o1 = vaddr (in)
	 * %o2 = vpshift (in)
	 * %o3 = tsb size code (in)
	 * %o4 = scratch register
	 */
	ENTRY_NP(sfmmu_get_tsbe)
	GET_TSBE_POINTER(%o2, %o0, %o1, %o3, %o4)
	retl
	nop
	SET_SIZE(sfmmu_get_tsbe)

	/*
	 * Return a TSB tag for the given va.
	 * %o0 = va (in/clobbered)
	 * %o0 = va shifted to be in tsb tag format (with no context) (out)
	 */
	ENTRY_NP(sfmmu_make_tsbtag)
	retl
	srln	%o0, TTARGET_VA_SHIFT, %o0
	SET_SIZE(sfmmu_make_tsbtag)

#endif /* lint */

/*
 * Other sfmmu primitives
 */


#if defined (lint)
void
sfmmu_patch_ktsb(void)
{
}

void
sfmmu_kpm_patch_tlbm(void)
{
}

void
sfmmu_kpm_patch_tsbm(void)
{
}

void
sfmmu_patch_shctx(void)
{
}

/* ARGSUSED */
void
sfmmu_load_tsbe(struct tsbe *tsbep, uint64_t vaddr, tte_t *ttep, int phys)
{
}

/* ARGSUSED */
void
sfmmu_unload_tsbe(struct tsbe *tsbep, uint64_t vaddr, int phys)
{
}

/* ARGSUSED */
void
sfmmu_kpm_load_tsb(caddr_t addr, tte_t *ttep, int vpshift)
{
}

/* ARGSUSED */
void
sfmmu_kpm_unload_tsb(caddr_t addr, int vpshift)
{
}

#else /* lint */

#define	I_SIZE		4
		
	ENTRY_NP(sfmmu_fix_ktlb_traptable)
	/*
	 * %o0 = start of patch area
	 * %o1 = size code of TSB to patch
	 * %o3 = scratch
	 */
	/* fix sll */
	ld	[%o0], %o3			/* get sll */
	sub	%o3, %o1, %o3			/* decrease shift by tsb szc */
	st	%o3, [%o0]			/* write sll */
	flush	%o0
	/* fix srl */
	add	%o0, I_SIZE, %o0		/* goto next instr. */
	ld	[%o0], %o3			/* get srl */
	sub	%o3, %o1, %o3			/* decrease shift by tsb szc */
	st	%o3, [%o0]			/* write srl */
	retl
	flush	%o0
	SET_SIZE(sfmmu_fix_ktlb_traptable)

	ENTRY_NP(sfmmu_fixup_ktsbbase)
	/*
	 * %o0 = start of patch area
	 * %o5 = kernel virtual or physical tsb base address
	 * %o2, %o3 are used as scratch registers.
	 */
	/* fixup sethi instruction */
	ld	[%o0], %o3
	srl	%o5, 10, %o2			! offset is bits 32:10
	or	%o3, %o2, %o3			! set imm22
	st	%o3, [%o0]
	/* fixup offset of lduw/ldx */
	add	%o0, I_SIZE, %o0		! next instr
	ld	[%o0], %o3
	and	%o5, 0x3ff, %o2			! set imm13 to bits 9:0
	or	%o3, %o2, %o3
	st	%o3, [%o0]
	retl
	flush	%o0
	SET_SIZE(sfmmu_fixup_ktsbbase)

	ENTRY_NP(sfmmu_fixup_setx)
	/*
	 * %o0 = start of patch area
	 * %o4 = 64 bit value to patch
	 * %o2, %o3 are used as scratch registers.
	 *
	 * Note: Assuming that all parts of the instructions which need to be
	 *	 patched correspond to RUNTIME_PATCH (aka 0)
	 *
	 * Note the implementation of setx which is being patched is as follows:
	 *
	 * sethi   %hh(RUNTIME_PATCH), tmp
	 * sethi   %lm(RUNTIME_PATCH), dest
	 * or      tmp, %hm(RUNTIME_PATCH), tmp
	 * or      dest, %lo(RUNTIME_PATCH), dest
	 * sllx    tmp, 32, tmp
	 * nop
	 * or      tmp, dest, dest
	 *
	 * which differs from the implementation in the 
	 * "SPARC Architecture Manual"
	 */
	/* fixup sethi instruction */
	ld	[%o0], %o3
	srlx	%o4, 42, %o2			! bits [63:42]
	or	%o3, %o2, %o3			! set imm22
	st	%o3, [%o0]
	/* fixup sethi instruction */
	add	%o0, I_SIZE, %o0		! next instr
	ld	[%o0], %o3
	sllx	%o4, 32, %o2			! clear upper bits
	srlx	%o2, 42, %o2			! bits [31:10]
	or	%o3, %o2, %o3			! set imm22
	st	%o3, [%o0]
	/* fixup or instruction */
	add	%o0, I_SIZE, %o0		! next instr
	ld	[%o0], %o3
	srlx	%o4, 32, %o2			! bits [63:32]
	and	%o2, 0x3ff, %o2			! bits [41:32]
	or	%o3, %o2, %o3			! set imm
	st	%o3, [%o0]
	/* fixup or instruction */
	add	%o0, I_SIZE, %o0		! next instr
	ld	[%o0], %o3
	and	%o4, 0x3ff, %o2			! bits [9:0]
	or	%o3, %o2, %o3			! set imm
	st	%o3, [%o0]
	retl
	flush	%o0
	SET_SIZE(sfmmu_fixup_setx)

	ENTRY_NP(sfmmu_fixup_or)
	/*
	 * %o0 = start of patch area
	 * %o4 = 32 bit value to patch
	 * %o2, %o3 are used as scratch registers.
	 * Note: Assuming that all parts of the instructions which need to be
	 *	 patched correspond to RUNTIME_PATCH (aka 0)
	 */
	ld	[%o0], %o3
	and	%o4, 0x3ff, %o2			! bits [9:0]
	or	%o3, %o2, %o3			! set imm
	st	%o3, [%o0]
	retl
	flush	%o0
	SET_SIZE(sfmmu_fixup_or)

	ENTRY_NP(sfmmu_fixup_shiftx)
	/*
	 * %o0 = start of patch area
	 * %o4 = signed int immediate value to add to sllx/srlx imm field
	 * %o2, %o3 are used as scratch registers.
	 *
	 * sllx/srlx store the 6 bit immediate value in the lowest order bits
	 * so we do a simple add.  The caller must be careful to prevent
	 * overflow, which could easily occur if the initial value is nonzero!
	 */
	ld	[%o0], %o3			! %o3 = instruction to patch
	and	%o3, 0x3f, %o2			! %o2 = existing imm value
	add	%o2, %o4, %o2			! %o2 = new imm value
	andn	%o3, 0x3f, %o3			! clear old imm value
	and	%o2, 0x3f, %o2			! truncate new imm value
	or	%o3, %o2, %o3			! set new imm value
	st	%o3, [%o0]			! store updated instruction
	retl
	flush	%o0
	SET_SIZE(sfmmu_fixup_shiftx)

	ENTRY_NP(sfmmu_fixup_mmu_asi)
	/*
	 * Patch imm_asi of all ldda instructions in the MMU
	 * trap handlers.  We search MMU_PATCH_INSTR instructions
	 * starting from the itlb miss handler (trap 0x64).
	 * %o0 = address of tt[0,1]_itlbmiss
	 * %o1 = imm_asi to setup, shifted by appropriate offset.
	 * %o3 = number of instructions to search
	 * %o4 = reserved by caller: called from leaf routine
	 */
1:	ldsw	[%o0], %o2			! load instruction to %o2
	brgez,pt %o2, 2f
	  srl	%o2, 30, %o5
	btst	1, %o5				! test bit 30; skip if not set
	bz,pt	%icc, 2f
	  sllx	%o2, 39, %o5			! bit 24 -> bit 63
	srlx	%o5, 58, %o5			! isolate op3 part of opcode
	xor	%o5, 0x13, %o5			! 01 0011 binary == ldda
	brnz,pt	%o5, 2f				! skip if not a match
	  or	%o2, %o1, %o2			! or in imm_asi
	st	%o2, [%o0]			! write patched instruction
2:	dec	%o3
	brnz,a,pt %o3, 1b			! loop until we're done
	  add	%o0, I_SIZE, %o0
	retl
	flush	%o0
	SET_SIZE(sfmmu_fixup_mmu_asi)

	/*
	 * Patch immediate ASI used to access the TSB in the
	 * trap table.
	 * inputs: %o0 = value of ktsb_phys
	 */
	ENTRY_NP(sfmmu_patch_mmu_asi)
	mov	%o7, %o4			! save return pc in %o4
	movrnz	%o0, ASI_QUAD_LDD_PHYS, %o3
	movrz	%o0, ASI_NQUAD_LD, %o3
	sll	%o3, 5, %o1			! imm_asi offset
	mov	6, %o3				! number of instructions
	sethi	%hi(dktsb), %o0			! to search
	call	sfmmu_fixup_mmu_asi		! patch kdtlb miss
	  or	%o0, %lo(dktsb), %o0
	mov	6, %o3				! number of instructions
	sethi	%hi(dktsb4m), %o0		! to search
	call	sfmmu_fixup_mmu_asi		! patch kdtlb4m miss
	  or	%o0, %lo(dktsb4m), %o0
	mov	6, %o3				! number of instructions
	sethi	%hi(iktsb), %o0			! to search
	call	sfmmu_fixup_mmu_asi		! patch kitlb miss
	  or	%o0, %lo(iktsb), %o0
	mov	6, %o3				! number of instructions
	sethi	%hi(iktsb4m), %o0		! to search
	call	sfmmu_fixup_mmu_asi		! patch kitlb4m miss
	  or	%o0, %lo(iktsb4m), %o0
	mov	%o4, %o7			! retore return pc -- leaf
	retl
	nop
	SET_SIZE(sfmmu_patch_mmu_asi)

	ENTRY_NP(sfmmu_patch_ktsb)
	/*
	 * We need to fix iktsb, dktsb, et. al.
	 */
	save	%sp, -SA(MINFRAME), %sp
	set	ktsb_phys, %o1
	ld	[%o1], %o4
	set	ktsb_base, %o5
	set	ktsb4m_base, %l1
	brz,pt	%o4, 1f
	  nop
	set	ktsb_pbase, %o5
	set	ktsb4m_pbase, %l1
1:
	sethi	%hi(ktsb_szcode), %o1
	ld	[%o1 + %lo(ktsb_szcode)], %o1	/* %o1 = ktsb size code */

	sethi	%hi(iktsb), %o0
	call	sfmmu_fix_ktlb_traptable
	  or	%o0, %lo(iktsb), %o0

	sethi	%hi(dktsb), %o0
	call	sfmmu_fix_ktlb_traptable
	  or	%o0, %lo(dktsb), %o0

	sethi	%hi(ktsb4m_szcode), %o1
	ld	[%o1 + %lo(ktsb4m_szcode)], %o1	/* %o1 = ktsb4m size code */

	sethi	%hi(iktsb4m), %o0
	call	sfmmu_fix_ktlb_traptable
	  or	%o0, %lo(iktsb4m), %o0
	
	sethi	%hi(dktsb4m), %o0
	call	sfmmu_fix_ktlb_traptable
	  or	%o0, %lo(dktsb4m), %o0

#ifndef sun4v
	mov	ASI_N, %o2
	movrnz	%o4, ASI_MEM, %o2	! setup kernel 32bit ASI to patch
	mov	%o2, %o4		! sfmmu_fixup_or needs this in %o4
	sethi	%hi(tsb_kernel_patch_asi), %o0
	call	sfmmu_fixup_or
	  or	%o0, %lo(tsb_kernel_patch_asi), %o0
#endif

	ldx 	[%o5], %o4		! load ktsb base addr (VA or PA)

	sethi	%hi(dktsbbase), %o0
	call	sfmmu_fixup_setx	! patch value of ktsb base addr
	  or	%o0, %lo(dktsbbase), %o0

	sethi	%hi(iktsbbase), %o0
	call	sfmmu_fixup_setx	! patch value of ktsb base addr
	  or	%o0, %lo(iktsbbase), %o0

	sethi	%hi(sfmmu_kprot_patch_ktsb_base), %o0
	call	sfmmu_fixup_setx	! patch value of ktsb base addr
	  or	%o0, %lo(sfmmu_kprot_patch_ktsb_base), %o0

#ifdef sun4v
	sethi	%hi(sfmmu_dslow_patch_ktsb_base), %o0
	call	sfmmu_fixup_setx	! patch value of ktsb base addr
	  or	%o0, %lo(sfmmu_dslow_patch_ktsb_base), %o0
#endif /* sun4v */

	ldx 	[%l1], %o4		! load ktsb4m base addr (VA or PA)

	sethi	%hi(dktsb4mbase), %o0
	call	sfmmu_fixup_setx	! patch value of ktsb4m base addr
	  or	%o0, %lo(dktsb4mbase), %o0

	sethi	%hi(iktsb4mbase), %o0
	call	sfmmu_fixup_setx	! patch value of ktsb4m base addr
	  or	%o0, %lo(iktsb4mbase), %o0
	
	sethi	%hi(sfmmu_kprot_patch_ktsb4m_base), %o0
	call	sfmmu_fixup_setx	! patch value of ktsb4m base addr
	  or	%o0, %lo(sfmmu_kprot_patch_ktsb4m_base), %o0

#ifdef sun4v
	sethi	%hi(sfmmu_dslow_patch_ktsb4m_base), %o0
	call	sfmmu_fixup_setx	! patch value of ktsb4m base addr
	  or	%o0, %lo(sfmmu_dslow_patch_ktsb4m_base), %o0
#endif /* sun4v */

	set	ktsb_szcode, %o4
	ld	[%o4], %o4
	sethi	%hi(sfmmu_kprot_patch_ktsb_szcode), %o0
	call	sfmmu_fixup_or		! patch value of ktsb_szcode
	  or	%o0, %lo(sfmmu_kprot_patch_ktsb_szcode), %o0

#ifdef sun4v
	sethi	%hi(sfmmu_dslow_patch_ktsb_szcode), %o0
	call	sfmmu_fixup_or		! patch value of ktsb_szcode
	  or	%o0, %lo(sfmmu_dslow_patch_ktsb_szcode), %o0
#endif /* sun4v */

	set	ktsb4m_szcode, %o4
	ld	[%o4], %o4
	sethi	%hi(sfmmu_kprot_patch_ktsb4m_szcode), %o0
	call	sfmmu_fixup_or		! patch value of ktsb4m_szcode
	  or	%o0, %lo(sfmmu_kprot_patch_ktsb4m_szcode), %o0

#ifdef sun4v
	sethi	%hi(sfmmu_dslow_patch_ktsb4m_szcode), %o0
	call	sfmmu_fixup_or		! patch value of ktsb4m_szcode
	  or	%o0, %lo(sfmmu_dslow_patch_ktsb4m_szcode), %o0
#endif /* sun4v */

	ret
	restore
	SET_SIZE(sfmmu_patch_ktsb)

	ENTRY_NP(sfmmu_kpm_patch_tlbm)
	/*
	 * Fixup trap handlers in common segkpm case.  This is reserved
	 * for future use should kpm TSB be changed to be other than the
	 * kernel TSB.
	 */
	retl
	nop
	SET_SIZE(sfmmu_kpm_patch_tlbm)

	ENTRY_NP(sfmmu_kpm_patch_tsbm)
	/*
	 * nop the branch to sfmmu_kpm_dtsb_miss_small 
	 * in the case where we are using large pages for
	 * seg_kpm (and hence must probe the second TSB for
	 * seg_kpm VAs)
	 */
	set	dktsb4m_kpmcheck_small, %o0
	MAKE_NOP_INSTR(%o1)
	st	%o1, [%o0]
	flush	%o0
	retl
	nop
	SET_SIZE(sfmmu_kpm_patch_tsbm)

	ENTRY_NP(sfmmu_patch_utsb)
#ifdef UTSB_PHYS
	retl
	nop
#else /* UTSB_PHYS */
	/*
	 * We need to hot patch utsb_vabase and utsb4m_vabase
	 */
	save	%sp, -SA(MINFRAME), %sp

	/* patch value of utsb_vabase */
	set	utsb_vabase, %o1
	ldx	[%o1], %o4
	sethi	%hi(sfmmu_uprot_get_1st_tsbe_ptr), %o0
	call	sfmmu_fixup_setx
	  or	%o0, %lo(sfmmu_uprot_get_1st_tsbe_ptr), %o0
	sethi	%hi(sfmmu_uitlb_get_1st_tsbe_ptr), %o0
	call	sfmmu_fixup_setx
	  or	%o0, %lo(sfmmu_uitlb_get_1st_tsbe_ptr), %o0
	sethi	%hi(sfmmu_udtlb_get_1st_tsbe_ptr), %o0
	call	sfmmu_fixup_setx
	  or	%o0, %lo(sfmmu_udtlb_get_1st_tsbe_ptr), %o0

	/* patch value of utsb4m_vabase */
	set	utsb4m_vabase, %o1
	ldx	[%o1], %o4
	sethi	%hi(sfmmu_uprot_get_2nd_tsb_base), %o0
	call	sfmmu_fixup_setx
	  or	%o0, %lo(sfmmu_uprot_get_2nd_tsb_base), %o0
	sethi	%hi(sfmmu_uitlb_get_2nd_tsb_base), %o0
	call	sfmmu_fixup_setx
	  or	%o0, %lo(sfmmu_uitlb_get_2nd_tsb_base), %o0
	sethi	%hi(sfmmu_udtlb_get_2nd_tsb_base), %o0
	call	sfmmu_fixup_setx
	  or	%o0, %lo(sfmmu_udtlb_get_2nd_tsb_base), %o0

	/*
	 * Patch TSB base register masks and shifts if needed.
	 * By default the TSB base register contents are set up for 4M slab.
	 * If we're using a smaller slab size and reserved VA range we need
	 * to patch up those values here.
	 */
	set	tsb_slab_shift, %o1
	set	MMU_PAGESHIFT4M, %o4
	lduw	[%o1], %o3
	subcc	%o4, %o3, %o4
	bz,pt	%icc, 1f
	  /* delay slot safe */

	/* patch reserved VA range size if needed. */
	sethi	%hi(sfmmu_tsb_1st_resv_offset), %o0
	call	sfmmu_fixup_shiftx
	  or	%o0, %lo(sfmmu_tsb_1st_resv_offset), %o0
	call	sfmmu_fixup_shiftx
	  add	%o0, I_SIZE, %o0
	sethi	%hi(sfmmu_tsb_2nd_resv_offset), %o0
	call	sfmmu_fixup_shiftx
	  or	%o0, %lo(sfmmu_tsb_2nd_resv_offset), %o0
	call	sfmmu_fixup_shiftx
	  add	%o0, I_SIZE, %o0
1:
	/* patch TSBREG_VAMASK used to set up TSB base register */
	set	tsb_slab_mask, %o1
	ldx	[%o1], %o4
	sethi	%hi(sfmmu_tsb_1st_tsbreg_vamask), %o0
	call	sfmmu_fixup_or
	  or	%o0, %lo(sfmmu_tsb_1st_tsbreg_vamask), %o0
	sethi	%hi(sfmmu_tsb_2nd_tsbreg_vamask), %o0
	call	sfmmu_fixup_or
	  or	%o0, %lo(sfmmu_tsb_2nd_tsbreg_vamask), %o0

	ret
	restore
#endif /* UTSB_PHYS */
	SET_SIZE(sfmmu_patch_utsb)

	ENTRY_NP(sfmmu_patch_shctx)
#ifdef sun4u
	retl
	  nop
#else /* sun4u */
	set	sfmmu_shctx_cpu_mondo_patch, %o0
	MAKE_JMP_INSTR(5, %o1, %o2)	! jmp       %g5
	st	%o1, [%o0]
	flush	%o0
	MAKE_NOP_INSTR(%o1)
	add	%o0, I_SIZE, %o0	! next instr
	st	%o1, [%o0]
	flush	%o0

	set	sfmmu_shctx_user_rtt_patch, %o0
	st      %o1, [%o0]		! nop 1st instruction
	flush	%o0
	add     %o0, I_SIZE, %o0
	st      %o1, [%o0]		! nop 2nd instruction
	flush	%o0
	add     %o0, I_SIZE, %o0
	st      %o1, [%o0]		! nop 3rd instruction
	flush	%o0
	add     %o0, I_SIZE, %o0
	st      %o1, [%o0]		! nop 4th instruction
	flush	%o0
	add     %o0, I_SIZE, %o0
	st      %o1, [%o0]		! nop 5th instruction
	retl
	  flush	%o0
#endif /* sun4u */
	SET_SIZE(sfmmu_patch_shctx)

	/*
	 * Routine that loads an entry into a tsb using virtual addresses.
	 * Locking is required since all cpus can use the same TSB.
	 * Note that it is no longer required to have a valid context
	 * when calling this function.
	 */
	ENTRY_NP(sfmmu_load_tsbe)
	/*
	 * %o0 = pointer to tsbe to load
	 * %o1 = tsb tag
	 * %o2 = virtual pointer to TTE
	 * %o3 = 1 if physical address in %o0 else 0
	 */
	rdpr	%pstate, %o5
#ifdef DEBUG
	PANIC_IF_INTR_DISABLED_PSTR(%o5, sfmmu_di_l2, %g1)
#endif /* DEBUG */

	wrpr	%o5, PSTATE_IE, %pstate		/* disable interrupts */

	SETUP_TSB_ASI(%o3, %g3)
	TSB_UPDATE(%o0, %o2, %o1, %g1, %g2, 1)

	wrpr	%g0, %o5, %pstate		/* enable interrupts */
	
	retl
	membar	#StoreStore|#StoreLoad
	SET_SIZE(sfmmu_load_tsbe)

	/*
	 * Flush TSB of a given entry if the tag matches.
	 */ 
	ENTRY(sfmmu_unload_tsbe)
	/*
	 * %o0 = pointer to tsbe to be flushed
	 * %o1 = tag to match
	 * %o2 = 1 if physical address in %o0 else 0
	 */
	SETUP_TSB_ASI(%o2, %g1)
	TSB_INVALIDATE(%o0, %o1, %g1, %o2, %o3, unload_tsbe)
	retl
	membar	#StoreStore|#StoreLoad
	SET_SIZE(sfmmu_unload_tsbe)

	/*
	 * Routine that loads a TTE into the kpm TSB from C code.
	 * Locking is required since kpm TSB is shared among all CPUs.
	 */
	ENTRY_NP(sfmmu_kpm_load_tsb)
	/*
	 * %o0 = vaddr
	 * %o1 = ttep
	 * %o2 = virtpg to TSB index shift (e.g. TTE pagesize shift)
	 */
	rdpr	%pstate, %o5			! %o5 = saved pstate
#ifdef DEBUG
	PANIC_IF_INTR_DISABLED_PSTR(%o5, sfmmu_di_l3, %g1)
#endif /* DEBUG */
	wrpr	%o5, PSTATE_IE, %pstate		! disable interrupts

#ifndef sun4v
	sethi	%hi(ktsb_phys), %o4
	mov	ASI_N, %o3
	ld	[%o4 + %lo(ktsb_phys)], %o4
	movrnz	%o4, ASI_MEM, %o3
	mov	%o3, %asi
#endif
	mov	%o0, %g1			! %g1 = vaddr

	/* GET_KPM_TSBE_POINTER(vpshift, tsbp, vaddr (clobbers), tmp1, tmp2) */
	GET_KPM_TSBE_POINTER(%o2, %g2, %g1, %o3, %o4)
	/* %g2 = tsbep, %g1 clobbered */

	srlx	%o0, TTARGET_VA_SHIFT, %g1;	! %g1 = tag target
	/* TSB_UPDATE(tsbep, tteva, tagtarget, tmp1, tmp2, label) */
	TSB_UPDATE(%g2, %o1, %g1, %o3, %o4, 1)

	wrpr	%g0, %o5, %pstate		! enable interrupts
	retl
	  membar #StoreStore|#StoreLoad
	SET_SIZE(sfmmu_kpm_load_tsb)

	/*
	 * Routine that shoots down a TTE in the kpm TSB or in the
	 * kernel TSB depending on virtpg. Locking is required since
	 * kpm/kernel TSB is shared among all CPUs.
	 */
	ENTRY_NP(sfmmu_kpm_unload_tsb)
	/*
	 * %o0 = vaddr
	 * %o1 = virtpg to TSB index shift (e.g. TTE page shift)
	 */
#ifndef sun4v
	sethi	%hi(ktsb_phys), %o4
	mov	ASI_N, %o3
	ld	[%o4 + %lo(ktsb_phys)], %o4
	movrnz	%o4, ASI_MEM, %o3
	mov	%o3, %asi
#endif
	mov	%o0, %g1			! %g1 = vaddr

	/* GET_KPM_TSBE_POINTER(vpshift, tsbp, vaddr (clobbers), tmp1, tmp2) */
	GET_KPM_TSBE_POINTER(%o1, %g2, %g1, %o3, %o4)
	/* %g2 = tsbep, %g1 clobbered */

	srlx	%o0, TTARGET_VA_SHIFT, %g1;	! %g1 = tag target
	/* TSB_INVALIDATE(tsbep, tag, tmp1, tmp2, tmp3, label) */
	TSB_INVALIDATE(%g2, %g1, %o3, %o4, %o1, kpm_tsbinval)

	retl
	  membar	#StoreStore|#StoreLoad
	SET_SIZE(sfmmu_kpm_unload_tsb)

#endif /* lint */


#if defined (lint)

/*ARGSUSED*/
pfn_t
sfmmu_ttetopfn(tte_t *tte, caddr_t vaddr)
{ return(0); }

#else /* lint */

	ENTRY_NP(sfmmu_ttetopfn)
	ldx	[%o0], %g1			/* read tte */
	TTETOPFN(%g1, %o1, sfmmu_ttetopfn_l1, %g2, %g3, %g4)
	/*
	 * g1 = pfn
	 */
	retl
	mov	%g1, %o0
	SET_SIZE(sfmmu_ttetopfn)

#endif /* !lint */


#if defined (lint)
/*
 * The sfmmu_hblk_hash_add is the assembly primitive for adding hmeblks to the
 * the hash list.
 */
/* ARGSUSED */
void
sfmmu_hblk_hash_add(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
	uint64_t hblkpa)
{
}

/*
 * The sfmmu_hblk_hash_rm is the assembly primitive to remove hmeblks from the
 * hash list.
 */
/* ARGSUSED */
void
sfmmu_hblk_hash_rm(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
	uint64_t hblkpa, struct hme_blk *prev_hblkp)
{
}
#else /* lint */

/*
 * Functions to grab/release hme bucket list lock.  I only use a byte
 * instead of the whole int because eventually we might want to
 * put some counters on the other bytes (of course, these routines would
 * have to change).  The code that grab this lock should execute
 * with interrupts disabled and hold the lock for the least amount of time
 * possible.
 */

/*
 * Even though hmeh_listlock is updated using pa there's no need to flush
 * dcache since hmeh_listlock will be restored to the original value (0)
 * before interrupts are reenabled.
 */

/*
 * For sparcv9 hme hash buckets may not be in the nucleus.  hme hash update
 * routines still use virtual addresses to update the bucket fields. But they
 * must not cause a TLB miss after grabbing the low level bucket lock. To
 * achieve this we must make sure the bucket structure is completely within an
 * 8K page.
 */

#if (HMEBUCK_SIZE & (HMEBUCK_SIZE - 1))
#error - the size of hmehash_bucket structure is not power of 2
#endif

#ifdef HMELOCK_BACKOFF_ENABLE

#define HMELOCK_BACKOFF(reg, val)                               \
	set     val, reg                                        ;\
	brnz    reg, .                                          ;\
	  dec   reg

#define CAS_HME(tmp1, tmp2, exitlabel, asi)                     \
	mov     0xff, tmp2                                      ;\
	casa    [tmp1]asi, %g0, tmp2                            ;\
	brz,a,pt tmp2, exitlabel                                ;\
	membar  #LoadLoad

#define HMELOCK_ENTER(hmebp, tmp1, tmp2, label, asi)            \
	mov     0xff, tmp2                                      ;\
	add     hmebp, HMEBUCK_LOCK, tmp1                       ;\
	casa    [tmp1]asi, %g0, tmp2                            ;\
	brz,a,pt tmp2, label/**/2                               ;\
	membar  #LoadLoad                                       ;\
	HMELOCK_BACKOFF(tmp2,0x80)                              ;\
	CAS_HME(tmp1, tmp2, label/**/2, asi)                    ;\
	HMELOCK_BACKOFF(tmp2,0x100)                             ;\
	CAS_HME(tmp1, tmp2, label/**/2, asi)                    ;\
	HMELOCK_BACKOFF(tmp2,0x200)                             ;\
	CAS_HME(tmp1, tmp2, label/**/2, asi)                    ;\
label/**/1:                                                     ;\
	HMELOCK_BACKOFF(tmp2,0x400)                             ;\
	CAS_HME(tmp1, tmp2, label/**/2, asi)                    ;\
	HMELOCK_BACKOFF(tmp2,0x800)                             ;\
	CAS_HME(tmp1, tmp2, label/**/2, asi)                    ;\
	HMELOCK_BACKOFF(tmp2,0x1000)                            ;\
	CAS_HME(tmp1, tmp2, label/**/2, asi)                    ;\
	HMELOCK_BACKOFF(tmp2,0x2000)                            ;\
	mov     0xff, tmp2                                      ;\
	casa    [tmp1]asi, %g0, tmp2                            ;\
	brnz,pn tmp2, label/**/1     /* reset backoff */        ;\
	membar  #LoadLoad                                       ;\
label/**/2:

#else /* HMELOCK_BACKOFF_ENABLE */

#define HMELOCK_ENTER(hmebp, tmp1, tmp2, label1, asi)           \
	mov     0xff, tmp2                                      ;\
	add     hmebp, HMEBUCK_LOCK, tmp1                       ;\
label1:                                                         ;\
	casa    [tmp1]asi, %g0, tmp2                            ;\
	brnz,pn tmp2, label1                                    ;\
	mov     0xff, tmp2                                      ;\
	membar  #LoadLoad

#endif /* HMELOCK_BACKOFF_ENABLE */

#define HMELOCK_EXIT(hmebp, tmp1, asi)                          \
	membar  #LoadStore|#StoreStore                          ;\
	add     hmebp, HMEBUCK_LOCK, tmp1                       ;\
	sta     %g0, [tmp1]asi

	.seg	".data"
hblk_add_panic1:
	.ascii	"sfmmu_hblk_hash_add: interrupts disabled"
	.byte	0
hblk_add_panic2:
	.ascii	"sfmmu_hblk_hash_add: va hmeblkp is NULL but pa is not"
	.byte	0
	.align	4
	.seg	".text"

	ENTRY_NP(sfmmu_hblk_hash_add)
	/*
	 * %o0 = hmebp
	 * %o1 = hmeblkp
	 * %o2 = hblkpa
	 */
	rdpr	%pstate, %o5
#ifdef DEBUG
	andcc	%o5, PSTATE_IE, %g0		/* if interrupts already */
	bnz,pt %icc, 3f				/* disabled, panic	 */
	  nop
	save	%sp, -SA(MINFRAME), %sp
	sethi	%hi(hblk_add_panic1), %o0
	call	panic
	 or	%o0, %lo(hblk_add_panic1), %o0
	ret
	restore

3:
#endif /* DEBUG */
	wrpr	%o5, PSTATE_IE, %pstate		/* disable interrupts */
	mov	%o2, %g1

	/*
	 * g1 = hblkpa
	 */
	ldn	[%o0 + HMEBUCK_HBLK], %o4	/* next hmeblk */
	ldx	[%o0 + HMEBUCK_NEXTPA], %g2	/* g2 = next hblkpa */
#ifdef	DEBUG
	cmp	%o4, %g0
	bne,pt %xcc, 1f
	 nop
	brz,pt %g2, 1f
	 nop
	wrpr	%g0, %o5, %pstate		/* enable interrupts */
	save	%sp, -SA(MINFRAME), %sp
	sethi	%hi(hblk_add_panic2), %o0
	call	panic
	  or	%o0, %lo(hblk_add_panic2), %o0
	ret
	restore
1:
#endif /* DEBUG */
	/*
	 * We update hmeblks entries before grabbing lock because the stores
	 * could take a tlb miss and require the hash lock.  The buckets
	 * are part of the nucleus so we are cool with those stores.
	 *
	 * if buckets are not part of the nucleus our game is to
	 * not touch any other page via va until we drop the lock.
	 * This guarantees we won't get a tlb miss before the lock release
	 * since interrupts are disabled.
	 */
	stn	%o4, [%o1 + HMEBLK_NEXT]	/* update hmeblk's next */
	stx	%g2, [%o1 + HMEBLK_NEXTPA]	/* update hmeblk's next pa */
	HMELOCK_ENTER(%o0, %o2, %o3, hashadd1, ASI_N)
	stn	%o1, [%o0 + HMEBUCK_HBLK]	/* update bucket hblk next */
	stx	%g1, [%o0 + HMEBUCK_NEXTPA]	/* add hmeblk to list */
	HMELOCK_EXIT(%o0, %g2, ASI_N)
	retl
	  wrpr	%g0, %o5, %pstate		/* enable interrupts */
	SET_SIZE(sfmmu_hblk_hash_add)

	ENTRY_NP(sfmmu_hblk_hash_rm)
	/*
	 * This function removes an hmeblk from the hash chain. 
	 * It is written to guarantee we don't take a tlb miss
	 * by using physical addresses to update the list.
	 * 
	 * %o0 = hmebp
	 * %o1 = hmeblkp
	 * %o2 = hmeblkp previous pa
	 * %o3 = hmeblkp previous
	 */

	mov	%o3, %o4			/* o4 = hmeblkp previous */

	rdpr	%pstate, %o5
#ifdef DEBUG
	PANIC_IF_INTR_DISABLED_PSTR(%o5, sfmmu_di_l4, %g1)
#endif /* DEBUG */
	/*
	 * disable interrupts, clear Address Mask to access 64 bit physaddr
	 */
	andn    %o5, PSTATE_IE, %g1
	wrpr    %g1, 0, %pstate

#ifndef sun4v
	sethi   %hi(dcache_line_mask), %g4
	ld      [%g4 + %lo(dcache_line_mask)], %g4
#endif /* sun4v */

	/*
	 * if buckets are not part of the nucleus our game is to
	 * not touch any other page via va until we drop the lock.
	 * This guarantees we won't get a tlb miss before the lock release
	 * since interrupts are disabled.
	 */
	HMELOCK_ENTER(%o0, %g1, %g3, hashrm1, ASI_N)
	ldn	[%o0 + HMEBUCK_HBLK], %g2	/* first hmeblk in list */
	cmp	%g2, %o1
	bne,pt	%ncc,1f
	 mov	ASI_MEM, %asi
	/*
	 * hmeblk is first on list
	 */
	ldx	[%o0 + HMEBUCK_NEXTPA], %g2	/* g2 = hmeblk pa */
	ldna	[%g2 + HMEBLK_NEXT] %asi, %o3	/* read next hmeblk va */
	ldxa	[%g2 + HMEBLK_NEXTPA] %asi, %g1	/* read next hmeblk pa */
	stn	%o3, [%o0 + HMEBUCK_HBLK]	/* write va */
	ba,pt	%xcc, 2f
	  stx	%g1, [%o0 + HMEBUCK_NEXTPA]	/* write pa */
1:
	/* hmeblk is not first on list */

	mov	%o2, %g3
#ifndef sun4v
	GET_CPU_IMPL(%g2)
	cmp 	%g2, CHEETAH_IMPL
	bge,a,pt %icc, hblk_hash_rm_1
	  and	%o4, %g4, %g2
	cmp	%g2, SPITFIRE_IMPL
	blt	%icc, hblk_hash_rm_2		/* no flushing needed for OPL */
	  and	%o4, %g4, %g2
	stxa	%g0, [%g2]ASI_DC_TAG		/* flush prev pa from dcache */
	add	%o4, HMEBLK_NEXT, %o4
	and	%o4, %g4, %g2
	ba	hblk_hash_rm_2
	stxa	%g0, [%g2]ASI_DC_TAG		/* flush prev va from dcache */
hblk_hash_rm_1:

	stxa	%g0, [%g3]ASI_DC_INVAL		/* flush prev pa from dcache */
	membar	#Sync
	add     %g3, HMEBLK_NEXT, %g2
	stxa	%g0, [%g2]ASI_DC_INVAL		/* flush prev va from dcache */
hblk_hash_rm_2:
	membar	#Sync
#endif /* sun4v */
	ldxa	[%g3 + HMEBLK_NEXTPA] %asi, %g2	/* g2 = hmeblk pa */ 
	ldna	[%g2 + HMEBLK_NEXT] %asi, %o3	/* read next hmeblk va */
	ldxa	[%g2 + HMEBLK_NEXTPA] %asi, %g1	/* read next hmeblk pa */
	stna	%o3, [%g3 + HMEBLK_NEXT] %asi	/* write va */
	stxa	%g1, [%g3 + HMEBLK_NEXTPA] %asi	/* write pa */
2:
	HMELOCK_EXIT(%o0, %g2, ASI_N)
	retl
	  wrpr	%g0, %o5, %pstate		/* enable interrupts */
	SET_SIZE(sfmmu_hblk_hash_rm)

#endif /* lint */

/*
 * These macros are used to update global sfmmu hme hash statistics
 * in perf critical paths. It is only enabled in debug kernels or
 * if SFMMU_STAT_GATHER is defined
 */
#if defined(DEBUG) || defined(SFMMU_STAT_GATHER)
#define	HAT_HSEARCH_DBSTAT(hatid, tsbarea, tmp1, tmp2)			\
	ldn	[tsbarea + TSBMISS_KHATID], tmp1			;\
	mov	HATSTAT_KHASH_SEARCH, tmp2				;\
	cmp	tmp1, hatid						;\
	movne	%ncc, HATSTAT_UHASH_SEARCH, tmp2			;\
	set	sfmmu_global_stat, tmp1					;\
	add	tmp1, tmp2, tmp1					;\
	ld	[tmp1], tmp2						;\
	inc	tmp2							;\
	st	tmp2, [tmp1]

#define	HAT_HLINK_DBSTAT(hatid, tsbarea, tmp1, tmp2)			\
	ldn	[tsbarea + TSBMISS_KHATID], tmp1			;\
	mov	HATSTAT_KHASH_LINKS, tmp2				;\
	cmp	tmp1, hatid						;\
	movne	%ncc, HATSTAT_UHASH_LINKS, tmp2				;\
	set	sfmmu_global_stat, tmp1					;\
	add	tmp1, tmp2, tmp1					;\
	ld	[tmp1], tmp2						;\
	inc	tmp2							;\
	st	tmp2, [tmp1]


#else /* DEBUG || SFMMU_STAT_GATHER */

#define	HAT_HSEARCH_DBSTAT(hatid, tsbarea, tmp1, tmp2)

#define	HAT_HLINK_DBSTAT(hatid, tsbarea, tmp1, tmp2)

#endif  /* DEBUG || SFMMU_STAT_GATHER */

/*
 * This macro is used to update global sfmmu kstas in non
 * perf critical areas so they are enabled all the time
 */
#define	HAT_GLOBAL_STAT(statname, tmp1, tmp2)				\
	sethi	%hi(sfmmu_global_stat), tmp1				;\
	add	tmp1, statname, tmp1					;\
	ld	[tmp1 + %lo(sfmmu_global_stat)], tmp2			;\
	inc	tmp2							;\
	st	tmp2, [tmp1 + %lo(sfmmu_global_stat)]

/*
 * These macros are used to update per cpu stats in non perf
 * critical areas so they are enabled all the time
 */
#define	HAT_PERCPU_STAT32(tsbarea, stat, tmp1)				\
	ld	[tsbarea + stat], tmp1					;\
	inc	tmp1							;\
	st	tmp1, [tsbarea + stat]

/*
 * These macros are used to update per cpu stats in non perf
 * critical areas so they are enabled all the time
 */
#define	HAT_PERCPU_STAT16(tsbarea, stat, tmp1)				\
	lduh	[tsbarea + stat], tmp1					;\
	inc	tmp1							;\
	stuh	tmp1, [tsbarea + stat]

#if defined(KPM_TLBMISS_STATS_GATHER)
	/*
	 * Count kpm dtlb misses separately to allow a different
	 * evaluation of hme and kpm tlbmisses. kpm tsb hits can
	 * be calculated by (kpm_dtlb_misses - kpm_tsb_misses).
	 */
#define	KPM_TLBMISS_STAT_INCR(tagacc, val, tsbma, tmp1, label)		\
	brgez	tagacc, label	/* KPM VA? */				;\
	nop								;\
	CPU_INDEX(tmp1, tsbma)						;\
	sethi	%hi(kpmtsbm_area), tsbma				;\
	sllx	tmp1, KPMTSBM_SHIFT, tmp1				;\
	or	tsbma, %lo(kpmtsbm_area), tsbma				;\
	add	tsbma, tmp1, tsbma		/* kpmtsbm area */	;\
	/* VA range check */						;\
	ldx	[tsbma + KPMTSBM_VBASE], val				;\
	cmp	tagacc, val						;\
	blu,pn	%xcc, label						;\
	  ldx	[tsbma + KPMTSBM_VEND], tmp1				;\
	cmp	tagacc, tmp1						;\
	bgeu,pn	%xcc, label						;\
	  lduw	[tsbma + KPMTSBM_DTLBMISS], val				;\
	inc	val							;\
	st	val, [tsbma + KPMTSBM_DTLBMISS]				;\
label:
#else
#define	KPM_TLBMISS_STAT_INCR(tagacc, val, tsbma, tmp1, label)
#endif	/* KPM_TLBMISS_STATS_GATHER */

#if defined (lint)
/*
 * The following routines are jumped to from the mmu trap handlers to do
 * the setting up to call systrap.  They are separate routines instead of 
 * being part of the handlers because the handlers would exceed 32
 * instructions and since this is part of the slow path the jump
 * cost is irrelevant.
 */
void
sfmmu_pagefault(void)
{
}

void
sfmmu_mmu_trap(void)
{
}

void
sfmmu_window_trap(void)
{
}

void
sfmmu_kpm_exception(void)
{
}

#else /* lint */

#ifdef	PTL1_PANIC_DEBUG
	.seg	".data"
	.global	test_ptl1_panic
test_ptl1_panic:
	.word	0
	.align	8

	.seg	".text"
	.align	4
#endif	/* PTL1_PANIC_DEBUG */


	ENTRY_NP(sfmmu_pagefault)
	SET_GL_REG(1)
	USE_ALTERNATE_GLOBALS(%g5)
	GET_MMU_BOTH_TAGACC(%g5 /*dtag*/, %g2 /*itag*/, %g6, %g4)
	rdpr	%tt, %g6
	cmp	%g6, FAST_IMMU_MISS_TT
	be,a,pn	%icc, 1f
	  mov	T_INSTR_MMU_MISS, %g3
	cmp	%g6, T_INSTR_MMU_MISS
	be,a,pn	%icc, 1f
	  mov	T_INSTR_MMU_MISS, %g3
	mov	%g5, %g2
	mov	T_DATA_PROT, %g3		/* arg2 = traptype */
	cmp	%g6, FAST_DMMU_MISS_TT
	move	%icc, T_DATA_MMU_MISS, %g3	/* arg2 = traptype */
	cmp	%g6, T_DATA_MMU_MISS
	move	%icc, T_DATA_MMU_MISS, %g3	/* arg2 = traptype */

#ifdef  PTL1_PANIC_DEBUG
	/* check if we want to test the tl1 panic */
	sethi	%hi(test_ptl1_panic), %g4
	ld	[%g4 + %lo(test_ptl1_panic)], %g1
	st	%g0, [%g4 + %lo(test_ptl1_panic)]
	cmp	%g1, %g0
	bne,a,pn %icc, ptl1_panic
	  or	%g0, PTL1_BAD_DEBUG, %g1
#endif	/* PTL1_PANIC_DEBUG */
1:
	HAT_GLOBAL_STAT(HATSTAT_PAGEFAULT, %g6, %g4)
	/*
	 * g2 = tag access reg
	 * g3.l = type
	 * g3.h = 0
	 */
	sethi	%hi(trap), %g1
	or	%g1, %lo(trap), %g1
2:
	ba,pt	%xcc, sys_trap
	  mov	-1, %g4	
	SET_SIZE(sfmmu_pagefault)

	ENTRY_NP(sfmmu_mmu_trap)
	SET_GL_REG(1)
	USE_ALTERNATE_GLOBALS(%g5)
	GET_MMU_BOTH_TAGACC(%g5 /*dtag*/, %g2 /*itag*/, %g4, %g6)
	rdpr	%tt, %g6
	cmp	%g6, FAST_IMMU_MISS_TT
	be,a,pn	%icc, 1f
	  mov	T_INSTR_MMU_MISS, %g3
	cmp	%g6, T_INSTR_MMU_MISS
	be,a,pn	%icc, 1f
	  mov	T_INSTR_MMU_MISS, %g3
	mov	%g5, %g2
	mov	T_DATA_PROT, %g3		/* arg2 = traptype */
	cmp	%g6, FAST_DMMU_MISS_TT
	move	%icc, T_DATA_MMU_MISS, %g3	/* arg2 = traptype */
	cmp	%g6, T_DATA_MMU_MISS
	move	%icc, T_DATA_MMU_MISS, %g3	/* arg2 = traptype */
1:
	/*
	 * g2 = tag access reg
	 * g3 = type
	 */
	sethi	%hi(sfmmu_tsbmiss_exception), %g1
	or	%g1, %lo(sfmmu_tsbmiss_exception), %g1
	ba,pt	%xcc, sys_trap
	  mov	-1, %g4	
	/*NOTREACHED*/
	SET_SIZE(sfmmu_mmu_trap)

	ENTRY_NP(sfmmu_suspend_tl)
	SET_GL_REG(1)
	USE_ALTERNATE_GLOBALS(%g5)
	GET_MMU_BOTH_TAGACC(%g5 /*dtag*/, %g2 /*itag*/, %g4, %g3)
	rdpr	%tt, %g6
	cmp	%g6, FAST_IMMU_MISS_TT
	be,a,pn	%icc, 1f
	  mov	T_INSTR_MMU_MISS, %g3
	mov	%g5, %g2
	cmp	%g6, FAST_DMMU_MISS_TT
	move	%icc, T_DATA_MMU_MISS, %g3
	movne	%icc, T_DATA_PROT, %g3
1:
	sethi	%hi(sfmmu_tsbmiss_suspended), %g1
	or	%g1, %lo(sfmmu_tsbmiss_suspended), %g1
	/* g1 = TL0 handler, g2 = tagacc, g3 = trap type */
	ba,pt	%xcc, sys_trap
	  mov	PIL_15, %g4
	/*NOTREACHED*/
	SET_SIZE(sfmmu_suspend_tl)

	/*
	 * No %g registers in use at this point.
	 */
	ENTRY_NP(sfmmu_window_trap)
	rdpr	%tpc, %g1
#ifdef sun4v
#ifdef DEBUG
	/* We assume previous %gl was 1 */
	rdpr	%tstate, %g4
	srlx	%g4, TSTATE_GL_SHIFT, %g4
	and	%g4, TSTATE_GL_MASK, %g4
	cmp	%g4, 1
	bne,a,pn %icc, ptl1_panic
	  mov	PTL1_BAD_WTRAP, %g1
#endif /* DEBUG */
	/* user miss at tl>1. better be the window handler or user_rtt */
	/* in user_rtt? */
	set	rtt_fill_start, %g4
	cmp	%g1, %g4
	blu,pn %xcc, 6f
	 .empty
	set	rtt_fill_end, %g4
	cmp	%g1, %g4
	bgeu,pn %xcc, 6f
	 nop
	set	fault_rtt_fn1, %g1
	wrpr	%g0, %g1, %tnpc	
	ba,a	7f
6:
	! must save this trap level before descending trap stack
	! no need to save %tnpc, either overwritten or discarded
	! already got it: rdpr	%tpc, %g1
	rdpr	%tstate, %g6
	rdpr	%tt, %g7
	! trap level saved, go get underlying trap type
	rdpr	%tl, %g5
	sub	%g5, 1, %g3
	wrpr	%g3, %tl
	rdpr	%tt, %g2
	wrpr	%g5, %tl
	! restore saved trap level
	wrpr	%g1, %tpc
	wrpr	%g6, %tstate
	wrpr	%g7, %tt
#else /* sun4v */
	/* user miss at tl>1. better be the window handler */
	rdpr	%tl, %g5
	sub	%g5, 1, %g3
	wrpr	%g3, %tl
	rdpr	%tt, %g2
	wrpr	%g5, %tl
#endif /* sun4v */
	and	%g2, WTRAP_TTMASK, %g4
	cmp	%g4, WTRAP_TYPE	
	bne,pn	%xcc, 1f
	 nop
	/* tpc should be in the trap table */
	set	trap_table, %g4
	cmp	%g1, %g4
	blt,pn %xcc, 1f
	 .empty
	set	etrap_table, %g4
	cmp	%g1, %g4
	bge,pn %xcc, 1f
	 .empty
	andn	%g1, WTRAP_ALIGN, %g1	/* 128 byte aligned */
	add	%g1, WTRAP_FAULTOFF, %g1
	wrpr	%g0, %g1, %tnpc	
7:
	/*
	 * some wbuf handlers will call systrap to resolve the fault
	 * we pass the trap type so they figure out the correct parameters.
	 * g5 = trap type, g6 = tag access reg
	 */

	/*
	 * only use g5, g6, g7 registers after we have switched to alternate
	 * globals.
	 */
	SET_GL_REG(1)
	USE_ALTERNATE_GLOBALS(%g5)
	GET_MMU_D_TAGACC(%g6 /*dtag*/, %g5 /*scratch*/)
	rdpr	%tt, %g7
	cmp	%g7, FAST_IMMU_MISS_TT
	be,a,pn	%icc, ptl1_panic
	  mov	PTL1_BAD_WTRAP, %g1
	cmp	%g7, T_INSTR_MMU_MISS
	be,a,pn	%icc, ptl1_panic
	  mov	PTL1_BAD_WTRAP, %g1
	mov	T_DATA_PROT, %g5
	cmp	%g7, FAST_DMMU_MISS_TT
	move	%icc, T_DATA_MMU_MISS, %g5
	cmp	%g7, T_DATA_MMU_MISS
	move	%icc, T_DATA_MMU_MISS, %g5
	! XXXQ AGS re-check out this one
	done
1:
	CPU_PADDR(%g1, %g4)
	add	%g1, CPU_TL1_HDLR, %g1
	lda	[%g1]ASI_MEM, %g4
	brnz,a,pt %g4, sfmmu_mmu_trap
	  sta	%g0, [%g1]ASI_MEM
	ba,pt	%icc, ptl1_panic
	  mov	PTL1_BAD_TRAP, %g1
	SET_SIZE(sfmmu_window_trap)

	ENTRY_NP(sfmmu_kpm_exception)
	/*
	 * We have accessed an unmapped segkpm address or a legal segkpm
	 * address which is involved in a VAC alias conflict prevention.
	 * Before we go to trap(), check to see if CPU_DTRACE_NOFAULT is
	 * set. If it is, we will instead note that a fault has occurred
	 * by setting CPU_DTRACE_BADADDR and issue a "done" (instead of
	 * a "retry"). This will step over the faulting instruction.
	 * Note that this means that a legal segkpm address involved in
	 * a VAC alias conflict prevention (a rare case to begin with)
	 * cannot be used in DTrace.
	 */
	CPU_INDEX(%g1, %g2)
	set	cpu_core, %g2
	sllx	%g1, CPU_CORE_SHIFT, %g1
	add	%g1, %g2, %g1
	lduh	[%g1 + CPUC_DTRACE_FLAGS], %g2
	andcc	%g2, CPU_DTRACE_NOFAULT, %g0
	bz	0f
	or	%g2, CPU_DTRACE_BADADDR, %g2
	stuh	%g2, [%g1 + CPUC_DTRACE_FLAGS]
	GET_MMU_D_ADDR(%g3, /*scratch*/ %g4)
	stx	%g3, [%g1 + CPUC_DTRACE_ILLVAL]
	done
0:
	TSTAT_CHECK_TL1(1f, %g1, %g2)
1:
	SET_GL_REG(1)
	USE_ALTERNATE_GLOBALS(%g5)
	GET_MMU_D_TAGACC(%g2 /* tagacc */, %g4 /*scratch*/)
	mov	T_DATA_MMU_MISS, %g3	/* arg2 = traptype */
	/*
	 * g2=tagacc g3.l=type g3.h=0
	 */
	sethi	%hi(trap), %g1
	or	%g1, %lo(trap), %g1
	ba,pt	%xcc, sys_trap
	mov	-1, %g4
	SET_SIZE(sfmmu_kpm_exception)

#endif /* lint */

#if defined (lint)

void
sfmmu_tsb_miss(void)
{
}

void
sfmmu_kpm_dtsb_miss(void)
{
}

void
sfmmu_kpm_dtsb_miss_small(void)
{
}

#else /* lint */

#if (IMAP_SEG != 0)
#error - ism_map->ism_seg offset is not zero
#endif

/*
 * Copies ism mapping for this ctx in param "ism" if this is a ISM 
 * tlb miss and branches to label "ismhit". If this is not an ISM 
 * process or an ISM tlb miss it falls thru.
 *
 * Checks to see if the vaddr passed in via tagacc is in an ISM segment for
 * this process.
 * If so, it will branch to label "ismhit".  If not, it will fall through.
 *
 * Also hat_unshare() will set the context for this process to INVALID_CONTEXT
 * so that any other threads of this process will not try and walk the ism
 * maps while they are being changed.  
 *
 * NOTE: We will never have any holes in our ISM maps. sfmmu_share/unshare
 *       will make sure of that. This means we can terminate our search on
 *       the first zero mapping we find.
 *
 * Parameters:
 * tagacc	= (pseudo-)tag access register (vaddr + ctx) (in)
 * tsbmiss	= address of tsb miss area (in)
 * ismseg	= contents of ism_seg for this ism map (out)
 * ismhat	= physical address of imap_ismhat for this ism map (out)
 * tmp1		= scratch reg (CLOBBERED)
 * tmp2		= scratch reg (CLOBBERED)
 * tmp3		= scratch reg (CLOBBERED)
 * label:    temporary labels
 * ismhit:   label where to jump to if an ism dtlb miss
 * exitlabel:label where to jump if hat is busy due to hat_unshare.
 */
#define ISM_CHECK(tagacc, tsbmiss, ismseg, ismhat, tmp1, tmp2, tmp3 \
	label, ismhit)							\
	ldx	[tsbmiss + TSBMISS_ISMBLKPA], tmp1 /* tmp1 = &ismblk */	;\
	brlz,pt  tmp1, label/**/3		/* exit if -1 */	;\
	  add	tmp1, IBLK_MAPS, ismhat	/* ismhat = &ismblk.map[0] */	;\
label/**/1:								;\
	ldxa	[ismhat]ASI_MEM, ismseg	/* ismblk.map[0].ism_seg */	;\
	mov	tmp1, tmp3	/* update current ismblkpa head */	;\
label/**/2:								;\
	brz,pt  ismseg, label/**/3		/* no mapping */	;\
	  add	ismhat, IMAP_VB_SHIFT, tmp1 /* tmp1 = vb_shift addr */	;\
	lduba	[tmp1]ASI_MEM, tmp1 		/* tmp1 = vb shift*/	;\
	srlx	ismseg, tmp1, tmp2		/* tmp2 = vbase */	;\
	srlx	tagacc, tmp1, tmp1		/* tmp1 =  va seg*/	;\
	sub	tmp1, tmp2, tmp2		/* tmp2 = va - vbase */	;\
	add	ismhat, IMAP_SZ_MASK, tmp1 /* tmp1 = sz_mask addr */	;\
	lda	[tmp1]ASI_MEM, tmp1		/* tmp1 = sz_mask */	;\
	and	ismseg, tmp1, tmp1		/* tmp1 = size */	;\
	cmp	tmp2, tmp1		 	/* check va <= offset*/	;\
	blu,a,pt  %xcc, ismhit			/* ism hit */		;\
	  add	ismhat, IMAP_ISMHAT, ismhat 	/* ismhat = &ism_sfmmu*/ ;\
									;\
	add	ismhat, ISM_MAP_SZ, ismhat /* ismhat += sizeof(map) */ 	;\
	add	tmp3, (IBLK_MAPS + ISM_MAP_SLOTS * ISM_MAP_SZ), tmp1	;\
	cmp	ismhat, tmp1						;\
	bl,pt	%xcc, label/**/2		/* keep looking  */	;\
	  ldxa	[ismhat]ASI_MEM, ismseg	/* ismseg = map[ismhat] */	;\
									;\
	add	tmp3, IBLK_NEXTPA, tmp1					;\
	ldxa	[tmp1]ASI_MEM, tmp1		/* check blk->nextpa */	;\
	brgez,pt tmp1, label/**/1		/* continue if not -1*/	;\
	  add	tmp1, IBLK_MAPS, ismhat	/* ismhat = &ismblk.map[0]*/	;\
label/**/3:	

/*
 * Returns the hme hash bucket (hmebp) given the vaddr, and the hatid
 * It also returns the virtual pg for vaddr (ie. vaddr << hmeshift)
 * Parameters:
 * tagacc = reg containing virtual address
 * hatid = reg containing sfmmu pointer
 * hmeshift = constant/register to shift vaddr to obtain vapg
 * hmebp = register where bucket pointer will be stored
 * vapg = register where virtual page will be stored
 * tmp1, tmp2 = tmp registers
 */


#define	HMEHASH_FUNC_ASM(tagacc, hatid, tsbarea, hmeshift, hmebp,	\
	vapg, label, tmp1, tmp2)					\
	sllx	tagacc, TAGACC_CTX_LSHIFT, tmp1				;\
	brnz,a,pt tmp1, label/**/1					;\
	  ld    [tsbarea + TSBMISS_UHASHSZ], hmebp			;\
	ld	[tsbarea + TSBMISS_KHASHSZ], hmebp			;\
	ba,pt	%xcc, label/**/2					;\
	  ldx	[tsbarea + TSBMISS_KHASHSTART], tmp1			;\
label/**/1:								;\
	ldx	[tsbarea + TSBMISS_UHASHSTART], tmp1			;\
label/**/2:								;\
	srlx	tagacc, hmeshift, vapg					;\
	xor	vapg, hatid, tmp2	/* hatid ^ (vaddr >> shift) */	;\
	and	tmp2, hmebp, hmebp	/* index into hme_hash */	;\
	mulx	hmebp, HMEBUCK_SIZE, hmebp				;\
	add	hmebp, tmp1, hmebp

/*
 * hashtag includes bspage + hashno (64 bits).
 */

#define	MAKE_HASHTAG(vapg, hatid, hmeshift, hashno, hblktag)		\
	sllx	vapg, hmeshift, vapg					;\
	mov	hashno, hblktag						;\
	sllx	hblktag, HTAG_REHASH_SHIFT, hblktag			;\
	or	vapg, hblktag, hblktag

/*
 * Function to traverse hmeblk hash link list and find corresponding match.
 * The search is done using physical pointers. It returns the physical address
 * and virtual address pointers to the hmeblk that matches with the tag
 * provided.
 * Parameters:
 * hmebp	= register that points to hme hash bucket, also used as
 *		  tmp reg (clobbered)
 * hmeblktag	= register with hmeblk tag match
 * hatid	= register with hatid
 * hmeblkpa	= register where physical ptr will be stored
 * hmeblkva	= register where virtual ptr will be stored
 * tmp1		= tmp reg
 * label: temporary label
 */

#define	HMEHASH_SEARCH(hmebp, hmeblktag, hatid, hmeblkpa, hmeblkva,	\
	tsbarea, tmp1, label)					 	\
	add     hmebp, HMEBUCK_NEXTPA, hmeblkpa				;\
	ldxa    [hmeblkpa]ASI_MEM, hmeblkpa				;\
	add     hmebp, HMEBUCK_HBLK, hmeblkva				;\
	ldxa    [hmeblkva]ASI_MEM, hmeblkva				;\
	HAT_HSEARCH_DBSTAT(hatid, tsbarea, hmebp, tmp1)			;\
label/**/1:								;\
	brz,pn	hmeblkva, label/**/2					;\
	HAT_HLINK_DBSTAT(hatid, tsbarea, hmebp, tmp1)			;\
	add	hmeblkpa, HMEBLK_TAG, hmebp				;\
	ldxa	[hmebp]ASI_MEM, tmp1	 /* read 1st part of tag */	;\
	add	hmebp, CLONGSIZE, hmebp					;\
	ldxa	[hmebp]ASI_MEM, hmebp 	/* read 2nd part of tag */	;\
	xor	tmp1, hmeblktag, tmp1					;\
	xor	hmebp, hatid, hmebp					;\
	or	hmebp, tmp1, hmebp					;\
	brz,pn	hmebp, label/**/2	/* branch on hit */		;\
	  add	hmeblkpa, HMEBLK_NEXT, hmebp				;\
	ldna	[hmebp]ASI_MEM, hmeblkva	/* hmeblk ptr va */	;\
	add	hmeblkpa, HMEBLK_NEXTPA, hmebp				;\
	ba,pt	%xcc, label/**/1					;\
	  ldxa	[hmebp]ASI_MEM, hmeblkpa	/* hmeblk ptr pa */	;\
label/**/2:

/*
 * Function to traverse hmeblk hash link list and find corresponding match.
 * The search is done using physical pointers. It returns the physical address
 * and virtual address pointers to the hmeblk that matches with the tag
 * provided.
 * Parameters:
 * hmeblktag	= register with hmeblk tag match (rid field is 0)
 * hatid	= register with hatid (pointer to SRD)
 * hmeblkpa	= register where physical ptr will be stored
 * hmeblkva	= register where virtual ptr will be stored
 * tmp1		= tmp reg
 * tmp2		= tmp reg
 * label: temporary label
 */

#define	HMEHASH_SEARCH_SHME(hmeblktag, hatid, hmeblkpa, hmeblkva,	\
	tsbarea, tmp1, tmp2, label)			 		\
label/**/1:								;\
	brz,pn	hmeblkva, label/**/4					;\
	HAT_HLINK_DBSTAT(hatid, tsbarea, tmp1, tmp2)			;\
	add	hmeblkpa, HMEBLK_TAG, tmp2				;\
	ldxa	[tmp2]ASI_MEM, tmp1	 /* read 1st part of tag */	;\
	add	tmp2, CLONGSIZE, tmp2					;\
	ldxa	[tmp2]ASI_MEM, tmp2 	/* read 2nd part of tag */	;\
	xor	tmp1, hmeblktag, tmp1					;\
	xor	tmp2, hatid, tmp2					;\
	brz,pn	tmp2, label/**/3	/* branch on hit */		;\
	  add	hmeblkpa, HMEBLK_NEXT, tmp2				;\
label/**/2:								;\
	ldna	[tmp2]ASI_MEM, hmeblkva	/* hmeblk ptr va */		;\
	add	hmeblkpa, HMEBLK_NEXTPA, tmp2				;\
	ba,pt	%xcc, label/**/1					;\
	  ldxa	[tmp2]ASI_MEM, hmeblkpa	/* hmeblk ptr pa */		;\
label/**/3:								;\
	cmp	tmp1, SFMMU_MAX_HME_REGIONS				;\
	bgeu,pt	%xcc, label/**/2					;\
	  add	hmeblkpa, HMEBLK_NEXT, tmp2				;\
	and	tmp1, BT_ULMASK, tmp2					;\
	srlx	tmp1, BT_ULSHIFT, tmp1					;\
	sllx	tmp1, CLONGSHIFT, tmp1					;\
	add	tsbarea, tmp1, tmp1					;\
	ldx	[tmp1 + TSBMISS_SHMERMAP], tmp1				;\
	srlx	tmp1, tmp2, tmp1					;\
	btst	0x1, tmp1						;\
	bz,pn	%xcc, label/**/2					;\
	  add	hmeblkpa, HMEBLK_NEXT, tmp2				;\
label/**/4:

#if ((1 << SFHME_SHIFT) != SFHME_SIZE)
#error HMEBLK_TO_HMENT assumes sf_hment is power of 2 in size
#endif

/*
 * HMEBLK_TO_HMENT is a macro that given an hmeblk and a vaddr returns
 * he offset for the corresponding hment.
 * Parameters:
 * In:
 *	vaddr = register with virtual address
 *	hmeblkpa = physical pointer to hme_blk
 * Out:	
 *	hmentoff = register where hment offset will be stored
 *	hmemisc = hblk_misc
 * Scratch:
 *	tmp1
 */
#define	HMEBLK_TO_HMENT(vaddr, hmeblkpa, hmentoff, hmemisc, tmp1, label1)\
	add	hmeblkpa, HMEBLK_MISC, hmentoff				;\
	lda	[hmentoff]ASI_MEM, hmemisc 				;\
	andcc	hmemisc, HBLK_SZMASK, %g0				;\
	bnz,a,pn  %icc, label1		/* if sz != TTE8K branch */	;\
	  or	%g0, HMEBLK_HME1, hmentoff				;\
	srl	vaddr, MMU_PAGESHIFT, tmp1				;\
	and	tmp1, NHMENTS - 1, tmp1		/* tmp1 = index */	;\
	sllx	tmp1, SFHME_SHIFT, tmp1					;\
	add	tmp1, HMEBLK_HME1, hmentoff				;\
label1:

/*
 * GET_TTE is a macro that returns a TTE given a tag and hatid.
 *
 * tagacc	= (pseudo-)tag access register (in)
 * hatid	= sfmmu pointer for TSB miss (in)
 * tte		= tte for TLB miss if found, otherwise clobbered (out)
 * hmeblkpa	= PA of hment if found, otherwise clobbered (out)
 * hmeblkva	= VA of hment if found, otherwise clobbered (out)
 * tsbarea	= pointer to the tsbmiss area for this cpu. (in)
 * hmemisc	= hblk_misc if TTE is found (out), otherwise clobbered
 * hmeshift	= constant/register to shift VA to obtain the virtual pfn
 *		  for this page size.
 * hashno	= constant/register hash number
 * label	= temporary label for branching within macro.
 * foundlabel	= label to jump to when tte is found.
 * suspendlabel= label to jump to when tte is suspended.	
 * exitlabel	= label to jump to when tte is not found.
 *
 */
#define GET_TTE(tagacc, hatid, tte, hmeblkpa, hmeblkva, tsbarea, hmemisc, \
		hmeshift, hashno, label, foundlabel, suspendlabel, exitlabel) \
									;\
	stn	tagacc, [tsbarea + (TSBMISS_SCRATCH + TSB_TAGACC)]	;\
	stn	hatid, [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)]	;\
	HMEHASH_FUNC_ASM(tagacc, hatid, tsbarea, hmeshift, tte,		\
		hmeblkpa, label/**/5, hmemisc, hmeblkva)		;\
									;\
	/*								;\
	 * tagacc = tagacc						;\
	 * hatid = hatid						;\
	 * tsbarea = tsbarea						;\
	 * tte   = hmebp (hme bucket pointer)				;\
	 * hmeblkpa  = vapg  (virtual page)				;\
	 * hmemisc, hmeblkva = scratch					;\
	 */								;\
	MAKE_HASHTAG(hmeblkpa, hatid, hmeshift, hashno, hmemisc)	;\
	or	hmemisc, SFMMU_INVALID_SHMERID, hmemisc			;\
									;\
	/*								;\
	 * tagacc = tagacc						;\
	 * hatid = hatid						;\
	 * tte   = hmebp						;\
	 * hmeblkpa  = CLOBBERED					;\
	 * hmemisc  = htag_bspage+hashno+invalid_rid			;\
	 * hmeblkva  = scratch						;\
	 */								;\
	stn	tte, [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)]	;\
	HMELOCK_ENTER(tte, hmeblkpa, hmeblkva, label/**/3, ASI_MEM)	;\
	HMEHASH_SEARCH(tte, hmemisc, hatid, hmeblkpa, hmeblkva, 	\
		tsbarea, tagacc, label/**/1)				;\
	/*								;\
	 * tagacc = CLOBBERED						;\
	 * tte = CLOBBERED						;\
	 * hmeblkpa = hmeblkpa						;\
	 * hmeblkva = hmeblkva						;\
	 */								;\
	brnz,pt	hmeblkva, label/**/4	/* branch if hmeblk found */	;\
	  ldn	[tsbarea + (TSBMISS_SCRATCH + TSB_TAGACC)], tagacc	;\
	ldn	[tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hmeblkva	;\
	HMELOCK_EXIT(hmeblkva, hmeblkva, ASI_MEM)  /* drop lock */	;\
	ba,pt	%xcc, exitlabel		/* exit if hblk not found */	;\
	  nop								;\
label/**/4:								;\
	/*								;\
	 * We have found the hmeblk containing the hment.		;\
	 * Now we calculate the corresponding tte.			;\
	 *								;\
	 * tagacc = tagacc						;\
	 * hatid = hatid						;\
	 * tte   = clobbered						;\
	 * hmeblkpa  = hmeblkpa						;\
	 * hmemisc  = hblktag						;\
	 * hmeblkva  = hmeblkva 					;\
	 */								;\
	HMEBLK_TO_HMENT(tagacc, hmeblkpa, hatid, hmemisc, tte,		\
		label/**/2)						;\
									;\
	/*								;\
	 * tagacc = tagacc						;\
	 * hatid = hmentoff						;\
	 * tte   = clobbered						;\
	 * hmeblkpa  = hmeblkpa						;\
	 * hmemisc  = hblk_misc						;\
	 * hmeblkva  = hmeblkva 					;\
	 */								;\
									;\
	add	hatid, SFHME_TTE, hatid					;\
	add	hmeblkpa, hatid, hmeblkpa				;\
	ldxa	[hmeblkpa]ASI_MEM, tte	/* MMU_READTTE through pa */	;\
	add	hmeblkva, hatid, hmeblkva				;\
	ldn	[tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hatid 	;\
	HMELOCK_EXIT(hatid, hatid, ASI_MEM)	/* drop lock */		;\
	set	TTE_SUSPEND, hatid					;\
	TTE_SUSPEND_INT_SHIFT(hatid)					;\
	btst	tte, hatid						;\
	bz,pt	%xcc, foundlabel					;\
	ldn	[tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)], hatid	;\
									;\
	/*								;\
	 * Mapping is suspended, so goto suspend label.			;\
	 */								;\
	ba,pt	%xcc, suspendlabel					;\
	  nop

/*
 * GET_SHME_TTE is similar to GET_TTE() except it searches
 * shared hmeblks via HMEHASH_SEARCH_SHME() macro.
 * If valid tte is found, hmemisc = shctx flag, i.e., shme is
 * either 0 (not part of scd) or 1 (part of scd).
 */
#define GET_SHME_TTE(tagacc, hatid, tte, hmeblkpa, hmeblkva, tsbarea,	\
		hmemisc, hmeshift, hashno, label, foundlabel,		\
		suspendlabel, exitlabel)				\
									;\
	stn	tagacc, [tsbarea + (TSBMISS_SCRATCH + TSB_TAGACC)]	;\
	stn	hatid, [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)]	;\
	HMEHASH_FUNC_ASM(tagacc, hatid, tsbarea, hmeshift, tte,		\
		hmeblkpa, label/**/5, hmemisc, hmeblkva)		;\
									;\
	/*								;\
	 * tagacc = tagacc						;\
	 * hatid = hatid						;\
	 * tsbarea = tsbarea						;\
	 * tte   = hmebp (hme bucket pointer)				;\
	 * hmeblkpa  = vapg  (virtual page)				;\
	 * hmemisc, hmeblkva = scratch					;\
	 */								;\
	MAKE_HASHTAG(hmeblkpa, hatid, hmeshift, hashno, hmemisc)	;\
									;\
	/*								;\
	 * tagacc = tagacc						;\
	 * hatid = hatid						;\
	 * tsbarea = tsbarea						;\
	 * tte   = hmebp						;\
	 * hmemisc  = htag_bspage + hashno + 0 (for rid)		;\
	 * hmeblkpa  = CLOBBERED					;\
	 * hmeblkva  = scratch						;\
	 */								;\
	stn	tte, [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)]	;\
	HMELOCK_ENTER(tte, hmeblkpa, hmeblkva, label/**/3, ASI_MEM)	;\
									;\
	add     tte, HMEBUCK_NEXTPA, hmeblkpa				;\
	ldxa    [hmeblkpa]ASI_MEM, hmeblkpa				;\
	add     tte, HMEBUCK_HBLK, hmeblkva				;\
	ldxa    [hmeblkva]ASI_MEM, hmeblkva				;\
	HAT_HSEARCH_DBSTAT(hatid, tsbarea, tagacc, tte)			;\
									;\
label/**/8:								;\
	HMEHASH_SEARCH_SHME(hmemisc, hatid, hmeblkpa, hmeblkva, 	\
		tsbarea, tagacc, tte, label/**/1)			;\
	/*								;\
	 * tagacc = CLOBBERED						;\
	 * tte = CLOBBERED						;\
	 * hmeblkpa = hmeblkpa						;\
	 * hmeblkva = hmeblkva						;\
	 */								;\
	brnz,pt	hmeblkva, label/**/4	/* branch if hmeblk found */	;\
	  ldn	[tsbarea + (TSBMISS_SCRATCH + TSB_TAGACC)], tagacc	;\
	ldn	[tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hmeblkva	;\
	HMELOCK_EXIT(hmeblkva, hmeblkva, ASI_MEM)  /* drop lock */	;\
	ba,pt	%xcc, exitlabel		/* exit if hblk not found */	;\
	  nop								;\
label/**/4:								;\
	/*								;\
	 * We have found the hmeblk containing the hment.		;\
	 * Now we calculate the corresponding tte.			;\
	 *								;\
	 * tagacc = tagacc						;\
	 * hatid = hatid						;\
	 * tte   = clobbered						;\
	 * hmeblkpa  = hmeblkpa						;\
	 * hmemisc  = hblktag						;\
	 * hmeblkva  = hmeblkva 					;\
	 * tsbarea = tsbmiss area					;\
	 */								;\
	HMEBLK_TO_HMENT(tagacc, hmeblkpa, hatid, hmemisc, tte,		\
		label/**/2)						;\
									;\
	/*								;\
	 * tagacc = tagacc						;\
	 * hatid = hmentoff						;\
	 * tte = clobbered						;\
	 * hmeblkpa  = hmeblkpa						;\
	 * hmemisc  = hblk_misc						;\
	 * hmeblkva  = hmeblkva						;\
	 * tsbarea = tsbmiss area					;\
	 */								;\
									;\
	add	hatid, SFHME_TTE, hatid					;\
	add	hmeblkpa, hatid, hmeblkpa				;\
	ldxa	[hmeblkpa]ASI_MEM, tte	/* MMU_READTTE through pa */	;\
	brlz,pt tte, label/**/6						;\
	  add	hmeblkva, hatid, hmeblkva				;\
	btst	HBLK_SZMASK, hmemisc					;\
	bnz,a,pt %icc, label/**/7					;\
	  ldn	[tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hatid 	;\
									;\
	/*								;\
 	 * We found an invalid 8K tte in shme.				;\
	 * it may not belong to shme's region since			;\
	 * region size/alignment granularity is 8K but different	;\
	 * regions don't share hmeblks. Continue the search.		;\
	 */								;\
	sub	hmeblkpa, hatid, hmeblkpa				;\
	ldn	[tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)], hatid	;\
	srlx	tagacc, hmeshift, tte					;\
	add	hmeblkpa, HMEBLK_NEXT, hmeblkva				;\
	ldxa	[hmeblkva]ASI_MEM, hmeblkva				;\
	add	hmeblkpa, HMEBLK_NEXTPA, hmeblkpa			;\
	ldxa	[hmeblkpa]ASI_MEM, hmeblkpa				;\
	MAKE_HASHTAG(tte, hatid, hmeshift, hashno, hmemisc)		;\
	ba,a,pt	%xcc, label/**/8					;\
label/**/6:								;\
	GET_SCDSHMERMAP(tsbarea, hmeblkpa, hatid, hmemisc)		;\
	ldn	[tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hatid 	;\
label/**/7:								;\
	HMELOCK_EXIT(hatid, hatid, ASI_MEM)	/* drop lock */		;\
	set	TTE_SUSPEND, hatid					;\
	TTE_SUSPEND_INT_SHIFT(hatid)					;\
	btst	tte, hatid						;\
	bz,pt	%xcc, foundlabel					;\
	ldn	[tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)], hatid	;\
									;\
	/*								;\
	 * Mapping is suspended, so goto suspend label.			;\
	 */								;\
	ba,pt	%xcc, suspendlabel					;\
	  nop

	/*
	 * KERNEL PROTECTION HANDLER
	 *
	 * g1 = tsb8k pointer register (clobbered)
	 * g2 = tag access register (ro)
	 * g3 - g7 = scratch registers
	 *
	 * Note: This function is patched at runtime for performance reasons.
	 * 	 Any changes here require sfmmu_patch_ktsb fixed.
	 */
	ENTRY_NP(sfmmu_kprot_trap)
	mov	%g2, %g7		! TSB pointer macro clobbers tagacc
sfmmu_kprot_patch_ktsb_base:
	RUNTIME_PATCH_SETX(%g1, %g6)
	/* %g1 = contents of ktsb_base or ktsb_pbase */
sfmmu_kprot_patch_ktsb_szcode:
	or	%g0, RUNTIME_PATCH, %g3	! ktsb_szcode (hot patched)

	GET_TSBE_POINTER(MMU_PAGESHIFT, %g1, %g7, %g3, %g5)
	! %g1 = First TSB entry pointer, as TSB miss handler expects

	mov	%g2, %g7		! TSB pointer macro clobbers tagacc
sfmmu_kprot_patch_ktsb4m_base:
	RUNTIME_PATCH_SETX(%g3, %g6)
	/* %g3 = contents of ktsb4m_base or ktsb4m_pbase */
sfmmu_kprot_patch_ktsb4m_szcode:
	or	%g0, RUNTIME_PATCH, %g6	! ktsb4m_szcode (hot patched)

	GET_TSBE_POINTER(MMU_PAGESHIFT4M, %g3, %g7, %g6, %g5)
	! %g3 = 4M tsb entry pointer, as TSB miss handler expects

        CPU_TSBMISS_AREA(%g6, %g7)
        HAT_PERCPU_STAT16(%g6, TSBMISS_KPROTS, %g7)
	ba,pt	%xcc, sfmmu_tsb_miss_tt
	  nop

	/*
	 * USER PROTECTION HANDLER
	 *
	 * g1 = tsb8k pointer register (ro)
	 * g2 = tag access register (ro)
	 * g3 = faulting context (clobbered, currently not used)
	 * g4 - g7 = scratch registers
	 */
	ALTENTRY(sfmmu_uprot_trap)
#ifdef sun4v
	GET_1ST_TSBE_PTR(%g2, %g1, %g4, %g5) 
	/* %g1 = first TSB entry ptr now, %g2 preserved */

	GET_UTSBREG(SCRATCHPAD_UTSBREG2, %g3)	/* get 2nd utsbreg */
	brlz,pt %g3, 9f				/* check for 2nd TSB */
	  nop

	GET_2ND_TSBE_PTR(%g2, %g3, %g4, %g5) 
	/* %g3 = second TSB entry ptr now, %g2 preserved */

#else /* sun4v */
#ifdef UTSB_PHYS
	/* g1 = first TSB entry ptr */
	GET_2ND_TSBREG(%g3)
	brlz,pt %g3, 9f			/* check for 2nd TSB */
	  nop

	GET_2ND_TSBE_PTR(%g2, %g3, %g4, %g5) 
	/* %g3 = second TSB entry ptr now, %g2 preserved */
#else /* UTSB_PHYS */
	brgez,pt %g1, 9f		/* check for 2nd TSB */
	  mov	-1, %g3			/* set second tsbe ptr to -1 */

	mov	%g2, %g7
	GET_2ND_TSBE_PTR(%g7, %g1, %g3, %g4, %g5, sfmmu_uprot) 
	/* %g3 = second TSB entry ptr now, %g7 clobbered */
	mov	%g1, %g7
	GET_1ST_TSBE_PTR(%g7, %g1, %g5, sfmmu_uprot)
#endif /* UTSB_PHYS */
#endif /* sun4v */ 
9:
	CPU_TSBMISS_AREA(%g6, %g7)
	HAT_PERCPU_STAT16(%g6, TSBMISS_UPROTS, %g7)
	ba,pt	%xcc, sfmmu_tsb_miss_tt		/* branch TSB miss handler */
	  nop

	/*
	 * Kernel 8K page iTLB miss.  We also get here if we took a
	 * fast instruction access mmu miss trap while running in
	 * invalid context.
	 *
	 * %g1 = 8K TSB pointer register (not used, clobbered)
	 * %g2 = tag access register (used)
	 * %g3 = faulting context id (used)
	 * %g7 = TSB tag to match (used)
	 */
	.align	64
	ALTENTRY(sfmmu_kitlb_miss)
	brnz,pn %g3, tsb_tl0_noctxt
	  nop

	/* kernel miss */
	/* get kernel tsb pointer */
	/* we patch the next set of instructions at run time */
	/* NOTE: any changes here require sfmmu_patch_ktsb fixed */
iktsbbase:
	RUNTIME_PATCH_SETX(%g4, %g5)
	/* %g4 = contents of ktsb_base or ktsb_pbase */

iktsb:	sllx	%g2, 64-(TAGACC_SHIFT + TSB_START_SIZE + RUNTIME_PATCH), %g1
	srlx	%g1, 64-(TSB_START_SIZE + TSB_ENTRY_SHIFT + RUNTIME_PATCH), %g1
	or	%g4, %g1, %g1			! form tsb ptr
	ldda	[%g1]RUNTIME_PATCH, %g4		! %g4 = tag, %g5 = data
	cmp	%g4, %g7
	bne,pn	%xcc, iktsb4mbase		! check 4m ktsb
	  srlx    %g2, MMU_PAGESHIFT4M, %g3	! use 4m virt-page as TSB index

	andcc %g5, TTE_EXECPRM_INT, %g0		! check exec bit
	bz,pn	%icc, exec_fault
	  nop
	TT_TRACE(trace_tsbhit)			! 2 instr traptrace
	ITLB_STUFF(%g5, %g1, %g2, %g3, %g4)
	retry

iktsb4mbase:
        RUNTIME_PATCH_SETX(%g4, %g6)
        /* %g4 = contents of ktsb4m_base or ktsb4m_pbase */
iktsb4m:
	sllx    %g3, 64-(TSB_START_SIZE + RUNTIME_PATCH), %g3
        srlx    %g3, 64-(TSB_START_SIZE + TSB_ENTRY_SHIFT + RUNTIME_PATCH), %g3
	add	%g4, %g3, %g3			! %g3 = 4m tsbe ptr
	ldda	[%g3]RUNTIME_PATCH, %g4		! %g4 = tag, %g5 = data
	cmp	%g4, %g7
	bne,pn	%xcc, sfmmu_tsb_miss_tt		! branch on miss
	  andcc %g5, TTE_EXECPRM_INT, %g0		! check exec bit
	bz,pn	%icc, exec_fault
	  nop
	TT_TRACE(trace_tsbhit)			! 2 instr traptrace
	ITLB_STUFF(%g5, %g1, %g2, %g3, %g4)
	retry

	/*
	 * Kernel dTLB miss.  We also get here if we took a fast data
	 * access mmu miss trap while running in invalid context.
	 *
	 * Note: for now we store kpm TTEs in the kernel TSB as usual.
	 *	We select the TSB miss handler to branch to depending on
	 *	the virtual address of the access.  In the future it may
	 *	be desirable to separate kpm TTEs into their own TSB,
	 *	in which case all that needs to be done is to set
	 *	kpm_tsbbase/kpm_tsbsz to point to the new TSB and branch
	 *	early in the miss if we detect a kpm VA to a new handler.
	 *
	 * %g1 = 8K TSB pointer register (not used, clobbered)
	 * %g2 = tag access register (used)
	 * %g3 = faulting context id (used)
	 */
	.align	64
	ALTENTRY(sfmmu_kdtlb_miss)
	brnz,pn	%g3, tsb_tl0_noctxt		/* invalid context? */
	  nop

	/* Gather some stats for kpm misses in the TLB. */
	/* KPM_TLBMISS_STAT_INCR(tagacc, val, tsbma, tmp1, label) */
	KPM_TLBMISS_STAT_INCR(%g2, %g4, %g5, %g6, kpmtlbm_stat_out)

	/*
	 * Get first TSB offset and look for 8K/64K/512K mapping
	 * using the 8K virtual page as the index.
	 *
	 * We patch the next set of instructions at run time;
	 * any changes here require sfmmu_patch_ktsb changes too.
	 */
dktsbbase:
	RUNTIME_PATCH_SETX(%g7, %g6)
	/* %g7 = contents of ktsb_base or ktsb_pbase */

dktsb:	sllx	%g2, 64-(TAGACC_SHIFT + TSB_START_SIZE + RUNTIME_PATCH), %g1
	srlx	%g1, 64-(TSB_START_SIZE + TSB_ENTRY_SHIFT + RUNTIME_PATCH), %g1

	/*
	 * At this point %g1 is our index into the TSB.
	 * We just masked off enough bits of the VA depending
	 * on our TSB size code.
	 */
	ldda	[%g7 + %g1]RUNTIME_PATCH, %g4	! %g4 = tag, %g5 = data
	srlx	%g2, TAG_VALO_SHIFT, %g6	! make tag to compare
	cmp	%g6, %g4			! compare tag
	bne,pn	%xcc, dktsb4m_kpmcheck_small
	  add	%g7, %g1, %g1			/* form tsb ptr */
	TT_TRACE(trace_tsbhit)
	DTLB_STUFF(%g5, %g1, %g2, %g3, %g4)
	/* trapstat expects tte in %g5 */
	retry

	/*
	 * If kpm is using large pages, the following instruction needs
	 * to be patched to a nop at boot time (by sfmmu_kpm_patch_tsbm)
	 * so that we will probe the 4M TSB regardless of the VA.  In
	 * the case kpm is using small pages, we know no large kernel
	 * mappings are located above 0x80000000.00000000 so we skip the
	 * probe as an optimization.
	 */
dktsb4m_kpmcheck_small:
	brlz,pn %g2, sfmmu_kpm_dtsb_miss_small
	  /* delay slot safe, below */

	/*
	 * Get second TSB offset and look for 4M mapping
	 * using 4M virtual page as the TSB index.
	 *
	 * Here:
	 * %g1 = 8K TSB pointer.  Don't squash it.
	 * %g2 = tag access register (we still need it)
	 */
	srlx	%g2, MMU_PAGESHIFT4M, %g3

	/*
	 * We patch the next set of instructions at run time;
	 * any changes here require sfmmu_patch_ktsb changes too.
	 */
dktsb4mbase:
	RUNTIME_PATCH_SETX(%g7, %g6)
	/* %g7 = contents of ktsb4m_base or ktsb4m_pbase */
dktsb4m:
	sllx	%g3, 64-(TSB_START_SIZE + RUNTIME_PATCH), %g3
	srlx	%g3, 64-(TSB_START_SIZE + TSB_ENTRY_SHIFT + RUNTIME_PATCH), %g3

	/*
	 * At this point %g3 is our index into the TSB.
	 * We just masked off enough bits of the VA depending
	 * on our TSB size code.
	 */
	ldda	[%g7 + %g3]RUNTIME_PATCH, %g4	! %g4 = tag, %g5 = data
	srlx	%g2, TAG_VALO_SHIFT, %g6	! make tag to compare
	cmp	%g6, %g4			! compare tag

dktsb4m_tsbmiss:
	bne,pn	%xcc, dktsb4m_kpmcheck
	  add	%g7, %g3, %g3			! %g3 = kernel second TSB ptr
	TT_TRACE(trace_tsbhit)
	/* we don't check TTE size here since we assume 4M TSB is separate */
	DTLB_STUFF(%g5, %g1, %g2, %g3, %g4)
	/* trapstat expects tte in %g5 */
	retry

	/*
	 * So, we failed to find a valid TTE to match the faulting
	 * address in either TSB.  There are a few cases that could land
	 * us here:
	 *
	 * 1) This is a kernel VA below 0x80000000.00000000.  We branch
	 *    to sfmmu_tsb_miss_tt to handle the miss.
	 * 2) We missed on a kpm VA, and we didn't find the mapping in the
	 *    4M TSB.  Let segkpm handle it.
	 *
	 * Note that we shouldn't land here in the case of a kpm VA when
	 * kpm_smallpages is active -- we handled that case earlier at
	 * dktsb4m_kpmcheck_small.
	 *
	 * At this point:
	 *  g1 = 8K-indexed primary TSB pointer
	 *  g2 = tag access register
	 *  g3 = 4M-indexed secondary TSB pointer
	 */
dktsb4m_kpmcheck:
	cmp	%g2, %g0
	bl,pn	%xcc, sfmmu_kpm_dtsb_miss
	  nop
	ba,a,pt	%icc, sfmmu_tsb_miss_tt
	  nop

#ifdef sun4v
	/*
	 * User instruction miss w/ single TSB.
	 * The first probe covers 8K, 64K, and 512K page sizes,
	 * because 64K and 512K mappings are replicated off 8K
	 * pointer.
	 *
	 * g1 = tsb8k pointer register
	 * g2 = tag access register
	 * g3 - g6 = scratch registers
	 * g7 = TSB tag to match
	 */
	.align	64
	ALTENTRY(sfmmu_uitlb_fastpath)

	PROBE_1ST_ITSB(%g1, %g7, uitlb_fast_8k_probefail)
	/* g4 - g5 = clobbered by PROBE_1ST_ITSB */
	ba,pn	%xcc, sfmmu_tsb_miss_tt
	  mov	-1, %g3

	/*
	 * User data miss w/ single TSB.
	 * The first probe covers 8K, 64K, and 512K page sizes,
	 * because 64K and 512K mappings are replicated off 8K
	 * pointer.
	 *
	 * g1 = tsb8k pointer register
	 * g2 = tag access register
	 * g3 - g6 = scratch registers
	 * g7 = TSB tag to match
	 */
	.align 64
	ALTENTRY(sfmmu_udtlb_fastpath)

	PROBE_1ST_DTSB(%g1, %g7, udtlb_fast_8k_probefail)
	/* g4 - g5 = clobbered by PROBE_1ST_DTSB */
	ba,pn	%xcc, sfmmu_tsb_miss_tt
	  mov	-1, %g3

	/*
	 * User instruction miss w/ multiple TSBs (sun4v).
	 * The first probe covers 8K, 64K, and 512K page sizes,
	 * because 64K and 512K mappings are replicated off 8K
	 * pointer.  Second probe covers 4M page size only.
	 *
	 * Just like sfmmu_udtlb_slowpath, except:
	 *   o Uses ASI_ITLB_IN
	 *   o checks for execute permission
	 *   o No ISM prediction.
	 *
	 * g1 = tsb8k pointer register
	 * g2 = tag access register
	 * g3 - g6 = scratch registers
	 * g7 = TSB tag to match
	 */
	.align	64
	ALTENTRY(sfmmu_uitlb_slowpath)

	GET_1ST_TSBE_PTR(%g2, %g1, %g4, %g5)
	PROBE_1ST_ITSB(%g1, %g7, uitlb_8k_probefail)
	/* g4 - g5 = clobbered here */

	GET_2ND_TSBE_PTR(%g2, %g3, %g4, %g5)
	/* g1 = first TSB pointer, g3 = second TSB pointer */
	srlx	%g2, TAG_VALO_SHIFT, %g7
	PROBE_2ND_ITSB(%g3, %g7)
	/* NOT REACHED */

#else /* sun4v */

	/*
	 * User instruction miss w/ multiple TSBs (sun4u).
	 * The first probe covers 8K, 64K, and 512K page sizes,
	 * because 64K and 512K mappings are replicated off 8K
	 * pointer.  Second probe covers 4M page size only.
	 *
	 * Just like sfmmu_udtlb_slowpath, except:
	 *   o Uses ASI_ITLB_IN
	 *   o checks for execute permission
	 *   o No ISM prediction.
	 *
	 * g1 = tsb8k pointer register
	 * g2 = tag access register
	 * g3 = 2nd tsbreg if defined UTSB_PHYS, else scratch
	 * g4 - g6 = scratch registers
	 * g7 = TSB tag to match
	 */
	.align	64
	ALTENTRY(sfmmu_uitlb_slowpath)

#ifdef UTSB_PHYS
	/*
	 * g1 = 1st TSB entry pointer
	 * g3 = 2nd TSB base register
	 * Need 2nd TSB entry pointer for 2nd probe.
	 */
	PROBE_1ST_ITSB(%g1, %g7, uitlb_8k_probefail)

	GET_2ND_TSBE_PTR(%g2, %g3, %g4, %g5)
#else /* UTSB_PHYS */
	mov	%g1, %g3	/* save tsb8k reg in %g3 */
	GET_1ST_TSBE_PTR(%g3, %g1, %g5, sfmmu_uitlb)
	PROBE_1ST_ITSB(%g1, %g7, uitlb_8k_probefail)

	mov	%g2, %g6	/* GET_2ND_TSBE_PTR clobbers tagacc */
	mov	%g3, %g7	/* copy tsb8k reg in %g7 */
	GET_2ND_TSBE_PTR(%g6, %g7, %g3, %g4, %g5, sfmmu_uitlb)
#endif /* UTSB_PHYS */
	/* g1 = first TSB pointer, g3 = second TSB pointer */
	srlx	%g2, TAG_VALO_SHIFT, %g7
	PROBE_2ND_ITSB(%g3, %g7, isynth)
	/* NOT REACHED */
#endif /* sun4v */

	/*
	 * User data miss w/ multiple TSBs.
	 * The first probe covers 8K, 64K, and 512K page sizes,
	 * because 64K and 512K mappings are replicated off 8K
	 * pointer.  Second probe covers 4M page size only.
	 *
	 * We consider probing for 4M pages first if the VA falls
	 * in a range that's likely to be ISM.
	 *
	 * g1 = tsb8k pointer register
	 * g2 = tag access register
	 * g3 = 2nd tsbreg if defined UTSB_PHYS, else scratch
	 * g4 - g6 = scratch registers
	 * g7 = TSB tag to match
	 */
	.align 64
	ALTENTRY(sfmmu_udtlb_slowpath)

	/*
	 * Check for ISM.  If it exists, look for 4M mappings in the second TSB
	 * first, then probe for other mappings in the first TSB if that fails.
	 */
	srax	%g2, PREDISM_BASESHIFT, %g6	/* g6 > 0 : ISM predicted */
	brgz,pn %g6, udtlb_miss_probesecond	/* check for ISM */
	  mov	%g1, %g3

udtlb_miss_probefirst:
	/*
	 * g1 = 8K TSB pointer register
	 * g2 = tag access register
	 * g3 = (potentially) second TSB entry ptr
	 * g6 = ism pred.
	 * g7 = vpg_4m
	 */
#ifdef sun4v
	GET_1ST_TSBE_PTR(%g2, %g1, %g4, %g5)
	PROBE_1ST_DTSB(%g1, %g7, udtlb_first_probefail)

	/*
	 * Here:
	 *   g1 = first TSB pointer
	 *   g2 = tag access reg
	 *   g3 = second TSB ptr IFF ISM pred. (else don't care)
	 */
	brgz,pn	%g6, sfmmu_tsb_miss_tt
	  nop
#else /* sun4v */
#ifndef UTSB_PHYS
	mov	%g1, %g4
	GET_1ST_TSBE_PTR(%g4, %g1, %g5, sfmmu_udtlb)
#endif UTSB_PHYS
	PROBE_1ST_DTSB(%g1, %g7, udtlb_first_probefail)

	/*
	 * Here:
	 *   g1 = first TSB pointer
	 *   g2 = tag access reg
	 *   g3 = second TSB ptr IFF ISM pred. (else don't care)
	 */
	brgz,pn	%g6, sfmmu_tsb_miss_tt
	  nop
#ifndef UTSB_PHYS
	ldxa	[%g0]ASI_DMMU_TSB_8K, %g3
#endif UTSB_PHYS
	/* fall through in 8K->4M probe order */
#endif /* sun4v */

udtlb_miss_probesecond:
	/*
	 * Look in the second TSB for the TTE
	 * g1 = First TSB entry ptr if !ISM pred, TSB8K ptr reg if ISM pred.
	 * g2 = tag access reg
	 * g3 = 8K TSB pointer register
	 * g6 = ism pred.
	 * g7 = vpg_4m
	 */
#ifdef sun4v
	/* GET_2ND_TSBE_PTR(tagacc, tsbe_ptr, tmp1, tmp2) */
	GET_2ND_TSBE_PTR(%g2, %g3, %g4, %g5)
	/* %g2 is okay, no need to reload, %g3 = second tsbe ptr */
#else /* sun4v */
#ifdef UTSB_PHYS
	GET_2ND_TSBREG(%g3)
	GET_2ND_TSBE_PTR(%g2, %g3, %g4, %g5)
	/* tagacc (%g2) is okay, no need to reload, %g3 = second tsbe ptr */
#else /* UTSB_PHYS */
	mov	%g3, %g7
	GET_2ND_TSBE_PTR(%g2, %g7, %g3, %g4, %g5, sfmmu_udtlb)
	/* %g2 clobbered, %g3 =second tsbe ptr */
	mov	MMU_TAG_ACCESS, %g2
	ldxa	[%g2]ASI_DMMU, %g2
#endif /* UTSB_PHYS */
#endif /* sun4v */

	srlx	%g2, TAG_VALO_SHIFT, %g7
	PROBE_2ND_DTSB(%g3, %g7, udtlb_4m_probefail)
	/* g4 - g5 = clobbered here; %g7 still vpg_4m at this point */
	brgz,pn	%g6, udtlb_miss_probefirst
	  nop

	/* fall through to sfmmu_tsb_miss_tt */

	ALTENTRY(sfmmu_tsb_miss_tt)
	TT_TRACE(trace_tsbmiss)
	/*
	 * We get here if there is a TSB miss OR a write protect trap.
	 *
	 * g1 = First TSB entry pointer
	 * g2 = tag access register
	 * g3 = 4M TSB entry pointer; -1 if no 2nd TSB
	 * g4 - g7 = scratch registers
	 */

	ALTENTRY(sfmmu_tsb_miss)

	/*
	 * If trapstat is running, we need to shift the %tpc and %tnpc to
	 * point to trapstat's TSB miss return code (note that trapstat
	 * itself will patch the correct offset to add).
	 */
	rdpr	%tl, %g7
	cmp	%g7, 1
	ble,pt	%xcc, 0f
	  sethi	%hi(KERNELBASE), %g6
	rdpr	%tpc, %g7
	or	%g6, %lo(KERNELBASE), %g6
	cmp	%g7, %g6
	bgeu,pt	%xcc, 0f
	/* delay slot safe */

	ALTENTRY(tsbmiss_trapstat_patch_point)
	add	%g7, RUNTIME_PATCH, %g7	/* must match TSTAT_TSBMISS_INSTR */
	wrpr	%g7, %tpc
	add	%g7, 4, %g7
	wrpr	%g7, %tnpc
0:
	CPU_TSBMISS_AREA(%g6, %g7)

	stn	%g1, [%g6 + TSBMISS_TSBPTR]	/* save first tsb pointer */
	stn	%g3, [%g6 + TSBMISS_TSBPTR4M]	/* save second tsb pointer */

	sllx	%g2, TAGACC_CTX_LSHIFT, %g3
	brz,a,pn %g3, 1f			/* skip ahead if kernel */
	  ldn	[%g6 + TSBMISS_KHATID], %g7
	srlx	%g3, TAGACC_CTX_LSHIFT, %g3	/* g3 = ctxnum */
	ldn	[%g6 + TSBMISS_UHATID], %g7     /* g7 = hatid */

	HAT_PERCPU_STAT32(%g6, TSBMISS_UTSBMISS, %g5)

	cmp	%g3, INVALID_CONTEXT
	be,pn	%icc, tsb_tl0_noctxt		/* no ctx miss exception */
	  stn	%g7, [%g6 + (TSBMISS_SCRATCH + TSBMISS_HATID)]

#ifdef sun4v
        ldub    [%g6 + TSBMISS_URTTEFLAGS], %g7	/* clear ctx1 flag set from */
        andn    %g7, HAT_CHKCTX1_FLAG, %g7	/* the previous tsb miss    */
        stub    %g7, [%g6 + TSBMISS_URTTEFLAGS]
#endif
	
	ISM_CHECK(%g2, %g6, %g3, %g4, %g5, %g7, %g1, tsb_l1, tsb_ism)
	/*
	 * The miss wasn't in an ISM segment.
	 *
	 * %g1 %g3, %g4, %g5, %g7 all clobbered
	 * %g2 = (pseudo) tag access
	 */

	ba,pt	%icc, 2f
	  ldn	[%g6 + (TSBMISS_SCRATCH + TSBMISS_HATID)], %g7

1:
	HAT_PERCPU_STAT32(%g6, TSBMISS_KTSBMISS, %g5)
	/*
	 * 8K and 64K hash.
	 */
2:

	GET_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
		MMU_PAGESHIFT64K, TTE64K, tsb_l8K, tsb_checktte,
		sfmmu_suspend_tl, tsb_512K)
	/* NOT REACHED */

tsb_512K:
	sllx	%g2, TAGACC_CTX_LSHIFT, %g5
	brz,pn	%g5, 3f
	  ldub	[%g6 + TSBMISS_UTTEFLAGS], %g4
	and	%g4, HAT_512K_FLAG, %g5

	/*
	 * Note that there is a small window here where we may have
	 * a 512k page in the hash list but have not set the HAT_512K_FLAG
	 * flag yet, so we will skip searching the 512k hash list.
	 * In this case we will end up in pagefault which will find
	 * the mapping and return.  So, in this instance we will end up
	 * spending a bit more time resolving this TSB miss, but it can
	 * only happen once per process and even then, the chances of that
	 * are very small, so it's not worth the extra overhead it would
	 * take to close this window.
	 */
	brz,pn	%g5, tsb_4M
	  nop
3:	
	/*
	 * 512K hash
	 */
	
	GET_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
		MMU_PAGESHIFT512K, TTE512K, tsb_l512K, tsb_checktte,
		sfmmu_suspend_tl, tsb_4M)
	/* NOT REACHED */

tsb_4M:
	sllx	%g2, TAGACC_CTX_LSHIFT, %g5
	brz,pn	%g5, 4f
	  ldub	[%g6 + TSBMISS_UTTEFLAGS], %g4
	and	%g4, HAT_4M_FLAG, %g5
	brz,pn	%g5, tsb_32M
	  nop
4:	
	/*
	 * 4M hash
	 */

	GET_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
		MMU_PAGESHIFT4M, TTE4M, tsb_l4M, tsb_checktte,
		sfmmu_suspend_tl, tsb_32M)
	/* NOT REACHED */

tsb_32M:
	sllx	%g2, TAGACC_CTX_LSHIFT, %g5
#ifdef sun4v
        brz,pn	%g5, 6f
#else
	brz,pn	%g5, tsb_pagefault
#endif        
	  ldub	[%g6 + TSBMISS_UTTEFLAGS], %g4
	and	%g4, HAT_32M_FLAG, %g5
	brz,pn	%g5, tsb_256M
	  nop
5:	
	/*
	 * 32M hash
	 */

	GET_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
		MMU_PAGESHIFT32M, TTE32M, tsb_l32M, tsb_checktte,
		sfmmu_suspend_tl, tsb_256M)
	/* NOT REACHED */

#ifdef sun4u
#define tsb_shme        tsb_pagefault
#endif
tsb_256M:
	ldub	[%g6 + TSBMISS_UTTEFLAGS], %g4
	and	%g4, HAT_256M_FLAG, %g5
	brz,pn	%g5, tsb_shme
	  nop
6:	
	/*
	 * 256M hash
	 */
		
	GET_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
	    MMU_PAGESHIFT256M, TTE256M, tsb_l256M, tsb_checktte,
	    sfmmu_suspend_tl, tsb_shme)
	/* NOT REACHED */

tsb_checktte:
	/*
	 * g1 = hblk_misc
	 * g2 = tagacc
	 * g3 = tte
	 * g4 = tte pa
	 * g5 = tte va
	 * g6 = tsbmiss area
	 * g7 = hatid
	 */
	brlz,a,pt %g3, tsb_validtte
	  rdpr	%tt, %g7

#ifdef sun4u
#undef tsb_shme
	ba	tsb_pagefault
	  nop	
#else

tsb_shme:
	/*
	 * g2 = tagacc
	 * g6 = tsbmiss area
	 */
	sllx	%g2, TAGACC_CTX_LSHIFT, %g5
	brz,pn	%g5, tsb_pagefault
	  nop
	ldx	[%g6 + TSBMISS_SHARED_UHATID], %g7	/* g7 = srdp */
	brz,pn	%g7, tsb_pagefault
	  nop

	GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
		MMU_PAGESHIFT64K, TTE64K, tsb_shme_l8K, tsb_shme_checktte,
		sfmmu_suspend_tl, tsb_shme_512K)
	/* NOT REACHED */

tsb_shme_512K:
	ldub	[%g6 + TSBMISS_URTTEFLAGS], %g4
	and	%g4, HAT_512K_FLAG, %g5
	brz,pn	%g5, tsb_shme_4M
	  nop

	/*
	 * 512K hash
	 */
	
	GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
		MMU_PAGESHIFT512K, TTE512K, tsb_shme_l512K, tsb_shme_checktte,
		sfmmu_suspend_tl, tsb_shme_4M)
	/* NOT REACHED */

tsb_shme_4M:
	ldub	[%g6 + TSBMISS_URTTEFLAGS], %g4
	and	%g4, HAT_4M_FLAG, %g5
	brz,pn	%g5, tsb_shme_32M
	  nop
4:	
	/*
	 * 4M hash
	 */
	GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
		MMU_PAGESHIFT4M, TTE4M, tsb_shme_l4M, tsb_shme_checktte,
		sfmmu_suspend_tl, tsb_shme_32M)
	/* NOT REACHED */

tsb_shme_32M:
	ldub	[%g6 + TSBMISS_URTTEFLAGS], %g4
	and	%g4, HAT_32M_FLAG, %g5
	brz,pn	%g5, tsb_shme_256M
	  nop

	/*
	 * 32M hash
	 */

	GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
		MMU_PAGESHIFT32M, TTE32M, tsb_shme_l32M, tsb_shme_checktte,
		sfmmu_suspend_tl, tsb_shme_256M)
	/* NOT REACHED */

tsb_shme_256M:
	ldub	[%g6 + TSBMISS_URTTEFLAGS], %g4
	and	%g4, HAT_256M_FLAG, %g5
	brz,pn	%g5, tsb_pagefault
	  nop

	/*
	 * 256M hash
	 */
		
	GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
	    MMU_PAGESHIFT256M, TTE256M, tsb_shme_l256M, tsb_shme_checktte,
	    sfmmu_suspend_tl, tsb_pagefault)
	/* NOT REACHED */

tsb_shme_checktte:

	brgez,pn %g3, tsb_pagefault
	  rdpr	%tt, %g7
	/*
	 * g1 = ctx1 flag
	 * g3 = tte
	 * g4 = tte pa
	 * g5 = tte va
	 * g6 = tsbmiss area
	 * g7 = tt
	 */

	brz,pt  %g1, tsb_validtte
	  nop
	ldub    [%g6 + TSBMISS_URTTEFLAGS], %g1
	  or	%g1, HAT_CHKCTX1_FLAG, %g1
	stub    %g1, [%g6 + TSBMISS_URTTEFLAGS]

	SAVE_CTX1(%g7, %g2, %g1, tsb_shmel)
#endif /* sun4u */

tsb_validtte:
	/*
	 * g3 = tte
	 * g4 = tte pa
	 * g5 = tte va
	 * g6 = tsbmiss area
	 * g7 = tt
	 */

	/*
	 * Set ref/mod bits if this is a prot trap.  Usually, it isn't.
	 */
	cmp	%g7, FAST_PROT_TT
	bne,pt	%icc, 4f
	  nop

	TTE_SET_REFMOD_ML(%g3, %g4, %g5, %g6, %g7, tsb_lset_refmod,
	    tsb_protfault) 

	rdpr	%tt, %g5
	GET_MMU_D_TTARGET(%g2, %g7)		/* %g2 = ttarget */
#ifdef sun4v
	MMU_FAULT_STATUS_AREA(%g7)
	ldx	[%g7 + MMFSA_D_ADDR], %g5	/* save fault addr for later */
#endif	
	ba,pt	%xcc, tsb_update_tl1
	  nop	

4:
	/* 
	 * If ITLB miss check exec bit.
	 * If not set treat as invalid TTE.
	 */
	cmp     %g7, T_INSTR_MMU_MISS
	be,pn	%icc, 5f
	  andcc   %g3, TTE_EXECPRM_INT, %g0	/* check execute bit is set */
	cmp     %g7, FAST_IMMU_MISS_TT
	bne,pt %icc, 3f
	  andcc   %g3, TTE_EXECPRM_INT, %g0	/* check execute bit is set */
5:
	bz,pn %icc, tsb_protfault
	  nop

3:
	/*
	 * Set reference bit if not already set
	 */
	TTE_SET_REF_ML(%g3, %g4, %g5, %g6, %g7, tsb_lset_ref) 

	/*
	 * Now, load into TSB/TLB.  At this point:
	 * g3 = tte
	 * g4 = patte
	 * g6 = tsbmiss area
	 */
	rdpr	%tt, %g5
#ifdef sun4v
	MMU_FAULT_STATUS_AREA(%g2)
	cmp	%g5, T_INSTR_MMU_MISS
	be,a,pt	%icc, 9f
	  nop
	cmp	%g5, FAST_IMMU_MISS_TT
	be,a,pt	%icc, 9f
	  nop
	add	%g2, MMFSA_D_, %g2
9:
	ldx	[%g2 + MMFSA_CTX_], %g7
	sllx	%g7, TTARGET_CTX_SHIFT, %g7
	ldx	[%g2 + MMFSA_ADDR_], %g2
	mov	%g2, %g5		! save the fault addr for later use
	srlx	%g2, TTARGET_VA_SHIFT, %g2
	or	%g2, %g7, %g2
#else
	cmp	%g5, FAST_IMMU_MISS_TT
	be,a,pt	%icc, tsb_update_tl1
	  ldxa	[%g0]ASI_IMMU, %g2
	ldxa	[%g0]ASI_DMMU, %g2
#endif
tsb_update_tl1:
	srlx	%g2, TTARGET_CTX_SHIFT, %g7
	brz,pn	%g7, tsb_kernel
#ifdef sun4v
	  and	%g3, TTE_SZ_BITS, %g7	! assumes TTE_SZ_SHFT is 0
#else
	  srlx	%g3, TTE_SZ_SHFT, %g7
#endif

tsb_user:
#ifdef sun4v
	cmp	%g7, TTE4M
	bge,pn	%icc, tsb_user4m
	  nop
#else /* sun4v */
	cmp	%g7, TTESZ_VALID | TTE4M
	be,pn	%icc, tsb_user4m
	  srlx	%g3, TTE_SZ2_SHFT, %g7
	andcc	%g7, TTE_SZ2_BITS, %g7		! check 32/256MB
#ifdef ITLB_32M_256M_SUPPORT
	bnz,pn	%icc, tsb_user4m
	  nop
#else /* ITLB_32M_256M_SUPPORT */
	bnz,a,pn %icc, tsb_user_pn_synth
	 cmp	%g5, FAST_IMMU_MISS_TT
#endif /* ITLB_32M_256M_SUPPORT */
#endif /* sun4v */

tsb_user8k:
#ifdef sun4v
	ldub	[%g6 + TSBMISS_URTTEFLAGS], %g7
	and	%g7, HAT_CHKCTX1_FLAG, %g1
	brz,a,pn %g1, 1f
	  ldn	[%g6 + TSBMISS_TSBPTR], %g1	! g1 = first TSB ptr
	GET_UTSBREG_SHCTX(%g6, TSBMISS_TSBSCDPTR, %g1)
	brlz,a,pn %g1, ptl1_panic			! if no shared tsb
	  mov PTL1_NO_SCDTSB8K, %g1			! panic
	GET_3RD_TSBE_PTR(%g5, %g1, %g6, %g7)
1:
#else
	ldn	[%g6 + TSBMISS_TSBPTR], %g1	! g1 = first TSB ptr

#ifndef UTSB_PHYS
	mov	ASI_N, %g7	! user TSBs accessed by VA
	mov	%g7, %asi
#endif /* UTSB_PHYS */

#endif /* sun4v */

	TSB_UPDATE_TL(%g1, %g3, %g2, %g4, %g7, %g6, 5)

#ifdef sun4v
	rdpr    %tt, %g5
	cmp	%g5, T_INSTR_MMU_MISS
	be,a,pn	%xcc, 9f
	  mov	%g3, %g5
#endif /* sun4v */
	cmp	%g5, FAST_IMMU_MISS_TT
	be,pn	%xcc, 9f
	  mov	%g3, %g5

	DTLB_STUFF(%g5, %g1, %g2, %g3, %g4)
	! trapstat wants TTE in %g5
	retry
9:
	ITLB_STUFF(%g5, %g1, %g2, %g3, %g4)
	! trapstat wants TTE in %g5
	retry

tsb_user4m:
#ifdef sun4v
	ldub	[%g6 + TSBMISS_URTTEFLAGS], %g7
	and	%g7, HAT_CHKCTX1_FLAG, %g1
	brz,a,pn %g1, 4f
	  ldn	[%g6 + TSBMISS_TSBPTR4M], %g1		! g1 = TSB ptr
	GET_UTSBREG_SHCTX(%g6, TSBMISS_TSBSCDPTR4M, %g1)
	brlz,a,pn %g1, 5f				! if no shared 2nd tsb
	  nop
	GET_4TH_TSBE_PTR(%g5, %g1, %g6, %g7)
#else
	ldn	[%g6 + TSBMISS_TSBPTR4M], %g1		! g1 = TSB ptr
#endif
4:
	brlz,pn %g1, 5f	/* Check to see if we have 2nd TSB programmed */
	  nop

#ifndef UTSB_PHYS
	mov	ASI_N, %g7	! user TSBs accessed by VA
	mov	%g7, %asi
#endif /* UTSB_PHYS */

        TSB_UPDATE_TL(%g1, %g3, %g2, %g4, %g7, %g6, 6)

5:
#ifdef sun4v
	rdpr    %tt, %g5
        cmp     %g5, T_INSTR_MMU_MISS
        be,a,pn %xcc, 9f
          mov   %g3, %g5
#endif /* sun4v */
        cmp     %g5, FAST_IMMU_MISS_TT
        be,pn   %xcc, 9f
        mov     %g3, %g5

        DTLB_STUFF(%g5, %g1, %g2, %g3, %g4)
        ! trapstat wants TTE in %g5
        retry
9:
        ITLB_STUFF(%g5, %g1, %g2, %g3, %g4)
        ! trapstat wants TTE in %g5
        retry

#if !defined(sun4v) && !defined(ITLB_32M_256M_SUPPORT)
	/*
	 * Panther ITLB synthesis.
	 * The Panther 32M and 256M ITLB code simulates these two large page
	 * sizes with 4M pages, to provide support for programs, for example
	 * Java, that may copy instructions into a 32M or 256M data page and
	 * then execute them. The code below generates the 4M pfn bits and
	 * saves them in the modified 32M/256M ttes in the TSB. If the tte is
	 * stored in the DTLB to map a 32M/256M page, the 4M pfn offset bits
	 * are ignored by the hardware.
	 * 
	 * Now, load into TSB/TLB.  At this point:
	 * g2 = tagtarget
	 * g3 = tte
	 * g4 = patte
	 * g5 = tt
	 * g6 = tsbmiss area
	 */
tsb_user_pn_synth:
	be,pt	%xcc, tsb_user_itlb_synth	/* ITLB miss */
	  andcc %g3, TTE_EXECPRM_INT, %g0	/* is execprm bit set */
	bz,pn %icc, 4b				/* if not, been here before */
	  ldn	[%g6 + TSBMISS_TSBPTR4M], %g1	/* g1 = tsbp */
	brlz,a,pn %g1, 5f			/* no 2nd tsb */
	  mov	%g3, %g5

	mov	MMU_TAG_ACCESS, %g7
	ldxa	[%g7]ASI_DMMU, %g6		/* get tag access va */
	GET_4M_PFN_OFF(%g3, %g6, %g5, %g7, 1)	/* make 4M pfn offset */

	mov	ASI_N, %g7	/* user TSBs always accessed by VA */
	mov	%g7, %asi
	TSB_UPDATE_TL_PN(%g1, %g5, %g2, %g4, %g7, %g3, 4) /* update TSB */
5:
        DTLB_STUFF(%g5, %g1, %g2, %g3, %g4)
        retry

tsb_user_itlb_synth:
	ldn	[%g6 + TSBMISS_TSBPTR4M], %g1		/* g1 = tsbp */

	mov	MMU_TAG_ACCESS, %g7
	ldxa	[%g7]ASI_IMMU, %g6		/* get tag access va */
	GET_4M_PFN_OFF(%g3, %g6, %g5, %g7, 2)	/* make 4M pfn offset */
	brlz,a,pn %g1, 7f	/* Check to see if we have 2nd TSB programmed */
	  or	%g5, %g3, %g5			/* add 4M bits to TTE */

	mov	ASI_N, %g7	/* user TSBs always accessed by VA */
	mov	%g7, %asi
	TSB_UPDATE_TL_PN(%g1, %g5, %g2, %g4, %g7, %g3, 6) /* update TSB */
7:
	SET_TTE4M_PN(%g5, %g7)			/* add TTE4M pagesize to TTE */
        ITLB_STUFF(%g5, %g1, %g2, %g3, %g4)
        retry
#endif /* sun4v && ITLB_32M_256M_SUPPORT */

tsb_kernel:
#ifdef sun4v
	rdpr	%tt, %g5
	cmp	%g7, TTE4M
	bge,pn	%icc, 5f
#else
	cmp	%g7, TTESZ_VALID | TTE4M	! no 32M or 256M support
	be,pn	%icc, 5f
#endif
	  nop
	ldn	[%g6 + TSBMISS_TSBPTR], %g1	! g1 = 8k tsbptr
	ba,pt	%xcc, 6f
	  nop
5:
	ldn	[%g6 + TSBMISS_TSBPTR4M], %g1	! g1 = 4m tsbptr
	brlz,pn	%g1, 3f		/* skip programming if 4m TSB ptr is -1 */
	  nop
6:
#ifndef sun4v
tsb_kernel_patch_asi:
	or	%g0, RUNTIME_PATCH, %g6
	mov	%g6, %asi	! XXX avoid writing to %asi !!
#endif
	TSB_UPDATE_TL(%g1, %g3, %g2, %g4, %g7, %g6, 7)
3:
#ifdef sun4v
	cmp	%g5, T_INSTR_MMU_MISS
	be,a,pn	%icc, 1f
	  mov	%g3, %g5			! trapstat wants TTE in %g5
#endif /* sun4v */
	cmp	%g5, FAST_IMMU_MISS_TT
	be,pn	%icc, 1f
	  mov	%g3, %g5			! trapstat wants TTE in %g5
	DTLB_STUFF(%g5, %g1, %g2, %g3, %g4)
	! trapstat wants TTE in %g5
	retry
1:
	ITLB_STUFF(%g5, %g1, %g2, %g3, %g4)
	! trapstat wants TTE in %g5
	retry

tsb_ism:
	/*
	 * This is an ISM [i|d]tlb miss.  We optimize for largest
	 * page size down to smallest.
	 *
	 * g2 = vaddr + ctx(or ctxtype (sun4v)) aka (pseudo-)tag access
	 *	register
	 * g3 = ismmap->ism_seg
	 * g4 = physical address of ismmap->ism_sfmmu
	 * g6 = tsbmiss area
	 */
	ldna	[%g4]ASI_MEM, %g7		/* g7 = ism hatid */
	brz,a,pn %g7, ptl1_panic		/* if zero jmp ahead */
	  mov	PTL1_BAD_ISM, %g1
						/* g5 = pa of imap_vb_shift */
	sub	%g4, (IMAP_ISMHAT - IMAP_VB_SHIFT), %g5
	lduba	[%g5]ASI_MEM, %g4		/* g4 = imap_vb_shift */
	srlx	%g3, %g4, %g3			/* clr size field */
	set	TAGACC_CTX_MASK, %g1		/* mask off ctx number */
	sllx    %g3, %g4, %g3                   /* g3 = ism vbase */
	and     %g2, %g1, %g4                   /* g4 = ctx number */
	andn    %g2, %g1, %g1                   /* g1 = tlb miss vaddr */
	sub     %g1, %g3, %g2                   /* g2 = offset in ISM seg */
	or      %g2, %g4, %g2                   /* g2 = (pseudo-)tagacc */
	sub     %g5, (IMAP_VB_SHIFT - IMAP_HATFLAGS), %g5
	lduha   [%g5]ASI_MEM, %g4               /* g5 = pa of imap_hatflags */
#ifdef sun4v
	and     %g4, HAT_CTX1_FLAG, %g5         /* g5 = imap_hatflags */
	brz,pt %g5, tsb_chk4M_ism
	  nop
	ldub    [%g6 + TSBMISS_URTTEFLAGS], %g5
	or      %g5, HAT_CHKCTX1_FLAG, %g5
	stub    %g5, [%g6 + TSBMISS_URTTEFLAGS]
	rdpr    %tt, %g5
	SAVE_CTX1(%g5, %g3, %g1, tsb_shctxl)
#endif
	/*
	 * ISM pages are always locked down.
	 * If we can't find the tte then pagefault
	 * and let the spt segment driver resolve it.
	 *
	 * g2 = tagacc w/ISM vaddr (offset in ISM seg)
	 * g4 = imap_hatflags
	 * g6 = tsb miss area
	 * g7 = ISM hatid
	 */

tsb_chk4M_ism:
	and	%g4, HAT_4M_FLAG, %g5		/* g4 = imap_hatflags */
	brnz,pt	%g5, tsb_ism_4M			/* branch if 4M pages */
	  nop

tsb_ism_32M:
	and	%g4, HAT_32M_FLAG, %g5		/* check default 32M next */
	brz,pn	%g5, tsb_ism_256M
	  nop

	/*
	 * 32M hash.
	 */
		
	GET_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1, MMU_PAGESHIFT32M,
	    TTE32M, tsb_ism_l32M, tsb_ism_32M_found, sfmmu_suspend_tl,
	    tsb_ism_4M)
	/* NOT REACHED */
	
tsb_ism_32M_found:
	brlz,a,pt %g3, tsb_validtte
	  rdpr	%tt, %g7
	ba,pt	%xcc, tsb_ism_4M
	  nop

tsb_ism_256M:
	and	%g4, HAT_256M_FLAG, %g5		/* 256M is last resort */
	brz,a,pn %g5, ptl1_panic
	  mov	PTL1_BAD_ISM, %g1

	/*
	 * 256M hash.
	 */
	GET_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1, MMU_PAGESHIFT256M,
	    TTE256M, tsb_ism_l256M, tsb_ism_256M_found, sfmmu_suspend_tl,
	    tsb_ism_4M)

tsb_ism_256M_found:
	brlz,a,pt %g3, tsb_validtte
	  rdpr	%tt, %g7

tsb_ism_4M:
	/*
	 * 4M hash.
	 */
	GET_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1, MMU_PAGESHIFT4M,
	    TTE4M, tsb_ism_l4M, tsb_ism_4M_found, sfmmu_suspend_tl,
	    tsb_ism_8K)
	/* NOT REACHED */

tsb_ism_4M_found:
	brlz,a,pt %g3, tsb_validtte
	  rdpr	%tt, %g7

tsb_ism_8K:
	/*
	 * 8K and 64K hash.
	 */
	
	GET_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1, MMU_PAGESHIFT64K,
	    TTE64K, tsb_ism_l8K, tsb_ism_8K_found, sfmmu_suspend_tl,
	    tsb_pagefault)
	/* NOT REACHED */

tsb_ism_8K_found:
	brlz,a,pt %g3, tsb_validtte
	  rdpr	%tt, %g7

tsb_pagefault:
	rdpr	%tt, %g7
	cmp	%g7, FAST_PROT_TT
	be,a,pn	%icc, tsb_protfault
	  wrpr	%g0, FAST_DMMU_MISS_TT, %tt

tsb_protfault:
	/*
	 * we get here if we couldn't find a valid tte in the hash.
	 *
	 * If user and we are at tl>1 we go to window handling code.
	 *
	 * If kernel and the fault is on the same page as our stack
	 * pointer, then we know the stack is bad and the trap handler
	 * will fail, so we call ptl1_panic with PTL1_BAD_STACK.
	 *
	 * If this is a kernel trap and tl>1, panic.
	 *
	 * Otherwise we call pagefault.
	 */
	cmp	%g7, FAST_IMMU_MISS_TT
#ifdef sun4v
	MMU_FAULT_STATUS_AREA(%g4)
	ldx	[%g4 + MMFSA_I_CTX], %g5
	ldx	[%g4 + MMFSA_D_CTX], %g4
	move	%icc, %g5, %g4
	cmp	%g7, T_INSTR_MMU_MISS
	move	%icc, %g5, %g4
#else
	mov	MMU_TAG_ACCESS, %g4
	ldxa	[%g4]ASI_DMMU, %g2
	ldxa	[%g4]ASI_IMMU, %g5
	move	%icc, %g5, %g2
	cmp	%g7, T_INSTR_MMU_MISS
	move	%icc, %g5, %g2
	sllx	%g2, TAGACC_CTX_LSHIFT, %g4
#endif
	brnz,pn	%g4, 3f				/* skip if not kernel */
	  rdpr	%tl, %g5
	
	add	%sp, STACK_BIAS, %g3
	srlx	%g3, MMU_PAGESHIFT, %g3
	srlx	%g2, MMU_PAGESHIFT, %g4
	cmp	%g3, %g4
	be,a,pn	%icc, ptl1_panic		/* panic if bad %sp */
	  mov	PTL1_BAD_STACK, %g1
	
	cmp	%g5, 1
	ble,pt	%icc, 2f
	  nop
	TSTAT_CHECK_TL1(2f, %g1, %g2)
	rdpr	%tt, %g2
	cmp	%g2, FAST_PROT_TT
	mov	PTL1_BAD_KPROT_FAULT, %g1
	movne	%icc, PTL1_BAD_KMISS, %g1
	ba,pt	%icc, ptl1_panic
	  nop

2:
	/*
	 * We are taking a pagefault in the kernel on a kernel address.  If
	 * CPU_DTRACE_NOFAULT is set in the cpuc_dtrace_flags, we don't actually
	 * want to call sfmmu_pagefault -- we will instead note that a fault
	 * has occurred by setting CPU_DTRACE_BADADDR and issue a "done"
	 * (instead of a "retry").  This will step over the faulting
	 * instruction.
	 */
	CPU_INDEX(%g1, %g2)
	set	cpu_core, %g2
	sllx	%g1, CPU_CORE_SHIFT, %g1
	add	%g1, %g2, %g1
	lduh	[%g1 + CPUC_DTRACE_FLAGS], %g2
	andcc	%g2, CPU_DTRACE_NOFAULT, %g0
	bz	sfmmu_pagefault
	or	%g2, CPU_DTRACE_BADADDR, %g2
	stuh	%g2, [%g1 + CPUC_DTRACE_FLAGS]
	GET_MMU_D_ADDR(%g3, %g4)
	stx	%g3, [%g1 + CPUC_DTRACE_ILLVAL]
	done

3:
	cmp	%g5, 1
	ble,pt	%icc, 4f
	  nop
	TSTAT_CHECK_TL1(4f, %g1, %g2)
	ba,pt	%icc, sfmmu_window_trap
	  nop

4:
	/*
	 * We are taking a pagefault on a non-kernel address.  If we are in
	 * the kernel (e.g., due to a copyin()), we will check cpuc_dtrace_flags
	 * and (if CPU_DTRACE_NOFAULT is set) will proceed as outlined above.
	 */
	CPU_INDEX(%g1, %g2)
	set	cpu_core, %g2
	sllx	%g1, CPU_CORE_SHIFT, %g1
	add	%g1, %g2, %g1
	lduh	[%g1 + CPUC_DTRACE_FLAGS], %g2
	andcc	%g2, CPU_DTRACE_NOFAULT, %g0
	bz	sfmmu_mmu_trap
	or	%g2, CPU_DTRACE_BADADDR, %g2
	stuh	%g2, [%g1 + CPUC_DTRACE_FLAGS]
	GET_MMU_D_ADDR(%g3, %g4)
	stx	%g3, [%g1 + CPUC_DTRACE_ILLVAL]

	/*
	 * Be sure that we're actually taking this miss from the kernel --
	 * otherwise we have managed to return to user-level with
	 * CPU_DTRACE_NOFAULT set in cpuc_dtrace_flags.
	 */
	rdpr	%tstate, %g2
	btst	TSTATE_PRIV, %g2
	bz,a	ptl1_panic
	  mov	PTL1_BAD_DTRACE_FLAGS, %g1
	done

	ALTENTRY(tsb_tl0_noctxt)
	/*
	 * If we have no context, check to see if CPU_DTRACE_NOFAULT is set;
	 * if it is, indicated that we have faulted and issue a done.
	 */
	CPU_INDEX(%g5, %g6)
	set	cpu_core, %g6
	sllx	%g5, CPU_CORE_SHIFT, %g5
	add	%g5, %g6, %g5
	lduh	[%g5 + CPUC_DTRACE_FLAGS], %g6
	andcc	%g6, CPU_DTRACE_NOFAULT, %g0
	bz	1f
	or	%g6, CPU_DTRACE_BADADDR, %g6
	stuh	%g6, [%g5 + CPUC_DTRACE_FLAGS]
	GET_MMU_D_ADDR(%g3, %g4)
	stx	%g3, [%g5 + CPUC_DTRACE_ILLVAL]

	/*
	 * Be sure that we're actually taking this miss from the kernel --
	 * otherwise we have managed to return to user-level with
	 * CPU_DTRACE_NOFAULT set in cpuc_dtrace_flags.
	 */
	rdpr	%tstate, %g5
	btst	TSTATE_PRIV, %g5
	bz,a	ptl1_panic
	  mov	PTL1_BAD_DTRACE_FLAGS, %g1
	TSTAT_CHECK_TL1(2f, %g1, %g2);
2:
	done

1:
	rdpr	%tt, %g5
	cmp	%g5, FAST_IMMU_MISS_TT
#ifdef sun4v
	MMU_FAULT_STATUS_AREA(%g2)
	be,a,pt	%icc, 2f
	  ldx	[%g2 + MMFSA_I_CTX], %g3
	cmp	%g5, T_INSTR_MMU_MISS
	be,a,pt	%icc, 2f
	  ldx	[%g2 + MMFSA_I_CTX], %g3
	ldx	[%g2 + MMFSA_D_CTX], %g3
2:
#else
	mov	MMU_TAG_ACCESS, %g2
	be,a,pt	%icc, 2f
	  ldxa	[%g2]ASI_IMMU, %g3
	ldxa	[%g2]ASI_DMMU, %g3
2:	sllx	%g3, TAGACC_CTX_LSHIFT, %g3
#endif
	brz,a,pn %g3, ptl1_panic		! panic if called for kernel
	  mov	PTL1_BAD_CTX_STEAL, %g1		! since kernel ctx was stolen
	rdpr	%tl, %g5
	cmp	%g5, 1
	ble,pt	%icc, sfmmu_mmu_trap
	  nop
	TSTAT_CHECK_TL1(sfmmu_mmu_trap, %g1, %g2)
	ba,pt	%icc, sfmmu_window_trap
	  nop
	SET_SIZE(sfmmu_tsb_miss)
#endif /* lint */

#if defined (lint)
/*
 * This routine will look for a user or kernel vaddr in the hash
 * structure.  It returns a valid pfn or PFN_INVALID.  It doesn't
 * grab any locks.  It should only be used by other sfmmu routines.
 */
/* ARGSUSED */
pfn_t
sfmmu_vatopfn(caddr_t vaddr, sfmmu_t *sfmmup, tte_t *ttep)
{
	return(0);
}

/* ARGSUSED */
pfn_t
sfmmu_kvaszc2pfn(caddr_t vaddr, int hashno)
{
	return(0);
}

#else /* lint */

	ENTRY_NP(sfmmu_vatopfn)
 	/*
 	 * disable interrupts
 	 */
 	rdpr	%pstate, %o3
#ifdef DEBUG
	PANIC_IF_INTR_DISABLED_PSTR(%o3, sfmmu_di_l5, %g1)
#endif
	/*
	 * disable interrupts to protect the TSBMISS area
	 */
	andn    %o3, PSTATE_IE, %o5
	wrpr    %o5, 0, %pstate

	/*
	 * o0 = vaddr
	 * o1 = sfmmup
	 * o2 = ttep	
	 */
	CPU_TSBMISS_AREA(%g1, %o5)
	ldn	[%g1 + TSBMISS_KHATID], %o4
	cmp	%o4, %o1
	bne,pn	%ncc, vatopfn_nokernel
	  mov	TTE64K, %g5			/* g5 = rehash # */
	mov %g1,%o5				/* o5 = tsbmiss_area */
	/*
	 * o0 = vaddr
	 * o1 & o4 = hatid
	 * o2 = ttep
	 * o5 = tsbmiss area
	 */
	mov	HBLK_RANGE_SHIFT, %g6
1:

	/*
	 * o0 = vaddr
	 * o1 = sfmmup
	 * o2 = ttep
	 * o3 = old %pstate
	 * o4 = hatid
	 * o5 = tsbmiss
	 * g5 = rehash #
	 * g6 = hmeshift
	 *
	 * The first arg to GET_TTE is actually tagaccess register
	 * not just vaddr. Since this call is for kernel we need to clear
	 * any lower vaddr bits that would be interpreted as ctx bits.
	 */
	set     TAGACC_CTX_MASK, %g1
	andn    %o0, %g1, %o0
	GET_TTE(%o0, %o4, %g1, %g2, %g3, %o5, %g4, %g6, %g5,
		vatopfn_l1, kvtop_hblk_found, tsb_suspend, kvtop_nohblk)

kvtop_hblk_found:
	/*
	 * o0 = vaddr
	 * o1 = sfmmup
	 * o2 = ttep
	 * g1 = tte
	 * g2 = tte pa
	 * g3 = tte va
	 * o2 = tsbmiss area
	 * o1 = hat id
	 */
	brgez,a,pn %g1, 6f			/* if tte invalid goto tl0 */
	  mov	-1, %o0				/* output = -1 (PFN_INVALID) */
	stx %g1,[%o2]				/* put tte into *ttep */		
	TTETOPFN(%g1, %o0, vatopfn_l2, %g2, %g3, %g4)
	/*
	 * o0 = vaddr
	 * o1 = sfmmup
	 * o2 = ttep
	 * g1 = pfn
	 */
	ba,pt	%xcc, 6f
	  mov	%g1, %o0

kvtop_nohblk:
	/*
	 * we get here if we couldn't find valid hblk in hash.  We rehash
	 * if neccesary.
	 */
	ldn	[%o5 + (TSBMISS_SCRATCH + TSB_TAGACC)], %o0
#ifdef sun4v
	cmp	%g5, MAX_HASHCNT	        
#else                
	cmp	%g5, DEFAULT_MAX_HASHCNT	/* no 32/256M kernel pages */
#endif        
	be,a,pn	%icc, 6f
	  mov	-1, %o0				/* output = -1 (PFN_INVALID) */
	mov	%o1, %o4			/* restore hatid */
#ifdef sun4v
        add	%g5, 2, %g5
	cmp	%g5, 3
	move	%icc, MMU_PAGESHIFT4M, %g6
	ba,pt	%icc, 1b
	movne	%icc, MMU_PAGESHIFT256M, %g6
#else                
        inc	%g5
	cmp	%g5, 2
	move	%icc, MMU_PAGESHIFT512K, %g6
	ba,pt	%icc, 1b
	movne	%icc, MMU_PAGESHIFT4M, %g6
#endif        
6:
	retl
 	  wrpr	%g0, %o3, %pstate		/* re-enable interrupts */

tsb_suspend:
	/*
	 * o0 = vaddr
	 * o1 = sfmmup
	 * o2 = ttep
	 * g1 = tte
	 * g2 = tte pa
	 * g3 = tte va
	 * o2 = tsbmiss area  use o5 instead of o2 for tsbmiss 
	 */
	stx %g1,[%o2]				/* put tte into *ttep */
	brgez,a,pn %g1, 8f			/* if tte invalid goto 8: */
	  sub	%g0, 1, %o0			/* output = PFN_INVALID */
	sub	%g0, 2, %o0			/* output = PFN_SUSPENDED */
8:
	retl
	 wrpr	%g0, %o3, %pstate		/* enable interrupts */

vatopfn_nokernel:
	/*
	 * This routine does NOT support user addresses
	 * There is a routine in C that supports this.
	 * The only reason why we don't have the C routine
	 * support kernel addresses as well is because
	 * we do va_to_pa while holding the hashlock.
	 */
 	wrpr	%g0, %o3, %pstate		/* re-enable interrupts */
	save	%sp, -SA(MINFRAME), %sp
	sethi	%hi(sfmmu_panic3), %o0
	call	panic
	 or	%o0, %lo(sfmmu_panic3), %o0

	SET_SIZE(sfmmu_vatopfn)

	/*
	 * %o0 = vaddr
	 * %o1 = hashno (aka szc)
	 *
	 * 
	 * This routine is similar to sfmmu_vatopfn() but will only look for
	 * a kernel vaddr in the hash structure for the specified rehash value.
	 * It's just an optimization for the case when pagesize for a given
	 * va range is already known (e.g. large page heap) and we don't want
	 * to start the search with rehash value 1 as sfmmu_vatopfn() does.
	 *
	 * Returns valid pfn or PFN_INVALID if
	 * tte for specified rehash # is not found, invalid or suspended.
	 */
	ENTRY_NP(sfmmu_kvaszc2pfn)
 	/*
 	 * disable interrupts
 	 */
 	rdpr	%pstate, %o3
#ifdef DEBUG
	PANIC_IF_INTR_DISABLED_PSTR(%o3, sfmmu_di_l6, %g1)
#endif
	/*
	 * disable interrupts to protect the TSBMISS area
	 */
	andn    %o3, PSTATE_IE, %o5
	wrpr    %o5, 0, %pstate

	CPU_TSBMISS_AREA(%g1, %o5)
	ldn	[%g1 + TSBMISS_KHATID], %o4
	sll	%o1, 1, %g6
	add	%g6, %o1, %g6
	add	%g6, MMU_PAGESHIFT, %g6
	/*
	 * %o0 = vaddr
	 * %o1 = hashno
	 * %o3 = old %pstate
	 * %o4 = ksfmmup
	 * %g1 = tsbmiss area
	 * %g6 = hmeshift
	 */

	/*
	 * The first arg to GET_TTE is actually tagaccess register
	 * not just vaddr. Since this call is for kernel we need to clear
	 * any lower vaddr bits that would be interpreted as ctx bits.
	 */
	srlx	%o0, MMU_PAGESHIFT, %o0
	sllx	%o0, MMU_PAGESHIFT, %o0
	GET_TTE(%o0, %o4, %g3, %g4, %g5, %g1, %o5, %g6, %o1,
		kvaszc2pfn_l1, kvaszc2pfn_hblk_found, kvaszc2pfn_nohblk,
		kvaszc2pfn_nohblk)

kvaszc2pfn_hblk_found:
	/*
	 * %g3 = tte
	 * %o0 = vaddr
	 */
	brgez,a,pn %g3, 1f			/* check if tte is invalid */
	  mov	-1, %o0				/* output = -1 (PFN_INVALID) */
	TTETOPFN(%g3, %o0, kvaszc2pfn_l2, %g2, %g4, %g5)
	/*
	 * g3 = pfn
	 */
	ba,pt	%xcc, 1f
	  mov	%g3, %o0

kvaszc2pfn_nohblk:
	mov	-1, %o0

1:
	retl
 	  wrpr	%g0, %o3, %pstate		/* re-enable interrupts */

	SET_SIZE(sfmmu_kvaszc2pfn)

#endif /* lint */



#if !defined(lint)

/*
 * kpm lock used between trap level tsbmiss handler and kpm C level.
 */
#define KPMLOCK_ENTER(kpmlckp, tmp1, label1, asi)			\
	mov     0xff, tmp1						;\
label1:									;\
	casa    [kpmlckp]asi, %g0, tmp1					;\
	brnz,pn tmp1, label1						;\
	mov     0xff, tmp1						;\
	membar  #LoadLoad

#define KPMLOCK_EXIT(kpmlckp, asi)					\
	membar  #LoadStore|#StoreStore					;\
	sta     %g0, [kpmlckp]asi

/*
 * Lookup a memseg for a given pfn and if found, return the physical
 * address of the corresponding struct memseg in mseg, otherwise
 * return MSEG_NULLPTR_PA. The kpmtsbm pointer must be provided in
 * tsbmp, %asi is assumed to be ASI_MEM.
 * This lookup is done by strictly traversing only the physical memseg
 * linkage. The more generic approach, to check the virtual linkage
 * before using the physical (used e.g. with hmehash buckets), cannot
 * be used here. Memory DR operations can run in parallel to this
 * lookup w/o any locks and updates of the physical and virtual linkage
 * cannot be done atomically wrt. to each other. Because physical
 * address zero can be valid physical address, MSEG_NULLPTR_PA acts
 * as "physical NULL" pointer.
 */
#define	PAGE_NUM2MEMSEG_NOLOCK_PA(pfn, mseg, tsbmp, tmp1, tmp2, tmp3, label) \
	sethi	%hi(mhash_per_slot), tmp3 /* no tsbmp use due to DR */	;\
	ldx	[tmp3 + %lo(mhash_per_slot)], mseg			;\
	udivx	pfn, mseg, mseg						;\
	ldx	[tsbmp + KPMTSBM_MSEGPHASHPA], tmp1			;\
	and	mseg, SFMMU_N_MEM_SLOTS - 1, mseg			;\
	sllx	mseg, SFMMU_MEM_HASH_ENTRY_SHIFT, mseg			;\
	add	tmp1, mseg, tmp1					;\
	ldxa	[tmp1]%asi, mseg					;\
	cmp	mseg, MSEG_NULLPTR_PA					;\
	be,pn	%xcc, label/**/1		/* if not found */	;\
	  nop								;\
	ldxa	[mseg + MEMSEG_PAGES_BASE]%asi, tmp1			;\
	cmp	pfn, tmp1			/* pfn - pages_base */	;\
	blu,pn	%xcc, label/**/1					;\
	  ldxa	[mseg + MEMSEG_PAGES_END]%asi, tmp2			;\
	cmp	pfn, tmp2			/* pfn - pages_end */	;\
	bgeu,pn	%xcc, label/**/1					;\
	  sub	pfn, tmp1, tmp1			/* pfn - pages_base */	;\
	mulx	tmp1, PAGE_SIZE, tmp1					;\
	ldxa	[mseg + MEMSEG_PAGESPA]%asi, tmp2	/* pages */	;\
	add	tmp2, tmp1, tmp1			/* pp */	;\
	lduwa	[tmp1 + PAGE_PAGENUM]%asi, tmp2				;\
	cmp	tmp2, pfn						;\
	be,pt	%xcc, label/**/_ok			/* found */	;\
label/**/1:								;\
	/* brute force lookup */					;\
	sethi	%hi(memsegspa), tmp3 /* no tsbmp use due to DR */	;\
	ldx	[tmp3 + %lo(memsegspa)], mseg				;\
label/**/2:								;\
	cmp	mseg, MSEG_NULLPTR_PA					;\
	be,pn	%xcc, label/**/_ok		/* if not found */	;\
	  nop								;\
	ldxa	[mseg + MEMSEG_PAGES_BASE]%asi, tmp1			;\
	cmp	pfn, tmp1			/* pfn - pages_base */	;\
	blu,a,pt %xcc, label/**/2					;\
	  ldxa	[mseg + MEMSEG_NEXTPA]%asi, mseg			;\
	ldxa	[mseg + MEMSEG_PAGES_END]%asi, tmp2			;\
	cmp	pfn, tmp2			/* pfn - pages_end */	;\
	bgeu,a,pt %xcc, label/**/2					;\
	  ldxa	[mseg + MEMSEG_NEXTPA]%asi, mseg			;\
label/**/_ok:

	/*
	 * kpm tsb miss handler large pages
	 * g1 = 8K kpm TSB entry pointer
	 * g2 = tag access register
	 * g3 = 4M kpm TSB entry pointer
	 */
	ALTENTRY(sfmmu_kpm_dtsb_miss)
	TT_TRACE(trace_tsbmiss)

	CPU_INDEX(%g7, %g6)
	sethi	%hi(kpmtsbm_area), %g6
	sllx	%g7, KPMTSBM_SHIFT, %g7
	or	%g6, %lo(kpmtsbm_area), %g6
	add	%g6, %g7, %g6			/* g6 = kpmtsbm ptr */

	/* check enable flag */
	ldub	[%g6 + KPMTSBM_FLAGS], %g4
	and	%g4, KPMTSBM_ENABLE_FLAG, %g5
	brz,pn	%g5, sfmmu_tsb_miss		/* if kpm not enabled */
	  nop

	/* VA range check */
	ldx	[%g6 + KPMTSBM_VBASE], %g7
	cmp	%g2, %g7
	blu,pn	%xcc, sfmmu_tsb_miss
	  ldx	[%g6 + KPMTSBM_VEND], %g5
	cmp	%g2, %g5	
	bgeu,pn	%xcc, sfmmu_tsb_miss
	  stx	%g3, [%g6 + KPMTSBM_TSBPTR]

	/*
	 * check TL tsbmiss handling flag
	 * bump tsbmiss counter
	 */
	lduw	[%g6 + KPMTSBM_TSBMISS], %g5
#ifdef	DEBUG
	and	%g4, KPMTSBM_TLTSBM_FLAG, %g3
	inc	%g5
	brz,pn	%g3, sfmmu_kpm_exception
	  st	%g5, [%g6 + KPMTSBM_TSBMISS] 
#else
	inc	%g5
	st	%g5, [%g6 + KPMTSBM_TSBMISS] 
#endif
	/*
	 * At this point:
	 *  g1 = 8K kpm TSB pointer (not used)
	 *  g2 = tag access register
	 *  g3 = clobbered
	 *  g6 = per-CPU kpm tsbmiss area
	 *  g7 = kpm_vbase
	 */

	/* vaddr2pfn */
	ldub	[%g6 + KPMTSBM_SZSHIFT], %g3
	sub	%g2, %g7, %g4			/* paddr = vaddr-kpm_vbase */
	srax    %g4, %g3, %g2			/* which alias range (r) */
	brnz,pn	%g2, sfmmu_kpm_exception	/* if (r != 0) goto C handler */
	  srlx	%g4, MMU_PAGESHIFT, %g2		/* %g2 = pfn */

	/*
	 * Setup %asi
	 * mseg_pa = page_numtomemseg_nolock(pfn)
	 * if (mseg_pa == NULL) sfmmu_kpm_exception
	 * g2=pfn
	 */
	mov	ASI_MEM, %asi
	PAGE_NUM2MEMSEG_NOLOCK_PA(%g2, %g3, %g6, %g4, %g5, %g7, kpmtsbmp2m)
	cmp	%g3, MSEG_NULLPTR_PA		
	be,pn	%xcc, sfmmu_kpm_exception	/* if mseg not found */
	  nop

	/*
	 * inx = ptokpmp((kpmptop((ptopkpmp(pfn))) - mseg_pa->kpm_pbase));
	 * g2=pfn g3=mseg_pa
	 */
	ldub	[%g6 + KPMTSBM_KPMP2PSHFT], %g5
	ldxa	[%g3 + MEMSEG_KPM_PBASE]%asi, %g7
	srlx	%g2, %g5, %g4
	sllx	%g4, %g5, %g4
	sub	%g4, %g7, %g4	
	srlx	%g4, %g5, %g4

	/*
	 * Validate inx value
	 * g2=pfn g3=mseg_pa g4=inx
	 */
#ifdef	DEBUG
	ldxa	[%g3 + MEMSEG_KPM_NKPMPGS]%asi, %g5
	cmp	%g4, %g5			/* inx - nkpmpgs */
	bgeu,pn	%xcc, sfmmu_kpm_exception	/* if out of range */
	  ld	[%g6 + KPMTSBM_KPMPTABLESZ], %g7
#else
	ld	[%g6 + KPMTSBM_KPMPTABLESZ], %g7
#endif
	/*
	 * kp = &mseg_pa->kpm_pages[inx]
	 */
	sllx	%g4, KPMPAGE_SHIFT, %g4		/* kpm_pages offset */
	ldxa	[%g3 + MEMSEG_KPM_PAGES]%asi, %g5 /* kpm_pages */
	add	%g5, %g4, %g5			/* kp */

	/*
	 * KPMP_HASH(kp)
	 * g2=pfn g3=mseg_pa g4=offset g5=kp g7=kpmp_table_sz
	 */
	ldub	[%g6 + KPMTSBM_KPMPSHIFT], %g1	/* kpmp_shift */
	sub	%g7, 1, %g7			/* mask */
	srlx	%g5, %g1, %g1			/* x = ksp >> kpmp_shift */
	add	%g5, %g1, %g5			/* y = ksp + x */
	and 	%g5, %g7, %g5			/* hashinx = y & mask */

	/*
	 * Calculate physical kpm_page pointer
	 * g2=pfn g3=mseg_pa g4=offset g5=hashinx
	 */
	ldxa	[%g3 + MEMSEG_KPM_PAGESPA]%asi, %g1 /* kpm_pagespa */
	add	%g1, %g4, %g1			/* kp_pa */

	/*
	 * Calculate physical hash lock address
	 * g1=kp_refcntc_pa g2=pfn g5=hashinx
	 */
	ldx	[%g6 + KPMTSBM_KPMPTABLEPA], %g4 /* kpmp_tablepa */
	sllx	%g5, KPMHLK_SHIFT, %g5
	add	%g4, %g5, %g3
	add	%g3, KPMHLK_LOCK, %g3		/* hlck_pa */

	/*
	 * Assemble tte
	 * g1=kp_pa g2=pfn g3=hlck_pa
	 */
#ifdef sun4v
	sethi	%hi(TTE_VALID_INT), %g5		/* upper part */
	sllx	%g5, 32, %g5		
	mov	(TTE_CP_INT|TTE_CV_INT|TTE_PRIV_INT|TTE_HWWR_INT), %g4
	or	%g4, TTE4M, %g4
	or	%g5, %g4, %g5
#else
	sethi	%hi(TTE_VALID_INT), %g4	
	mov	TTE4M, %g5
	sllx	%g5, TTE_SZ_SHFT_INT, %g5
	or	%g5, %g4, %g5			/* upper part */
	sllx	%g5, 32, %g5		
	mov	(TTE_CP_INT|TTE_CV_INT|TTE_PRIV_INT|TTE_HWWR_INT), %g4
	or	%g5, %g4, %g5
#endif
	sllx	%g2, MMU_PAGESHIFT, %g4
	or	%g5, %g4, %g5			/* tte */
	ldx	[%g6 + KPMTSBM_TSBPTR], %g4
	GET_MMU_D_TTARGET(%g2, %g7)		/* %g2 = ttarget */

	/*
	 * tsb dropin
	 * g1=kp_pa g2=ttarget g3=hlck_pa g4=kpmtsbp4m g5=tte g6=kpmtsbm_area
	 */

	/* KPMLOCK_ENTER(kpmlckp, tmp1, label1, asi) */
	KPMLOCK_ENTER(%g3, %g7, kpmtsbmhdlr1, ASI_MEM)

	/* use C-handler if there's no go for dropin */
	ldsha	[%g1 + KPMPAGE_REFCNTC]%asi, %g7 /* kp_refcntc */
	cmp	%g7, -1
	bne,pn	%xcc, 5f	/* use C-handler if there's no go for dropin */
	  nop	

#ifdef	DEBUG
	/* double check refcnt */
	ldsha	[%g1 + KPMPAGE_REFCNT]%asi, %g7
	brz,pn	%g7, 5f			/* let C-handler deal with this */ 
	  nop
#endif

#ifndef sun4v
	ldub	[%g6 + KPMTSBM_FLAGS], %g7
	mov	ASI_N, %g1
	andcc	%g7, KPMTSBM_TSBPHYS_FLAG, %g0
	movnz	%icc, ASI_MEM, %g1
	mov	%g1, %asi
#endif

	/* TSB_LOCK_ENTRY(tsbp, tmp1, tmp2, label) (needs %asi set) */
	TSB_LOCK_ENTRY(%g4, %g1, %g7, 6)

	/* TSB_INSERT_UNLOCK_ENTRY(tsbp, tte, tagtarget, tmp) */
	TSB_INSERT_UNLOCK_ENTRY(%g4, %g5, %g2, %g7)

	DTLB_STUFF(%g5, %g1, %g2, %g4, %g6)

	/* KPMLOCK_EXIT(kpmlckp, asi) */
	KPMLOCK_EXIT(%g3, ASI_MEM)

	/*
	 * If trapstat is running, we need to shift the %tpc and %tnpc to
	 * point to trapstat's TSB miss return code (note that trapstat
	 * itself will patch the correct offset to add).
	 * Note: TTE is expected in %g5 (allows per pagesize reporting).
	 */
	rdpr	%tl, %g7
	cmp	%g7, 1
	ble	%icc, 0f
	sethi	%hi(KERNELBASE), %g6
	rdpr	%tpc, %g7
	or	%g6, %lo(KERNELBASE), %g6
	cmp	%g7, %g6
	bgeu	%xcc, 0f
	ALTENTRY(tsbmiss_trapstat_patch_point_kpm)
	add	%g7, RUNTIME_PATCH, %g7	/* must match TSTAT_TSBMISS_INSTR */
	wrpr	%g7, %tpc
	add	%g7, 4, %g7
	wrpr	%g7, %tnpc
0:
	retry
5:
	/* g3=hlck_pa */ 
	KPMLOCK_EXIT(%g3, ASI_MEM)
	ba,pt	%icc, sfmmu_kpm_exception
	  nop
	SET_SIZE(sfmmu_kpm_dtsb_miss)

	/*
	 * kpm tsbmiss handler for smallpages
	 * g1 = 8K kpm TSB pointer
	 * g2 = tag access register
	 * g3 = 4M kpm TSB pointer
	 */
	ALTENTRY(sfmmu_kpm_dtsb_miss_small)
	TT_TRACE(trace_tsbmiss)
	CPU_INDEX(%g7, %g6)
	sethi	%hi(kpmtsbm_area), %g6
	sllx	%g7, KPMTSBM_SHIFT, %g7
	or	%g6, %lo(kpmtsbm_area), %g6
	add	%g6, %g7, %g6			/* g6 = kpmtsbm ptr */

	/* check enable flag */
	ldub	[%g6 + KPMTSBM_FLAGS], %g4
	and	%g4, KPMTSBM_ENABLE_FLAG, %g5
	brz,pn	%g5, sfmmu_tsb_miss		/* if kpm not enabled */
	  nop

	/*
	 * VA range check
	 * On fail: goto sfmmu_tsb_miss
	 */
	ldx	[%g6 + KPMTSBM_VBASE], %g7
	cmp	%g2, %g7
	blu,pn	%xcc, sfmmu_tsb_miss
	  ldx	[%g6 + KPMTSBM_VEND], %g5
	cmp	%g2, %g5	
	bgeu,pn	%xcc, sfmmu_tsb_miss
	  stx	%g1, [%g6 + KPMTSBM_TSBPTR]	/* save 8K kpm TSB pointer */

	/*
	 * check TL tsbmiss handling flag
	 * bump tsbmiss counter 
	 */
	lduw	[%g6 + KPMTSBM_TSBMISS], %g5
#ifdef	DEBUG
	and	%g4, KPMTSBM_TLTSBM_FLAG, %g1
	inc	%g5
	brz,pn	%g1, sfmmu_kpm_exception
	  st	%g5, [%g6 + KPMTSBM_TSBMISS] 
#else
	inc	%g5
	st	%g5, [%g6 + KPMTSBM_TSBMISS] 
#endif
	/*
	 * At this point:
	 *  g1 = clobbered
	 *  g2 = tag access register
	 *  g3 = 4M kpm TSB pointer (not used)
	 *  g6 = per-CPU kpm tsbmiss area
	 *  g7 = kpm_vbase
	 */

	/* vaddr2pfn */
	ldub	[%g6 + KPMTSBM_SZSHIFT], %g3
	sub	%g2, %g7, %g4			/* paddr = vaddr-kpm_vbase */
	srax    %g4, %g3, %g2			/* which alias range (r) */
	brnz,pn	%g2, sfmmu_kpm_exception	/* if (r != 0) goto C handler */
	  srlx	%g4, MMU_PAGESHIFT, %g2		/* %g2 = pfn */

	/*
	 * Setup %asi
	 * mseg_pa = page_numtomemseg_nolock_pa(pfn)
	 * if (mseg not found) sfmmu_kpm_exception
	 * g2=pfn
	 */
	mov	ASI_MEM, %asi
	PAGE_NUM2MEMSEG_NOLOCK_PA(%g2, %g3, %g6, %g4, %g5, %g7, kpmtsbmsp2m)
	cmp	%g3, MSEG_NULLPTR_PA		
	be,pn	%xcc, sfmmu_kpm_exception	/* if mseg not found */
	  nop

	/*
	 * inx = pfn - mseg_pa->kpm_pbase
	 * g2=pfn g3=mseg_pa
	 */
	ldxa	[%g3 + MEMSEG_KPM_PBASE]%asi, %g7
	sub	%g2, %g7, %g4	

#ifdef	DEBUG
	/*
	 * Validate inx value
	 * g2=pfn g3=mseg_pa g4=inx
	 */
	ldxa	[%g3 + MEMSEG_KPM_NKPMPGS]%asi, %g5
	cmp	%g4, %g5			/* inx - nkpmpgs */
	bgeu,pn	%xcc, sfmmu_kpm_exception	/* if out of range */
	  ld	[%g6 + KPMTSBM_KPMPTABLESZ], %g7 
#else
	ld	[%g6 + KPMTSBM_KPMPTABLESZ], %g7 
#endif
	/* ksp = &mseg_pa->kpm_spages[inx] */
	ldxa	[%g3 + MEMSEG_KPM_SPAGES]%asi, %g5	
	add	%g5, %g4, %g5			/* ksp */

	/*
	 * KPMP_SHASH(kp)
	 * g2=pfn g3=mseg_pa g4=inx g5=ksp g7=kpmp_stable_sz
	 */
	ldub	[%g6 + KPMTSBM_KPMPSHIFT], %g1	/* kpmp_shift */
	sub	%g7, 1, %g7			/* mask */
	sllx	%g5, %g1, %g1			/* x = ksp << kpmp_shift */
	add	%g5, %g1, %g5			/* y = ksp + x */
	and 	%g5, %g7, %g5			/* hashinx = y & mask */

	/*
	 * Calculate physical kpm_spage pointer
	 * g2=pfn g3=mseg_pa g4=offset g5=hashinx
	 */
	ldxa	[%g3 + MEMSEG_KPM_PAGESPA]%asi, %g1 /* kpm_spagespa */
	add	%g1, %g4, %g1			/* ksp_pa */

	/*
	 * Calculate physical hash lock address.
	 * Note: Changes in kpm_shlk_t must be reflected here.
	 * g1=ksp_pa g2=pfn g5=hashinx
	 */
	ldx	[%g6 + KPMTSBM_KPMPTABLEPA], %g4 /* kpmp_stablepa */
	sllx	%g5, KPMSHLK_SHIFT, %g5
	add	%g4, %g5, %g3			/* hlck_pa */

	/*
	 * Assemble tte
	 * g1=ksp_pa g2=pfn g3=hlck_pa
	 */
	sethi	%hi(TTE_VALID_INT), %g5		/* upper part */
	sllx	%g5, 32, %g5		
	mov	(TTE_CP_INT|TTE_CV_INT|TTE_PRIV_INT|TTE_HWWR_INT), %g4
	or	%g5, %g4, %g5
	sllx	%g2, MMU_PAGESHIFT, %g4
	or	%g5, %g4, %g5			/* tte */
	ldx	[%g6 + KPMTSBM_TSBPTR], %g4
	GET_MMU_D_TTARGET(%g2, %g7)		/* %g2 = ttarget */

	/*
	 * tsb dropin
	 * g1=ksp_pa g2=ttarget g3=hlck_pa g4=ktsbp g5=tte
	 */

	/* KPMLOCK_ENTER(kpmlckp, tmp1, label1, asi) */
	KPMLOCK_ENTER(%g3, %g7, kpmtsbsmlock, ASI_MEM)

	/* use C-handler if there's no go for dropin */
	ldsba	[%g1 + KPMSPAGE_MAPPED]%asi, %g7 /* kp_mapped */
	cmp	%g7, -1
	bne,pn	%xcc, 5f
	  nop	

#ifndef sun4v
	ldub	[%g6 + KPMTSBM_FLAGS], %g7
	mov	ASI_N, %g1
	andcc	%g7, KPMTSBM_TSBPHYS_FLAG, %g0
	movnz	%icc, ASI_MEM, %g1
	mov	%g1, %asi
#endif

	/* TSB_LOCK_ENTRY(tsbp, tmp1, tmp2, label) (needs %asi set) */
	TSB_LOCK_ENTRY(%g4, %g1, %g7, 6)

	/* TSB_INSERT_UNLOCK_ENTRY(tsbp, tte, tagtarget, tmp) */
	TSB_INSERT_UNLOCK_ENTRY(%g4, %g5, %g2, %g7)

	DTLB_STUFF(%g5, %g2, %g4, %g5, %g6)

	/* KPMLOCK_EXIT(kpmlckp, asi) */
	KPMLOCK_EXIT(%g3, ASI_MEM)

	/*
	 * If trapstat is running, we need to shift the %tpc and %tnpc to
	 * point to trapstat's TSB miss return code (note that trapstat
	 * itself will patch the correct offset to add).
	 * Note: TTE is expected in %g5 (allows per pagesize reporting).
	 */
	rdpr	%tl, %g7
	cmp	%g7, 1
	ble	%icc, 0f
	sethi	%hi(KERNELBASE), %g6
	rdpr	%tpc, %g7
	or	%g6, %lo(KERNELBASE), %g6
	cmp	%g7, %g6
	bgeu	%xcc, 0f
	ALTENTRY(tsbmiss_trapstat_patch_point_kpm_small)
	add	%g7, RUNTIME_PATCH, %g7	/* must match TSTAT_TSBMISS_INSTR */
	wrpr	%g7, %tpc
	add	%g7, 4, %g7
	wrpr	%g7, %tnpc
0:
	retry
5:
	/* g3=hlck_pa */ 
	KPMLOCK_EXIT(%g3, ASI_MEM)
	ba,pt	%icc, sfmmu_kpm_exception
	  nop
	SET_SIZE(sfmmu_kpm_dtsb_miss_small)

#if (1<< KPMTSBM_SHIFT) != KPMTSBM_SIZE
#error - KPMTSBM_SHIFT does not correspond to size of kpmtsbm struct
#endif

#endif /* lint */

#ifdef	lint
/*
 * Enable/disable tsbmiss handling at trap level for a kpm (large) page.
 * Called from C-level, sets/clears "go" indication for trap level handler.
 * khl_lock is a low level spin lock to protect the kp_tsbmtl field.
 * Assumed that &kp->kp_refcntc is checked for zero or -1 at C-level.
 * Assumes khl_mutex is held when called from C-level. 
 */
/* ARGSUSED */
void
sfmmu_kpm_tsbmtl(short *kp_refcntc, uint_t *khl_lock, int cmd)
{
}

/*
 * kpm_smallpages: stores val to byte at address mapped within
 * low level lock brackets. The old value is returned. 
 * Called from C-level.
 */
/* ARGSUSED */
int
sfmmu_kpm_stsbmtl(char *mapped, uint_t *kshl_lock, int val)
{
	return (0);
}

#else /* lint */

	.seg	".data"
sfmmu_kpm_tsbmtl_panic:
	.ascii	"sfmmu_kpm_tsbmtl: interrupts disabled"
	.byte	0
sfmmu_kpm_stsbmtl_panic:
	.ascii	"sfmmu_kpm_stsbmtl: interrupts disabled"
	.byte	0
	.align	4
	.seg	".text"

	ENTRY_NP(sfmmu_kpm_tsbmtl)
	rdpr	%pstate, %o3
	/*
	 * %o0 = &kp_refcntc
	 * %o1 = &khl_lock
	 * %o2 = 0/1 (off/on)
	 * %o3 = pstate save
	 */
#ifdef DEBUG
	andcc	%o3, PSTATE_IE, %g0		/* if interrupts already */
	bnz,pt %icc, 1f				/* disabled, panic	 */
	  nop
	save	%sp, -SA(MINFRAME), %sp
	sethi	%hi(sfmmu_kpm_tsbmtl_panic), %o0
	call	panic
	 or	%o0, %lo(sfmmu_kpm_tsbmtl_panic), %o0
	ret
	restore
1:
#endif /* DEBUG */
	wrpr	%o3, PSTATE_IE, %pstate		/* disable interrupts */
	
	KPMLOCK_ENTER(%o1, %o4, kpmtsbmtl1, ASI_N)
	mov	-1, %o5
	brz,a	%o2, 2f
	  mov	0, %o5
2:
	sth	%o5, [%o0]	
	KPMLOCK_EXIT(%o1, ASI_N)

	retl
	  wrpr	%g0, %o3, %pstate		/* enable interrupts */
	SET_SIZE(sfmmu_kpm_tsbmtl)

	ENTRY_NP(sfmmu_kpm_stsbmtl)
	rdpr	%pstate, %o3
	/*
	 * %o0 = &mapped
	 * %o1 = &kshl_lock
	 * %o2 = val
	 * %o3 = pstate save
	 */
#ifdef DEBUG
	andcc	%o3, PSTATE_IE, %g0		/* if interrupts already */
	bnz,pt %icc, 1f				/* disabled, panic	 */
	  nop
	save	%sp, -SA(MINFRAME), %sp
	sethi	%hi(sfmmu_kpm_stsbmtl_panic), %o0
	call	panic
	  or	%o0, %lo(sfmmu_kpm_stsbmtl_panic), %o0
	ret
	restore
1:
#endif /* DEBUG */
	wrpr	%o3, PSTATE_IE, %pstate		/* disable interrupts */
	
	KPMLOCK_ENTER(%o1, %o4, kpmstsbmtl1, ASI_N)
	ldsb	[%o0], %o5
	stb	%o2, [%o0]	
	KPMLOCK_EXIT(%o1, ASI_N)

	mov	%o5, %o0			/* return old val */
	retl
	  wrpr	%g0, %o3, %pstate		/* enable interrupts */
	SET_SIZE(sfmmu_kpm_stsbmtl)

#endif /* lint */

#ifndef lint
#ifdef sun4v
	/*
	 * User/kernel data miss w// multiple TSBs
	 * The first probe covers 8K, 64K, and 512K page sizes,
	 * because 64K and 512K mappings are replicated off 8K
	 * pointer.  Second probe covers 4M page size only.
	 *
	 * MMU fault area contains miss address and context.
	 */
	ALTENTRY(sfmmu_slow_dmmu_miss)
	GET_MMU_D_PTAGACC_CTXTYPE(%g2, %g3)	! %g2 = ptagacc, %g3 = ctx type

slow_miss_common:
	/*
	 *  %g2 = tagacc register (needed for sfmmu_tsb_miss_tt)
	 *  %g3 = ctx (cannot be INVALID_CONTEXT)
	 */
	brnz,pt	%g3, 8f			! check for user context
	  nop

	/*
	 * Kernel miss
	 * Get 8K and 4M TSB pointers in %g1 and %g3 and
	 * branch to sfmmu_tsb_miss_tt to handle it.
	 */
	mov	%g2, %g7		! TSB pointer macro clobbers tagacc
sfmmu_dslow_patch_ktsb_base:
	RUNTIME_PATCH_SETX(%g1, %g6)	! %g1 = contents of ktsb_pbase
sfmmu_dslow_patch_ktsb_szcode:
	or	%g0, RUNTIME_PATCH, %g3	! ktsb_szcode (hot patched)

	GET_TSBE_POINTER(MMU_PAGESHIFT, %g1, %g7, %g3, %g5)
	! %g1 = First TSB entry pointer, as TSB miss handler expects

	mov	%g2, %g7		! TSB pointer macro clobbers tagacc
sfmmu_dslow_patch_ktsb4m_base:
	RUNTIME_PATCH_SETX(%g3, %g6)	! %g3 = contents of ktsb4m_pbase
sfmmu_dslow_patch_ktsb4m_szcode:
	or	%g0, RUNTIME_PATCH, %g6	! ktsb4m_szcode (hot patched)

	GET_TSBE_POINTER(MMU_PAGESHIFT4M, %g3, %g7, %g6, %g5)
	! %g3 = 4M tsb entry pointer, as TSB miss handler expects
	ba,a,pt	%xcc, sfmmu_tsb_miss_tt
	.empty

8:
	/*
	 * User miss
	 * Get first TSB pointer in %g1
	 * Get second TSB pointer (or NULL if no second TSB) in %g3
	 * Branch to sfmmu_tsb_miss_tt to handle it
	 */
	GET_1ST_TSBE_PTR(%g2, %g1, %g4, %g5) 
	/* %g1 = first TSB entry ptr now, %g2 preserved */

	GET_UTSBREG(SCRATCHPAD_UTSBREG2, %g3)	/* get 2nd utsbreg */
	brlz,pt %g3, sfmmu_tsb_miss_tt		/* done if no 2nd TSB */
	  nop

	GET_2ND_TSBE_PTR(%g2, %g3, %g4, %g5) 
	/* %g3 = second TSB entry ptr now, %g2 preserved */
9:
	ba,a,pt	%xcc, sfmmu_tsb_miss_tt
	.empty
	SET_SIZE(sfmmu_slow_dmmu_miss)


	/*
	 * User/kernel instruction miss w/ multiple TSBs
	 * The first probe covers 8K, 64K, and 512K page sizes,
	 * because 64K and 512K mappings are replicated off 8K
	 * pointer.  Second probe covers 4M page size only.
	 *
	 * MMU fault area contains miss address and context.
	 */
	ALTENTRY(sfmmu_slow_immu_miss)
	GET_MMU_I_PTAGACC_CTXTYPE(%g2, %g3)
	ba,a,pt	%xcc, slow_miss_common
	SET_SIZE(sfmmu_slow_immu_miss)

#endif /* sun4v */
#endif	/* lint */

#ifndef lint

/*
 * Per-CPU tsbmiss areas to avoid cache misses in TSB miss handlers.
 */
	.seg	".data"
	.align	64
	.global tsbmiss_area
tsbmiss_area:
	.skip	(TSBMISS_SIZE * NCPU)

	.align	64
	.global kpmtsbm_area
kpmtsbm_area:
	.skip	(KPMTSBM_SIZE * NCPU)
#endif	/* lint */