xref: /illumos-gate/usr/src/uts/i86pc/vm/htable.c (revision aac11643c466386309a97e46ac9f9a4cad538e5c)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5a85a6733Sjosephb  * Common Development and Distribution License (the "License").
6a85a6733Sjosephb  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
21ae115bc7Smrj 
227c478bd9Sstevel@tonic-gate /*
23ae115bc7Smrj  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate #include <sys/types.h>
307c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
317c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
327c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
337c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
347c478bd9Sstevel@tonic-gate #include <sys/machparam.h>
357c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
367c478bd9Sstevel@tonic-gate #include <sys/mman.h>
377c478bd9Sstevel@tonic-gate #include <sys/systm.h>
387c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
397c478bd9Sstevel@tonic-gate #include <sys/thread.h>
407c478bd9Sstevel@tonic-gate #include <sys/proc.h>
417c478bd9Sstevel@tonic-gate #include <sys/cpu.h>
427c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
437c478bd9Sstevel@tonic-gate #include <sys/disp.h>
447c478bd9Sstevel@tonic-gate #include <sys/vmem.h>
457c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
467c478bd9Sstevel@tonic-gate #include <sys/promif.h>
477c478bd9Sstevel@tonic-gate #include <sys/var.h>
487c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h>
49ae115bc7Smrj #include <sys/archsystm.h>
507c478bd9Sstevel@tonic-gate #include <sys/bootconf.h>
517c478bd9Sstevel@tonic-gate #include <sys/dumphdr.h>
527c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
537c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h>
547c478bd9Sstevel@tonic-gate #include <vm/hat.h>
557c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h>
567c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
577c478bd9Sstevel@tonic-gate 
58ae115bc7Smrj #include <sys/bootinfo.h>
59ae115bc7Smrj #include <vm/kboot_mmu.h>
60ae115bc7Smrj 
61ae115bc7Smrj static void x86pte_zero(htable_t *dest, uint_t entry, uint_t count);
62ae115bc7Smrj 
637c478bd9Sstevel@tonic-gate kmem_cache_t *htable_cache;
647c478bd9Sstevel@tonic-gate 
657c478bd9Sstevel@tonic-gate /*
667c478bd9Sstevel@tonic-gate  * The variable htable_reserve_amount, rather than HTABLE_RESERVE_AMOUNT,
677c478bd9Sstevel@tonic-gate  * is used in order to facilitate testing of the htable_steal() code.
687c478bd9Sstevel@tonic-gate  * By resetting htable_reserve_amount to a lower value, we can force
697c478bd9Sstevel@tonic-gate  * stealing to occur.  The reserve amount is a guess to get us through boot.
707c478bd9Sstevel@tonic-gate  */
717c478bd9Sstevel@tonic-gate #define	HTABLE_RESERVE_AMOUNT	(200)
727c478bd9Sstevel@tonic-gate uint_t htable_reserve_amount = HTABLE_RESERVE_AMOUNT;
737c478bd9Sstevel@tonic-gate kmutex_t htable_reserve_mutex;
747c478bd9Sstevel@tonic-gate uint_t htable_reserve_cnt;
757c478bd9Sstevel@tonic-gate htable_t *htable_reserve_pool;
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate /*
78a85a6733Sjosephb  * Used to hand test htable_steal().
797c478bd9Sstevel@tonic-gate  */
80a85a6733Sjosephb #ifdef DEBUG
81a85a6733Sjosephb ulong_t force_steal = 0;
82a85a6733Sjosephb ulong_t ptable_cnt = 0;
83a85a6733Sjosephb #endif
84a85a6733Sjosephb 
85a85a6733Sjosephb /*
86a85a6733Sjosephb  * This variable is so that we can tune this via /etc/system
87a85a6733Sjosephb  * Any value works, but a power of two <= mmu.ptes_per_table is best.
88a85a6733Sjosephb  */
89a85a6733Sjosephb uint_t htable_steal_passes = 8;
907c478bd9Sstevel@tonic-gate 
917c478bd9Sstevel@tonic-gate /*
927c478bd9Sstevel@tonic-gate  * mutex stuff for access to htable hash
937c478bd9Sstevel@tonic-gate  */
947c478bd9Sstevel@tonic-gate #define	NUM_HTABLE_MUTEX 128
957c478bd9Sstevel@tonic-gate kmutex_t htable_mutex[NUM_HTABLE_MUTEX];
967c478bd9Sstevel@tonic-gate #define	HTABLE_MUTEX_HASH(h) ((h) & (NUM_HTABLE_MUTEX - 1))
977c478bd9Sstevel@tonic-gate 
987c478bd9Sstevel@tonic-gate #define	HTABLE_ENTER(h)	mutex_enter(&htable_mutex[HTABLE_MUTEX_HASH(h)]);
997c478bd9Sstevel@tonic-gate #define	HTABLE_EXIT(h)	mutex_exit(&htable_mutex[HTABLE_MUTEX_HASH(h)]);
1007c478bd9Sstevel@tonic-gate 
1017c478bd9Sstevel@tonic-gate /*
1027c478bd9Sstevel@tonic-gate  * forward declarations
1037c478bd9Sstevel@tonic-gate  */
1047c478bd9Sstevel@tonic-gate static void link_ptp(htable_t *higher, htable_t *new, uintptr_t vaddr);
1057c478bd9Sstevel@tonic-gate static void unlink_ptp(htable_t *higher, htable_t *old, uintptr_t vaddr);
1067c478bd9Sstevel@tonic-gate static void htable_free(htable_t *ht);
107ae115bc7Smrj static x86pte_t *x86pte_access_pagetable(htable_t *ht, uint_t index);
1087c478bd9Sstevel@tonic-gate static void x86pte_release_pagetable(htable_t *ht);
1097c478bd9Sstevel@tonic-gate static x86pte_t x86pte_cas(htable_t *ht, uint_t entry, x86pte_t old,
1107c478bd9Sstevel@tonic-gate 	x86pte_t new);
1117c478bd9Sstevel@tonic-gate 
1127c478bd9Sstevel@tonic-gate /*
1137c478bd9Sstevel@tonic-gate  * A counter to track if we are stealing or reaping htables. When non-zero
1147c478bd9Sstevel@tonic-gate  * htable_free() will directly free htables (either to the reserve or kmem)
1157c478bd9Sstevel@tonic-gate  * instead of putting them in a hat's htable cache.
1167c478bd9Sstevel@tonic-gate  */
1177c478bd9Sstevel@tonic-gate uint32_t htable_dont_cache = 0;
1187c478bd9Sstevel@tonic-gate 
1197c478bd9Sstevel@tonic-gate /*
1207c478bd9Sstevel@tonic-gate  * Track the number of active pagetables, so we can know how many to reap
1217c478bd9Sstevel@tonic-gate  */
1227c478bd9Sstevel@tonic-gate static uint32_t active_ptables = 0;
1237c478bd9Sstevel@tonic-gate 
1247c478bd9Sstevel@tonic-gate /*
1257c478bd9Sstevel@tonic-gate  * Allocate a memory page for a hardware page table.
1267c478bd9Sstevel@tonic-gate  *
127ae115bc7Smrj  * A wrapper around page_get_physical(), with some extra checks.
1287c478bd9Sstevel@tonic-gate  */
129ae115bc7Smrj static pfn_t
130ae115bc7Smrj ptable_alloc(uintptr_t seed)
1317c478bd9Sstevel@tonic-gate {
1327c478bd9Sstevel@tonic-gate 	pfn_t pfn;
1337c478bd9Sstevel@tonic-gate 	page_t *pp;
1347c478bd9Sstevel@tonic-gate 
135ae115bc7Smrj 	pfn = PFN_INVALID;
1367c478bd9Sstevel@tonic-gate 	atomic_add_32(&active_ptables, 1);
1377c478bd9Sstevel@tonic-gate 
1387c478bd9Sstevel@tonic-gate 	/*
139ae115bc7Smrj 	 * The first check is to see if there is memory in the system. If we
140ae115bc7Smrj 	 * drop to throttlefree, then fail the ptable_alloc() and let the
141ae115bc7Smrj 	 * stealing code kick in. Note that we have to do this test here,
142ae115bc7Smrj 	 * since the test in page_create_throttle() would let the NOSLEEP
143ae115bc7Smrj 	 * allocation go through and deplete the page reserves.
144a85a6733Sjosephb 	 *
145a85a6733Sjosephb 	 * The !NOMEMWAIT() lets pageout, fsflush, etc. skip this check.
1467c478bd9Sstevel@tonic-gate 	 */
147a85a6733Sjosephb 	if (!NOMEMWAIT() && freemem <= throttlefree + 1)
148ae115bc7Smrj 		return (PFN_INVALID);
1497c478bd9Sstevel@tonic-gate 
150a85a6733Sjosephb #ifdef DEBUG
151a85a6733Sjosephb 	/*
152a85a6733Sjosephb 	 * This code makes htable_steal() easier to test. By setting
153a85a6733Sjosephb 	 * force_steal we force pagetable allocations to fall
154a85a6733Sjosephb 	 * into the stealing code. Roughly 1 in ever "force_steal"
155a85a6733Sjosephb 	 * page table allocations will fail.
156a85a6733Sjosephb 	 */
157ae115bc7Smrj 	if (proc_pageout != NULL && force_steal > 1 &&
158a85a6733Sjosephb 	    ++ptable_cnt > force_steal) {
159a85a6733Sjosephb 		ptable_cnt = 0;
160ae115bc7Smrj 		return (PFN_INVALID);
161a85a6733Sjosephb 	}
162a85a6733Sjosephb #endif /* DEBUG */
163a85a6733Sjosephb 
164ae115bc7Smrj 	pp = page_get_physical(seed);
1657c478bd9Sstevel@tonic-gate 	if (pp == NULL)
166ae115bc7Smrj 		return (PFN_INVALID);
1677c478bd9Sstevel@tonic-gate 	pfn = pp->p_pagenum;
1687c478bd9Sstevel@tonic-gate 	page_downgrade(pp);
1697c478bd9Sstevel@tonic-gate 	ASSERT(PAGE_SHARED(pp));
1707c478bd9Sstevel@tonic-gate 
1717c478bd9Sstevel@tonic-gate 	if (pfn == PFN_INVALID)
1727c478bd9Sstevel@tonic-gate 		panic("ptable_alloc(): Invalid PFN!!");
173a85a6733Sjosephb 	HATSTAT_INC(hs_ptable_allocs);
174ae115bc7Smrj 	return (pfn);
1757c478bd9Sstevel@tonic-gate }
1767c478bd9Sstevel@tonic-gate 
1777c478bd9Sstevel@tonic-gate /*
1787c478bd9Sstevel@tonic-gate  * Free an htable's associated page table page.  See the comments
1797c478bd9Sstevel@tonic-gate  * for ptable_alloc().
1807c478bd9Sstevel@tonic-gate  */
1817c478bd9Sstevel@tonic-gate static void
182ae115bc7Smrj ptable_free(pfn_t pfn)
1837c478bd9Sstevel@tonic-gate {
184ae115bc7Smrj 	page_t *pp = page_numtopp_nolock(pfn);
1857c478bd9Sstevel@tonic-gate 
1867c478bd9Sstevel@tonic-gate 	/*
1877c478bd9Sstevel@tonic-gate 	 * need to destroy the page used for the pagetable
1887c478bd9Sstevel@tonic-gate 	 */
1897c478bd9Sstevel@tonic-gate 	ASSERT(pfn != PFN_INVALID);
1907c478bd9Sstevel@tonic-gate 	HATSTAT_INC(hs_ptable_frees);
1917c478bd9Sstevel@tonic-gate 	atomic_add_32(&active_ptables, -1);
1927c478bd9Sstevel@tonic-gate 	if (pp == NULL)
1937c478bd9Sstevel@tonic-gate 		panic("ptable_free(): no page for pfn!");
1947c478bd9Sstevel@tonic-gate 	ASSERT(PAGE_SHARED(pp));
1957c478bd9Sstevel@tonic-gate 	ASSERT(pfn == pp->p_pagenum);
1967c478bd9Sstevel@tonic-gate 
1977c478bd9Sstevel@tonic-gate 	/*
1987c478bd9Sstevel@tonic-gate 	 * Get an exclusive lock, might have to wait for a kmem reader.
1997c478bd9Sstevel@tonic-gate 	 */
2007c478bd9Sstevel@tonic-gate 	if (!page_tryupgrade(pp)) {
2017c478bd9Sstevel@tonic-gate 		page_unlock(pp);
2027c478bd9Sstevel@tonic-gate 		/*
2037c478bd9Sstevel@tonic-gate 		 * RFE: we could change this to not loop forever
2047c478bd9Sstevel@tonic-gate 		 * George Cameron had some idea on how to do that.
2057c478bd9Sstevel@tonic-gate 		 * For now looping works - it's just like sfmmu.
2067c478bd9Sstevel@tonic-gate 		 */
2077c478bd9Sstevel@tonic-gate 		while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM))
2087c478bd9Sstevel@tonic-gate 			continue;
2097c478bd9Sstevel@tonic-gate 	}
2107c478bd9Sstevel@tonic-gate 	page_free(pp, 1);
2117c478bd9Sstevel@tonic-gate 	page_unresv(1);
2127c478bd9Sstevel@tonic-gate }
2137c478bd9Sstevel@tonic-gate 
2147c478bd9Sstevel@tonic-gate /*
2157c478bd9Sstevel@tonic-gate  * Put one htable on the reserve list.
2167c478bd9Sstevel@tonic-gate  */
2177c478bd9Sstevel@tonic-gate static void
2187c478bd9Sstevel@tonic-gate htable_put_reserve(htable_t *ht)
2197c478bd9Sstevel@tonic-gate {
2207c478bd9Sstevel@tonic-gate 	ht->ht_hat = NULL;		/* no longer tied to a hat */
2217c478bd9Sstevel@tonic-gate 	ASSERT(ht->ht_pfn == PFN_INVALID);
2227c478bd9Sstevel@tonic-gate 	HATSTAT_INC(hs_htable_rputs);
2237c478bd9Sstevel@tonic-gate 	mutex_enter(&htable_reserve_mutex);
2247c478bd9Sstevel@tonic-gate 	ht->ht_next = htable_reserve_pool;
2257c478bd9Sstevel@tonic-gate 	htable_reserve_pool = ht;
2267c478bd9Sstevel@tonic-gate 	++htable_reserve_cnt;
2277c478bd9Sstevel@tonic-gate 	mutex_exit(&htable_reserve_mutex);
2287c478bd9Sstevel@tonic-gate }
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate /*
2317c478bd9Sstevel@tonic-gate  * Take one htable from the reserve.
2327c478bd9Sstevel@tonic-gate  */
2337c478bd9Sstevel@tonic-gate static htable_t *
2347c478bd9Sstevel@tonic-gate htable_get_reserve(void)
2357c478bd9Sstevel@tonic-gate {
2367c478bd9Sstevel@tonic-gate 	htable_t *ht = NULL;
2377c478bd9Sstevel@tonic-gate 
2387c478bd9Sstevel@tonic-gate 	mutex_enter(&htable_reserve_mutex);
2397c478bd9Sstevel@tonic-gate 	if (htable_reserve_cnt != 0) {
2407c478bd9Sstevel@tonic-gate 		ht = htable_reserve_pool;
2417c478bd9Sstevel@tonic-gate 		ASSERT(ht != NULL);
2427c478bd9Sstevel@tonic-gate 		ASSERT(ht->ht_pfn == PFN_INVALID);
2437c478bd9Sstevel@tonic-gate 		htable_reserve_pool = ht->ht_next;
2447c478bd9Sstevel@tonic-gate 		--htable_reserve_cnt;
2457c478bd9Sstevel@tonic-gate 		HATSTAT_INC(hs_htable_rgets);
2467c478bd9Sstevel@tonic-gate 	}
2477c478bd9Sstevel@tonic-gate 	mutex_exit(&htable_reserve_mutex);
2487c478bd9Sstevel@tonic-gate 	return (ht);
2497c478bd9Sstevel@tonic-gate }
2507c478bd9Sstevel@tonic-gate 
2517c478bd9Sstevel@tonic-gate /*
252ae115bc7Smrj  * Allocate initial htables and put them on the reserve list
2537c478bd9Sstevel@tonic-gate  */
2547c478bd9Sstevel@tonic-gate void
2557c478bd9Sstevel@tonic-gate htable_initial_reserve(uint_t count)
2567c478bd9Sstevel@tonic-gate {
2577c478bd9Sstevel@tonic-gate 	htable_t *ht;
2587c478bd9Sstevel@tonic-gate 
2597c478bd9Sstevel@tonic-gate 	count += HTABLE_RESERVE_AMOUNT;
2607c478bd9Sstevel@tonic-gate 	while (count > 0) {
2617c478bd9Sstevel@tonic-gate 		ht = kmem_cache_alloc(htable_cache, KM_NOSLEEP);
2627c478bd9Sstevel@tonic-gate 		ASSERT(ht != NULL);
2637c478bd9Sstevel@tonic-gate 
2647c478bd9Sstevel@tonic-gate 		ASSERT(use_boot_reserve);
265ae115bc7Smrj 		ht->ht_pfn = PFN_INVALID;
266ae115bc7Smrj 		htable_put_reserve(ht);
2677c478bd9Sstevel@tonic-gate 		--count;
2687c478bd9Sstevel@tonic-gate 	}
2697c478bd9Sstevel@tonic-gate }
2707c478bd9Sstevel@tonic-gate 
2717c478bd9Sstevel@tonic-gate /*
2727c478bd9Sstevel@tonic-gate  * Readjust the reserves after a thread finishes using them.
2737c478bd9Sstevel@tonic-gate  */
2747c478bd9Sstevel@tonic-gate void
2757c478bd9Sstevel@tonic-gate htable_adjust_reserve()
2767c478bd9Sstevel@tonic-gate {
2777c478bd9Sstevel@tonic-gate 	htable_t *ht;
2787c478bd9Sstevel@tonic-gate 
2797c478bd9Sstevel@tonic-gate 	/*
2807c478bd9Sstevel@tonic-gate 	 * Free any excess htables in the reserve list
2817c478bd9Sstevel@tonic-gate 	 */
282*aac11643Sjosephb 	while (htable_reserve_cnt > htable_reserve_amount &&
283*aac11643Sjosephb 	    !USE_HAT_RESERVES()) {
2847c478bd9Sstevel@tonic-gate 		ht = htable_get_reserve();
2857c478bd9Sstevel@tonic-gate 		if (ht == NULL)
2867c478bd9Sstevel@tonic-gate 			return;
2877c478bd9Sstevel@tonic-gate 		ASSERT(ht->ht_pfn == PFN_INVALID);
2887c478bd9Sstevel@tonic-gate 		kmem_cache_free(htable_cache, ht);
2897c478bd9Sstevel@tonic-gate 	}
2907c478bd9Sstevel@tonic-gate }
2917c478bd9Sstevel@tonic-gate 
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate /*
2947c478bd9Sstevel@tonic-gate  * This routine steals htables from user processes for htable_alloc() or
2957c478bd9Sstevel@tonic-gate  * for htable_reap().
2967c478bd9Sstevel@tonic-gate  */
2977c478bd9Sstevel@tonic-gate static htable_t *
2987c478bd9Sstevel@tonic-gate htable_steal(uint_t cnt)
2997c478bd9Sstevel@tonic-gate {
3007c478bd9Sstevel@tonic-gate 	hat_t		*hat = kas.a_hat;	/* list starts with khat */
3017c478bd9Sstevel@tonic-gate 	htable_t	*list = NULL;
3027c478bd9Sstevel@tonic-gate 	htable_t	*ht;
3037c478bd9Sstevel@tonic-gate 	htable_t	*higher;
3047c478bd9Sstevel@tonic-gate 	uint_t		h;
305a85a6733Sjosephb 	uint_t		h_start;
306a85a6733Sjosephb 	static uint_t	h_seed = 0;
3077c478bd9Sstevel@tonic-gate 	uint_t		e;
3087c478bd9Sstevel@tonic-gate 	uintptr_t	va;
3097c478bd9Sstevel@tonic-gate 	x86pte_t	pte;
3107c478bd9Sstevel@tonic-gate 	uint_t		stolen = 0;
3117c478bd9Sstevel@tonic-gate 	uint_t		pass;
312a85a6733Sjosephb 	uint_t		threshold;
3137c478bd9Sstevel@tonic-gate 
3147c478bd9Sstevel@tonic-gate 	/*
3157c478bd9Sstevel@tonic-gate 	 * Limit htable_steal_passes to something reasonable
3167c478bd9Sstevel@tonic-gate 	 */
3177c478bd9Sstevel@tonic-gate 	if (htable_steal_passes == 0)
3187c478bd9Sstevel@tonic-gate 		htable_steal_passes = 1;
3197c478bd9Sstevel@tonic-gate 	if (htable_steal_passes > mmu.ptes_per_table)
3207c478bd9Sstevel@tonic-gate 		htable_steal_passes = mmu.ptes_per_table;
3217c478bd9Sstevel@tonic-gate 
3227c478bd9Sstevel@tonic-gate 	/*
323a85a6733Sjosephb 	 * Loop through all user hats. The 1st pass takes cached htables that
3247c478bd9Sstevel@tonic-gate 	 * aren't in use. The later passes steal by removing mappings, too.
3257c478bd9Sstevel@tonic-gate 	 */
3267c478bd9Sstevel@tonic-gate 	atomic_add_32(&htable_dont_cache, 1);
327a85a6733Sjosephb 	for (pass = 0; pass <= htable_steal_passes && stolen < cnt; ++pass) {
328a85a6733Sjosephb 		threshold = pass * mmu.ptes_per_table / htable_steal_passes;
329a85a6733Sjosephb 		hat = kas.a_hat;
3307c478bd9Sstevel@tonic-gate 		for (;;) {
3317c478bd9Sstevel@tonic-gate 
3327c478bd9Sstevel@tonic-gate 			/*
333a85a6733Sjosephb 			 * Clear the victim flag and move to next hat
3347c478bd9Sstevel@tonic-gate 			 */
3357c478bd9Sstevel@tonic-gate 			mutex_enter(&hat_list_lock);
336a85a6733Sjosephb 			if (hat != kas.a_hat) {
3377c478bd9Sstevel@tonic-gate 				hat->hat_flags &= ~HAT_VICTIM;
3387c478bd9Sstevel@tonic-gate 				cv_broadcast(&hat_list_cv);
339a85a6733Sjosephb 			}
340a85a6733Sjosephb 			hat = hat->hat_next;
341a85a6733Sjosephb 
342a85a6733Sjosephb 			/*
343a85a6733Sjosephb 			 * Skip any hat that is already being stolen from.
344a85a6733Sjosephb 			 *
345a85a6733Sjosephb 			 * We skip SHARED hats, as these are dummy
346a85a6733Sjosephb 			 * hats that host ISM shared page tables.
347a85a6733Sjosephb 			 *
348a85a6733Sjosephb 			 * We also skip if HAT_FREEING because hat_pte_unmap()
349a85a6733Sjosephb 			 * won't zero out the PTE's. That would lead to hitting
350a85a6733Sjosephb 			 * stale PTEs either here or under hat_unload() when we
351a85a6733Sjosephb 			 * steal and unload the same page table in competing
352a85a6733Sjosephb 			 * threads.
353a85a6733Sjosephb 			 */
354a85a6733Sjosephb 			while (hat != NULL &&
355a85a6733Sjosephb 			    (hat->hat_flags &
356a85a6733Sjosephb 			    (HAT_VICTIM | HAT_SHARED | HAT_FREEING)) != 0)
357a85a6733Sjosephb 				hat = hat->hat_next;
358a85a6733Sjosephb 
359a85a6733Sjosephb 			if (hat == NULL) {
3607c478bd9Sstevel@tonic-gate 				mutex_exit(&hat_list_lock);
3617c478bd9Sstevel@tonic-gate 				break;
3627c478bd9Sstevel@tonic-gate 			}
363a85a6733Sjosephb 
364a85a6733Sjosephb 			/*
365a85a6733Sjosephb 			 * Are we finished?
366a85a6733Sjosephb 			 */
367a85a6733Sjosephb 			if (stolen == cnt) {
368a85a6733Sjosephb 				/*
369a85a6733Sjosephb 				 * Try to spread the pain of stealing,
370a85a6733Sjosephb 				 * move victim HAT to the end of the HAT list.
371a85a6733Sjosephb 				 */
372a85a6733Sjosephb 				if (pass >= 1 && cnt == 1 &&
373a85a6733Sjosephb 				    kas.a_hat->hat_prev != hat) {
374a85a6733Sjosephb 
375a85a6733Sjosephb 					/* unlink victim hat */
376a85a6733Sjosephb 					if (hat->hat_prev)
377a85a6733Sjosephb 						hat->hat_prev->hat_next =
378a85a6733Sjosephb 						    hat->hat_next;
379a85a6733Sjosephb 					else
380a85a6733Sjosephb 						kas.a_hat->hat_next =
381a85a6733Sjosephb 						    hat->hat_next;
382a85a6733Sjosephb 					if (hat->hat_next)
383a85a6733Sjosephb 						hat->hat_next->hat_prev =
384a85a6733Sjosephb 						    hat->hat_prev;
385a85a6733Sjosephb 					else
386a85a6733Sjosephb 						kas.a_hat->hat_prev =
387a85a6733Sjosephb 						    hat->hat_prev;
388a85a6733Sjosephb 
389a85a6733Sjosephb 
390a85a6733Sjosephb 					/* relink at end of hat list */
391a85a6733Sjosephb 					hat->hat_next = NULL;
392a85a6733Sjosephb 					hat->hat_prev = kas.a_hat->hat_prev;
393a85a6733Sjosephb 					if (hat->hat_prev)
394a85a6733Sjosephb 						hat->hat_prev->hat_next = hat;
395a85a6733Sjosephb 					else
396a85a6733Sjosephb 						kas.a_hat->hat_next = hat;
397a85a6733Sjosephb 					kas.a_hat->hat_prev = hat;
398a85a6733Sjosephb 
399a85a6733Sjosephb 				}
400a85a6733Sjosephb 
401a85a6733Sjosephb 				mutex_exit(&hat_list_lock);
402a85a6733Sjosephb 				break;
403a85a6733Sjosephb 			}
404a85a6733Sjosephb 
405a85a6733Sjosephb 			/*
406a85a6733Sjosephb 			 * Mark the HAT as a stealing victim.
407a85a6733Sjosephb 			 */
4087c478bd9Sstevel@tonic-gate 			hat->hat_flags |= HAT_VICTIM;
4097c478bd9Sstevel@tonic-gate 			mutex_exit(&hat_list_lock);
4107c478bd9Sstevel@tonic-gate 
4117c478bd9Sstevel@tonic-gate 			/*
4127c478bd9Sstevel@tonic-gate 			 * Take any htables from the hat's cached "free" list.
4137c478bd9Sstevel@tonic-gate 			 */
4147c478bd9Sstevel@tonic-gate 			hat_enter(hat);
4157c478bd9Sstevel@tonic-gate 			while ((ht = hat->hat_ht_cached) != NULL &&
4167c478bd9Sstevel@tonic-gate 			    stolen < cnt) {
4177c478bd9Sstevel@tonic-gate 				hat->hat_ht_cached = ht->ht_next;
4187c478bd9Sstevel@tonic-gate 				ht->ht_next = list;
4197c478bd9Sstevel@tonic-gate 				list = ht;
4207c478bd9Sstevel@tonic-gate 				++stolen;
4217c478bd9Sstevel@tonic-gate 			}
4227c478bd9Sstevel@tonic-gate 			hat_exit(hat);
4237c478bd9Sstevel@tonic-gate 
4247c478bd9Sstevel@tonic-gate 			/*
4257c478bd9Sstevel@tonic-gate 			 * Don't steal on first pass.
4267c478bd9Sstevel@tonic-gate 			 */
427a85a6733Sjosephb 			if (pass == 0 || stolen == cnt)
4287c478bd9Sstevel@tonic-gate 				continue;
4297c478bd9Sstevel@tonic-gate 
4307c478bd9Sstevel@tonic-gate 			/*
431a85a6733Sjosephb 			 * Search the active htables for one to steal.
432a85a6733Sjosephb 			 * Start at a different hash bucket every time to
433a85a6733Sjosephb 			 * help spread the pain of stealing.
4347c478bd9Sstevel@tonic-gate 			 */
435a85a6733Sjosephb 			h = h_start = h_seed++ % hat->hat_num_hash;
436a85a6733Sjosephb 			do {
4377c478bd9Sstevel@tonic-gate 				higher = NULL;
4387c478bd9Sstevel@tonic-gate 				HTABLE_ENTER(h);
4397c478bd9Sstevel@tonic-gate 				for (ht = hat->hat_ht_hash[h]; ht;
4407c478bd9Sstevel@tonic-gate 				    ht = ht->ht_next) {
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 					/*
4437c478bd9Sstevel@tonic-gate 					 * Can we rule out reaping?
4447c478bd9Sstevel@tonic-gate 					 */
4457c478bd9Sstevel@tonic-gate 					if (ht->ht_busy != 0 ||
4467c478bd9Sstevel@tonic-gate 					    (ht->ht_flags & HTABLE_SHARED_PFN)||
447a85a6733Sjosephb 					    ht->ht_level > 0 ||
448a85a6733Sjosephb 					    ht->ht_valid_cnt > threshold ||
4497c478bd9Sstevel@tonic-gate 					    ht->ht_lock_cnt != 0)
4507c478bd9Sstevel@tonic-gate 						continue;
4517c478bd9Sstevel@tonic-gate 
4527c478bd9Sstevel@tonic-gate 					/*
4537c478bd9Sstevel@tonic-gate 					 * Increment busy so the htable can't
4547c478bd9Sstevel@tonic-gate 					 * disappear. We drop the htable mutex
4557c478bd9Sstevel@tonic-gate 					 * to avoid deadlocks with
4567c478bd9Sstevel@tonic-gate 					 * hat_pageunload() and the hment mutex
4577c478bd9Sstevel@tonic-gate 					 * while we call hat_pte_unmap()
4587c478bd9Sstevel@tonic-gate 					 */
4597c478bd9Sstevel@tonic-gate 					++ht->ht_busy;
4607c478bd9Sstevel@tonic-gate 					HTABLE_EXIT(h);
4617c478bd9Sstevel@tonic-gate 
4627c478bd9Sstevel@tonic-gate 					/*
4637c478bd9Sstevel@tonic-gate 					 * Try stealing.
4647c478bd9Sstevel@tonic-gate 					 * - unload and invalidate all PTEs
4657c478bd9Sstevel@tonic-gate 					 */
4667c478bd9Sstevel@tonic-gate 					for (e = 0, va = ht->ht_vaddr;
467ae115bc7Smrj 					    e < HTABLE_NUM_PTES(ht) &&
4687c478bd9Sstevel@tonic-gate 					    ht->ht_valid_cnt > 0 &&
4697c478bd9Sstevel@tonic-gate 					    ht->ht_busy == 1 &&
4707c478bd9Sstevel@tonic-gate 					    ht->ht_lock_cnt == 0;
4717c478bd9Sstevel@tonic-gate 					    ++e, va += MMU_PAGESIZE) {
4727c478bd9Sstevel@tonic-gate 						pte = x86pte_get(ht, e);
4737c478bd9Sstevel@tonic-gate 						if (!PTE_ISVALID(pte))
4747c478bd9Sstevel@tonic-gate 							continue;
4757c478bd9Sstevel@tonic-gate 						hat_pte_unmap(ht, e,
4767c478bd9Sstevel@tonic-gate 						    HAT_UNLOAD, pte, NULL);
4777c478bd9Sstevel@tonic-gate 					}
4787c478bd9Sstevel@tonic-gate 
4797c478bd9Sstevel@tonic-gate 					/*
4807c478bd9Sstevel@tonic-gate 					 * Reacquire htable lock. If we didn't
4817c478bd9Sstevel@tonic-gate 					 * remove all mappings in the table,
4827c478bd9Sstevel@tonic-gate 					 * or another thread added a new mapping
4837c478bd9Sstevel@tonic-gate 					 * behind us, give up on this table.
4847c478bd9Sstevel@tonic-gate 					 */
4857c478bd9Sstevel@tonic-gate 					HTABLE_ENTER(h);
4867c478bd9Sstevel@tonic-gate 					if (ht->ht_busy != 1 ||
4877c478bd9Sstevel@tonic-gate 					    ht->ht_valid_cnt != 0 ||
4887c478bd9Sstevel@tonic-gate 					    ht->ht_lock_cnt != 0) {
4897c478bd9Sstevel@tonic-gate 						--ht->ht_busy;
4907c478bd9Sstevel@tonic-gate 						continue;
4917c478bd9Sstevel@tonic-gate 					}
4927c478bd9Sstevel@tonic-gate 
4937c478bd9Sstevel@tonic-gate 					/*
4947c478bd9Sstevel@tonic-gate 					 * Steal it and unlink the page table.
4957c478bd9Sstevel@tonic-gate 					 */
4967c478bd9Sstevel@tonic-gate 					higher = ht->ht_parent;
4977c478bd9Sstevel@tonic-gate 					unlink_ptp(higher, ht, ht->ht_vaddr);
4987c478bd9Sstevel@tonic-gate 
4997c478bd9Sstevel@tonic-gate 					/*
5007c478bd9Sstevel@tonic-gate 					 * remove from the hash list
5017c478bd9Sstevel@tonic-gate 					 */
5027c478bd9Sstevel@tonic-gate 					if (ht->ht_next)
5037c478bd9Sstevel@tonic-gate 						ht->ht_next->ht_prev =
5047c478bd9Sstevel@tonic-gate 						    ht->ht_prev;
5057c478bd9Sstevel@tonic-gate 
5067c478bd9Sstevel@tonic-gate 					if (ht->ht_prev) {
5077c478bd9Sstevel@tonic-gate 						ht->ht_prev->ht_next =
5087c478bd9Sstevel@tonic-gate 						    ht->ht_next;
5097c478bd9Sstevel@tonic-gate 					} else {
5107c478bd9Sstevel@tonic-gate 						ASSERT(hat->hat_ht_hash[h] ==
5117c478bd9Sstevel@tonic-gate 						    ht);
5127c478bd9Sstevel@tonic-gate 						hat->hat_ht_hash[h] =
5137c478bd9Sstevel@tonic-gate 						    ht->ht_next;
5147c478bd9Sstevel@tonic-gate 					}
5157c478bd9Sstevel@tonic-gate 
5167c478bd9Sstevel@tonic-gate 					/*
5177c478bd9Sstevel@tonic-gate 					 * Break to outer loop to release the
518ae115bc7Smrj 					 * higher (ht_parent) pagetable. This
5197c478bd9Sstevel@tonic-gate 					 * spreads out the pain caused by
5207c478bd9Sstevel@tonic-gate 					 * pagefaults.
5217c478bd9Sstevel@tonic-gate 					 */
5227c478bd9Sstevel@tonic-gate 					ht->ht_next = list;
5237c478bd9Sstevel@tonic-gate 					list = ht;
5247c478bd9Sstevel@tonic-gate 					++stolen;
5257c478bd9Sstevel@tonic-gate 					break;
5267c478bd9Sstevel@tonic-gate 				}
5277c478bd9Sstevel@tonic-gate 				HTABLE_EXIT(h);
5287c478bd9Sstevel@tonic-gate 				if (higher != NULL)
5297c478bd9Sstevel@tonic-gate 					htable_release(higher);
530a85a6733Sjosephb 				if (++h == hat->hat_num_hash)
531a85a6733Sjosephb 					h = 0;
532a85a6733Sjosephb 			} while (stolen < cnt && h != h_start);
5337c478bd9Sstevel@tonic-gate 		}
5347c478bd9Sstevel@tonic-gate 	}
5357c478bd9Sstevel@tonic-gate 	atomic_add_32(&htable_dont_cache, -1);
5367c478bd9Sstevel@tonic-gate 	return (list);
5377c478bd9Sstevel@tonic-gate }
5387c478bd9Sstevel@tonic-gate 
5397c478bd9Sstevel@tonic-gate 
5407c478bd9Sstevel@tonic-gate /*
5417c478bd9Sstevel@tonic-gate  * This is invoked from kmem when the system is low on memory.  We try
5427c478bd9Sstevel@tonic-gate  * to free hments, htables, and ptables to improve the memory situation.
5437c478bd9Sstevel@tonic-gate  */
5447c478bd9Sstevel@tonic-gate /*ARGSUSED*/
5457c478bd9Sstevel@tonic-gate static void
5467c478bd9Sstevel@tonic-gate htable_reap(void *handle)
5477c478bd9Sstevel@tonic-gate {
5487c478bd9Sstevel@tonic-gate 	uint_t		reap_cnt;
5497c478bd9Sstevel@tonic-gate 	htable_t	*list;
5507c478bd9Sstevel@tonic-gate 	htable_t	*ht;
5517c478bd9Sstevel@tonic-gate 
5527c478bd9Sstevel@tonic-gate 	HATSTAT_INC(hs_reap_attempts);
5537c478bd9Sstevel@tonic-gate 	if (!can_steal_post_boot)
5547c478bd9Sstevel@tonic-gate 		return;
5557c478bd9Sstevel@tonic-gate 
5567c478bd9Sstevel@tonic-gate 	/*
5577c478bd9Sstevel@tonic-gate 	 * Try to reap 5% of the page tables bounded by a maximum of
5587c478bd9Sstevel@tonic-gate 	 * 5% of physmem and a minimum of 10.
5597c478bd9Sstevel@tonic-gate 	 */
5607c478bd9Sstevel@tonic-gate 	reap_cnt = MIN(MAX(physmem / 20, active_ptables / 20), 10);
5617c478bd9Sstevel@tonic-gate 
5627c478bd9Sstevel@tonic-gate 	/*
5637c478bd9Sstevel@tonic-gate 	 * Let htable_steal() do the work, we just call htable_free()
5647c478bd9Sstevel@tonic-gate 	 */
5657c478bd9Sstevel@tonic-gate 	list = htable_steal(reap_cnt);
5667c478bd9Sstevel@tonic-gate 	while ((ht = list) != NULL) {
5677c478bd9Sstevel@tonic-gate 		list = ht->ht_next;
5687c478bd9Sstevel@tonic-gate 		HATSTAT_INC(hs_reaped);
5697c478bd9Sstevel@tonic-gate 		htable_free(ht);
5707c478bd9Sstevel@tonic-gate 	}
5717c478bd9Sstevel@tonic-gate 
5727c478bd9Sstevel@tonic-gate 	/*
5737c478bd9Sstevel@tonic-gate 	 * Free up excess reserves
5747c478bd9Sstevel@tonic-gate 	 */
5757c478bd9Sstevel@tonic-gate 	htable_adjust_reserve();
5767c478bd9Sstevel@tonic-gate 	hment_adjust_reserve();
5777c478bd9Sstevel@tonic-gate }
5787c478bd9Sstevel@tonic-gate 
5797c478bd9Sstevel@tonic-gate /*
580ae115bc7Smrj  * Allocate an htable, stealing one or using the reserve if necessary
5817c478bd9Sstevel@tonic-gate  */
5827c478bd9Sstevel@tonic-gate static htable_t *
5837c478bd9Sstevel@tonic-gate htable_alloc(
5847c478bd9Sstevel@tonic-gate 	hat_t		*hat,
5857c478bd9Sstevel@tonic-gate 	uintptr_t	vaddr,
5867c478bd9Sstevel@tonic-gate 	level_t		level,
5877c478bd9Sstevel@tonic-gate 	htable_t	*shared)
5887c478bd9Sstevel@tonic-gate {
5897c478bd9Sstevel@tonic-gate 	htable_t	*ht = NULL;
5907c478bd9Sstevel@tonic-gate 	uint_t		is_vlp;
5917c478bd9Sstevel@tonic-gate 	uint_t		is_bare = 0;
5927c478bd9Sstevel@tonic-gate 	uint_t		need_to_zero = 1;
5937c478bd9Sstevel@tonic-gate 	int		kmflags = (can_steal_post_boot ? KM_NOSLEEP : KM_SLEEP);
5947c478bd9Sstevel@tonic-gate 
5957c478bd9Sstevel@tonic-gate 	if (level < 0 || level > TOP_LEVEL(hat))
5967c478bd9Sstevel@tonic-gate 		panic("htable_alloc(): level %d out of range\n", level);
5977c478bd9Sstevel@tonic-gate 
5987c478bd9Sstevel@tonic-gate 	is_vlp = (hat->hat_flags & HAT_VLP) && level == VLP_LEVEL;
5997c478bd9Sstevel@tonic-gate 	if (is_vlp || shared != NULL)
6007c478bd9Sstevel@tonic-gate 		is_bare = 1;
6017c478bd9Sstevel@tonic-gate 
6027c478bd9Sstevel@tonic-gate 	/*
6037c478bd9Sstevel@tonic-gate 	 * First reuse a cached htable from the hat_ht_cached field, this
604ae115bc7Smrj 	 * avoids unnecessary trips through kmem/page allocators.
6057c478bd9Sstevel@tonic-gate 	 */
6067c478bd9Sstevel@tonic-gate 	if (hat->hat_ht_cached != NULL && !is_bare) {
6077c478bd9Sstevel@tonic-gate 		hat_enter(hat);
6087c478bd9Sstevel@tonic-gate 		ht = hat->hat_ht_cached;
6097c478bd9Sstevel@tonic-gate 		if (ht != NULL) {
6107c478bd9Sstevel@tonic-gate 			hat->hat_ht_cached = ht->ht_next;
6117c478bd9Sstevel@tonic-gate 			need_to_zero = 0;
6127c478bd9Sstevel@tonic-gate 			/* XX64 ASSERT() they're all zero somehow */
6137c478bd9Sstevel@tonic-gate 			ASSERT(ht->ht_pfn != PFN_INVALID);
6147c478bd9Sstevel@tonic-gate 		}
6157c478bd9Sstevel@tonic-gate 		hat_exit(hat);
6167c478bd9Sstevel@tonic-gate 	}
6177c478bd9Sstevel@tonic-gate 
6187c478bd9Sstevel@tonic-gate 	if (ht == NULL) {
6197c478bd9Sstevel@tonic-gate 		/*
62097704650Sjosephb 		 * Allocate an htable, possibly refilling the reserves.
6217c478bd9Sstevel@tonic-gate 		 */
62297704650Sjosephb 		if (USE_HAT_RESERVES()) {
6237c478bd9Sstevel@tonic-gate 			ht = htable_get_reserve();
6247c478bd9Sstevel@tonic-gate 		} else {
6257c478bd9Sstevel@tonic-gate 			/*
6267c478bd9Sstevel@tonic-gate 			 * Donate successful htable allocations to the reserve.
6277c478bd9Sstevel@tonic-gate 			 */
6287c478bd9Sstevel@tonic-gate 			for (;;) {
6297c478bd9Sstevel@tonic-gate 				ht = kmem_cache_alloc(htable_cache, kmflags);
6307c478bd9Sstevel@tonic-gate 				if (ht == NULL)
6317c478bd9Sstevel@tonic-gate 					break;
6327c478bd9Sstevel@tonic-gate 				ht->ht_pfn = PFN_INVALID;
63397704650Sjosephb 				if (USE_HAT_RESERVES() ||
6347c478bd9Sstevel@tonic-gate 				    htable_reserve_cnt >= htable_reserve_amount)
6357c478bd9Sstevel@tonic-gate 					break;
6367c478bd9Sstevel@tonic-gate 				htable_put_reserve(ht);
6377c478bd9Sstevel@tonic-gate 			}
6387c478bd9Sstevel@tonic-gate 		}
6397c478bd9Sstevel@tonic-gate 
6407c478bd9Sstevel@tonic-gate 		/*
6417c478bd9Sstevel@tonic-gate 		 * allocate a page for the hardware page table if needed
6427c478bd9Sstevel@tonic-gate 		 */
6437c478bd9Sstevel@tonic-gate 		if (ht != NULL && !is_bare) {
644a85a6733Sjosephb 			ht->ht_hat = hat;
645ae115bc7Smrj 			ht->ht_pfn = ptable_alloc((uintptr_t)ht);
6467c478bd9Sstevel@tonic-gate 			if (ht->ht_pfn == PFN_INVALID) {
64797704650Sjosephb 				if (USE_HAT_RESERVES())
64897704650Sjosephb 					htable_put_reserve(ht);
64997704650Sjosephb 				else
6507c478bd9Sstevel@tonic-gate 					kmem_cache_free(htable_cache, ht);
6517c478bd9Sstevel@tonic-gate 				ht = NULL;
6527c478bd9Sstevel@tonic-gate 			}
6537c478bd9Sstevel@tonic-gate 		}
6547c478bd9Sstevel@tonic-gate 	}
6557c478bd9Sstevel@tonic-gate 
6567c478bd9Sstevel@tonic-gate 	/*
657a85a6733Sjosephb 	 * If allocations failed, kick off a kmem_reap() and resort to
658a85a6733Sjosephb 	 * htable steal(). We may spin here if the system is very low on
659a85a6733Sjosephb 	 * memory. If the kernel itself has consumed all memory and kmem_reap()
660a85a6733Sjosephb 	 * can't free up anything, then we'll really get stuck here.
661a85a6733Sjosephb 	 * That should only happen in a system where the administrator has
662a85a6733Sjosephb 	 * misconfigured VM parameters via /etc/system.
6637c478bd9Sstevel@tonic-gate 	 */
664a85a6733Sjosephb 	while (ht == NULL && can_steal_post_boot) {
665a85a6733Sjosephb 		kmem_reap();
6667c478bd9Sstevel@tonic-gate 		ht = htable_steal(1);
6677c478bd9Sstevel@tonic-gate 		HATSTAT_INC(hs_steals);
6687c478bd9Sstevel@tonic-gate 
6697c478bd9Sstevel@tonic-gate 		/*
670a85a6733Sjosephb 		 * If we stole for a bare htable, release the pagetable page.
6717c478bd9Sstevel@tonic-gate 		 */
672ae115bc7Smrj 		if (ht != NULL) {
673ae115bc7Smrj 			if (is_bare) {
674ae115bc7Smrj 				ptable_free(ht->ht_pfn);
675ae115bc7Smrj 				ht->ht_pfn = PFN_INVALID;
676ae115bc7Smrj 			}
677ae115bc7Smrj 		}
6787c478bd9Sstevel@tonic-gate 	}
6797c478bd9Sstevel@tonic-gate 
6807c478bd9Sstevel@tonic-gate 	/*
681a85a6733Sjosephb 	 * All attempts to allocate or steal failed. This should only happen
682a85a6733Sjosephb 	 * if we run out of memory during boot, due perhaps to a huge
683a85a6733Sjosephb 	 * boot_archive. At this point there's no way to continue.
6847c478bd9Sstevel@tonic-gate 	 */
6857c478bd9Sstevel@tonic-gate 	if (ht == NULL)
6867c478bd9Sstevel@tonic-gate 		panic("htable_alloc(): couldn't steal\n");
6877c478bd9Sstevel@tonic-gate 
6887c478bd9Sstevel@tonic-gate 	/*
6897c478bd9Sstevel@tonic-gate 	 * Shared page tables have all entries locked and entries may not
6907c478bd9Sstevel@tonic-gate 	 * be added or deleted.
6917c478bd9Sstevel@tonic-gate 	 */
6927c478bd9Sstevel@tonic-gate 	ht->ht_flags = 0;
6937c478bd9Sstevel@tonic-gate 	if (shared != NULL) {
6947c478bd9Sstevel@tonic-gate 		ASSERT(level == 0);
6957c478bd9Sstevel@tonic-gate 		ASSERT(shared->ht_valid_cnt > 0);
6967c478bd9Sstevel@tonic-gate 		ht->ht_flags |= HTABLE_SHARED_PFN;
6977c478bd9Sstevel@tonic-gate 		ht->ht_pfn = shared->ht_pfn;
6987c478bd9Sstevel@tonic-gate 		ht->ht_lock_cnt = 0;
6997c478bd9Sstevel@tonic-gate 		ht->ht_valid_cnt = 0;		/* updated in hat_share() */
7007c478bd9Sstevel@tonic-gate 		ht->ht_shares = shared;
7017c478bd9Sstevel@tonic-gate 		need_to_zero = 0;
7027c478bd9Sstevel@tonic-gate 	} else {
7037c478bd9Sstevel@tonic-gate 		ht->ht_shares = NULL;
7047c478bd9Sstevel@tonic-gate 		ht->ht_lock_cnt = 0;
7057c478bd9Sstevel@tonic-gate 		ht->ht_valid_cnt = 0;
7067c478bd9Sstevel@tonic-gate 	}
7077c478bd9Sstevel@tonic-gate 
7087c478bd9Sstevel@tonic-gate 	/*
7097c478bd9Sstevel@tonic-gate 	 * setup flags, etc. for VLP htables
7107c478bd9Sstevel@tonic-gate 	 */
7117c478bd9Sstevel@tonic-gate 	if (is_vlp) {
7127c478bd9Sstevel@tonic-gate 		ht->ht_flags |= HTABLE_VLP;
7137c478bd9Sstevel@tonic-gate 		ASSERT(ht->ht_pfn == PFN_INVALID);
7147c478bd9Sstevel@tonic-gate 		need_to_zero = 0;
7157c478bd9Sstevel@tonic-gate 	}
7167c478bd9Sstevel@tonic-gate 
7177c478bd9Sstevel@tonic-gate 	/*
7187c478bd9Sstevel@tonic-gate 	 * fill in the htable
7197c478bd9Sstevel@tonic-gate 	 */
7207c478bd9Sstevel@tonic-gate 	ht->ht_hat = hat;
7217c478bd9Sstevel@tonic-gate 	ht->ht_parent = NULL;
7227c478bd9Sstevel@tonic-gate 	ht->ht_vaddr = vaddr;
7237c478bd9Sstevel@tonic-gate 	ht->ht_level = level;
7247c478bd9Sstevel@tonic-gate 	ht->ht_busy = 1;
7257c478bd9Sstevel@tonic-gate 	ht->ht_next = NULL;
7267c478bd9Sstevel@tonic-gate 	ht->ht_prev = NULL;
7277c478bd9Sstevel@tonic-gate 
7287c478bd9Sstevel@tonic-gate 	/*
7297c478bd9Sstevel@tonic-gate 	 * Zero out any freshly allocated page table
7307c478bd9Sstevel@tonic-gate 	 */
7317c478bd9Sstevel@tonic-gate 	if (need_to_zero)
7327c478bd9Sstevel@tonic-gate 		x86pte_zero(ht, 0, mmu.ptes_per_table);
733ae115bc7Smrj 
7347c478bd9Sstevel@tonic-gate 	return (ht);
7357c478bd9Sstevel@tonic-gate }
7367c478bd9Sstevel@tonic-gate 
7377c478bd9Sstevel@tonic-gate /*
7387c478bd9Sstevel@tonic-gate  * Free up an htable, either to a hat's cached list, the reserves or
7397c478bd9Sstevel@tonic-gate  * back to kmem.
7407c478bd9Sstevel@tonic-gate  */
7417c478bd9Sstevel@tonic-gate static void
7427c478bd9Sstevel@tonic-gate htable_free(htable_t *ht)
7437c478bd9Sstevel@tonic-gate {
7447c478bd9Sstevel@tonic-gate 	hat_t *hat = ht->ht_hat;
7457c478bd9Sstevel@tonic-gate 
7467c478bd9Sstevel@tonic-gate 	/*
7477c478bd9Sstevel@tonic-gate 	 * If the process isn't exiting, cache the free htable in the hat
7487c478bd9Sstevel@tonic-gate 	 * structure. We always do this for the boot reserve. We don't
7497c478bd9Sstevel@tonic-gate 	 * do this if the hat is exiting or we are stealing/reaping htables.
7507c478bd9Sstevel@tonic-gate 	 */
7517c478bd9Sstevel@tonic-gate 	if (hat != NULL &&
7527c478bd9Sstevel@tonic-gate 	    !(ht->ht_flags & HTABLE_SHARED_PFN) &&
7537c478bd9Sstevel@tonic-gate 	    (use_boot_reserve ||
7547c478bd9Sstevel@tonic-gate 	    (!(hat->hat_flags & HAT_FREEING) && !htable_dont_cache))) {
7557c478bd9Sstevel@tonic-gate 		ASSERT((ht->ht_flags & HTABLE_VLP) == 0);
7567c478bd9Sstevel@tonic-gate 		ASSERT(ht->ht_pfn != PFN_INVALID);
7577c478bd9Sstevel@tonic-gate 		hat_enter(hat);
7587c478bd9Sstevel@tonic-gate 		ht->ht_next = hat->hat_ht_cached;
7597c478bd9Sstevel@tonic-gate 		hat->hat_ht_cached = ht;
7607c478bd9Sstevel@tonic-gate 		hat_exit(hat);
7617c478bd9Sstevel@tonic-gate 		return;
7627c478bd9Sstevel@tonic-gate 	}
7637c478bd9Sstevel@tonic-gate 
7647c478bd9Sstevel@tonic-gate 	/*
7657c478bd9Sstevel@tonic-gate 	 * If we have a hardware page table, free it.
766ae115bc7Smrj 	 * We don't free page tables that are accessed by sharing.
7677c478bd9Sstevel@tonic-gate 	 */
7687c478bd9Sstevel@tonic-gate 	if (ht->ht_flags & HTABLE_SHARED_PFN) {
7697c478bd9Sstevel@tonic-gate 		ASSERT(ht->ht_pfn != PFN_INVALID);
7707c478bd9Sstevel@tonic-gate 	} else if (!(ht->ht_flags & HTABLE_VLP)) {
771ae115bc7Smrj 		ptable_free(ht->ht_pfn);
7727c478bd9Sstevel@tonic-gate 	}
773ae115bc7Smrj 	ht->ht_pfn = PFN_INVALID;
7747c478bd9Sstevel@tonic-gate 
7757c478bd9Sstevel@tonic-gate 	/*
77697704650Sjosephb 	 * Free htables or put into reserves.
7777c478bd9Sstevel@tonic-gate 	 */
778*aac11643Sjosephb 	if (USE_HAT_RESERVES() || htable_reserve_cnt < htable_reserve_amount) {
7797c478bd9Sstevel@tonic-gate 		htable_put_reserve(ht);
780*aac11643Sjosephb 	} else {
7817c478bd9Sstevel@tonic-gate 		kmem_cache_free(htable_cache, ht);
782*aac11643Sjosephb 		htable_adjust_reserve();
783*aac11643Sjosephb 	}
7847c478bd9Sstevel@tonic-gate }
7857c478bd9Sstevel@tonic-gate 
7867c478bd9Sstevel@tonic-gate 
7877c478bd9Sstevel@tonic-gate /*
7887c478bd9Sstevel@tonic-gate  * This is called when a hat is being destroyed or swapped out. We reap all
7897c478bd9Sstevel@tonic-gate  * the remaining htables in the hat cache. If destroying all left over
7907c478bd9Sstevel@tonic-gate  * htables are also destroyed.
7917c478bd9Sstevel@tonic-gate  *
7927c478bd9Sstevel@tonic-gate  * We also don't need to invalidate any of the PTPs nor do any demapping.
7937c478bd9Sstevel@tonic-gate  */
7947c478bd9Sstevel@tonic-gate void
7957c478bd9Sstevel@tonic-gate htable_purge_hat(hat_t *hat)
7967c478bd9Sstevel@tonic-gate {
7977c478bd9Sstevel@tonic-gate 	htable_t *ht;
7987c478bd9Sstevel@tonic-gate 	int h;
7997c478bd9Sstevel@tonic-gate 
8007c478bd9Sstevel@tonic-gate 	/*
8017c478bd9Sstevel@tonic-gate 	 * Purge the htable cache if just reaping.
8027c478bd9Sstevel@tonic-gate 	 */
8037c478bd9Sstevel@tonic-gate 	if (!(hat->hat_flags & HAT_FREEING)) {
8047c478bd9Sstevel@tonic-gate 		atomic_add_32(&htable_dont_cache, 1);
8057c478bd9Sstevel@tonic-gate 		for (;;) {
8067c478bd9Sstevel@tonic-gate 			hat_enter(hat);
8077c478bd9Sstevel@tonic-gate 			ht = hat->hat_ht_cached;
8087c478bd9Sstevel@tonic-gate 			if (ht == NULL) {
8097c478bd9Sstevel@tonic-gate 				hat_exit(hat);
8107c478bd9Sstevel@tonic-gate 				break;
8117c478bd9Sstevel@tonic-gate 			}
8127c478bd9Sstevel@tonic-gate 			hat->hat_ht_cached = ht->ht_next;
8137c478bd9Sstevel@tonic-gate 			hat_exit(hat);
8147c478bd9Sstevel@tonic-gate 			htable_free(ht);
8157c478bd9Sstevel@tonic-gate 		}
8167c478bd9Sstevel@tonic-gate 		atomic_add_32(&htable_dont_cache, -1);
8177c478bd9Sstevel@tonic-gate 		return;
8187c478bd9Sstevel@tonic-gate 	}
8197c478bd9Sstevel@tonic-gate 
8207c478bd9Sstevel@tonic-gate 	/*
8217c478bd9Sstevel@tonic-gate 	 * if freeing, no locking is needed
8227c478bd9Sstevel@tonic-gate 	 */
8237c478bd9Sstevel@tonic-gate 	while ((ht = hat->hat_ht_cached) != NULL) {
8247c478bd9Sstevel@tonic-gate 		hat->hat_ht_cached = ht->ht_next;
8257c478bd9Sstevel@tonic-gate 		htable_free(ht);
8267c478bd9Sstevel@tonic-gate 	}
8277c478bd9Sstevel@tonic-gate 
8287c478bd9Sstevel@tonic-gate 	/*
8297c478bd9Sstevel@tonic-gate 	 * walk thru the htable hash table and free all the htables in it.
8307c478bd9Sstevel@tonic-gate 	 */
8317c478bd9Sstevel@tonic-gate 	for (h = 0; h < hat->hat_num_hash; ++h) {
8327c478bd9Sstevel@tonic-gate 		while ((ht = hat->hat_ht_hash[h]) != NULL) {
8337c478bd9Sstevel@tonic-gate 			if (ht->ht_next)
8347c478bd9Sstevel@tonic-gate 				ht->ht_next->ht_prev = ht->ht_prev;
8357c478bd9Sstevel@tonic-gate 
8367c478bd9Sstevel@tonic-gate 			if (ht->ht_prev) {
8377c478bd9Sstevel@tonic-gate 				ht->ht_prev->ht_next = ht->ht_next;
8387c478bd9Sstevel@tonic-gate 			} else {
8397c478bd9Sstevel@tonic-gate 				ASSERT(hat->hat_ht_hash[h] == ht);
8407c478bd9Sstevel@tonic-gate 				hat->hat_ht_hash[h] = ht->ht_next;
8417c478bd9Sstevel@tonic-gate 			}
8427c478bd9Sstevel@tonic-gate 			htable_free(ht);
8437c478bd9Sstevel@tonic-gate 		}
8447c478bd9Sstevel@tonic-gate 	}
8457c478bd9Sstevel@tonic-gate }
8467c478bd9Sstevel@tonic-gate 
8477c478bd9Sstevel@tonic-gate /*
8487c478bd9Sstevel@tonic-gate  * Unlink an entry for a table at vaddr and level out of the existing table
8497c478bd9Sstevel@tonic-gate  * one level higher. We are always holding the HASH_ENTER() when doing this.
8507c478bd9Sstevel@tonic-gate  */
8517c478bd9Sstevel@tonic-gate static void
8527c478bd9Sstevel@tonic-gate unlink_ptp(htable_t *higher, htable_t *old, uintptr_t vaddr)
8537c478bd9Sstevel@tonic-gate {
8547c478bd9Sstevel@tonic-gate 	uint_t		entry = htable_va2entry(vaddr, higher);
8557c478bd9Sstevel@tonic-gate 	x86pte_t	expect = MAKEPTP(old->ht_pfn, old->ht_level);
8567c478bd9Sstevel@tonic-gate 	x86pte_t	found;
8577c478bd9Sstevel@tonic-gate 
8587c478bd9Sstevel@tonic-gate 	ASSERT(higher->ht_busy > 0);
8597c478bd9Sstevel@tonic-gate 	ASSERT(higher->ht_valid_cnt > 0);
8607c478bd9Sstevel@tonic-gate 	ASSERT(old->ht_valid_cnt == 0);
8617c478bd9Sstevel@tonic-gate 	found = x86pte_cas(higher, entry, expect, 0);
8627c478bd9Sstevel@tonic-gate 	if (found != expect)
8637c478bd9Sstevel@tonic-gate 		panic("Bad PTP found=" FMT_PTE ", expected=" FMT_PTE,
8647c478bd9Sstevel@tonic-gate 		    found, expect);
8657c478bd9Sstevel@tonic-gate 	HTABLE_DEC(higher->ht_valid_cnt);
8667c478bd9Sstevel@tonic-gate }
8677c478bd9Sstevel@tonic-gate 
8687c478bd9Sstevel@tonic-gate /*
8697c478bd9Sstevel@tonic-gate  * Link an entry for a new table at vaddr and level into the existing table
8707c478bd9Sstevel@tonic-gate  * one level higher. We are always holding the HASH_ENTER() when doing this.
8717c478bd9Sstevel@tonic-gate  */
8727c478bd9Sstevel@tonic-gate static void
8737c478bd9Sstevel@tonic-gate link_ptp(htable_t *higher, htable_t *new, uintptr_t vaddr)
8747c478bd9Sstevel@tonic-gate {
8757c478bd9Sstevel@tonic-gate 	uint_t		entry = htable_va2entry(vaddr, higher);
8767c478bd9Sstevel@tonic-gate 	x86pte_t	newptp = MAKEPTP(new->ht_pfn, new->ht_level);
8777c478bd9Sstevel@tonic-gate 	x86pte_t	found;
8787c478bd9Sstevel@tonic-gate 
8797c478bd9Sstevel@tonic-gate 	ASSERT(higher->ht_busy > 0);
8807c478bd9Sstevel@tonic-gate 
8817c478bd9Sstevel@tonic-gate 	ASSERT(new->ht_level != mmu.max_level);
8827c478bd9Sstevel@tonic-gate 
8837c478bd9Sstevel@tonic-gate 	HTABLE_INC(higher->ht_valid_cnt);
8847c478bd9Sstevel@tonic-gate 
8857c478bd9Sstevel@tonic-gate 	found = x86pte_cas(higher, entry, 0, newptp);
886b4b46911Skchow 	if ((found & ~PT_REF) != 0)
8877c478bd9Sstevel@tonic-gate 		panic("HAT: ptp not 0, found=" FMT_PTE, found);
8887c478bd9Sstevel@tonic-gate }
8897c478bd9Sstevel@tonic-gate 
8907c478bd9Sstevel@tonic-gate /*
891ae115bc7Smrj  * Release of hold on an htable. If this is the last use and the pagetable
892ae115bc7Smrj  * is empty we may want to free it, then recursively look at the pagetable
893ae115bc7Smrj  * above it. The recursion is handled by the outer while() loop.
8947c478bd9Sstevel@tonic-gate  */
8957c478bd9Sstevel@tonic-gate void
8967c478bd9Sstevel@tonic-gate htable_release(htable_t *ht)
8977c478bd9Sstevel@tonic-gate {
8987c478bd9Sstevel@tonic-gate 	uint_t		hashval;
8997c478bd9Sstevel@tonic-gate 	htable_t	*shared;
9007c478bd9Sstevel@tonic-gate 	htable_t	*higher;
9017c478bd9Sstevel@tonic-gate 	hat_t		*hat;
9027c478bd9Sstevel@tonic-gate 	uintptr_t	va;
9037c478bd9Sstevel@tonic-gate 	level_t		level;
9047c478bd9Sstevel@tonic-gate 
9057c478bd9Sstevel@tonic-gate 	while (ht != NULL) {
9067c478bd9Sstevel@tonic-gate 		shared = NULL;
9077c478bd9Sstevel@tonic-gate 		for (;;) {
9087c478bd9Sstevel@tonic-gate 			hat = ht->ht_hat;
9097c478bd9Sstevel@tonic-gate 			va = ht->ht_vaddr;
9107c478bd9Sstevel@tonic-gate 			level = ht->ht_level;
9117c478bd9Sstevel@tonic-gate 			hashval = HTABLE_HASH(hat, va, level);
9127c478bd9Sstevel@tonic-gate 
9137c478bd9Sstevel@tonic-gate 			/*
9147c478bd9Sstevel@tonic-gate 			 * The common case is that this isn't the last use of
9157c478bd9Sstevel@tonic-gate 			 * an htable so we don't want to free the htable.
9167c478bd9Sstevel@tonic-gate 			 */
9177c478bd9Sstevel@tonic-gate 			HTABLE_ENTER(hashval);
9187c478bd9Sstevel@tonic-gate 			ASSERT(ht->ht_lock_cnt == 0 || ht->ht_valid_cnt > 0);
9197c478bd9Sstevel@tonic-gate 			ASSERT(ht->ht_valid_cnt >= 0);
9207c478bd9Sstevel@tonic-gate 			ASSERT(ht->ht_busy > 0);
9217c478bd9Sstevel@tonic-gate 			if (ht->ht_valid_cnt > 0)
9227c478bd9Sstevel@tonic-gate 				break;
9237c478bd9Sstevel@tonic-gate 			if (ht->ht_busy > 1)
9247c478bd9Sstevel@tonic-gate 				break;
9257c478bd9Sstevel@tonic-gate 
9267c478bd9Sstevel@tonic-gate 			/*
9277c478bd9Sstevel@tonic-gate 			 * we always release empty shared htables
9287c478bd9Sstevel@tonic-gate 			 */
9297c478bd9Sstevel@tonic-gate 			if (!(ht->ht_flags & HTABLE_SHARED_PFN)) {
9307c478bd9Sstevel@tonic-gate 
9317c478bd9Sstevel@tonic-gate 				/*
9327c478bd9Sstevel@tonic-gate 				 * don't release if in address space tear down
9337c478bd9Sstevel@tonic-gate 				 */
9347c478bd9Sstevel@tonic-gate 				if (hat->hat_flags & HAT_FREEING)
9357c478bd9Sstevel@tonic-gate 					break;
9367c478bd9Sstevel@tonic-gate 
9377c478bd9Sstevel@tonic-gate 				/*
9387c478bd9Sstevel@tonic-gate 				 * At and above max_page_level, free if it's for
9397c478bd9Sstevel@tonic-gate 				 * a boot-time kernel mapping below kernelbase.
9407c478bd9Sstevel@tonic-gate 				 */
9417c478bd9Sstevel@tonic-gate 				if (level >= mmu.max_page_level &&
9427c478bd9Sstevel@tonic-gate 				    (hat != kas.a_hat || va >= kernelbase))
9437c478bd9Sstevel@tonic-gate 					break;
9447c478bd9Sstevel@tonic-gate 			}
9457c478bd9Sstevel@tonic-gate 
9467c478bd9Sstevel@tonic-gate 			/*
947ae115bc7Smrj 			 * Remember if we destroy an htable that shares its PFN
948ae115bc7Smrj 			 * from elsewhere.
9497c478bd9Sstevel@tonic-gate 			 */
9507c478bd9Sstevel@tonic-gate 			if (ht->ht_flags & HTABLE_SHARED_PFN) {
9517c478bd9Sstevel@tonic-gate 				ASSERT(ht->ht_level == 0);
9527c478bd9Sstevel@tonic-gate 				ASSERT(shared == NULL);
9537c478bd9Sstevel@tonic-gate 				shared = ht->ht_shares;
9547c478bd9Sstevel@tonic-gate 				HATSTAT_INC(hs_htable_unshared);
9557c478bd9Sstevel@tonic-gate 			}
9567c478bd9Sstevel@tonic-gate 
9577c478bd9Sstevel@tonic-gate 			/*
9587c478bd9Sstevel@tonic-gate 			 * Handle release of a table and freeing the htable_t.
9597c478bd9Sstevel@tonic-gate 			 * Unlink it from the table higher (ie. ht_parent).
9607c478bd9Sstevel@tonic-gate 			 */
9617c478bd9Sstevel@tonic-gate 			ASSERT(ht->ht_lock_cnt == 0);
9627c478bd9Sstevel@tonic-gate 			higher = ht->ht_parent;
9637c478bd9Sstevel@tonic-gate 			ASSERT(higher != NULL);
9647c478bd9Sstevel@tonic-gate 
9657c478bd9Sstevel@tonic-gate 			/*
9667c478bd9Sstevel@tonic-gate 			 * Unlink the pagetable.
9677c478bd9Sstevel@tonic-gate 			 */
9687c478bd9Sstevel@tonic-gate 			unlink_ptp(higher, ht, va);
9697c478bd9Sstevel@tonic-gate 
9707c478bd9Sstevel@tonic-gate 			/*
9717c478bd9Sstevel@tonic-gate 			 * When any top level VLP page table entry changes, we
9727c478bd9Sstevel@tonic-gate 			 * must issue a reload of cr3 on all processors.
9737c478bd9Sstevel@tonic-gate 			 */
9747c478bd9Sstevel@tonic-gate 			if ((hat->hat_flags & HAT_VLP) &&
9757c478bd9Sstevel@tonic-gate 			    level == VLP_LEVEL - 1)
976ae115bc7Smrj 				hat_tlb_inval(hat, DEMAP_ALL_ADDR);
9777c478bd9Sstevel@tonic-gate 
9787c478bd9Sstevel@tonic-gate 			/*
9797c478bd9Sstevel@tonic-gate 			 * remove this htable from its hash list
9807c478bd9Sstevel@tonic-gate 			 */
9817c478bd9Sstevel@tonic-gate 			if (ht->ht_next)
9827c478bd9Sstevel@tonic-gate 				ht->ht_next->ht_prev = ht->ht_prev;
9837c478bd9Sstevel@tonic-gate 
9847c478bd9Sstevel@tonic-gate 			if (ht->ht_prev) {
9857c478bd9Sstevel@tonic-gate 				ht->ht_prev->ht_next = ht->ht_next;
9867c478bd9Sstevel@tonic-gate 			} else {
9877c478bd9Sstevel@tonic-gate 				ASSERT(hat->hat_ht_hash[hashval] == ht);
9887c478bd9Sstevel@tonic-gate 				hat->hat_ht_hash[hashval] = ht->ht_next;
9897c478bd9Sstevel@tonic-gate 			}
9907c478bd9Sstevel@tonic-gate 			HTABLE_EXIT(hashval);
9917c478bd9Sstevel@tonic-gate 			htable_free(ht);
9927c478bd9Sstevel@tonic-gate 			ht = higher;
9937c478bd9Sstevel@tonic-gate 		}
9947c478bd9Sstevel@tonic-gate 
9957c478bd9Sstevel@tonic-gate 		ASSERT(ht->ht_busy >= 1);
9967c478bd9Sstevel@tonic-gate 		--ht->ht_busy;
9977c478bd9Sstevel@tonic-gate 		HTABLE_EXIT(hashval);
9987c478bd9Sstevel@tonic-gate 
9997c478bd9Sstevel@tonic-gate 		/*
10007c478bd9Sstevel@tonic-gate 		 * If we released a shared htable, do a release on the htable
10017c478bd9Sstevel@tonic-gate 		 * from which it shared
10027c478bd9Sstevel@tonic-gate 		 */
10037c478bd9Sstevel@tonic-gate 		ht = shared;
10047c478bd9Sstevel@tonic-gate 	}
10057c478bd9Sstevel@tonic-gate }
10067c478bd9Sstevel@tonic-gate 
10077c478bd9Sstevel@tonic-gate /*
10087c478bd9Sstevel@tonic-gate  * Find the htable for the pagetable at the given level for the given address.
10097c478bd9Sstevel@tonic-gate  * If found acquires a hold that eventually needs to be htable_release()d
10107c478bd9Sstevel@tonic-gate  */
10117c478bd9Sstevel@tonic-gate htable_t *
10127c478bd9Sstevel@tonic-gate htable_lookup(hat_t *hat, uintptr_t vaddr, level_t level)
10137c478bd9Sstevel@tonic-gate {
10147c478bd9Sstevel@tonic-gate 	uintptr_t	base;
10157c478bd9Sstevel@tonic-gate 	uint_t		hashval;
10167c478bd9Sstevel@tonic-gate 	htable_t	*ht = NULL;
10177c478bd9Sstevel@tonic-gate 
10187c478bd9Sstevel@tonic-gate 	ASSERT(level >= 0);
10197c478bd9Sstevel@tonic-gate 	ASSERT(level <= TOP_LEVEL(hat));
10207c478bd9Sstevel@tonic-gate 
10217c478bd9Sstevel@tonic-gate 	if (level == TOP_LEVEL(hat))
10227c478bd9Sstevel@tonic-gate 		base = 0;
10237c478bd9Sstevel@tonic-gate 	else
10247c478bd9Sstevel@tonic-gate 		base = vaddr & LEVEL_MASK(level + 1);
10257c478bd9Sstevel@tonic-gate 
10267c478bd9Sstevel@tonic-gate 	hashval = HTABLE_HASH(hat, base, level);
10277c478bd9Sstevel@tonic-gate 	HTABLE_ENTER(hashval);
10287c478bd9Sstevel@tonic-gate 	for (ht = hat->hat_ht_hash[hashval]; ht; ht = ht->ht_next) {
10297c478bd9Sstevel@tonic-gate 		if (ht->ht_hat == hat &&
10307c478bd9Sstevel@tonic-gate 		    ht->ht_vaddr == base &&
10317c478bd9Sstevel@tonic-gate 		    ht->ht_level == level)
10327c478bd9Sstevel@tonic-gate 			break;
10337c478bd9Sstevel@tonic-gate 	}
10347c478bd9Sstevel@tonic-gate 	if (ht)
10357c478bd9Sstevel@tonic-gate 		++ht->ht_busy;
10367c478bd9Sstevel@tonic-gate 
10377c478bd9Sstevel@tonic-gate 	HTABLE_EXIT(hashval);
10387c478bd9Sstevel@tonic-gate 	return (ht);
10397c478bd9Sstevel@tonic-gate }
10407c478bd9Sstevel@tonic-gate 
10417c478bd9Sstevel@tonic-gate /*
10427c478bd9Sstevel@tonic-gate  * Acquires a hold on a known htable (from a locked hment entry).
10437c478bd9Sstevel@tonic-gate  */
10447c478bd9Sstevel@tonic-gate void
10457c478bd9Sstevel@tonic-gate htable_acquire(htable_t *ht)
10467c478bd9Sstevel@tonic-gate {
10477c478bd9Sstevel@tonic-gate 	hat_t		*hat = ht->ht_hat;
10487c478bd9Sstevel@tonic-gate 	level_t		level = ht->ht_level;
10497c478bd9Sstevel@tonic-gate 	uintptr_t	base = ht->ht_vaddr;
10507c478bd9Sstevel@tonic-gate 	uint_t		hashval = HTABLE_HASH(hat, base, level);
10517c478bd9Sstevel@tonic-gate 
10527c478bd9Sstevel@tonic-gate 	HTABLE_ENTER(hashval);
10537c478bd9Sstevel@tonic-gate #ifdef DEBUG
10547c478bd9Sstevel@tonic-gate 	/*
10557c478bd9Sstevel@tonic-gate 	 * make sure the htable is there
10567c478bd9Sstevel@tonic-gate 	 */
10577c478bd9Sstevel@tonic-gate 	{
10587c478bd9Sstevel@tonic-gate 		htable_t	*h;
10597c478bd9Sstevel@tonic-gate 
10607c478bd9Sstevel@tonic-gate 		for (h = hat->hat_ht_hash[hashval];
10617c478bd9Sstevel@tonic-gate 		    h && h != ht;
10627c478bd9Sstevel@tonic-gate 		    h = h->ht_next)
10637c478bd9Sstevel@tonic-gate 			;
10647c478bd9Sstevel@tonic-gate 		ASSERT(h == ht);
10657c478bd9Sstevel@tonic-gate 	}
10667c478bd9Sstevel@tonic-gate #endif /* DEBUG */
10677c478bd9Sstevel@tonic-gate 	++ht->ht_busy;
10687c478bd9Sstevel@tonic-gate 	HTABLE_EXIT(hashval);
10697c478bd9Sstevel@tonic-gate }
10707c478bd9Sstevel@tonic-gate 
10717c478bd9Sstevel@tonic-gate /*
10727c478bd9Sstevel@tonic-gate  * Find the htable for the pagetable at the given level for the given address.
10737c478bd9Sstevel@tonic-gate  * If found acquires a hold that eventually needs to be htable_release()d
10747c478bd9Sstevel@tonic-gate  * If not found the table is created.
10757c478bd9Sstevel@tonic-gate  *
10767c478bd9Sstevel@tonic-gate  * Since we can't hold a hash table mutex during allocation, we have to
10777c478bd9Sstevel@tonic-gate  * drop it and redo the search on a create. Then we may have to free the newly
10787c478bd9Sstevel@tonic-gate  * allocated htable if another thread raced in and created it ahead of us.
10797c478bd9Sstevel@tonic-gate  */
10807c478bd9Sstevel@tonic-gate htable_t *
10817c478bd9Sstevel@tonic-gate htable_create(
10827c478bd9Sstevel@tonic-gate 	hat_t		*hat,
10837c478bd9Sstevel@tonic-gate 	uintptr_t	vaddr,
10847c478bd9Sstevel@tonic-gate 	level_t		level,
10857c478bd9Sstevel@tonic-gate 	htable_t	*shared)
10867c478bd9Sstevel@tonic-gate {
10877c478bd9Sstevel@tonic-gate 	uint_t		h;
10887c478bd9Sstevel@tonic-gate 	level_t		l;
10897c478bd9Sstevel@tonic-gate 	uintptr_t	base;
10907c478bd9Sstevel@tonic-gate 	htable_t	*ht;
10917c478bd9Sstevel@tonic-gate 	htable_t	*higher = NULL;
10927c478bd9Sstevel@tonic-gate 	htable_t	*new = NULL;
10937c478bd9Sstevel@tonic-gate 
10947c478bd9Sstevel@tonic-gate 	if (level < 0 || level > TOP_LEVEL(hat))
10957c478bd9Sstevel@tonic-gate 		panic("htable_create(): level %d out of range\n", level);
10967c478bd9Sstevel@tonic-gate 
10977c478bd9Sstevel@tonic-gate 	/*
10987c478bd9Sstevel@tonic-gate 	 * Create the page tables in top down order.
10997c478bd9Sstevel@tonic-gate 	 */
11007c478bd9Sstevel@tonic-gate 	for (l = TOP_LEVEL(hat); l >= level; --l) {
11017c478bd9Sstevel@tonic-gate 		new = NULL;
11027c478bd9Sstevel@tonic-gate 		if (l == TOP_LEVEL(hat))
11037c478bd9Sstevel@tonic-gate 			base = 0;
11047c478bd9Sstevel@tonic-gate 		else
11057c478bd9Sstevel@tonic-gate 			base = vaddr & LEVEL_MASK(l + 1);
11067c478bd9Sstevel@tonic-gate 
11077c478bd9Sstevel@tonic-gate 		h = HTABLE_HASH(hat, base, l);
11087c478bd9Sstevel@tonic-gate try_again:
11097c478bd9Sstevel@tonic-gate 		/*
11107c478bd9Sstevel@tonic-gate 		 * look up the htable at this level
11117c478bd9Sstevel@tonic-gate 		 */
11127c478bd9Sstevel@tonic-gate 		HTABLE_ENTER(h);
11137c478bd9Sstevel@tonic-gate 		if (l == TOP_LEVEL(hat)) {
11147c478bd9Sstevel@tonic-gate 			ht = hat->hat_htable;
11157c478bd9Sstevel@tonic-gate 		} else {
11167c478bd9Sstevel@tonic-gate 			for (ht = hat->hat_ht_hash[h]; ht; ht = ht->ht_next) {
11177c478bd9Sstevel@tonic-gate 				ASSERT(ht->ht_hat == hat);
11187c478bd9Sstevel@tonic-gate 				if (ht->ht_vaddr == base &&
11197c478bd9Sstevel@tonic-gate 				    ht->ht_level == l)
11207c478bd9Sstevel@tonic-gate 					break;
11217c478bd9Sstevel@tonic-gate 			}
11227c478bd9Sstevel@tonic-gate 		}
11237c478bd9Sstevel@tonic-gate 
11247c478bd9Sstevel@tonic-gate 		/*
11257c478bd9Sstevel@tonic-gate 		 * if we found the htable, increment its busy cnt
11267c478bd9Sstevel@tonic-gate 		 * and if we had allocated a new htable, free it.
11277c478bd9Sstevel@tonic-gate 		 */
11287c478bd9Sstevel@tonic-gate 		if (ht != NULL) {
11297c478bd9Sstevel@tonic-gate 			/*
11307c478bd9Sstevel@tonic-gate 			 * If we find a pre-existing shared table, it must
11317c478bd9Sstevel@tonic-gate 			 * share from the same place.
11327c478bd9Sstevel@tonic-gate 			 */
11337c478bd9Sstevel@tonic-gate 			if (l == level && shared && ht->ht_shares &&
11347c478bd9Sstevel@tonic-gate 			    ht->ht_shares != shared) {
11357c478bd9Sstevel@tonic-gate 				panic("htable shared from wrong place "
11367c478bd9Sstevel@tonic-gate 				    "found htable=%p shared=%p", ht, shared);
11377c478bd9Sstevel@tonic-gate 			}
11387c478bd9Sstevel@tonic-gate 			++ht->ht_busy;
11397c478bd9Sstevel@tonic-gate 			HTABLE_EXIT(h);
11407c478bd9Sstevel@tonic-gate 			if (new)
11417c478bd9Sstevel@tonic-gate 				htable_free(new);
11427c478bd9Sstevel@tonic-gate 			if (higher != NULL)
11437c478bd9Sstevel@tonic-gate 				htable_release(higher);
11447c478bd9Sstevel@tonic-gate 			higher = ht;
11457c478bd9Sstevel@tonic-gate 
11467c478bd9Sstevel@tonic-gate 		/*
11477c478bd9Sstevel@tonic-gate 		 * if we didn't find it on the first search
11487c478bd9Sstevel@tonic-gate 		 * allocate a new one and search again
11497c478bd9Sstevel@tonic-gate 		 */
11507c478bd9Sstevel@tonic-gate 		} else if (new == NULL) {
11517c478bd9Sstevel@tonic-gate 			HTABLE_EXIT(h);
11527c478bd9Sstevel@tonic-gate 			new = htable_alloc(hat, base, l,
11537c478bd9Sstevel@tonic-gate 			    l == level ? shared : NULL);
11547c478bd9Sstevel@tonic-gate 			goto try_again;
11557c478bd9Sstevel@tonic-gate 
11567c478bd9Sstevel@tonic-gate 		/*
11577c478bd9Sstevel@tonic-gate 		 * 2nd search and still not there, use "new" table
11587c478bd9Sstevel@tonic-gate 		 * Link new table into higher, when not at top level.
11597c478bd9Sstevel@tonic-gate 		 */
11607c478bd9Sstevel@tonic-gate 		} else {
11617c478bd9Sstevel@tonic-gate 			ht = new;
11627c478bd9Sstevel@tonic-gate 			if (higher != NULL) {
11637c478bd9Sstevel@tonic-gate 				link_ptp(higher, ht, base);
11647c478bd9Sstevel@tonic-gate 				ht->ht_parent = higher;
11657c478bd9Sstevel@tonic-gate 
11667c478bd9Sstevel@tonic-gate 				/*
11677c478bd9Sstevel@tonic-gate 				 * When any top level VLP page table changes,
11687c478bd9Sstevel@tonic-gate 				 * we must reload cr3 on all processors.
11697c478bd9Sstevel@tonic-gate 				 */
11707c478bd9Sstevel@tonic-gate #ifdef __i386
11717c478bd9Sstevel@tonic-gate 				if (mmu.pae_hat &&
11727c478bd9Sstevel@tonic-gate #else /* !__i386 */
11737c478bd9Sstevel@tonic-gate 				if ((hat->hat_flags & HAT_VLP) &&
11747c478bd9Sstevel@tonic-gate #endif /* __i386 */
11757c478bd9Sstevel@tonic-gate 				    l == VLP_LEVEL - 1)
1176ae115bc7Smrj 					hat_tlb_inval(hat, DEMAP_ALL_ADDR);
11777c478bd9Sstevel@tonic-gate 			}
11787c478bd9Sstevel@tonic-gate 			ht->ht_next = hat->hat_ht_hash[h];
11797c478bd9Sstevel@tonic-gate 			ASSERT(ht->ht_prev == NULL);
11807c478bd9Sstevel@tonic-gate 			if (hat->hat_ht_hash[h])
11817c478bd9Sstevel@tonic-gate 				hat->hat_ht_hash[h]->ht_prev = ht;
11827c478bd9Sstevel@tonic-gate 			hat->hat_ht_hash[h] = ht;
11837c478bd9Sstevel@tonic-gate 			HTABLE_EXIT(h);
11847c478bd9Sstevel@tonic-gate 
11857c478bd9Sstevel@tonic-gate 			/*
11867c478bd9Sstevel@tonic-gate 			 * Note we don't do htable_release(higher).
11877c478bd9Sstevel@tonic-gate 			 * That happens recursively when "new" is removed by
11887c478bd9Sstevel@tonic-gate 			 * htable_release() or htable_steal().
11897c478bd9Sstevel@tonic-gate 			 */
11907c478bd9Sstevel@tonic-gate 			higher = ht;
11917c478bd9Sstevel@tonic-gate 
11927c478bd9Sstevel@tonic-gate 			/*
11937c478bd9Sstevel@tonic-gate 			 * If we just created a new shared page table we
11947c478bd9Sstevel@tonic-gate 			 * increment the shared htable's busy count, so that
11957c478bd9Sstevel@tonic-gate 			 * it can't be the victim of a steal even if it's empty.
11967c478bd9Sstevel@tonic-gate 			 */
11977c478bd9Sstevel@tonic-gate 			if (l == level && shared) {
11987c478bd9Sstevel@tonic-gate 				(void) htable_lookup(shared->ht_hat,
11997c478bd9Sstevel@tonic-gate 				    shared->ht_vaddr, shared->ht_level);
12007c478bd9Sstevel@tonic-gate 				HATSTAT_INC(hs_htable_shared);
12017c478bd9Sstevel@tonic-gate 			}
12027c478bd9Sstevel@tonic-gate 		}
12037c478bd9Sstevel@tonic-gate 	}
12047c478bd9Sstevel@tonic-gate 
12057c478bd9Sstevel@tonic-gate 	return (ht);
12067c478bd9Sstevel@tonic-gate }
12077c478bd9Sstevel@tonic-gate 
12087c478bd9Sstevel@tonic-gate /*
1209ae115bc7Smrj  * Inherit initial pagetables from the boot program.
1210ae115bc7Smrj  */
1211ae115bc7Smrj void
1212ae115bc7Smrj htable_attach(
1213ae115bc7Smrj 	hat_t *hat,
1214ae115bc7Smrj 	uintptr_t base,
1215ae115bc7Smrj 	level_t level,
1216ae115bc7Smrj 	htable_t *parent,
1217ae115bc7Smrj 	pfn_t pfn)
1218ae115bc7Smrj {
1219ae115bc7Smrj 	htable_t	*ht;
1220ae115bc7Smrj 	uint_t		h;
1221ae115bc7Smrj 	uint_t		i;
1222ae115bc7Smrj 	x86pte_t	pte;
1223ae115bc7Smrj 	x86pte_t	*ptep;
1224ae115bc7Smrj 	page_t		*pp;
1225ae115bc7Smrj 	extern page_t	*boot_claim_page(pfn_t);
1226ae115bc7Smrj 
1227ae115bc7Smrj 	ht = htable_get_reserve();
1228ae115bc7Smrj 	if (level == mmu.max_level)
1229ae115bc7Smrj 		kas.a_hat->hat_htable = ht;
1230ae115bc7Smrj 	ht->ht_hat = hat;
1231ae115bc7Smrj 	ht->ht_parent = parent;
1232ae115bc7Smrj 	ht->ht_vaddr = base;
1233ae115bc7Smrj 	ht->ht_level = level;
1234ae115bc7Smrj 	ht->ht_busy = 1;
1235ae115bc7Smrj 	ht->ht_next = NULL;
1236ae115bc7Smrj 	ht->ht_prev = NULL;
1237ae115bc7Smrj 	ht->ht_flags = 0;
1238ae115bc7Smrj 	ht->ht_pfn = pfn;
1239ae115bc7Smrj 	ht->ht_lock_cnt = 0;
1240ae115bc7Smrj 	ht->ht_valid_cnt = 0;
1241ae115bc7Smrj 	if (parent != NULL)
1242ae115bc7Smrj 		++parent->ht_busy;
1243ae115bc7Smrj 
1244ae115bc7Smrj 	h = HTABLE_HASH(hat, base, level);
1245ae115bc7Smrj 	HTABLE_ENTER(h);
1246ae115bc7Smrj 	ht->ht_next = hat->hat_ht_hash[h];
1247ae115bc7Smrj 	ASSERT(ht->ht_prev == NULL);
1248ae115bc7Smrj 	if (hat->hat_ht_hash[h])
1249ae115bc7Smrj 		hat->hat_ht_hash[h]->ht_prev = ht;
1250ae115bc7Smrj 	hat->hat_ht_hash[h] = ht;
1251ae115bc7Smrj 	HTABLE_EXIT(h);
1252ae115bc7Smrj 
1253ae115bc7Smrj 	/*
1254ae115bc7Smrj 	 * make sure the page table physical page is not FREE
1255ae115bc7Smrj 	 */
1256ae115bc7Smrj 	if (page_resv(1, KM_NOSLEEP) == 0)
1257ae115bc7Smrj 		panic("page_resv() failed in ptable alloc");
1258ae115bc7Smrj 
1259ae115bc7Smrj 	pp = boot_claim_page(pfn);
1260ae115bc7Smrj 	ASSERT(pp != NULL);
1261ae115bc7Smrj 	page_downgrade(pp);
1262ae115bc7Smrj 	/*
1263ae115bc7Smrj 	 * Record in the page_t that is a pagetable for segkpm setup.
1264ae115bc7Smrj 	 */
1265ae115bc7Smrj 	if (kpm_vbase)
1266ae115bc7Smrj 		pp->p_index = 1;
1267ae115bc7Smrj 
1268ae115bc7Smrj 	/*
1269ae115bc7Smrj 	 * Count valid mappings and recursively attach lower level pagetables.
1270ae115bc7Smrj 	 */
1271ae115bc7Smrj 	ptep = kbm_remap_window(pfn_to_pa(pfn), 0);
1272ae115bc7Smrj 	for (i = 0; i < HTABLE_NUM_PTES(ht); ++i) {
1273ae115bc7Smrj 		if (mmu.pae_hat)
1274ae115bc7Smrj 			pte = ptep[i];
1275ae115bc7Smrj 		else
1276ae115bc7Smrj 			pte = ((x86pte32_t *)ptep)[i];
1277ae115bc7Smrj 		if (!IN_HYPERVISOR_VA(base) && PTE_ISVALID(pte)) {
1278ae115bc7Smrj 			++ht->ht_valid_cnt;
1279ae115bc7Smrj 			if (!PTE_ISPAGE(pte, level)) {
1280ae115bc7Smrj 				htable_attach(hat, base, level - 1,
1281ae115bc7Smrj 				    ht, PTE2PFN(pte, level));
1282ae115bc7Smrj 				ptep = kbm_remap_window(pfn_to_pa(pfn), 0);
1283ae115bc7Smrj 			}
1284ae115bc7Smrj 		}
1285ae115bc7Smrj 		base += LEVEL_SIZE(level);
1286ae115bc7Smrj 		if (base == mmu.hole_start)
1287ae115bc7Smrj 			base = (mmu.hole_end + MMU_PAGEOFFSET) & MMU_PAGEMASK;
1288ae115bc7Smrj 	}
1289ae115bc7Smrj 
1290ae115bc7Smrj 	/*
1291ae115bc7Smrj 	 * As long as all the mappings we had were below kernel base
1292ae115bc7Smrj 	 * we can release the htable.
1293ae115bc7Smrj 	 */
1294ae115bc7Smrj 	if (base < kernelbase)
1295ae115bc7Smrj 		htable_release(ht);
1296ae115bc7Smrj }
1297ae115bc7Smrj 
1298ae115bc7Smrj /*
12997c478bd9Sstevel@tonic-gate  * Walk through a given htable looking for the first valid entry.  This
13007c478bd9Sstevel@tonic-gate  * routine takes both a starting and ending address.  The starting address
13017c478bd9Sstevel@tonic-gate  * is required to be within the htable provided by the caller, but there is
13027c478bd9Sstevel@tonic-gate  * no such restriction on the ending address.
13037c478bd9Sstevel@tonic-gate  *
13047c478bd9Sstevel@tonic-gate  * If the routine finds a valid entry in the htable (at or beyond the
13057c478bd9Sstevel@tonic-gate  * starting address), the PTE (and its address) will be returned.
13067c478bd9Sstevel@tonic-gate  * This PTE may correspond to either a page or a pagetable - it is the
13077c478bd9Sstevel@tonic-gate  * caller's responsibility to determine which.  If no valid entry is
13087c478bd9Sstevel@tonic-gate  * found, 0 (and invalid PTE) and the next unexamined address will be
13097c478bd9Sstevel@tonic-gate  * returned.
13107c478bd9Sstevel@tonic-gate  *
13117c478bd9Sstevel@tonic-gate  * The loop has been carefully coded for optimization.
13127c478bd9Sstevel@tonic-gate  */
13137c478bd9Sstevel@tonic-gate static x86pte_t
13147c478bd9Sstevel@tonic-gate htable_scan(htable_t *ht, uintptr_t *vap, uintptr_t eaddr)
13157c478bd9Sstevel@tonic-gate {
13167c478bd9Sstevel@tonic-gate 	uint_t e;
13177c478bd9Sstevel@tonic-gate 	x86pte_t found_pte = (x86pte_t)0;
1318ae115bc7Smrj 	caddr_t pte_ptr;
1319ae115bc7Smrj 	caddr_t end_pte_ptr;
13207c478bd9Sstevel@tonic-gate 	int l = ht->ht_level;
13217c478bd9Sstevel@tonic-gate 	uintptr_t va = *vap & LEVEL_MASK(l);
13227c478bd9Sstevel@tonic-gate 	size_t pgsize = LEVEL_SIZE(l);
13237c478bd9Sstevel@tonic-gate 
13247c478bd9Sstevel@tonic-gate 	ASSERT(va >= ht->ht_vaddr);
13257c478bd9Sstevel@tonic-gate 	ASSERT(va <= HTABLE_LAST_PAGE(ht));
13267c478bd9Sstevel@tonic-gate 
13277c478bd9Sstevel@tonic-gate 	/*
13287c478bd9Sstevel@tonic-gate 	 * Compute the starting index and ending virtual address
13297c478bd9Sstevel@tonic-gate 	 */
13307c478bd9Sstevel@tonic-gate 	e = htable_va2entry(va, ht);
13317c478bd9Sstevel@tonic-gate 
13327c478bd9Sstevel@tonic-gate 	/*
13337c478bd9Sstevel@tonic-gate 	 * The following page table scan code knows that the valid
13347c478bd9Sstevel@tonic-gate 	 * bit of a PTE is in the lowest byte AND that x86 is little endian!!
13357c478bd9Sstevel@tonic-gate 	 */
1336ae115bc7Smrj 	pte_ptr = (caddr_t)x86pte_access_pagetable(ht, 0);
1337ae115bc7Smrj 	end_pte_ptr = (caddr_t)PT_INDEX_PTR(pte_ptr, HTABLE_NUM_PTES(ht));
1338ae115bc7Smrj 	pte_ptr = (caddr_t)PT_INDEX_PTR((x86pte_t *)pte_ptr, e);
133930f7a194Skchow 	while (!PTE_ISVALID(*pte_ptr)) {
13407c478bd9Sstevel@tonic-gate 		va += pgsize;
13417c478bd9Sstevel@tonic-gate 		if (va >= eaddr)
13427c478bd9Sstevel@tonic-gate 			break;
13437c478bd9Sstevel@tonic-gate 		pte_ptr += mmu.pte_size;
13447c478bd9Sstevel@tonic-gate 		ASSERT(pte_ptr <= end_pte_ptr);
13457c478bd9Sstevel@tonic-gate 		if (pte_ptr == end_pte_ptr)
13467c478bd9Sstevel@tonic-gate 			break;
13477c478bd9Sstevel@tonic-gate 	}
13487c478bd9Sstevel@tonic-gate 
13497c478bd9Sstevel@tonic-gate 	/*
13507c478bd9Sstevel@tonic-gate 	 * if we found a valid PTE, load the entire PTE
13517c478bd9Sstevel@tonic-gate 	 */
1352ae115bc7Smrj 	if (va < eaddr && pte_ptr != end_pte_ptr)
1353ae115bc7Smrj 		found_pte = GET_PTE((x86pte_t *)pte_ptr);
13547c478bd9Sstevel@tonic-gate 	x86pte_release_pagetable(ht);
13557c478bd9Sstevel@tonic-gate 
13567c478bd9Sstevel@tonic-gate #if defined(__amd64)
13577c478bd9Sstevel@tonic-gate 	/*
13587c478bd9Sstevel@tonic-gate 	 * deal with VA hole on amd64
13597c478bd9Sstevel@tonic-gate 	 */
13607c478bd9Sstevel@tonic-gate 	if (l == mmu.max_level && va >= mmu.hole_start && va <= mmu.hole_end)
13617c478bd9Sstevel@tonic-gate 		va = mmu.hole_end + va - mmu.hole_start;
13627c478bd9Sstevel@tonic-gate #endif /* __amd64 */
13637c478bd9Sstevel@tonic-gate 
13647c478bd9Sstevel@tonic-gate 	*vap = va;
13657c478bd9Sstevel@tonic-gate 	return (found_pte);
13667c478bd9Sstevel@tonic-gate }
13677c478bd9Sstevel@tonic-gate 
13687c478bd9Sstevel@tonic-gate /*
13697c478bd9Sstevel@tonic-gate  * Find the address and htable for the first populated translation at or
13707c478bd9Sstevel@tonic-gate  * above the given virtual address.  The caller may also specify an upper
13717c478bd9Sstevel@tonic-gate  * limit to the address range to search.  Uses level information to quickly
13727c478bd9Sstevel@tonic-gate  * skip unpopulated sections of virtual address spaces.
13737c478bd9Sstevel@tonic-gate  *
13747c478bd9Sstevel@tonic-gate  * If not found returns NULL. When found, returns the htable and virt addr
13757c478bd9Sstevel@tonic-gate  * and has a hold on the htable.
13767c478bd9Sstevel@tonic-gate  */
13777c478bd9Sstevel@tonic-gate x86pte_t
13787c478bd9Sstevel@tonic-gate htable_walk(
13797c478bd9Sstevel@tonic-gate 	struct hat *hat,
13807c478bd9Sstevel@tonic-gate 	htable_t **htp,
13817c478bd9Sstevel@tonic-gate 	uintptr_t *vaddr,
13827c478bd9Sstevel@tonic-gate 	uintptr_t eaddr)
13837c478bd9Sstevel@tonic-gate {
13847c478bd9Sstevel@tonic-gate 	uintptr_t va = *vaddr;
13857c478bd9Sstevel@tonic-gate 	htable_t *ht;
13867c478bd9Sstevel@tonic-gate 	htable_t *prev = *htp;
13877c478bd9Sstevel@tonic-gate 	level_t l;
13887c478bd9Sstevel@tonic-gate 	level_t max_mapped_level;
13897c478bd9Sstevel@tonic-gate 	x86pte_t pte;
13907c478bd9Sstevel@tonic-gate 
13917c478bd9Sstevel@tonic-gate 	ASSERT(eaddr > va);
13927c478bd9Sstevel@tonic-gate 
13937c478bd9Sstevel@tonic-gate 	/*
13947c478bd9Sstevel@tonic-gate 	 * If this is a user address, then we know we need not look beyond
13957c478bd9Sstevel@tonic-gate 	 * kernelbase.
13967c478bd9Sstevel@tonic-gate 	 */
13977c478bd9Sstevel@tonic-gate 	ASSERT(hat == kas.a_hat || eaddr <= kernelbase ||
13987c478bd9Sstevel@tonic-gate 	    eaddr == HTABLE_WALK_TO_END);
13997c478bd9Sstevel@tonic-gate 	if (hat != kas.a_hat && eaddr == HTABLE_WALK_TO_END)
14007c478bd9Sstevel@tonic-gate 		eaddr = kernelbase;
14017c478bd9Sstevel@tonic-gate 
14027c478bd9Sstevel@tonic-gate 	/*
14037c478bd9Sstevel@tonic-gate 	 * If we're coming in with a previous page table, search it first
14047c478bd9Sstevel@tonic-gate 	 * without doing an htable_lookup(), this should be frequent.
14057c478bd9Sstevel@tonic-gate 	 */
14067c478bd9Sstevel@tonic-gate 	if (prev) {
14077c478bd9Sstevel@tonic-gate 		ASSERT(prev->ht_busy > 0);
14087c478bd9Sstevel@tonic-gate 		ASSERT(prev->ht_vaddr <= va);
14097c478bd9Sstevel@tonic-gate 		l = prev->ht_level;
14107c478bd9Sstevel@tonic-gate 		if (va <= HTABLE_LAST_PAGE(prev)) {
14117c478bd9Sstevel@tonic-gate 			pte = htable_scan(prev, &va, eaddr);
14127c478bd9Sstevel@tonic-gate 
14137c478bd9Sstevel@tonic-gate 			if (PTE_ISPAGE(pte, l)) {
14147c478bd9Sstevel@tonic-gate 				*vaddr = va;
14157c478bd9Sstevel@tonic-gate 				*htp = prev;
14167c478bd9Sstevel@tonic-gate 				return (pte);
14177c478bd9Sstevel@tonic-gate 			}
14187c478bd9Sstevel@tonic-gate 		}
14197c478bd9Sstevel@tonic-gate 
14207c478bd9Sstevel@tonic-gate 		/*
14217c478bd9Sstevel@tonic-gate 		 * We found nothing in the htable provided by the caller,
14227c478bd9Sstevel@tonic-gate 		 * so fall through and do the full search
14237c478bd9Sstevel@tonic-gate 		 */
14247c478bd9Sstevel@tonic-gate 		htable_release(prev);
14257c478bd9Sstevel@tonic-gate 	}
14267c478bd9Sstevel@tonic-gate 
14277c478bd9Sstevel@tonic-gate 	/*
14287c478bd9Sstevel@tonic-gate 	 * Find the level of the largest pagesize used by this HAT.
14297c478bd9Sstevel@tonic-gate 	 */
14307c478bd9Sstevel@tonic-gate 	max_mapped_level = 0;
14317c478bd9Sstevel@tonic-gate 	for (l = 1; l <= mmu.max_page_level; ++l)
14327c478bd9Sstevel@tonic-gate 		if (hat->hat_pages_mapped[l] != 0)
14337c478bd9Sstevel@tonic-gate 			max_mapped_level = l;
14347c478bd9Sstevel@tonic-gate 
14357c478bd9Sstevel@tonic-gate 	while (va < eaddr && va >= *vaddr) {
14367c478bd9Sstevel@tonic-gate 		ASSERT(!IN_VA_HOLE(va));
14377c478bd9Sstevel@tonic-gate 
14387c478bd9Sstevel@tonic-gate 		/*
14397c478bd9Sstevel@tonic-gate 		 *  Find lowest table with any entry for given address.
14407c478bd9Sstevel@tonic-gate 		 */
14417c478bd9Sstevel@tonic-gate 		for (l = 0; l <= TOP_LEVEL(hat); ++l) {
14427c478bd9Sstevel@tonic-gate 			ht = htable_lookup(hat, va, l);
14437c478bd9Sstevel@tonic-gate 			if (ht != NULL) {
14447c478bd9Sstevel@tonic-gate 				pte = htable_scan(ht, &va, eaddr);
14457c478bd9Sstevel@tonic-gate 				if (PTE_ISPAGE(pte, l)) {
14467c478bd9Sstevel@tonic-gate 					*vaddr = va;
14477c478bd9Sstevel@tonic-gate 					*htp = ht;
14487c478bd9Sstevel@tonic-gate 					return (pte);
14497c478bd9Sstevel@tonic-gate 				}
14507c478bd9Sstevel@tonic-gate 				htable_release(ht);
14517c478bd9Sstevel@tonic-gate 				break;
14527c478bd9Sstevel@tonic-gate 			}
14537c478bd9Sstevel@tonic-gate 
14547c478bd9Sstevel@tonic-gate 			/*
14557c478bd9Sstevel@tonic-gate 			 * The ht is never NULL at the top level since
14567c478bd9Sstevel@tonic-gate 			 * the top level htable is created in hat_alloc().
14577c478bd9Sstevel@tonic-gate 			 */
14587c478bd9Sstevel@tonic-gate 			ASSERT(l < TOP_LEVEL(hat));
14597c478bd9Sstevel@tonic-gate 
14607c478bd9Sstevel@tonic-gate 			/*
14617c478bd9Sstevel@tonic-gate 			 * No htable covers the address. If there is no
14627c478bd9Sstevel@tonic-gate 			 * larger page size that could cover it, we
14637c478bd9Sstevel@tonic-gate 			 * skip to the start of the next page table.
14647c478bd9Sstevel@tonic-gate 			 */
14657c478bd9Sstevel@tonic-gate 			if (l >= max_mapped_level) {
14667c478bd9Sstevel@tonic-gate 				va = NEXT_ENTRY_VA(va, l + 1);
14677c478bd9Sstevel@tonic-gate 				break;
14687c478bd9Sstevel@tonic-gate 			}
14697c478bd9Sstevel@tonic-gate 		}
14707c478bd9Sstevel@tonic-gate 	}
14717c478bd9Sstevel@tonic-gate 
14727c478bd9Sstevel@tonic-gate 	*vaddr = 0;
14737c478bd9Sstevel@tonic-gate 	*htp = NULL;
14747c478bd9Sstevel@tonic-gate 	return (0);
14757c478bd9Sstevel@tonic-gate }
14767c478bd9Sstevel@tonic-gate 
14777c478bd9Sstevel@tonic-gate /*
14787c478bd9Sstevel@tonic-gate  * Find the htable and page table entry index of the given virtual address
14797c478bd9Sstevel@tonic-gate  * with pagesize at or below given level.
14807c478bd9Sstevel@tonic-gate  * If not found returns NULL. When found, returns the htable, sets
14817c478bd9Sstevel@tonic-gate  * entry, and has a hold on the htable.
14827c478bd9Sstevel@tonic-gate  */
14837c478bd9Sstevel@tonic-gate htable_t *
14847c478bd9Sstevel@tonic-gate htable_getpte(
14857c478bd9Sstevel@tonic-gate 	struct hat *hat,
14867c478bd9Sstevel@tonic-gate 	uintptr_t vaddr,
14877c478bd9Sstevel@tonic-gate 	uint_t *entry,
14887c478bd9Sstevel@tonic-gate 	x86pte_t *pte,
14897c478bd9Sstevel@tonic-gate 	level_t level)
14907c478bd9Sstevel@tonic-gate {
14917c478bd9Sstevel@tonic-gate 	htable_t	*ht;
14927c478bd9Sstevel@tonic-gate 	level_t		l;
14937c478bd9Sstevel@tonic-gate 	uint_t		e;
14947c478bd9Sstevel@tonic-gate 
14957c478bd9Sstevel@tonic-gate 	ASSERT(level <= mmu.max_page_level);
14967c478bd9Sstevel@tonic-gate 
14977c478bd9Sstevel@tonic-gate 	for (l = 0; l <= level; ++l) {
14987c478bd9Sstevel@tonic-gate 		ht = htable_lookup(hat, vaddr, l);
14997c478bd9Sstevel@tonic-gate 		if (ht == NULL)
15007c478bd9Sstevel@tonic-gate 			continue;
15017c478bd9Sstevel@tonic-gate 		e = htable_va2entry(vaddr, ht);
15027c478bd9Sstevel@tonic-gate 		if (entry != NULL)
15037c478bd9Sstevel@tonic-gate 			*entry = e;
15047c478bd9Sstevel@tonic-gate 		if (pte != NULL)
15057c478bd9Sstevel@tonic-gate 			*pte = x86pte_get(ht, e);
15067c478bd9Sstevel@tonic-gate 		return (ht);
15077c478bd9Sstevel@tonic-gate 	}
15087c478bd9Sstevel@tonic-gate 	return (NULL);
15097c478bd9Sstevel@tonic-gate }
15107c478bd9Sstevel@tonic-gate 
15117c478bd9Sstevel@tonic-gate /*
15127c478bd9Sstevel@tonic-gate  * Find the htable and page table entry index of the given virtual address.
15137c478bd9Sstevel@tonic-gate  * There must be a valid page mapped at the given address.
15147c478bd9Sstevel@tonic-gate  * If not found returns NULL. When found, returns the htable, sets
15157c478bd9Sstevel@tonic-gate  * entry, and has a hold on the htable.
15167c478bd9Sstevel@tonic-gate  */
15177c478bd9Sstevel@tonic-gate htable_t *
15187c478bd9Sstevel@tonic-gate htable_getpage(struct hat *hat, uintptr_t vaddr, uint_t *entry)
15197c478bd9Sstevel@tonic-gate {
15207c478bd9Sstevel@tonic-gate 	htable_t	*ht;
15217c478bd9Sstevel@tonic-gate 	uint_t		e;
15227c478bd9Sstevel@tonic-gate 	x86pte_t	pte;
15237c478bd9Sstevel@tonic-gate 
15247c478bd9Sstevel@tonic-gate 	ht = htable_getpte(hat, vaddr, &e, &pte, mmu.max_page_level);
15257c478bd9Sstevel@tonic-gate 	if (ht == NULL)
15267c478bd9Sstevel@tonic-gate 		return (NULL);
15277c478bd9Sstevel@tonic-gate 
15287c478bd9Sstevel@tonic-gate 	if (entry)
15297c478bd9Sstevel@tonic-gate 		*entry = e;
15307c478bd9Sstevel@tonic-gate 
15317c478bd9Sstevel@tonic-gate 	if (PTE_ISPAGE(pte, ht->ht_level))
15327c478bd9Sstevel@tonic-gate 		return (ht);
15337c478bd9Sstevel@tonic-gate 	htable_release(ht);
15347c478bd9Sstevel@tonic-gate 	return (NULL);
15357c478bd9Sstevel@tonic-gate }
15367c478bd9Sstevel@tonic-gate 
15377c478bd9Sstevel@tonic-gate 
15387c478bd9Sstevel@tonic-gate void
15397c478bd9Sstevel@tonic-gate htable_init()
15407c478bd9Sstevel@tonic-gate {
15417c478bd9Sstevel@tonic-gate 	/*
15427c478bd9Sstevel@tonic-gate 	 * To save on kernel VA usage, we avoid debug information in 32 bit
15437c478bd9Sstevel@tonic-gate 	 * kernels.
15447c478bd9Sstevel@tonic-gate 	 */
15457c478bd9Sstevel@tonic-gate #if defined(__amd64)
15467c478bd9Sstevel@tonic-gate 	int	kmem_flags = KMC_NOHASH;
15477c478bd9Sstevel@tonic-gate #elif defined(__i386)
15487c478bd9Sstevel@tonic-gate 	int	kmem_flags = KMC_NOHASH | KMC_NODEBUG;
15497c478bd9Sstevel@tonic-gate #endif
15507c478bd9Sstevel@tonic-gate 
15517c478bd9Sstevel@tonic-gate 	/*
15527c478bd9Sstevel@tonic-gate 	 * initialize kmem caches
15537c478bd9Sstevel@tonic-gate 	 */
15547c478bd9Sstevel@tonic-gate 	htable_cache = kmem_cache_create("htable_t",
15557c478bd9Sstevel@tonic-gate 	    sizeof (htable_t), 0, NULL, NULL,
15567c478bd9Sstevel@tonic-gate 	    htable_reap, NULL, hat_memload_arena, kmem_flags);
15577c478bd9Sstevel@tonic-gate }
15587c478bd9Sstevel@tonic-gate 
15597c478bd9Sstevel@tonic-gate /*
15607c478bd9Sstevel@tonic-gate  * get the pte index for the virtual address in the given htable's pagetable
15617c478bd9Sstevel@tonic-gate  */
15627c478bd9Sstevel@tonic-gate uint_t
15637c478bd9Sstevel@tonic-gate htable_va2entry(uintptr_t va, htable_t *ht)
15647c478bd9Sstevel@tonic-gate {
15657c478bd9Sstevel@tonic-gate 	level_t	l = ht->ht_level;
15667c478bd9Sstevel@tonic-gate 
15677c478bd9Sstevel@tonic-gate 	ASSERT(va >= ht->ht_vaddr);
15687c478bd9Sstevel@tonic-gate 	ASSERT(va <= HTABLE_LAST_PAGE(ht));
1569ae115bc7Smrj 	return ((va >> LEVEL_SHIFT(l)) & (HTABLE_NUM_PTES(ht) - 1));
15707c478bd9Sstevel@tonic-gate }
15717c478bd9Sstevel@tonic-gate 
15727c478bd9Sstevel@tonic-gate /*
15737c478bd9Sstevel@tonic-gate  * Given an htable and the index of a pte in it, return the virtual address
15747c478bd9Sstevel@tonic-gate  * of the page.
15757c478bd9Sstevel@tonic-gate  */
15767c478bd9Sstevel@tonic-gate uintptr_t
15777c478bd9Sstevel@tonic-gate htable_e2va(htable_t *ht, uint_t entry)
15787c478bd9Sstevel@tonic-gate {
15797c478bd9Sstevel@tonic-gate 	level_t	l = ht->ht_level;
15807c478bd9Sstevel@tonic-gate 	uintptr_t va;
15817c478bd9Sstevel@tonic-gate 
1582ae115bc7Smrj 	ASSERT(entry < HTABLE_NUM_PTES(ht));
15837c478bd9Sstevel@tonic-gate 	va = ht->ht_vaddr + ((uintptr_t)entry << LEVEL_SHIFT(l));
15847c478bd9Sstevel@tonic-gate 
15857c478bd9Sstevel@tonic-gate 	/*
15867c478bd9Sstevel@tonic-gate 	 * Need to skip over any VA hole in top level table
15877c478bd9Sstevel@tonic-gate 	 */
15887c478bd9Sstevel@tonic-gate #if defined(__amd64)
15897c478bd9Sstevel@tonic-gate 	if (ht->ht_level == mmu.max_level && va >= mmu.hole_start)
15907c478bd9Sstevel@tonic-gate 		va += ((mmu.hole_end - mmu.hole_start) + 1);
15917c478bd9Sstevel@tonic-gate #endif
15927c478bd9Sstevel@tonic-gate 
15937c478bd9Sstevel@tonic-gate 	return (va);
15947c478bd9Sstevel@tonic-gate }
15957c478bd9Sstevel@tonic-gate 
15967c478bd9Sstevel@tonic-gate /*
15977c478bd9Sstevel@tonic-gate  * The code uses compare and swap instructions to read/write PTE's to
15987c478bd9Sstevel@tonic-gate  * avoid atomicity problems, since PTEs can be 8 bytes on 32 bit systems.
15997c478bd9Sstevel@tonic-gate  * will naturally be atomic.
16007c478bd9Sstevel@tonic-gate  *
16017c478bd9Sstevel@tonic-gate  * The combination of using kpreempt_disable()/_enable() and the hci_mutex
16027c478bd9Sstevel@tonic-gate  * are used to ensure that an interrupt won't overwrite a temporary mapping
16037c478bd9Sstevel@tonic-gate  * while it's in use. If an interrupt thread tries to access a PTE, it will
16047c478bd9Sstevel@tonic-gate  * yield briefly back to the pinned thread which holds the cpu's hci_mutex.
16057c478bd9Sstevel@tonic-gate  */
16067c478bd9Sstevel@tonic-gate void
1607ae115bc7Smrj x86pte_cpu_init(cpu_t *cpu)
16087c478bd9Sstevel@tonic-gate {
16097c478bd9Sstevel@tonic-gate 	struct hat_cpu_info *hci;
16107c478bd9Sstevel@tonic-gate 
1611ae115bc7Smrj 	hci = kmem_zalloc(sizeof (*hci), KM_SLEEP);
16127c478bd9Sstevel@tonic-gate 	mutex_init(&hci->hci_mutex, NULL, MUTEX_DEFAULT, NULL);
16137c478bd9Sstevel@tonic-gate 	cpu->cpu_hat_info = hci;
16147c478bd9Sstevel@tonic-gate }
16157c478bd9Sstevel@tonic-gate 
1616ae115bc7Smrj void
1617ae115bc7Smrj x86pte_cpu_fini(cpu_t *cpu)
1618ae115bc7Smrj {
1619ae115bc7Smrj 	struct hat_cpu_info *hci = cpu->cpu_hat_info;
1620ae115bc7Smrj 
1621ae115bc7Smrj 	kmem_free(hci, sizeof (*hci));
1622ae115bc7Smrj 	cpu->cpu_hat_info = NULL;
16237c478bd9Sstevel@tonic-gate }
16247c478bd9Sstevel@tonic-gate 
1625ae115bc7Smrj #ifdef __i386
1626ae115bc7Smrj /*
1627ae115bc7Smrj  * On 32 bit kernels, loading a 64 bit PTE is a little tricky
1628ae115bc7Smrj  */
1629ae115bc7Smrj x86pte_t
1630ae115bc7Smrj get_pte64(x86pte_t *ptr)
1631ae115bc7Smrj {
1632ae115bc7Smrj 	volatile uint32_t *p = (uint32_t *)ptr;
1633ae115bc7Smrj 	x86pte_t t;
1634ae115bc7Smrj 
1635ae115bc7Smrj 	ASSERT(mmu.pae_hat != 0);
1636ae115bc7Smrj 	for (;;) {
1637ae115bc7Smrj 		t = p[0];
1638ae115bc7Smrj 		t |= (uint64_t)p[1] << 32;
1639ae115bc7Smrj 		if ((t & 0xffffffff) == p[0])
1640ae115bc7Smrj 			return (t);
1641ae115bc7Smrj 	}
1642ae115bc7Smrj }
1643ae115bc7Smrj #endif /* __i386 */
1644ae115bc7Smrj 
16457c478bd9Sstevel@tonic-gate /*
16467c478bd9Sstevel@tonic-gate  * Disable preemption and establish a mapping to the pagetable with the
16477c478bd9Sstevel@tonic-gate  * given pfn. This is optimized for there case where it's the same
16487c478bd9Sstevel@tonic-gate  * pfn as we last used referenced from this CPU.
16497c478bd9Sstevel@tonic-gate  */
16507c478bd9Sstevel@tonic-gate static x86pte_t *
1651ae115bc7Smrj x86pte_access_pagetable(htable_t *ht, uint_t index)
16527c478bd9Sstevel@tonic-gate {
16537c478bd9Sstevel@tonic-gate 	/*
16547c478bd9Sstevel@tonic-gate 	 * VLP pagetables are contained in the hat_t
16557c478bd9Sstevel@tonic-gate 	 */
16567c478bd9Sstevel@tonic-gate 	if (ht->ht_flags & HTABLE_VLP)
1657ae115bc7Smrj 		return (PT_INDEX_PTR(ht->ht_hat->hat_vlp_ptes, index));
1658ae115bc7Smrj 	return (x86pte_mapin(ht->ht_pfn, index, ht));
1659ae115bc7Smrj }
16607c478bd9Sstevel@tonic-gate 
16617c478bd9Sstevel@tonic-gate /*
1662ae115bc7Smrj  * map the given pfn into the page table window.
16637c478bd9Sstevel@tonic-gate  */
1664ae115bc7Smrj /*ARGSUSED*/
1665ae115bc7Smrj x86pte_t *
1666ae115bc7Smrj x86pte_mapin(pfn_t pfn, uint_t index, htable_t *ht)
1667ae115bc7Smrj {
1668ae115bc7Smrj 	x86pte_t *pteptr;
1669ae115bc7Smrj 	x86pte_t pte;
1670ae115bc7Smrj 	x86pte_t newpte;
1671ae115bc7Smrj 	int x;
1672ae115bc7Smrj 
16737c478bd9Sstevel@tonic-gate 	ASSERT(pfn != PFN_INVALID);
16747c478bd9Sstevel@tonic-gate 
16757c478bd9Sstevel@tonic-gate 	if (!khat_running) {
1676ae115bc7Smrj 		caddr_t va = kbm_remap_window(pfn_to_pa(pfn), 1);
1677ae115bc7Smrj 		return (PT_INDEX_PTR(va, index));
16787c478bd9Sstevel@tonic-gate 	}
16797c478bd9Sstevel@tonic-gate 
16807c478bd9Sstevel@tonic-gate 	/*
1681ae115bc7Smrj 	 * If kpm is available, use it.
1682ae115bc7Smrj 	 */
1683ae115bc7Smrj 	if (kpm_vbase)
1684ae115bc7Smrj 		return (PT_INDEX_PTR(hat_kpm_pfn2va(pfn), index));
1685ae115bc7Smrj 
1686ae115bc7Smrj 	/*
1687ae115bc7Smrj 	 * Disable preemption and grab the CPU's hci_mutex
16887c478bd9Sstevel@tonic-gate 	 */
16897c478bd9Sstevel@tonic-gate 	kpreempt_disable();
1690ae115bc7Smrj 	ASSERT(CPU->cpu_hat_info != NULL);
1691ae115bc7Smrj 	mutex_enter(&CPU->cpu_hat_info->hci_mutex);
1692ae115bc7Smrj 	x = PWIN_TABLE(CPU->cpu_id);
1693ae115bc7Smrj 	pteptr = (x86pte_t *)PWIN_PTE_VA(x);
1694ae115bc7Smrj 	if (mmu.pae_hat)
1695ae115bc7Smrj 		pte = *pteptr;
1696ae115bc7Smrj 	else
1697ae115bc7Smrj 		pte = *(x86pte32_t *)pteptr;
1698ae115bc7Smrj 
1699ae115bc7Smrj 	newpte = MAKEPTE(pfn, 0) | mmu.pt_global | mmu.pt_nx;
1700ae115bc7Smrj 	newpte |= PT_WRITABLE;
1701ae115bc7Smrj 
1702ae115bc7Smrj 	if (!PTE_EQUIV(newpte, pte)) {
1703ae115bc7Smrj 		if (mmu.pae_hat)
1704ae115bc7Smrj 			*pteptr = newpte;
1705ae115bc7Smrj 		else
1706ae115bc7Smrj 			*(x86pte32_t *)pteptr = newpte;
1707ae115bc7Smrj 		mmu_tlbflush_entry((caddr_t)(PWIN_VA(x)));
17087c478bd9Sstevel@tonic-gate 	}
1709ae115bc7Smrj 	return (PT_INDEX_PTR(PWIN_VA(x), index));
17107c478bd9Sstevel@tonic-gate }
17117c478bd9Sstevel@tonic-gate 
17127c478bd9Sstevel@tonic-gate /*
17137c478bd9Sstevel@tonic-gate  * Release access to a page table.
17147c478bd9Sstevel@tonic-gate  */
17157c478bd9Sstevel@tonic-gate static void
17167c478bd9Sstevel@tonic-gate x86pte_release_pagetable(htable_t *ht)
17177c478bd9Sstevel@tonic-gate {
17187c478bd9Sstevel@tonic-gate 	/*
17197c478bd9Sstevel@tonic-gate 	 * nothing to do for VLP htables
17207c478bd9Sstevel@tonic-gate 	 */
17217c478bd9Sstevel@tonic-gate 	if (ht->ht_flags & HTABLE_VLP)
17227c478bd9Sstevel@tonic-gate 		return;
17237c478bd9Sstevel@tonic-gate 
1724ae115bc7Smrj 	x86pte_mapout();
17257c478bd9Sstevel@tonic-gate }
17267c478bd9Sstevel@tonic-gate 
1727ae115bc7Smrj void
1728ae115bc7Smrj x86pte_mapout(void)
1729ae115bc7Smrj {
1730ae115bc7Smrj 	if (mmu.pwin_base == NULL || !khat_running)
1731ae115bc7Smrj 		return;
1732ae115bc7Smrj 
17337c478bd9Sstevel@tonic-gate 	/*
1734ae115bc7Smrj 	 * Drop the CPU's hci_mutex and restore preemption.
17357c478bd9Sstevel@tonic-gate 	 */
1736ae115bc7Smrj 	mutex_exit(&CPU->cpu_hat_info->hci_mutex);
17377c478bd9Sstevel@tonic-gate 	kpreempt_enable();
17387c478bd9Sstevel@tonic-gate }
17397c478bd9Sstevel@tonic-gate 
17407c478bd9Sstevel@tonic-gate /*
17417c478bd9Sstevel@tonic-gate  * Atomic retrieval of a pagetable entry
17427c478bd9Sstevel@tonic-gate  */
17437c478bd9Sstevel@tonic-gate x86pte_t
17447c478bd9Sstevel@tonic-gate x86pte_get(htable_t *ht, uint_t entry)
17457c478bd9Sstevel@tonic-gate {
17467c478bd9Sstevel@tonic-gate 	x86pte_t	pte;
1747aa2ed9e5Sjosephb 	x86pte_t	*ptep;
17487c478bd9Sstevel@tonic-gate 
17497c478bd9Sstevel@tonic-gate 	/*
1750aa2ed9e5Sjosephb 	 * Be careful that loading PAE entries in 32 bit kernel is atomic.
17517c478bd9Sstevel@tonic-gate 	 */
1752ae115bc7Smrj 	ASSERT(entry < mmu.ptes_per_table);
1753ae115bc7Smrj 	ptep = x86pte_access_pagetable(ht, entry);
1754ae115bc7Smrj 	pte = GET_PTE(ptep);
17557c478bd9Sstevel@tonic-gate 	x86pte_release_pagetable(ht);
17567c478bd9Sstevel@tonic-gate 	return (pte);
17577c478bd9Sstevel@tonic-gate }
17587c478bd9Sstevel@tonic-gate 
17597c478bd9Sstevel@tonic-gate /*
17607c478bd9Sstevel@tonic-gate  * Atomic unconditional set of a page table entry, it returns the previous
1761ae115bc7Smrj  * value. For pre-existing mappings if the PFN changes, then we don't care
1762ae115bc7Smrj  * about the old pte's REF / MOD bits. If the PFN remains the same, we leave
1763ae115bc7Smrj  * the MOD/REF bits unchanged.
1764ae115bc7Smrj  *
1765ae115bc7Smrj  * If asked to overwrite a link to a lower page table with a large page
1766ae115bc7Smrj  * mapping, this routine returns the special value of LPAGE_ERROR. This
1767ae115bc7Smrj  * allows the upper HAT layers to retry with a smaller mapping size.
17687c478bd9Sstevel@tonic-gate  */
17697c478bd9Sstevel@tonic-gate x86pte_t
17707c478bd9Sstevel@tonic-gate x86pte_set(htable_t *ht, uint_t entry, x86pte_t new, void *ptr)
17717c478bd9Sstevel@tonic-gate {
17727c478bd9Sstevel@tonic-gate 	x86pte_t	old;
1773ae115bc7Smrj 	x86pte_t	prev;
17747c478bd9Sstevel@tonic-gate 	x86pte_t	*ptep;
1775ae115bc7Smrj 	level_t		l = ht->ht_level;
1776ae115bc7Smrj 	x86pte_t	pfn_mask = (l != 0) ? PT_PADDR_LGPG : PT_PADDR;
1777ae115bc7Smrj 	x86pte_t	n;
1778ae115bc7Smrj 	uintptr_t	addr = htable_e2va(ht, entry);
1779ae115bc7Smrj 	hat_t		*hat = ht->ht_hat;
17807c478bd9Sstevel@tonic-gate 
1781ae115bc7Smrj 	ASSERT(new != 0); /* don't use to invalidate a PTE, see x86pte_update */
17827c478bd9Sstevel@tonic-gate 	ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN));
1783ae115bc7Smrj 	if (ptr == NULL)
1784ae115bc7Smrj 		ptep = x86pte_access_pagetable(ht, entry);
1785ae115bc7Smrj 	else
17867c478bd9Sstevel@tonic-gate 		ptep = ptr;
17877c478bd9Sstevel@tonic-gate 
1788b193e412Skchow 	/*
1789ae115bc7Smrj 	 * Install the new PTE. If remapping the same PFN, then
1790ae115bc7Smrj 	 * copy existing REF/MOD bits to new mapping.
1791b193e412Skchow 	 */
1792ae115bc7Smrj 	do {
1793ae115bc7Smrj 		prev = GET_PTE(ptep);
1794ae115bc7Smrj 		n = new;
1795ae115bc7Smrj 		if (PTE_ISVALID(n) && (prev & pfn_mask) == (new & pfn_mask))
1796b193e412Skchow 			n |= prev & (PT_REF | PT_MOD);
1797ae115bc7Smrj 
1798ae115bc7Smrj 		/*
1799ae115bc7Smrj 		 * Another thread may have installed this mapping already,
1800ae115bc7Smrj 		 * flush the local TLB and be done.
1801ae115bc7Smrj 		 */
1802b193e412Skchow 		if (prev == n) {
18037c478bd9Sstevel@tonic-gate 			old = new;
1804ae115bc7Smrj 			mmu_tlbflush_entry((caddr_t)addr);
1805ae115bc7Smrj 			goto done;
18067c478bd9Sstevel@tonic-gate 		}
1807ae115bc7Smrj 
1808ae115bc7Smrj 		/*
1809ae115bc7Smrj 		 * Detect if we have a collision of installing a large
1810ae115bc7Smrj 		 * page mapping where there already is a lower page table.
1811ae115bc7Smrj 		 */
181297704650Sjosephb 		if (l > 0 && (prev & PT_VALID) && !(prev & PT_PAGESIZE)) {
181397704650Sjosephb 			old = LPAGE_ERROR;
181497704650Sjosephb 			goto done;
181597704650Sjosephb 		}
1816ae115bc7Smrj 
1817ae115bc7Smrj 		old = CAS_PTE(ptep, prev, n);
1818ae115bc7Smrj 	} while (old != prev);
1819ae115bc7Smrj 
1820ae115bc7Smrj 	/*
1821ae115bc7Smrj 	 * Do a TLB demap if needed, ie. the old pte was valid.
1822ae115bc7Smrj 	 *
1823ae115bc7Smrj 	 * Note that a stale TLB writeback to the PTE here either can't happen
1824ae115bc7Smrj 	 * or doesn't matter. The PFN can only change for NOSYNC|NOCONSIST
1825ae115bc7Smrj 	 * mappings, but they were created with REF and MOD already set, so
1826ae115bc7Smrj 	 * no stale writeback will happen.
1827ae115bc7Smrj 	 *
1828ae115bc7Smrj 	 * Segmap is the only place where remaps happen on the same pfn and for
1829ae115bc7Smrj 	 * that we want to preserve the stale REF/MOD bits.
1830ae115bc7Smrj 	 */
1831ae115bc7Smrj 	if (old & PT_REF)
1832ae115bc7Smrj 		hat_tlb_inval(hat, addr);
1833ae115bc7Smrj 
1834ae115bc7Smrj done:
18357c478bd9Sstevel@tonic-gate 	if (ptr == NULL)
18367c478bd9Sstevel@tonic-gate 		x86pte_release_pagetable(ht);
18377c478bd9Sstevel@tonic-gate 	return (old);
18387c478bd9Sstevel@tonic-gate }
18397c478bd9Sstevel@tonic-gate 
18407c478bd9Sstevel@tonic-gate /*
1841ae115bc7Smrj  * Atomic compare and swap of a page table entry. No TLB invalidates are done.
1842ae115bc7Smrj  * This is used for links between pagetables of different levels.
1843ae115bc7Smrj  * Note we always create these links with dirty/access set, so they should
1844ae115bc7Smrj  * never change.
18457c478bd9Sstevel@tonic-gate  */
1846ae115bc7Smrj x86pte_t
18477c478bd9Sstevel@tonic-gate x86pte_cas(htable_t *ht, uint_t entry, x86pte_t old, x86pte_t new)
18487c478bd9Sstevel@tonic-gate {
18497c478bd9Sstevel@tonic-gate 	x86pte_t	pte;
18507c478bd9Sstevel@tonic-gate 	x86pte_t	*ptep;
18517c478bd9Sstevel@tonic-gate 
1852ae115bc7Smrj 	ptep = x86pte_access_pagetable(ht, entry);
1853ae115bc7Smrj 	pte = CAS_PTE(ptep, old, new);
18547c478bd9Sstevel@tonic-gate 	x86pte_release_pagetable(ht);
18557c478bd9Sstevel@tonic-gate 	return (pte);
18567c478bd9Sstevel@tonic-gate }
18577c478bd9Sstevel@tonic-gate 
18587c478bd9Sstevel@tonic-gate /*
185997704650Sjosephb  * data structure for cross call information
18607c478bd9Sstevel@tonic-gate  */
186197704650Sjosephb typedef struct xcall_inval {
186297704650Sjosephb 	caddr_t		xi_addr;
186397704650Sjosephb 	x86pte_t	xi_found;
186497704650Sjosephb 	x86pte_t	xi_oldpte;
186597704650Sjosephb 	x86pte_t	*xi_pteptr;
186697704650Sjosephb 	processorid_t	xi_initiator;
186797704650Sjosephb } xcall_inval_t;
186897704650Sjosephb 
186997704650Sjosephb /*
187097704650Sjosephb  * Cross call service routine to invalidate TLBs. On the
187197704650Sjosephb  * initiating CPU, this first clears the PTE in memory.
187297704650Sjosephb  */
187397704650Sjosephb /*ARGSUSED*/
187497704650Sjosephb static int
187597704650Sjosephb x86pte_inval_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3)
18767c478bd9Sstevel@tonic-gate {
187797704650Sjosephb 	xcall_inval_t	*xi = (xcall_inval_t *)a1;
18787c478bd9Sstevel@tonic-gate 
187997704650Sjosephb 	if (CPU->cpu_id == xi->xi_initiator)
188097704650Sjosephb 		xi->xi_found = CAS_PTE(xi->xi_pteptr, xi->xi_oldpte, 0);
18817c478bd9Sstevel@tonic-gate 
188297704650Sjosephb 	mmu_tlbflush_entry(xi->xi_addr);
188397704650Sjosephb 	return (0);
1884ae115bc7Smrj }
1885ae115bc7Smrj 
1886ae115bc7Smrj /*
1887ae115bc7Smrj  * Invalidate a page table entry as long as it currently maps something that
1888ae115bc7Smrj  * matches the value determined by expect.
18897c478bd9Sstevel@tonic-gate  *
1890ae115bc7Smrj  * Also invalidates any TLB entries and returns the previous value of the PTE.
18917c478bd9Sstevel@tonic-gate  */
18927c478bd9Sstevel@tonic-gate x86pte_t
1893ae115bc7Smrj x86pte_inval(
1894ae115bc7Smrj 	htable_t *ht,
1895ae115bc7Smrj 	uint_t entry,
1896ae115bc7Smrj 	x86pte_t expect,
1897ae115bc7Smrj 	x86pte_t *pte_ptr)
18987c478bd9Sstevel@tonic-gate {
189997704650Sjosephb 	hat_t		*hat = ht->ht_hat;
19007c478bd9Sstevel@tonic-gate 	x86pte_t	*ptep;
190197704650Sjosephb 	xcall_inval_t	xi;
190297704650Sjosephb 	cpuset_t	cpus;
19037c478bd9Sstevel@tonic-gate 
19047c478bd9Sstevel@tonic-gate 	ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN));
1905ae115bc7Smrj 	ASSERT(ht->ht_level != VLP_LEVEL);
190697704650Sjosephb 
1907ae115bc7Smrj 	if (pte_ptr != NULL)
19087c478bd9Sstevel@tonic-gate 		ptep = pte_ptr;
1909ae115bc7Smrj 	else
1910ae115bc7Smrj 		ptep = x86pte_access_pagetable(ht, entry);
191197704650Sjosephb 	xi.xi_pteptr = ptep;
191297704650Sjosephb 	xi.xi_addr = (caddr_t)htable_e2va(ht, entry);
19137c478bd9Sstevel@tonic-gate 
19147c478bd9Sstevel@tonic-gate 	/*
191597704650Sjosephb 	 * Setup a cross call to any CPUs using this HAT
191697704650Sjosephb 	 */
191797704650Sjosephb 	kpreempt_disable();
191897704650Sjosephb 	xi.xi_initiator = CPU->cpu_id;
191997704650Sjosephb 	CPUSET_ZERO(cpus);
192097704650Sjosephb 	if (hat == kas.a_hat) {
192197704650Sjosephb 		CPUSET_OR(cpus, khat_cpuset);
192297704650Sjosephb 	} else {
192397704650Sjosephb 		mutex_enter(&hat->hat_switch_mutex);
192497704650Sjosephb 		CPUSET_OR(cpus, hat->hat_cpus);
192597704650Sjosephb 		CPUSET_ADD(cpus, CPU->cpu_id);
192697704650Sjosephb 	}
192797704650Sjosephb 
192897704650Sjosephb 	/*
192997704650Sjosephb 	 * Do the cross call to invalidate the PTE and flush TLBs.
193097704650Sjosephb 	 * Note that the loop is needed to handle changes due to h/w updating
193197704650Sjosephb 	 * of PT_MOD/PT_REF.
19327c478bd9Sstevel@tonic-gate 	 */
1933ae115bc7Smrj 	do {
193497704650Sjosephb 		xi.xi_oldpte = GET_PTE(ptep);
193597704650Sjosephb 		if (expect != 0 &&
193697704650Sjosephb 		    (xi.xi_oldpte & PT_PADDR) != (expect & PT_PADDR))
193797704650Sjosephb 			break;
193897704650Sjosephb 		if (panicstr == NULL)
193997704650Sjosephb 			xc_wait_sync((xc_arg_t)&xi, NULL, NULL, X_CALL_HIPRI,
194097704650Sjosephb 				    cpus, x86pte_inval_func);
194197704650Sjosephb 		else
194297704650Sjosephb 			(void) x86pte_inval_func((xc_arg_t)&xi, NULL, NULL);
194397704650Sjosephb 	} while (xi.xi_found != xi.xi_oldpte);
19447c478bd9Sstevel@tonic-gate 
194597704650Sjosephb 	if (hat != kas.a_hat)
194697704650Sjosephb 		mutex_exit(&hat->hat_switch_mutex);
194797704650Sjosephb 	kpreempt_enable();
194897704650Sjosephb 
19497c478bd9Sstevel@tonic-gate 	if (pte_ptr == NULL)
19507c478bd9Sstevel@tonic-gate 		x86pte_release_pagetable(ht);
195197704650Sjosephb 
195297704650Sjosephb 	return (xi.xi_oldpte);
19537c478bd9Sstevel@tonic-gate }
19547c478bd9Sstevel@tonic-gate 
19557c478bd9Sstevel@tonic-gate /*
1956ae115bc7Smrj  * Change a page table entry af it currently matches the value in expect.
19577c478bd9Sstevel@tonic-gate  */
19587c478bd9Sstevel@tonic-gate x86pte_t
1959ae115bc7Smrj x86pte_update(
1960ae115bc7Smrj 	htable_t *ht,
1961ae115bc7Smrj 	uint_t entry,
1962ae115bc7Smrj 	x86pte_t expect,
1963ae115bc7Smrj 	x86pte_t new)
19647c478bd9Sstevel@tonic-gate {
19657c478bd9Sstevel@tonic-gate 	x86pte_t	*ptep;
1966ae115bc7Smrj 	x86pte_t	found;
19677c478bd9Sstevel@tonic-gate 
1968ae115bc7Smrj 	ASSERT(new != 0);
19697c478bd9Sstevel@tonic-gate 	ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN));
1970ae115bc7Smrj 	ASSERT(ht->ht_level != VLP_LEVEL);
1971ae115bc7Smrj 
1972ae115bc7Smrj 	ptep = x86pte_access_pagetable(ht, entry);
1973ae115bc7Smrj 	found = CAS_PTE(ptep, expect, new);
1974ae115bc7Smrj 	if (found == expect) {
1975ae115bc7Smrj 		hat_tlb_inval(ht->ht_hat, htable_e2va(ht, entry));
19767c478bd9Sstevel@tonic-gate 
19777c478bd9Sstevel@tonic-gate 		/*
1978ae115bc7Smrj 		 * When removing write permission *and* clearing the
1979ae115bc7Smrj 		 * MOD bit, check if a write happened via a stale
1980ae115bc7Smrj 		 * TLB entry before the TLB shootdown finished.
1981ae115bc7Smrj 		 *
1982ae115bc7Smrj 		 * If it did happen, simply re-enable write permission and
1983ae115bc7Smrj 		 * act like the original CAS failed.
19847c478bd9Sstevel@tonic-gate 		 */
1985ae115bc7Smrj 		if ((expect & (PT_WRITABLE | PT_MOD)) == PT_WRITABLE &&
1986ae115bc7Smrj 		    (new & (PT_WRITABLE | PT_MOD)) == 0 &&
1987ae115bc7Smrj 		    (GET_PTE(ptep) & PT_MOD) != 0) {
1988ae115bc7Smrj 			do {
1989ae115bc7Smrj 				found = GET_PTE(ptep);
1990ae115bc7Smrj 				found =
1991ae115bc7Smrj 				    CAS_PTE(ptep, found, found | PT_WRITABLE);
1992ae115bc7Smrj 			} while ((found & PT_WRITABLE) == 0);
1993ae115bc7Smrj 		}
1994ae115bc7Smrj 	}
19957c478bd9Sstevel@tonic-gate 	x86pte_release_pagetable(ht);
1996ae115bc7Smrj 	return (found);
19977c478bd9Sstevel@tonic-gate }
19987c478bd9Sstevel@tonic-gate 
19997c478bd9Sstevel@tonic-gate /*
20007c478bd9Sstevel@tonic-gate  * Copy page tables - this is just a little more complicated than the
20017c478bd9Sstevel@tonic-gate  * previous routines. Note that it's also not atomic! It also is never
20027c478bd9Sstevel@tonic-gate  * used for VLP pagetables.
20037c478bd9Sstevel@tonic-gate  */
20047c478bd9Sstevel@tonic-gate void
20057c478bd9Sstevel@tonic-gate x86pte_copy(htable_t *src, htable_t *dest, uint_t entry, uint_t count)
20067c478bd9Sstevel@tonic-gate {
20077c478bd9Sstevel@tonic-gate 	caddr_t	src_va;
20087c478bd9Sstevel@tonic-gate 	caddr_t dst_va;
20097c478bd9Sstevel@tonic-gate 	size_t size;
2010ae115bc7Smrj 	x86pte_t *pteptr;
2011ae115bc7Smrj 	x86pte_t pte;
20127c478bd9Sstevel@tonic-gate 
20137c478bd9Sstevel@tonic-gate 	ASSERT(khat_running);
20147c478bd9Sstevel@tonic-gate 	ASSERT(!(dest->ht_flags & HTABLE_VLP));
20157c478bd9Sstevel@tonic-gate 	ASSERT(!(src->ht_flags & HTABLE_VLP));
20167c478bd9Sstevel@tonic-gate 	ASSERT(!(src->ht_flags & HTABLE_SHARED_PFN));
20177c478bd9Sstevel@tonic-gate 	ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN));
20187c478bd9Sstevel@tonic-gate 
20197c478bd9Sstevel@tonic-gate 	/*
2020ae115bc7Smrj 	 * Acquire access to the CPU pagetable windows for the dest and source.
20217c478bd9Sstevel@tonic-gate 	 */
2022ae115bc7Smrj 	dst_va = (caddr_t)x86pte_access_pagetable(dest, entry);
2023ae115bc7Smrj 	if (kpm_vbase) {
2024ae115bc7Smrj 		src_va = (caddr_t)
2025ae115bc7Smrj 		    PT_INDEX_PTR(hat_kpm_pfn2va(src->ht_pfn), entry);
20267c478bd9Sstevel@tonic-gate 	} else {
2027ae115bc7Smrj 		uint_t x = PWIN_SRC(CPU->cpu_id);
20287c478bd9Sstevel@tonic-gate 
20297c478bd9Sstevel@tonic-gate 		/*
20307c478bd9Sstevel@tonic-gate 		 * Finish defining the src pagetable mapping
20317c478bd9Sstevel@tonic-gate 		 */
2032ae115bc7Smrj 		src_va = (caddr_t)PT_INDEX_PTR(PWIN_VA(x), entry);
2033ae115bc7Smrj 		pte = MAKEPTE(src->ht_pfn, 0) | mmu.pt_global | mmu.pt_nx;
2034ae115bc7Smrj 		pteptr = (x86pte_t *)PWIN_PTE_VA(x);
2035ae115bc7Smrj 		if (mmu.pae_hat)
2036ae115bc7Smrj 			*pteptr = pte;
2037ae115bc7Smrj 		else
2038ae115bc7Smrj 			*(x86pte32_t *)pteptr = pte;
2039ae115bc7Smrj 		mmu_tlbflush_entry((caddr_t)(PWIN_VA(x)));
20407c478bd9Sstevel@tonic-gate 	}
20417c478bd9Sstevel@tonic-gate 
20427c478bd9Sstevel@tonic-gate 	/*
20437c478bd9Sstevel@tonic-gate 	 * now do the copy
20447c478bd9Sstevel@tonic-gate 	 */
20457c478bd9Sstevel@tonic-gate 	size = count << mmu.pte_size_shift;
20467c478bd9Sstevel@tonic-gate 	bcopy(src_va, dst_va, size);
20477c478bd9Sstevel@tonic-gate 
20487c478bd9Sstevel@tonic-gate 	x86pte_release_pagetable(dest);
20497c478bd9Sstevel@tonic-gate }
20507c478bd9Sstevel@tonic-gate 
20517c478bd9Sstevel@tonic-gate /*
20527c478bd9Sstevel@tonic-gate  * Zero page table entries - Note this doesn't use atomic stores!
20537c478bd9Sstevel@tonic-gate  */
2054ae115bc7Smrj static void
20557c478bd9Sstevel@tonic-gate x86pte_zero(htable_t *dest, uint_t entry, uint_t count)
20567c478bd9Sstevel@tonic-gate {
20577c478bd9Sstevel@tonic-gate 	caddr_t dst_va;
20587c478bd9Sstevel@tonic-gate 	size_t size;
20597c478bd9Sstevel@tonic-gate 
20607c478bd9Sstevel@tonic-gate 	/*
20617c478bd9Sstevel@tonic-gate 	 * Map in the page table to be zeroed.
20627c478bd9Sstevel@tonic-gate 	 */
20637c478bd9Sstevel@tonic-gate 	ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN));
20647c478bd9Sstevel@tonic-gate 	ASSERT(!(dest->ht_flags & HTABLE_VLP));
2065ae115bc7Smrj 
2066ae115bc7Smrj 	dst_va = (caddr_t)x86pte_access_pagetable(dest, entry);
2067ae115bc7Smrj 
20687c478bd9Sstevel@tonic-gate 	size = count << mmu.pte_size_shift;
2069ae115bc7Smrj 	ASSERT(size > BLOCKZEROALIGN);
2070ae115bc7Smrj #ifdef __i386
2071ae115bc7Smrj 	if ((x86_feature & X86_SSE2) == 0)
20727c478bd9Sstevel@tonic-gate 		bzero(dst_va, size);
2073ae115bc7Smrj 	else
2074ae115bc7Smrj #endif
2075ae115bc7Smrj 		block_zero_no_xmm(dst_va, size);
2076ae115bc7Smrj 
20777c478bd9Sstevel@tonic-gate 	x86pte_release_pagetable(dest);
20787c478bd9Sstevel@tonic-gate }
20797c478bd9Sstevel@tonic-gate 
20807c478bd9Sstevel@tonic-gate /*
20817c478bd9Sstevel@tonic-gate  * Called to ensure that all pagetables are in the system dump
20827c478bd9Sstevel@tonic-gate  */
20837c478bd9Sstevel@tonic-gate void
20847c478bd9Sstevel@tonic-gate hat_dump(void)
20857c478bd9Sstevel@tonic-gate {
20867c478bd9Sstevel@tonic-gate 	hat_t *hat;
20877c478bd9Sstevel@tonic-gate 	uint_t h;
20887c478bd9Sstevel@tonic-gate 	htable_t *ht;
20897c478bd9Sstevel@tonic-gate 
20907c478bd9Sstevel@tonic-gate 	/*
2091a85a6733Sjosephb 	 * Dump all page tables
20927c478bd9Sstevel@tonic-gate 	 */
2093a85a6733Sjosephb 	for (hat = kas.a_hat; hat != NULL; hat = hat->hat_next) {
20947c478bd9Sstevel@tonic-gate 		for (h = 0; h < hat->hat_num_hash; ++h) {
20957c478bd9Sstevel@tonic-gate 			for (ht = hat->hat_ht_hash[h]; ht; ht = ht->ht_next) {
2096a85a6733Sjosephb 				if ((ht->ht_flags & HTABLE_VLP) == 0)
20977c478bd9Sstevel@tonic-gate 					dump_page(ht->ht_pfn);
20987c478bd9Sstevel@tonic-gate 			}
20997c478bd9Sstevel@tonic-gate 		}
21007c478bd9Sstevel@tonic-gate 	}
21017c478bd9Sstevel@tonic-gate }
2102