xref: /titanic_53/usr/src/uts/i86pc/vm/hment.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
30*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
31*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
32*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
33*7c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
34*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
35*7c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
36*7c478bd9Sstevel@tonic-gate #include <vm/hat.h>
37*7c478bd9Sstevel@tonic-gate #include <vm/vm_dep.h>
38*7c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
40*7c478bd9Sstevel@tonic-gate 
41*7c478bd9Sstevel@tonic-gate 
42*7c478bd9Sstevel@tonic-gate /*
43*7c478bd9Sstevel@tonic-gate  * When pages are shared by more than one mapping, a list of these
44*7c478bd9Sstevel@tonic-gate  * structs hangs off of the page_t connected by the hm_next and hm_prev
45*7c478bd9Sstevel@tonic-gate  * fields.  Every hment is also indexed by a system-wide hash table, using
46*7c478bd9Sstevel@tonic-gate  * hm_hashnext to connect it to the chain of hments in a single hash
47*7c478bd9Sstevel@tonic-gate  * bucket.
48*7c478bd9Sstevel@tonic-gate  */
49*7c478bd9Sstevel@tonic-gate struct hment {
50*7c478bd9Sstevel@tonic-gate 	struct hment	*hm_hashnext;	/* next mapping on hash chain */
51*7c478bd9Sstevel@tonic-gate 	struct hment	*hm_next;	/* next mapping of same page */
52*7c478bd9Sstevel@tonic-gate 	struct hment	*hm_prev;	/* previous mapping of same page */
53*7c478bd9Sstevel@tonic-gate 	htable_t	*hm_htable;	/* corresponding htable_t */
54*7c478bd9Sstevel@tonic-gate 	uint16_t	hm_entry;	/* index of pte in htable */
55*7c478bd9Sstevel@tonic-gate 	uint16_t	hm_pad;		/* explicitly expose compiler padding */
56*7c478bd9Sstevel@tonic-gate #ifdef __amd64
57*7c478bd9Sstevel@tonic-gate 	uint32_t	hm_pad2;	/* explicitly expose compiler padding */
58*7c478bd9Sstevel@tonic-gate #endif
59*7c478bd9Sstevel@tonic-gate };
60*7c478bd9Sstevel@tonic-gate 
61*7c478bd9Sstevel@tonic-gate /*
62*7c478bd9Sstevel@tonic-gate  * Value returned by hment_walk() when dealing with a single mapping
63*7c478bd9Sstevel@tonic-gate  * embedded in the page_t.
64*7c478bd9Sstevel@tonic-gate  */
65*7c478bd9Sstevel@tonic-gate #define	HMENT_EMBEDDED ((hment_t *)(uintptr_t)1)
66*7c478bd9Sstevel@tonic-gate 
67*7c478bd9Sstevel@tonic-gate kmem_cache_t *hment_cache;
68*7c478bd9Sstevel@tonic-gate 
69*7c478bd9Sstevel@tonic-gate /*
70*7c478bd9Sstevel@tonic-gate  * The hment reserve is similar to the htable reserve, with the following
71*7c478bd9Sstevel@tonic-gate  * exception. Hment's are never needed for HAT kmem allocs.
72*7c478bd9Sstevel@tonic-gate  *
73*7c478bd9Sstevel@tonic-gate  * The hment_reserve_amount variable is used, so that you can change it's
74*7c478bd9Sstevel@tonic-gate  * value to zero via a kernel debugger to force stealing to get tested.
75*7c478bd9Sstevel@tonic-gate  */
76*7c478bd9Sstevel@tonic-gate #define	HMENT_RESERVE_AMOUNT	(200)	/* currently a guess at right value. */
77*7c478bd9Sstevel@tonic-gate uint_t hment_reserve_amount = HMENT_RESERVE_AMOUNT;
78*7c478bd9Sstevel@tonic-gate kmutex_t hment_reserve_mutex;
79*7c478bd9Sstevel@tonic-gate uint_t	hment_reserve_count;
80*7c478bd9Sstevel@tonic-gate hment_t	*hment_reserve_pool;
81*7c478bd9Sstevel@tonic-gate extern  kthread_t *hat_reserves_thread;
82*7c478bd9Sstevel@tonic-gate 
83*7c478bd9Sstevel@tonic-gate /*
84*7c478bd9Sstevel@tonic-gate  * Possible performance RFE: we might need to make this dynamic, perhaps
85*7c478bd9Sstevel@tonic-gate  * based on the number of pages in the system.
86*7c478bd9Sstevel@tonic-gate  */
87*7c478bd9Sstevel@tonic-gate #define	HMENT_HASH_SIZE (64 * 1024)
88*7c478bd9Sstevel@tonic-gate static uint_t hment_hash_entries = HMENT_HASH_SIZE;
89*7c478bd9Sstevel@tonic-gate static hment_t **hment_hash;
90*7c478bd9Sstevel@tonic-gate 
91*7c478bd9Sstevel@tonic-gate /*
92*7c478bd9Sstevel@tonic-gate  * Lots of highly shared pages will have the same value for "entry" (consider
93*7c478bd9Sstevel@tonic-gate  * the starting address of "xterm" or "sh"). So we'll distinguish them by
94*7c478bd9Sstevel@tonic-gate  * adding the pfn of the page table into both the high bits.
95*7c478bd9Sstevel@tonic-gate  * The shift by 9 corresponds to the range of values for entry (0..511).
96*7c478bd9Sstevel@tonic-gate  */
97*7c478bd9Sstevel@tonic-gate #define	HMENT_HASH(pfn, entry) (uint32_t) 	\
98*7c478bd9Sstevel@tonic-gate 	((((pfn) << 9) + entry + pfn) & (hment_hash_entries - 1))
99*7c478bd9Sstevel@tonic-gate 
100*7c478bd9Sstevel@tonic-gate /*
101*7c478bd9Sstevel@tonic-gate  * "mlist_lock" is a hashed mutex lock for protecting per-page mapping
102*7c478bd9Sstevel@tonic-gate  * lists and "hash_lock" is a similar lock protecting the hment hash
103*7c478bd9Sstevel@tonic-gate  * table.  The hashed approach is taken to avoid the spatial overhead of
104*7c478bd9Sstevel@tonic-gate  * maintaining a separate lock for each page, while still achieving better
105*7c478bd9Sstevel@tonic-gate  * scalability than a single lock would allow.
106*7c478bd9Sstevel@tonic-gate  */
107*7c478bd9Sstevel@tonic-gate #define	MLIST_NUM_LOCK	256		/* must be power of two */
108*7c478bd9Sstevel@tonic-gate static kmutex_t mlist_lock[MLIST_NUM_LOCK];
109*7c478bd9Sstevel@tonic-gate 
110*7c478bd9Sstevel@tonic-gate /*
111*7c478bd9Sstevel@tonic-gate  * the shift by 9 is so that all large pages don't use the same hash bucket
112*7c478bd9Sstevel@tonic-gate  */
113*7c478bd9Sstevel@tonic-gate #define	MLIST_MUTEX(pp) \
114*7c478bd9Sstevel@tonic-gate 	&mlist_lock[((pp)->p_pagenum + ((pp)->p_pagenum >> 9)) & \
115*7c478bd9Sstevel@tonic-gate 	(MLIST_NUM_LOCK - 1)]
116*7c478bd9Sstevel@tonic-gate 
117*7c478bd9Sstevel@tonic-gate #define	HASH_NUM_LOCK	256		/* must be power of two */
118*7c478bd9Sstevel@tonic-gate static kmutex_t hash_lock[HASH_NUM_LOCK];
119*7c478bd9Sstevel@tonic-gate 
120*7c478bd9Sstevel@tonic-gate #define	HASH_MUTEX(idx) &hash_lock[(idx) & (HASH_NUM_LOCK-1)]
121*7c478bd9Sstevel@tonic-gate 
122*7c478bd9Sstevel@tonic-gate static hment_t *hment_steal(void);
123*7c478bd9Sstevel@tonic-gate 
124*7c478bd9Sstevel@tonic-gate /*
125*7c478bd9Sstevel@tonic-gate  * put one hment onto the reserves list
126*7c478bd9Sstevel@tonic-gate  */
127*7c478bd9Sstevel@tonic-gate static void
128*7c478bd9Sstevel@tonic-gate hment_put_reserve(hment_t *hm)
129*7c478bd9Sstevel@tonic-gate {
130*7c478bd9Sstevel@tonic-gate 	HATSTAT_INC(hs_hm_put_reserve);
131*7c478bd9Sstevel@tonic-gate 	mutex_enter(&hment_reserve_mutex);
132*7c478bd9Sstevel@tonic-gate 	hm->hm_next = hment_reserve_pool;
133*7c478bd9Sstevel@tonic-gate 	hment_reserve_pool = hm;
134*7c478bd9Sstevel@tonic-gate 	++hment_reserve_count;
135*7c478bd9Sstevel@tonic-gate 	mutex_exit(&hment_reserve_mutex);
136*7c478bd9Sstevel@tonic-gate }
137*7c478bd9Sstevel@tonic-gate 
138*7c478bd9Sstevel@tonic-gate /*
139*7c478bd9Sstevel@tonic-gate  * Take one hment from the reserve.
140*7c478bd9Sstevel@tonic-gate  */
141*7c478bd9Sstevel@tonic-gate static hment_t *
142*7c478bd9Sstevel@tonic-gate hment_get_reserve(void)
143*7c478bd9Sstevel@tonic-gate {
144*7c478bd9Sstevel@tonic-gate 	hment_t *hm = NULL;
145*7c478bd9Sstevel@tonic-gate 
146*7c478bd9Sstevel@tonic-gate 	/*
147*7c478bd9Sstevel@tonic-gate 	 * We rely on a "donation system" to refill the hment reserve
148*7c478bd9Sstevel@tonic-gate 	 * list, which only takes place when we are allocating hments for
149*7c478bd9Sstevel@tonic-gate 	 * user mappings.  It is theoretically possible that an incredibly
150*7c478bd9Sstevel@tonic-gate 	 * long string of kernel hment_alloc()s with no intervening user
151*7c478bd9Sstevel@tonic-gate 	 * hment_alloc()s could exhaust that pool.
152*7c478bd9Sstevel@tonic-gate 	 */
153*7c478bd9Sstevel@tonic-gate 	HATSTAT_INC(hs_hm_get_reserve);
154*7c478bd9Sstevel@tonic-gate 	mutex_enter(&hment_reserve_mutex);
155*7c478bd9Sstevel@tonic-gate 	if (hment_reserve_count != 0) {
156*7c478bd9Sstevel@tonic-gate 		hm = hment_reserve_pool;
157*7c478bd9Sstevel@tonic-gate 		hment_reserve_pool = hm->hm_next;
158*7c478bd9Sstevel@tonic-gate 		--hment_reserve_count;
159*7c478bd9Sstevel@tonic-gate 	}
160*7c478bd9Sstevel@tonic-gate 	mutex_exit(&hment_reserve_mutex);
161*7c478bd9Sstevel@tonic-gate 	return (hm);
162*7c478bd9Sstevel@tonic-gate }
163*7c478bd9Sstevel@tonic-gate 
164*7c478bd9Sstevel@tonic-gate /*
165*7c478bd9Sstevel@tonic-gate  * Allocate an hment
166*7c478bd9Sstevel@tonic-gate  */
167*7c478bd9Sstevel@tonic-gate static hment_t *
168*7c478bd9Sstevel@tonic-gate hment_alloc()
169*7c478bd9Sstevel@tonic-gate {
170*7c478bd9Sstevel@tonic-gate 	int km_flag = can_steal_post_boot ? KM_NOSLEEP : KM_SLEEP;
171*7c478bd9Sstevel@tonic-gate 	hment_t	*hm = NULL;
172*7c478bd9Sstevel@tonic-gate 	int use_reserves = (use_boot_reserve ||
173*7c478bd9Sstevel@tonic-gate 	    curthread == hat_reserves_thread || panicstr != NULL);
174*7c478bd9Sstevel@tonic-gate 
175*7c478bd9Sstevel@tonic-gate 	/*
176*7c478bd9Sstevel@tonic-gate 	 * If we aren't using the reserves, try using kmem to get an hment.
177*7c478bd9Sstevel@tonic-gate 	 * Donate any successful allocations to reserves if low.
178*7c478bd9Sstevel@tonic-gate 	 *
179*7c478bd9Sstevel@tonic-gate 	 * If we're in panic, resort to using the reserves.
180*7c478bd9Sstevel@tonic-gate 	 */
181*7c478bd9Sstevel@tonic-gate 	HATSTAT_INC(hs_hm_alloc);
182*7c478bd9Sstevel@tonic-gate 	if (!use_reserves) {
183*7c478bd9Sstevel@tonic-gate 		for (;;) {
184*7c478bd9Sstevel@tonic-gate 			hm = kmem_cache_alloc(hment_cache, km_flag);
185*7c478bd9Sstevel@tonic-gate 			if (hment_reserve_count >= hment_reserve_amount ||
186*7c478bd9Sstevel@tonic-gate 			    hm == NULL || panicstr != NULL ||
187*7c478bd9Sstevel@tonic-gate 			    curthread == hat_reserves_thread)
188*7c478bd9Sstevel@tonic-gate 				break;
189*7c478bd9Sstevel@tonic-gate 			hment_put_reserve(hm);
190*7c478bd9Sstevel@tonic-gate 		}
191*7c478bd9Sstevel@tonic-gate 	}
192*7c478bd9Sstevel@tonic-gate 
193*7c478bd9Sstevel@tonic-gate 	/*
194*7c478bd9Sstevel@tonic-gate 	 * If allocation failed, we need to tap the reserves or steal
195*7c478bd9Sstevel@tonic-gate 	 */
196*7c478bd9Sstevel@tonic-gate 	if (hm == NULL) {
197*7c478bd9Sstevel@tonic-gate 		if (use_reserves)
198*7c478bd9Sstevel@tonic-gate 			hm = hment_get_reserve();
199*7c478bd9Sstevel@tonic-gate 
200*7c478bd9Sstevel@tonic-gate 		/*
201*7c478bd9Sstevel@tonic-gate 		 * If we still haven't gotten an hment, attempt to steal one by
202*7c478bd9Sstevel@tonic-gate 		 * victimizing a mapping in a user htable.
203*7c478bd9Sstevel@tonic-gate 		 */
204*7c478bd9Sstevel@tonic-gate 		if (hm == NULL && can_steal_post_boot)
205*7c478bd9Sstevel@tonic-gate 			hm = hment_steal();
206*7c478bd9Sstevel@tonic-gate 
207*7c478bd9Sstevel@tonic-gate 		/*
208*7c478bd9Sstevel@tonic-gate 		 * we're in dire straights, try the reserve
209*7c478bd9Sstevel@tonic-gate 		 */
210*7c478bd9Sstevel@tonic-gate 		if (hm == NULL)
211*7c478bd9Sstevel@tonic-gate 			hm = hment_get_reserve();
212*7c478bd9Sstevel@tonic-gate 
213*7c478bd9Sstevel@tonic-gate 		/*
214*7c478bd9Sstevel@tonic-gate 		 * still no hment is a serious problem.
215*7c478bd9Sstevel@tonic-gate 		 */
216*7c478bd9Sstevel@tonic-gate 		if (hm == NULL)
217*7c478bd9Sstevel@tonic-gate 			panic("hment_alloc(): no reserve, couldn't steal");
218*7c478bd9Sstevel@tonic-gate 	}
219*7c478bd9Sstevel@tonic-gate 
220*7c478bd9Sstevel@tonic-gate 
221*7c478bd9Sstevel@tonic-gate 	hm->hm_entry = 0;
222*7c478bd9Sstevel@tonic-gate 	hm->hm_htable = NULL;
223*7c478bd9Sstevel@tonic-gate 	hm->hm_hashnext = NULL;
224*7c478bd9Sstevel@tonic-gate 	hm->hm_next = NULL;
225*7c478bd9Sstevel@tonic-gate 	hm->hm_prev = NULL;
226*7c478bd9Sstevel@tonic-gate 	return (hm);
227*7c478bd9Sstevel@tonic-gate }
228*7c478bd9Sstevel@tonic-gate 
229*7c478bd9Sstevel@tonic-gate /*
230*7c478bd9Sstevel@tonic-gate  * Free an hment, possibly to the reserves list when called from the
231*7c478bd9Sstevel@tonic-gate  * thread using the reserves. For example, when freeing an hment during an
232*7c478bd9Sstevel@tonic-gate  * htable_steal(), we can't recurse into the kmem allocator, so we just
233*7c478bd9Sstevel@tonic-gate  * push the hment onto the reserve list.
234*7c478bd9Sstevel@tonic-gate  */
235*7c478bd9Sstevel@tonic-gate void
236*7c478bd9Sstevel@tonic-gate hment_free(hment_t *hm)
237*7c478bd9Sstevel@tonic-gate {
238*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
239*7c478bd9Sstevel@tonic-gate 	/*
240*7c478bd9Sstevel@tonic-gate 	 * zero out all fields to try and force any race conditions to segfault
241*7c478bd9Sstevel@tonic-gate 	 */
242*7c478bd9Sstevel@tonic-gate 	bzero(hm, sizeof (*hm));
243*7c478bd9Sstevel@tonic-gate #endif
244*7c478bd9Sstevel@tonic-gate 	HATSTAT_INC(hs_hm_free);
245*7c478bd9Sstevel@tonic-gate 	if (curthread == hat_reserves_thread ||
246*7c478bd9Sstevel@tonic-gate 	    hment_reserve_count < hment_reserve_amount)
247*7c478bd9Sstevel@tonic-gate 		hment_put_reserve(hm);
248*7c478bd9Sstevel@tonic-gate 	else
249*7c478bd9Sstevel@tonic-gate 		kmem_cache_free(hment_cache, hm);
250*7c478bd9Sstevel@tonic-gate }
251*7c478bd9Sstevel@tonic-gate 
252*7c478bd9Sstevel@tonic-gate int
253*7c478bd9Sstevel@tonic-gate x86_hm_held(page_t *pp)
254*7c478bd9Sstevel@tonic-gate {
255*7c478bd9Sstevel@tonic-gate 	ASSERT(pp != NULL);
256*7c478bd9Sstevel@tonic-gate 	return (MUTEX_HELD(MLIST_MUTEX(pp)));
257*7c478bd9Sstevel@tonic-gate }
258*7c478bd9Sstevel@tonic-gate 
259*7c478bd9Sstevel@tonic-gate void
260*7c478bd9Sstevel@tonic-gate x86_hm_enter(page_t *pp)
261*7c478bd9Sstevel@tonic-gate {
262*7c478bd9Sstevel@tonic-gate 	ASSERT(pp != NULL);
263*7c478bd9Sstevel@tonic-gate 	mutex_enter(MLIST_MUTEX(pp));
264*7c478bd9Sstevel@tonic-gate }
265*7c478bd9Sstevel@tonic-gate 
266*7c478bd9Sstevel@tonic-gate void
267*7c478bd9Sstevel@tonic-gate x86_hm_exit(page_t *pp)
268*7c478bd9Sstevel@tonic-gate {
269*7c478bd9Sstevel@tonic-gate 	ASSERT(pp != NULL);
270*7c478bd9Sstevel@tonic-gate 	mutex_exit(MLIST_MUTEX(pp));
271*7c478bd9Sstevel@tonic-gate }
272*7c478bd9Sstevel@tonic-gate 
273*7c478bd9Sstevel@tonic-gate /*
274*7c478bd9Sstevel@tonic-gate  * Internal routine to add a full hment to a page_t mapping list
275*7c478bd9Sstevel@tonic-gate  */
276*7c478bd9Sstevel@tonic-gate static void
277*7c478bd9Sstevel@tonic-gate hment_insert(hment_t *hm, page_t *pp)
278*7c478bd9Sstevel@tonic-gate {
279*7c478bd9Sstevel@tonic-gate 	uint_t		idx;
280*7c478bd9Sstevel@tonic-gate 
281*7c478bd9Sstevel@tonic-gate 	ASSERT(x86_hm_held(pp));
282*7c478bd9Sstevel@tonic-gate 	ASSERT(!pp->p_embed);
283*7c478bd9Sstevel@tonic-gate 
284*7c478bd9Sstevel@tonic-gate 	/*
285*7c478bd9Sstevel@tonic-gate 	 * Add the hment to the page's mapping list.
286*7c478bd9Sstevel@tonic-gate 	 */
287*7c478bd9Sstevel@tonic-gate 	++pp->p_share;
288*7c478bd9Sstevel@tonic-gate 	hm->hm_next = pp->p_mapping;
289*7c478bd9Sstevel@tonic-gate 	if (pp->p_mapping != NULL)
290*7c478bd9Sstevel@tonic-gate 		((hment_t *)pp->p_mapping)->hm_prev = hm;
291*7c478bd9Sstevel@tonic-gate 	pp->p_mapping = hm;
292*7c478bd9Sstevel@tonic-gate 
293*7c478bd9Sstevel@tonic-gate 	/*
294*7c478bd9Sstevel@tonic-gate 	 * Add the hment to the system-wide hash table.
295*7c478bd9Sstevel@tonic-gate 	 */
296*7c478bd9Sstevel@tonic-gate 	idx = HMENT_HASH(hm->hm_htable->ht_pfn, hm->hm_entry);
297*7c478bd9Sstevel@tonic-gate 
298*7c478bd9Sstevel@tonic-gate 	mutex_enter(HASH_MUTEX(idx));
299*7c478bd9Sstevel@tonic-gate 	hm->hm_hashnext = hment_hash[idx];
300*7c478bd9Sstevel@tonic-gate 	hment_hash[idx] = hm;
301*7c478bd9Sstevel@tonic-gate 	mutex_exit(HASH_MUTEX(idx));
302*7c478bd9Sstevel@tonic-gate }
303*7c478bd9Sstevel@tonic-gate 
304*7c478bd9Sstevel@tonic-gate /*
305*7c478bd9Sstevel@tonic-gate  * Prepare a mapping list entry to the given page.
306*7c478bd9Sstevel@tonic-gate  *
307*7c478bd9Sstevel@tonic-gate  * There are 4 different situations to deal with:
308*7c478bd9Sstevel@tonic-gate  *
309*7c478bd9Sstevel@tonic-gate  * - Adding the first mapping to a page_t as an embedded hment
310*7c478bd9Sstevel@tonic-gate  * - Refaulting on an existing embedded mapping
311*7c478bd9Sstevel@tonic-gate  * - Upgrading an embedded mapping when adding a 2nd mapping
312*7c478bd9Sstevel@tonic-gate  * - Adding another mapping to a page_t that already has multiple mappings
313*7c478bd9Sstevel@tonic-gate  *	 note we don't optimized for the refaulting case here.
314*7c478bd9Sstevel@tonic-gate  *
315*7c478bd9Sstevel@tonic-gate  * Due to competition with other threads that may be mapping/unmapping the
316*7c478bd9Sstevel@tonic-gate  * same page and the need to drop all locks while allocating hments, any or
317*7c478bd9Sstevel@tonic-gate  * all of the 3 situations can occur (and in almost any order) in any given
318*7c478bd9Sstevel@tonic-gate  * call. Isn't this fun!
319*7c478bd9Sstevel@tonic-gate  */
320*7c478bd9Sstevel@tonic-gate hment_t *
321*7c478bd9Sstevel@tonic-gate hment_prepare(htable_t *htable, uint_t entry, page_t *pp)
322*7c478bd9Sstevel@tonic-gate {
323*7c478bd9Sstevel@tonic-gate 	hment_t		*hm = NULL;
324*7c478bd9Sstevel@tonic-gate 
325*7c478bd9Sstevel@tonic-gate 	ASSERT(x86_hm_held(pp));
326*7c478bd9Sstevel@tonic-gate 
327*7c478bd9Sstevel@tonic-gate 	for (;;) {
328*7c478bd9Sstevel@tonic-gate 
329*7c478bd9Sstevel@tonic-gate 		/*
330*7c478bd9Sstevel@tonic-gate 		 * The most common case is establishing the first mapping to a
331*7c478bd9Sstevel@tonic-gate 		 * page, so check that first. This doesn't need any allocated
332*7c478bd9Sstevel@tonic-gate 		 * hment.
333*7c478bd9Sstevel@tonic-gate 		 */
334*7c478bd9Sstevel@tonic-gate 		if (pp->p_mapping == NULL) {
335*7c478bd9Sstevel@tonic-gate 			ASSERT(!pp->p_embed);
336*7c478bd9Sstevel@tonic-gate 			ASSERT(pp->p_share == 0);
337*7c478bd9Sstevel@tonic-gate 			if (hm == NULL)
338*7c478bd9Sstevel@tonic-gate 				break;
339*7c478bd9Sstevel@tonic-gate 
340*7c478bd9Sstevel@tonic-gate 			/*
341*7c478bd9Sstevel@tonic-gate 			 * we had an hment already, so free it and retry
342*7c478bd9Sstevel@tonic-gate 			 */
343*7c478bd9Sstevel@tonic-gate 			goto free_and_continue;
344*7c478bd9Sstevel@tonic-gate 		}
345*7c478bd9Sstevel@tonic-gate 
346*7c478bd9Sstevel@tonic-gate 		/*
347*7c478bd9Sstevel@tonic-gate 		 * If there is an embedded mapping, we may need to
348*7c478bd9Sstevel@tonic-gate 		 * convert it to an hment.
349*7c478bd9Sstevel@tonic-gate 		 */
350*7c478bd9Sstevel@tonic-gate 		if (pp->p_embed) {
351*7c478bd9Sstevel@tonic-gate 
352*7c478bd9Sstevel@tonic-gate 			/* should point to htable */
353*7c478bd9Sstevel@tonic-gate 			ASSERT(pp->p_mapping != NULL);
354*7c478bd9Sstevel@tonic-gate 
355*7c478bd9Sstevel@tonic-gate 			/*
356*7c478bd9Sstevel@tonic-gate 			 * If we are faulting on a pre-existing mapping
357*7c478bd9Sstevel@tonic-gate 			 * there is no need to promote/allocate a new hment.
358*7c478bd9Sstevel@tonic-gate 			 * This happens a lot due to segmap.
359*7c478bd9Sstevel@tonic-gate 			 */
360*7c478bd9Sstevel@tonic-gate 			if (pp->p_mapping == htable && pp->p_mlentry == entry) {
361*7c478bd9Sstevel@tonic-gate 				if (hm == NULL)
362*7c478bd9Sstevel@tonic-gate 					break;
363*7c478bd9Sstevel@tonic-gate 				goto free_and_continue;
364*7c478bd9Sstevel@tonic-gate 			}
365*7c478bd9Sstevel@tonic-gate 
366*7c478bd9Sstevel@tonic-gate 			/*
367*7c478bd9Sstevel@tonic-gate 			 * If we have an hment allocated, use it to promote the
368*7c478bd9Sstevel@tonic-gate 			 * existing embedded mapping.
369*7c478bd9Sstevel@tonic-gate 			 */
370*7c478bd9Sstevel@tonic-gate 			if (hm != NULL) {
371*7c478bd9Sstevel@tonic-gate 				hm->hm_htable = pp->p_mapping;
372*7c478bd9Sstevel@tonic-gate 				hm->hm_entry = pp->p_mlentry;
373*7c478bd9Sstevel@tonic-gate 				pp->p_mapping = NULL;
374*7c478bd9Sstevel@tonic-gate 				pp->p_share = 0;
375*7c478bd9Sstevel@tonic-gate 				pp->p_embed = 0;
376*7c478bd9Sstevel@tonic-gate 				hment_insert(hm, pp);
377*7c478bd9Sstevel@tonic-gate 			}
378*7c478bd9Sstevel@tonic-gate 
379*7c478bd9Sstevel@tonic-gate 			/*
380*7c478bd9Sstevel@tonic-gate 			 * We either didn't have an hment allocated or we just
381*7c478bd9Sstevel@tonic-gate 			 * used it for the embedded mapping. In either case,
382*7c478bd9Sstevel@tonic-gate 			 * allocate another hment and restart.
383*7c478bd9Sstevel@tonic-gate 			 */
384*7c478bd9Sstevel@tonic-gate 			goto allocate_and_continue;
385*7c478bd9Sstevel@tonic-gate 		}
386*7c478bd9Sstevel@tonic-gate 
387*7c478bd9Sstevel@tonic-gate 		/*
388*7c478bd9Sstevel@tonic-gate 		 * Last possibility is that we're adding an hment to a list
389*7c478bd9Sstevel@tonic-gate 		 * of hments.
390*7c478bd9Sstevel@tonic-gate 		 */
391*7c478bd9Sstevel@tonic-gate 		if (hm != NULL)
392*7c478bd9Sstevel@tonic-gate 			break;
393*7c478bd9Sstevel@tonic-gate allocate_and_continue:
394*7c478bd9Sstevel@tonic-gate 		x86_hm_exit(pp);
395*7c478bd9Sstevel@tonic-gate 		hm = hment_alloc();
396*7c478bd9Sstevel@tonic-gate 		x86_hm_enter(pp);
397*7c478bd9Sstevel@tonic-gate 		continue;
398*7c478bd9Sstevel@tonic-gate 
399*7c478bd9Sstevel@tonic-gate free_and_continue:
400*7c478bd9Sstevel@tonic-gate 		/*
401*7c478bd9Sstevel@tonic-gate 		 * we allocated an hment already, free it and retry
402*7c478bd9Sstevel@tonic-gate 		 */
403*7c478bd9Sstevel@tonic-gate 		x86_hm_exit(pp);
404*7c478bd9Sstevel@tonic-gate 		hment_free(hm);
405*7c478bd9Sstevel@tonic-gate 		hm = NULL;
406*7c478bd9Sstevel@tonic-gate 		x86_hm_enter(pp);
407*7c478bd9Sstevel@tonic-gate 	}
408*7c478bd9Sstevel@tonic-gate 	ASSERT(x86_hm_held(pp));
409*7c478bd9Sstevel@tonic-gate 	return (hm);
410*7c478bd9Sstevel@tonic-gate }
411*7c478bd9Sstevel@tonic-gate 
412*7c478bd9Sstevel@tonic-gate /*
413*7c478bd9Sstevel@tonic-gate  * Record a mapping list entry for the htable/entry to the given page.
414*7c478bd9Sstevel@tonic-gate  *
415*7c478bd9Sstevel@tonic-gate  * hment_prepare() should have properly set up the situation.
416*7c478bd9Sstevel@tonic-gate  */
417*7c478bd9Sstevel@tonic-gate void
418*7c478bd9Sstevel@tonic-gate hment_assign(htable_t *htable, uint_t entry, page_t *pp, hment_t *hm)
419*7c478bd9Sstevel@tonic-gate {
420*7c478bd9Sstevel@tonic-gate 	ASSERT(x86_hm_held(pp));
421*7c478bd9Sstevel@tonic-gate 
422*7c478bd9Sstevel@tonic-gate 	/*
423*7c478bd9Sstevel@tonic-gate 	 * The most common case is establishing the first mapping to a
424*7c478bd9Sstevel@tonic-gate 	 * page, so check that first. This doesn't need any allocated
425*7c478bd9Sstevel@tonic-gate 	 * hment.
426*7c478bd9Sstevel@tonic-gate 	 */
427*7c478bd9Sstevel@tonic-gate 	if (pp->p_mapping == NULL) {
428*7c478bd9Sstevel@tonic-gate 		ASSERT(hm == NULL);
429*7c478bd9Sstevel@tonic-gate 		ASSERT(!pp->p_embed);
430*7c478bd9Sstevel@tonic-gate 		ASSERT(pp->p_share == 0);
431*7c478bd9Sstevel@tonic-gate 		pp->p_embed = 1;
432*7c478bd9Sstevel@tonic-gate 		pp->p_mapping = htable;
433*7c478bd9Sstevel@tonic-gate 		pp->p_mlentry = entry;
434*7c478bd9Sstevel@tonic-gate 		return;
435*7c478bd9Sstevel@tonic-gate 	}
436*7c478bd9Sstevel@tonic-gate 
437*7c478bd9Sstevel@tonic-gate 	/*
438*7c478bd9Sstevel@tonic-gate 	 * We should never get here with a pre-existing embedded maping
439*7c478bd9Sstevel@tonic-gate 	 */
440*7c478bd9Sstevel@tonic-gate 	ASSERT(!pp->p_embed);
441*7c478bd9Sstevel@tonic-gate 
442*7c478bd9Sstevel@tonic-gate 	/*
443*7c478bd9Sstevel@tonic-gate 	 * add the new hment to the mapping list
444*7c478bd9Sstevel@tonic-gate 	 */
445*7c478bd9Sstevel@tonic-gate 	ASSERT(hm != NULL);
446*7c478bd9Sstevel@tonic-gate 	hm->hm_htable = htable;
447*7c478bd9Sstevel@tonic-gate 	hm->hm_entry = entry;
448*7c478bd9Sstevel@tonic-gate 	hment_insert(hm, pp);
449*7c478bd9Sstevel@tonic-gate }
450*7c478bd9Sstevel@tonic-gate 
451*7c478bd9Sstevel@tonic-gate /*
452*7c478bd9Sstevel@tonic-gate  * Walk through the mappings for a page.
453*7c478bd9Sstevel@tonic-gate  *
454*7c478bd9Sstevel@tonic-gate  * must already have done an x86_hm_enter()
455*7c478bd9Sstevel@tonic-gate  */
456*7c478bd9Sstevel@tonic-gate hment_t *
457*7c478bd9Sstevel@tonic-gate hment_walk(page_t *pp, htable_t **ht, uint_t *entry, hment_t *prev)
458*7c478bd9Sstevel@tonic-gate {
459*7c478bd9Sstevel@tonic-gate 	hment_t		*hm;
460*7c478bd9Sstevel@tonic-gate 
461*7c478bd9Sstevel@tonic-gate 	ASSERT(x86_hm_held(pp));
462*7c478bd9Sstevel@tonic-gate 
463*7c478bd9Sstevel@tonic-gate 	if (pp->p_embed) {
464*7c478bd9Sstevel@tonic-gate 		if (prev == NULL) {
465*7c478bd9Sstevel@tonic-gate 			*ht = (htable_t *)pp->p_mapping;
466*7c478bd9Sstevel@tonic-gate 			*entry = pp->p_mlentry;
467*7c478bd9Sstevel@tonic-gate 			hm = HMENT_EMBEDDED;
468*7c478bd9Sstevel@tonic-gate 		} else {
469*7c478bd9Sstevel@tonic-gate 			ASSERT(prev == HMENT_EMBEDDED);
470*7c478bd9Sstevel@tonic-gate 			hm = NULL;
471*7c478bd9Sstevel@tonic-gate 		}
472*7c478bd9Sstevel@tonic-gate 	} else {
473*7c478bd9Sstevel@tonic-gate 		if (prev == NULL) {
474*7c478bd9Sstevel@tonic-gate 			ASSERT(prev != HMENT_EMBEDDED);
475*7c478bd9Sstevel@tonic-gate 			hm = (hment_t *)pp->p_mapping;
476*7c478bd9Sstevel@tonic-gate 		} else {
477*7c478bd9Sstevel@tonic-gate 			hm = prev->hm_next;
478*7c478bd9Sstevel@tonic-gate 		}
479*7c478bd9Sstevel@tonic-gate 
480*7c478bd9Sstevel@tonic-gate 		if (hm != NULL) {
481*7c478bd9Sstevel@tonic-gate 			*ht = hm->hm_htable;
482*7c478bd9Sstevel@tonic-gate 			*entry = hm->hm_entry;
483*7c478bd9Sstevel@tonic-gate 		}
484*7c478bd9Sstevel@tonic-gate 	}
485*7c478bd9Sstevel@tonic-gate 	return (hm);
486*7c478bd9Sstevel@tonic-gate }
487*7c478bd9Sstevel@tonic-gate 
488*7c478bd9Sstevel@tonic-gate /*
489*7c478bd9Sstevel@tonic-gate  * Remove a mapping to a page from its mapping list. Must have
490*7c478bd9Sstevel@tonic-gate  * the corresponding mapping list locked.
491*7c478bd9Sstevel@tonic-gate  * Finds the mapping list entry with the given pte_t and
492*7c478bd9Sstevel@tonic-gate  * unlinks it from the mapping list.
493*7c478bd9Sstevel@tonic-gate  */
494*7c478bd9Sstevel@tonic-gate hment_t *
495*7c478bd9Sstevel@tonic-gate hment_remove(page_t *pp, htable_t *ht, uint_t entry)
496*7c478bd9Sstevel@tonic-gate {
497*7c478bd9Sstevel@tonic-gate 	hment_t		*prev = NULL;
498*7c478bd9Sstevel@tonic-gate 	hment_t		*hm;
499*7c478bd9Sstevel@tonic-gate 	uint_t		idx;
500*7c478bd9Sstevel@tonic-gate 
501*7c478bd9Sstevel@tonic-gate 	ASSERT(x86_hm_held(pp));
502*7c478bd9Sstevel@tonic-gate 
503*7c478bd9Sstevel@tonic-gate 	/*
504*7c478bd9Sstevel@tonic-gate 	 * Check if we have only one mapping embedded in the page_t.
505*7c478bd9Sstevel@tonic-gate 	 */
506*7c478bd9Sstevel@tonic-gate 	if (pp->p_embed) {
507*7c478bd9Sstevel@tonic-gate 		ASSERT(ht == (htable_t *)pp->p_mapping);
508*7c478bd9Sstevel@tonic-gate 		ASSERT(entry == pp->p_mlentry);
509*7c478bd9Sstevel@tonic-gate 		ASSERT(pp->p_share == 0);
510*7c478bd9Sstevel@tonic-gate 		pp->p_mapping = NULL;
511*7c478bd9Sstevel@tonic-gate 		pp->p_mlentry = 0;
512*7c478bd9Sstevel@tonic-gate 		pp->p_embed = 0;
513*7c478bd9Sstevel@tonic-gate 		return (NULL);
514*7c478bd9Sstevel@tonic-gate 	}
515*7c478bd9Sstevel@tonic-gate 
516*7c478bd9Sstevel@tonic-gate 	/*
517*7c478bd9Sstevel@tonic-gate 	 * Otherwise it must be in the list of hments.
518*7c478bd9Sstevel@tonic-gate 	 * Find the hment in the system-wide hash table and remove it.
519*7c478bd9Sstevel@tonic-gate 	 */
520*7c478bd9Sstevel@tonic-gate 	ASSERT(pp->p_share != 0);
521*7c478bd9Sstevel@tonic-gate 	idx = HMENT_HASH(ht->ht_pfn, entry);
522*7c478bd9Sstevel@tonic-gate 	mutex_enter(HASH_MUTEX(idx));
523*7c478bd9Sstevel@tonic-gate 	hm = hment_hash[idx];
524*7c478bd9Sstevel@tonic-gate 	while (hm && (hm->hm_htable != ht || hm->hm_entry != entry)) {
525*7c478bd9Sstevel@tonic-gate 		prev = hm;
526*7c478bd9Sstevel@tonic-gate 		hm = hm->hm_hashnext;
527*7c478bd9Sstevel@tonic-gate 	}
528*7c478bd9Sstevel@tonic-gate 	if (hm == NULL)
529*7c478bd9Sstevel@tonic-gate 		panic("hment_remove() mapping not found in hash table");
530*7c478bd9Sstevel@tonic-gate 
531*7c478bd9Sstevel@tonic-gate 	if (prev)
532*7c478bd9Sstevel@tonic-gate 		prev->hm_hashnext = hm->hm_hashnext;
533*7c478bd9Sstevel@tonic-gate 	else
534*7c478bd9Sstevel@tonic-gate 		hment_hash[idx] = hm->hm_hashnext;
535*7c478bd9Sstevel@tonic-gate 	mutex_exit(HASH_MUTEX(idx));
536*7c478bd9Sstevel@tonic-gate 
537*7c478bd9Sstevel@tonic-gate 	/*
538*7c478bd9Sstevel@tonic-gate 	 * Remove the hment from the page's mapping list
539*7c478bd9Sstevel@tonic-gate 	 */
540*7c478bd9Sstevel@tonic-gate 	if (hm->hm_next)
541*7c478bd9Sstevel@tonic-gate 		hm->hm_next->hm_prev = hm->hm_prev;
542*7c478bd9Sstevel@tonic-gate 	if (hm->hm_prev)
543*7c478bd9Sstevel@tonic-gate 		hm->hm_prev->hm_next = hm->hm_next;
544*7c478bd9Sstevel@tonic-gate 	else
545*7c478bd9Sstevel@tonic-gate 		pp->p_mapping = hm->hm_next;
546*7c478bd9Sstevel@tonic-gate 
547*7c478bd9Sstevel@tonic-gate 	--pp->p_share;
548*7c478bd9Sstevel@tonic-gate 	hm->hm_hashnext = NULL;
549*7c478bd9Sstevel@tonic-gate 	hm->hm_next = NULL;
550*7c478bd9Sstevel@tonic-gate 	hm->hm_prev = NULL;
551*7c478bd9Sstevel@tonic-gate 
552*7c478bd9Sstevel@tonic-gate 	return (hm);
553*7c478bd9Sstevel@tonic-gate }
554*7c478bd9Sstevel@tonic-gate 
555*7c478bd9Sstevel@tonic-gate /*
556*7c478bd9Sstevel@tonic-gate  * Put initial hment's in the reserve pool.
557*7c478bd9Sstevel@tonic-gate  */
558*7c478bd9Sstevel@tonic-gate void
559*7c478bd9Sstevel@tonic-gate hment_reserve(uint_t count)
560*7c478bd9Sstevel@tonic-gate {
561*7c478bd9Sstevel@tonic-gate 	hment_t	*hm;
562*7c478bd9Sstevel@tonic-gate 
563*7c478bd9Sstevel@tonic-gate 	count += hment_reserve_amount;
564*7c478bd9Sstevel@tonic-gate 
565*7c478bd9Sstevel@tonic-gate 	while (hment_reserve_count < count) {
566*7c478bd9Sstevel@tonic-gate 		hm = kmem_cache_alloc(hment_cache, KM_NOSLEEP);
567*7c478bd9Sstevel@tonic-gate 		if (hm == NULL)
568*7c478bd9Sstevel@tonic-gate 			return;
569*7c478bd9Sstevel@tonic-gate 		hment_put_reserve(hm);
570*7c478bd9Sstevel@tonic-gate 	}
571*7c478bd9Sstevel@tonic-gate }
572*7c478bd9Sstevel@tonic-gate 
573*7c478bd9Sstevel@tonic-gate /*
574*7c478bd9Sstevel@tonic-gate  * Readjust the hment reserves after they may have been used.
575*7c478bd9Sstevel@tonic-gate  */
576*7c478bd9Sstevel@tonic-gate void
577*7c478bd9Sstevel@tonic-gate hment_adjust_reserve()
578*7c478bd9Sstevel@tonic-gate {
579*7c478bd9Sstevel@tonic-gate 	hment_t	*hm;
580*7c478bd9Sstevel@tonic-gate 
581*7c478bd9Sstevel@tonic-gate 	/*
582*7c478bd9Sstevel@tonic-gate 	 * Free up any excess reserves
583*7c478bd9Sstevel@tonic-gate 	 */
584*7c478bd9Sstevel@tonic-gate 	while (hment_reserve_count > hment_reserve_amount) {
585*7c478bd9Sstevel@tonic-gate 		ASSERT(curthread != hat_reserves_thread);
586*7c478bd9Sstevel@tonic-gate 		hm = hment_get_reserve();
587*7c478bd9Sstevel@tonic-gate 		if (hm == NULL)
588*7c478bd9Sstevel@tonic-gate 			return;
589*7c478bd9Sstevel@tonic-gate 		hment_free(hm);
590*7c478bd9Sstevel@tonic-gate 	}
591*7c478bd9Sstevel@tonic-gate }
592*7c478bd9Sstevel@tonic-gate 
593*7c478bd9Sstevel@tonic-gate /*
594*7c478bd9Sstevel@tonic-gate  * initialize hment data structures
595*7c478bd9Sstevel@tonic-gate  */
596*7c478bd9Sstevel@tonic-gate void
597*7c478bd9Sstevel@tonic-gate hment_init(void)
598*7c478bd9Sstevel@tonic-gate {
599*7c478bd9Sstevel@tonic-gate 	int i;
600*7c478bd9Sstevel@tonic-gate 	int flags = KMC_NOHASH | KMC_NODEBUG;
601*7c478bd9Sstevel@tonic-gate 
602*7c478bd9Sstevel@tonic-gate 	/*
603*7c478bd9Sstevel@tonic-gate 	 * Initialize kmem caches. On 32 bit kernel's we shut off
604*7c478bd9Sstevel@tonic-gate 	 * debug information to save on precious kernel VA usage.
605*7c478bd9Sstevel@tonic-gate 	 */
606*7c478bd9Sstevel@tonic-gate 	hment_cache = kmem_cache_create("hment_t",
607*7c478bd9Sstevel@tonic-gate 	    sizeof (hment_t), 0, NULL, NULL, NULL,
608*7c478bd9Sstevel@tonic-gate 	    NULL, hat_memload_arena, flags);
609*7c478bd9Sstevel@tonic-gate 
610*7c478bd9Sstevel@tonic-gate 	hment_hash = kmem_zalloc(hment_hash_entries * sizeof (hment_t *),
611*7c478bd9Sstevel@tonic-gate 	    KM_SLEEP);
612*7c478bd9Sstevel@tonic-gate 
613*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < MLIST_NUM_LOCK; i++)
614*7c478bd9Sstevel@tonic-gate 		mutex_init(&mlist_lock[i], NULL, MUTEX_DEFAULT, NULL);
615*7c478bd9Sstevel@tonic-gate 
616*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < HASH_NUM_LOCK; i++)
617*7c478bd9Sstevel@tonic-gate 		mutex_init(&hash_lock[i], NULL, MUTEX_DEFAULT, NULL);
618*7c478bd9Sstevel@tonic-gate 
619*7c478bd9Sstevel@tonic-gate 
620*7c478bd9Sstevel@tonic-gate }
621*7c478bd9Sstevel@tonic-gate 
622*7c478bd9Sstevel@tonic-gate /*
623*7c478bd9Sstevel@tonic-gate  * return the number of mappings to a page
624*7c478bd9Sstevel@tonic-gate  *
625*7c478bd9Sstevel@tonic-gate  * Note there is no ASSERT() that the MUTEX is held for this.
626*7c478bd9Sstevel@tonic-gate  * Hence the return value might be inaccurate if this is called without
627*7c478bd9Sstevel@tonic-gate  * doing an x86_hm_enter().
628*7c478bd9Sstevel@tonic-gate  */
629*7c478bd9Sstevel@tonic-gate uint_t
630*7c478bd9Sstevel@tonic-gate hment_mapcnt(page_t *pp)
631*7c478bd9Sstevel@tonic-gate {
632*7c478bd9Sstevel@tonic-gate 	uint_t cnt;
633*7c478bd9Sstevel@tonic-gate 	uint_t szc;
634*7c478bd9Sstevel@tonic-gate 	page_t *larger;
635*7c478bd9Sstevel@tonic-gate 	hment_t	*hm;
636*7c478bd9Sstevel@tonic-gate 
637*7c478bd9Sstevel@tonic-gate 	x86_hm_enter(pp);
638*7c478bd9Sstevel@tonic-gate 	if (pp->p_mapping == NULL)
639*7c478bd9Sstevel@tonic-gate 		cnt = 0;
640*7c478bd9Sstevel@tonic-gate 	else if (pp->p_embed)
641*7c478bd9Sstevel@tonic-gate 		cnt = 1;
642*7c478bd9Sstevel@tonic-gate 	else
643*7c478bd9Sstevel@tonic-gate 		cnt = pp->p_share;
644*7c478bd9Sstevel@tonic-gate 	x86_hm_exit(pp);
645*7c478bd9Sstevel@tonic-gate 
646*7c478bd9Sstevel@tonic-gate 	/*
647*7c478bd9Sstevel@tonic-gate 	 * walk through all larger mapping sizes counting mappings
648*7c478bd9Sstevel@tonic-gate 	 */
649*7c478bd9Sstevel@tonic-gate 	for (szc = 1; szc <= pp->p_szc; ++szc) {
650*7c478bd9Sstevel@tonic-gate 		larger = PP_GROUPLEADER(pp, szc);
651*7c478bd9Sstevel@tonic-gate 		if (larger == pp)	/* don't double count large mappings */
652*7c478bd9Sstevel@tonic-gate 			continue;
653*7c478bd9Sstevel@tonic-gate 
654*7c478bd9Sstevel@tonic-gate 		x86_hm_enter(larger);
655*7c478bd9Sstevel@tonic-gate 		if (larger->p_mapping != NULL) {
656*7c478bd9Sstevel@tonic-gate 			if (larger->p_embed &&
657*7c478bd9Sstevel@tonic-gate 			    ((htable_t *)larger->p_mapping)->ht_level == szc) {
658*7c478bd9Sstevel@tonic-gate 				++cnt;
659*7c478bd9Sstevel@tonic-gate 			} else if (!larger->p_embed) {
660*7c478bd9Sstevel@tonic-gate 				for (hm = larger->p_mapping; hm;
661*7c478bd9Sstevel@tonic-gate 				    hm = hm->hm_next) {
662*7c478bd9Sstevel@tonic-gate 					if (hm->hm_htable->ht_level == szc)
663*7c478bd9Sstevel@tonic-gate 						++cnt;
664*7c478bd9Sstevel@tonic-gate 				}
665*7c478bd9Sstevel@tonic-gate 			}
666*7c478bd9Sstevel@tonic-gate 		}
667*7c478bd9Sstevel@tonic-gate 		x86_hm_exit(larger);
668*7c478bd9Sstevel@tonic-gate 	}
669*7c478bd9Sstevel@tonic-gate 	return (cnt);
670*7c478bd9Sstevel@tonic-gate }
671*7c478bd9Sstevel@tonic-gate 
672*7c478bd9Sstevel@tonic-gate /*
673*7c478bd9Sstevel@tonic-gate  * We need to steal an hment. Walk through all the page_t's until we
674*7c478bd9Sstevel@tonic-gate  * find one that has multiple mappings. Unload one of the mappings
675*7c478bd9Sstevel@tonic-gate  * and reclaim that hment. Note that we'll save/restart the starting
676*7c478bd9Sstevel@tonic-gate  * page to try and spread the pain.
677*7c478bd9Sstevel@tonic-gate  */
678*7c478bd9Sstevel@tonic-gate static page_t *last_page = NULL;
679*7c478bd9Sstevel@tonic-gate 
680*7c478bd9Sstevel@tonic-gate static hment_t *
681*7c478bd9Sstevel@tonic-gate hment_steal(void)
682*7c478bd9Sstevel@tonic-gate {
683*7c478bd9Sstevel@tonic-gate 	page_t *last = last_page;
684*7c478bd9Sstevel@tonic-gate 	page_t *pp = last;
685*7c478bd9Sstevel@tonic-gate 	hment_t *hm = NULL;
686*7c478bd9Sstevel@tonic-gate 	hment_t *hm2;
687*7c478bd9Sstevel@tonic-gate 	htable_t *ht;
688*7c478bd9Sstevel@tonic-gate 	uint_t found_one = 0;
689*7c478bd9Sstevel@tonic-gate 
690*7c478bd9Sstevel@tonic-gate 	HATSTAT_INC(hs_hm_steals);
691*7c478bd9Sstevel@tonic-gate 	if (pp == NULL)
692*7c478bd9Sstevel@tonic-gate 		last = pp = page_first();
693*7c478bd9Sstevel@tonic-gate 
694*7c478bd9Sstevel@tonic-gate 	while (!found_one) {
695*7c478bd9Sstevel@tonic-gate 		HATSTAT_INC(hs_hm_steal_exam);
696*7c478bd9Sstevel@tonic-gate 		pp = page_next(pp);
697*7c478bd9Sstevel@tonic-gate 		if (pp == NULL)
698*7c478bd9Sstevel@tonic-gate 			pp = page_first();
699*7c478bd9Sstevel@tonic-gate 
700*7c478bd9Sstevel@tonic-gate 		/*
701*7c478bd9Sstevel@tonic-gate 		 * The loop and function exit here if nothing found to steal.
702*7c478bd9Sstevel@tonic-gate 		 */
703*7c478bd9Sstevel@tonic-gate 		if (pp == last)
704*7c478bd9Sstevel@tonic-gate 			return (NULL);
705*7c478bd9Sstevel@tonic-gate 
706*7c478bd9Sstevel@tonic-gate 		/*
707*7c478bd9Sstevel@tonic-gate 		 * Only lock the page_t if it has hments.
708*7c478bd9Sstevel@tonic-gate 		 */
709*7c478bd9Sstevel@tonic-gate 		if (pp->p_mapping == NULL || pp->p_embed)
710*7c478bd9Sstevel@tonic-gate 			continue;
711*7c478bd9Sstevel@tonic-gate 
712*7c478bd9Sstevel@tonic-gate 		/*
713*7c478bd9Sstevel@tonic-gate 		 * Search the mapping list for a usable mapping.
714*7c478bd9Sstevel@tonic-gate 		 */
715*7c478bd9Sstevel@tonic-gate 		x86_hm_enter(pp);
716*7c478bd9Sstevel@tonic-gate 		if (!pp->p_embed) {
717*7c478bd9Sstevel@tonic-gate 			for (hm = pp->p_mapping; hm; hm = hm->hm_next) {
718*7c478bd9Sstevel@tonic-gate 				ht = hm->hm_htable;
719*7c478bd9Sstevel@tonic-gate 				if (ht->ht_hat != kas.a_hat &&
720*7c478bd9Sstevel@tonic-gate 				    ht->ht_busy == 0 &&
721*7c478bd9Sstevel@tonic-gate 				    ht->ht_lock_cnt == 0) {
722*7c478bd9Sstevel@tonic-gate 					found_one = 1;
723*7c478bd9Sstevel@tonic-gate 					break;
724*7c478bd9Sstevel@tonic-gate 				}
725*7c478bd9Sstevel@tonic-gate 			}
726*7c478bd9Sstevel@tonic-gate 		}
727*7c478bd9Sstevel@tonic-gate 		if (!found_one)
728*7c478bd9Sstevel@tonic-gate 			x86_hm_exit(pp);
729*7c478bd9Sstevel@tonic-gate 	}
730*7c478bd9Sstevel@tonic-gate 
731*7c478bd9Sstevel@tonic-gate 	/*
732*7c478bd9Sstevel@tonic-gate 	 * Steal the mapping we found.  Note that hati_page_unmap() will
733*7c478bd9Sstevel@tonic-gate 	 * do the x86_hm_exit().
734*7c478bd9Sstevel@tonic-gate 	 */
735*7c478bd9Sstevel@tonic-gate 	hm2 = hati_page_unmap(pp, ht, hm->hm_entry);
736*7c478bd9Sstevel@tonic-gate 	ASSERT(hm2 == hm);
737*7c478bd9Sstevel@tonic-gate 	last_page = pp;
738*7c478bd9Sstevel@tonic-gate 	return (hm);
739*7c478bd9Sstevel@tonic-gate }
740