xref: /illumos-gate/usr/src/uts/i86pc/vm/hment.c (revision 86ef0a63e1cfa5dc98606efef379365acca98063)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/sysmacros.h>
28 #include <sys/kmem.h>
29 #include <sys/atomic.h>
30 #include <sys/bitmap.h>
31 #include <sys/systm.h>
32 #include <vm/seg_kmem.h>
33 #include <vm/hat.h>
34 #include <vm/vm_dep.h>
35 #include <vm/hat_i86.h>
36 #include <sys/cmn_err.h>
37 #include <sys/avl.h>
38 
39 
40 /*
41  * When pages are shared by more than one mapping, a list of these
42  * structs hangs off of the page_t connected by the hm_next and hm_prev
43  * fields.  Every hment is also indexed by a system-wide hash table, using
44  * hm_hashlink to connect the hments within each hash bucket.
45  */
46 struct hment {
47 	avl_node_t	hm_hashlink;	/* links for hash table */
48 	struct hment	*hm_next;	/* next mapping of same page */
49 	struct hment	*hm_prev;	/* previous mapping of same page */
50 	htable_t	*hm_htable;	/* corresponding htable_t */
51 	pfn_t		hm_pfn;		/* mapping page frame number */
52 	uint16_t	hm_entry;	/* index of pte in htable */
53 	uint16_t	hm_pad;		/* explicitly expose compiler padding */
54 	uint32_t	hm_pad2;	/* explicitly expose compiler padding */
55 };
56 
57 /*
58  * Value returned by hment_walk() when dealing with a single mapping
59  * embedded in the page_t.
60  */
61 #define	HMENT_EMBEDDED ((hment_t *)(uintptr_t)1)
62 
63 kmem_cache_t *hment_cache;
64 
65 /*
66  * The hment reserve is similar to the htable reserve, with the following
67  * exception. Hment's are never needed for HAT kmem allocs.
68  *
69  * The hment_reserve_amount variable is used, so that you can change it's
70  * value to zero via a kernel debugger to force stealing to get tested.
71  */
72 #define	HMENT_RESERVE_AMOUNT	(200)	/* currently a guess at right value. */
73 uint_t hment_reserve_amount = HMENT_RESERVE_AMOUNT;
74 kmutex_t hment_reserve_mutex;
75 uint_t	hment_reserve_count;
76 hment_t	*hment_reserve_pool;
77 
78 /*
79  * All hments are stored in a system wide hash of AVL trees.
80  */
81 #define	HMENT_HASH_SIZE (64 * 1024)
82 static uint_t hment_hash_entries = HMENT_HASH_SIZE;
83 static avl_tree_t *hment_table;
84 
85 /*
86  * Lots of highly shared pages will have the same value for "entry" (consider
87  * the starting address of "xterm" or "sh"). So we'll distinguish them by
88  * adding the pfn of the page table into both the high bits.
89  * The shift by 9 corresponds to the range of values for entry (0..511).
90  */
91 #define	HMENT_HASH(pfn, entry) (uint32_t)	\
92 	((((pfn) << 9) + entry + pfn) & (hment_hash_entries - 1))
93 
94 /*
95  * "mlist_lock" is a hashed mutex lock for protecting per-page mapping
96  * lists and "hash_lock" is a similar lock protecting the hment hash
97  * table.  The hashed approach is taken to avoid the spatial overhead of
98  * maintaining a separate lock for each page, while still achieving better
99  * scalability than a single lock would allow.
100  */
101 #define	MLIST_NUM_LOCK	2048		/* must be power of two */
102 static kmutex_t *mlist_lock;
103 
104 /*
105  * the shift by 9 is so that all large pages don't use the same hash bucket
106  */
107 #define	MLIST_MUTEX(pp) \
108 	&mlist_lock[((pp)->p_pagenum + ((pp)->p_pagenum >> 9)) & \
109 	(MLIST_NUM_LOCK - 1)]
110 
111 #define	HASH_NUM_LOCK	2048		/* must be power of two */
112 static kmutex_t *hash_lock;
113 
114 #define	HASH_MUTEX(idx) &hash_lock[(idx) & (HASH_NUM_LOCK-1)]
115 
116 static avl_node_t null_avl_link;	/* always zero */
117 static hment_t *hment_steal(void);
118 
119 /*
120  * Utility to compare hment_t's for use in AVL tree. The ordering
121  * is entirely arbitrary and is just so that the AVL algorithm works.
122  */
123 static int
hment_compare(const void * hm1,const void * hm2)124 hment_compare(const void *hm1, const void *hm2)
125 {
126 	hment_t *h1 = (hment_t *)hm1;
127 	hment_t *h2 = (hment_t *)hm2;
128 	long diff;
129 
130 	diff = (uintptr_t)h1->hm_htable - (uintptr_t)h2->hm_htable;
131 	if (diff == 0) {
132 		diff = h1->hm_entry - h2->hm_entry;
133 		if (diff == 0)
134 			diff = h1->hm_pfn - h2->hm_pfn;
135 	}
136 	if (diff < 0)
137 		diff = -1;
138 	else if (diff > 0)
139 		diff = 1;
140 	return (diff);
141 }
142 
143 /*
144  * put one hment onto the reserves list
145  */
146 static void
hment_put_reserve(hment_t * hm)147 hment_put_reserve(hment_t *hm)
148 {
149 	HATSTAT_INC(hs_hm_put_reserve);
150 	mutex_enter(&hment_reserve_mutex);
151 	hm->hm_next = hment_reserve_pool;
152 	hment_reserve_pool = hm;
153 	++hment_reserve_count;
154 	mutex_exit(&hment_reserve_mutex);
155 }
156 
157 /*
158  * Take one hment from the reserve.
159  */
160 static hment_t *
hment_get_reserve(void)161 hment_get_reserve(void)
162 {
163 	hment_t *hm = NULL;
164 
165 	/*
166 	 * We rely on a "donation system" to refill the hment reserve
167 	 * list, which only takes place when we are allocating hments for
168 	 * user mappings.  It is theoretically possible that an incredibly
169 	 * long string of kernel hment_alloc()s with no intervening user
170 	 * hment_alloc()s could exhaust that pool.
171 	 */
172 	HATSTAT_INC(hs_hm_get_reserve);
173 	mutex_enter(&hment_reserve_mutex);
174 	if (hment_reserve_count != 0) {
175 		hm = hment_reserve_pool;
176 		hment_reserve_pool = hm->hm_next;
177 		--hment_reserve_count;
178 	}
179 	mutex_exit(&hment_reserve_mutex);
180 	return (hm);
181 }
182 
183 /*
184  * Allocate an hment
185  */
186 static hment_t *
hment_alloc()187 hment_alloc()
188 {
189 	int km_flag = can_steal_post_boot ? KM_NOSLEEP : KM_SLEEP;
190 	hment_t	*hm = NULL;
191 
192 	/*
193 	 * If we aren't using the reserves, try using kmem to get an hment.
194 	 * Donate any successful allocations to reserves if low.
195 	 *
196 	 * If we're in panic, resort to using the reserves.
197 	 */
198 	HATSTAT_INC(hs_hm_alloc);
199 	if (!USE_HAT_RESERVES()) {
200 		for (;;) {
201 			hm = kmem_cache_alloc(hment_cache, km_flag);
202 			if (hm == NULL ||
203 			    USE_HAT_RESERVES() ||
204 			    hment_reserve_count >= hment_reserve_amount)
205 				break;
206 			hment_put_reserve(hm);
207 		}
208 	}
209 
210 	/*
211 	 * If allocation failed, we need to tap the reserves or steal
212 	 */
213 	if (hm == NULL) {
214 		if (USE_HAT_RESERVES())
215 			hm = hment_get_reserve();
216 
217 		/*
218 		 * If we still haven't gotten an hment, attempt to steal one by
219 		 * victimizing a mapping in a user htable.
220 		 */
221 		if (hm == NULL && can_steal_post_boot)
222 			hm = hment_steal();
223 
224 		/*
225 		 * we're in dire straights, try the reserve
226 		 */
227 		if (hm == NULL)
228 			hm = hment_get_reserve();
229 
230 		/*
231 		 * still no hment is a serious problem.
232 		 */
233 		if (hm == NULL)
234 			panic("hment_alloc(): no reserve, couldn't steal");
235 	}
236 
237 
238 	hm->hm_entry = 0;
239 	hm->hm_htable = NULL;
240 	hm->hm_hashlink = null_avl_link;
241 	hm->hm_next = NULL;
242 	hm->hm_prev = NULL;
243 	hm->hm_pfn = PFN_INVALID;
244 	return (hm);
245 }
246 
247 /*
248  * Free an hment, possibly to the reserves list when called from the
249  * thread using the reserves. For example, when freeing an hment during an
250  * htable_steal(), we can't recurse into the kmem allocator, so we just
251  * push the hment onto the reserve list.
252  */
253 void
hment_free(hment_t * hm)254 hment_free(hment_t *hm)
255 {
256 #ifdef DEBUG
257 	/*
258 	 * zero out all fields to try and force any race conditions to segfault
259 	 */
260 	bzero(hm, sizeof (*hm));
261 #endif
262 	HATSTAT_INC(hs_hm_free);
263 	if (USE_HAT_RESERVES() ||
264 	    hment_reserve_count < hment_reserve_amount) {
265 		hment_put_reserve(hm);
266 	} else {
267 		kmem_cache_free(hment_cache, hm);
268 		hment_adjust_reserve();
269 	}
270 }
271 
272 /*
273  * These must test for mlist_lock not having been allocated yet.
274  * We just ignore locking in that case, as it means were in early
275  * single threaded startup.
276  */
277 int
x86_hm_held(page_t * pp)278 x86_hm_held(page_t *pp)
279 {
280 	ASSERT(pp != NULL);
281 	if (mlist_lock == NULL)
282 		return (1);
283 	return (MUTEX_HELD(MLIST_MUTEX(pp)));
284 }
285 
286 void
x86_hm_enter(page_t * pp)287 x86_hm_enter(page_t *pp)
288 {
289 	ASSERT(pp != NULL);
290 	if (mlist_lock != NULL)
291 		mutex_enter(MLIST_MUTEX(pp));
292 }
293 
294 void
x86_hm_exit(page_t * pp)295 x86_hm_exit(page_t *pp)
296 {
297 	ASSERT(pp != NULL);
298 	if (mlist_lock != NULL)
299 		mutex_exit(MLIST_MUTEX(pp));
300 }
301 
302 /*
303  * Internal routine to add a full hment to a page_t mapping list
304  */
305 static void
hment_insert(hment_t * hm,page_t * pp)306 hment_insert(hment_t *hm, page_t *pp)
307 {
308 	uint_t		idx;
309 
310 	ASSERT(x86_hm_held(pp));
311 	ASSERT(!pp->p_embed);
312 
313 	/*
314 	 * Add the hment to the page's mapping list.
315 	 */
316 	++pp->p_share;
317 	hm->hm_next = pp->p_mapping;
318 	if (pp->p_mapping != NULL)
319 		((hment_t *)pp->p_mapping)->hm_prev = hm;
320 	pp->p_mapping = hm;
321 
322 	/*
323 	 * Add the hment to the system-wide hash table.
324 	 */
325 	idx = HMENT_HASH(hm->hm_htable->ht_pfn, hm->hm_entry);
326 
327 	mutex_enter(HASH_MUTEX(idx));
328 	avl_add(&hment_table[idx], hm);
329 	mutex_exit(HASH_MUTEX(idx));
330 }
331 
332 /*
333  * Prepare a mapping list entry to the given page.
334  *
335  * There are 4 different situations to deal with:
336  *
337  * - Adding the first mapping to a page_t as an embedded hment
338  * - Refaulting on an existing embedded mapping
339  * - Upgrading an embedded mapping when adding a 2nd mapping
340  * - Adding another mapping to a page_t that already has multiple mappings
341  *	 note we don't optimized for the refaulting case here.
342  *
343  * Due to competition with other threads that may be mapping/unmapping the
344  * same page and the need to drop all locks while allocating hments, any or
345  * all of the 3 situations can occur (and in almost any order) in any given
346  * call. Isn't this fun!
347  */
348 hment_t *
hment_prepare(htable_t * htable,uint_t entry,page_t * pp)349 hment_prepare(htable_t *htable, uint_t entry, page_t *pp)
350 {
351 	hment_t		*hm = NULL;
352 
353 	ASSERT(x86_hm_held(pp));
354 
355 	for (;;) {
356 
357 		/*
358 		 * The most common case is establishing the first mapping to a
359 		 * page, so check that first. This doesn't need any allocated
360 		 * hment.
361 		 */
362 		if (pp->p_mapping == NULL) {
363 			ASSERT(!pp->p_embed);
364 			ASSERT(pp->p_share == 0);
365 			if (hm == NULL)
366 				break;
367 
368 			/*
369 			 * we had an hment already, so free it and retry
370 			 */
371 			goto free_and_continue;
372 		}
373 
374 		/*
375 		 * If there is an embedded mapping, we may need to
376 		 * convert it to an hment.
377 		 */
378 		if (pp->p_embed) {
379 
380 			/* should point to htable */
381 			ASSERT(pp->p_mapping != NULL);
382 
383 			/*
384 			 * If we are faulting on a pre-existing mapping
385 			 * there is no need to promote/allocate a new hment.
386 			 * This happens a lot due to segmap.
387 			 */
388 			if (pp->p_mapping == htable && pp->p_mlentry == entry) {
389 				if (hm == NULL)
390 					break;
391 				goto free_and_continue;
392 			}
393 
394 			/*
395 			 * If we have an hment allocated, use it to promote the
396 			 * existing embedded mapping.
397 			 */
398 			if (hm != NULL) {
399 				hm->hm_htable = pp->p_mapping;
400 				hm->hm_entry = pp->p_mlentry;
401 				hm->hm_pfn = pp->p_pagenum;
402 				pp->p_mapping = NULL;
403 				pp->p_share = 0;
404 				pp->p_embed = 0;
405 				hment_insert(hm, pp);
406 			}
407 
408 			/*
409 			 * We either didn't have an hment allocated or we just
410 			 * used it for the embedded mapping. In either case,
411 			 * allocate another hment and restart.
412 			 */
413 			goto allocate_and_continue;
414 		}
415 
416 		/*
417 		 * Last possibility is that we're adding an hment to a list
418 		 * of hments.
419 		 */
420 		if (hm != NULL)
421 			break;
422 allocate_and_continue:
423 		x86_hm_exit(pp);
424 		hm = hment_alloc();
425 		x86_hm_enter(pp);
426 		continue;
427 
428 free_and_continue:
429 		/*
430 		 * we allocated an hment already, free it and retry
431 		 */
432 		x86_hm_exit(pp);
433 		hment_free(hm);
434 		hm = NULL;
435 		x86_hm_enter(pp);
436 	}
437 	ASSERT(x86_hm_held(pp));
438 	return (hm);
439 }
440 
441 /*
442  * Record a mapping list entry for the htable/entry to the given page.
443  *
444  * hment_prepare() should have properly set up the situation.
445  */
446 void
hment_assign(htable_t * htable,uint_t entry,page_t * pp,hment_t * hm)447 hment_assign(htable_t *htable, uint_t entry, page_t *pp, hment_t *hm)
448 {
449 	ASSERT(x86_hm_held(pp));
450 
451 	/*
452 	 * The most common case is establishing the first mapping to a
453 	 * page, so check that first. This doesn't need any allocated
454 	 * hment.
455 	 */
456 	if (pp->p_mapping == NULL) {
457 		ASSERT(hm == NULL);
458 		ASSERT(!pp->p_embed);
459 		ASSERT(pp->p_share == 0);
460 		pp->p_embed = 1;
461 		pp->p_mapping = htable;
462 		pp->p_mlentry = entry;
463 		return;
464 	}
465 
466 	/*
467 	 * We should never get here with a pre-existing embedded maping
468 	 */
469 	ASSERT(!pp->p_embed);
470 
471 	/*
472 	 * add the new hment to the mapping list
473 	 */
474 	ASSERT(hm != NULL);
475 	hm->hm_htable = htable;
476 	hm->hm_entry = entry;
477 	hm->hm_pfn = pp->p_pagenum;
478 	hment_insert(hm, pp);
479 }
480 
481 /*
482  * Walk through the mappings for a page.
483  *
484  * must already have done an x86_hm_enter()
485  */
486 hment_t *
hment_walk(page_t * pp,htable_t ** ht,uint_t * entry,hment_t * prev)487 hment_walk(page_t *pp, htable_t **ht, uint_t *entry, hment_t *prev)
488 {
489 	hment_t		*hm;
490 
491 	ASSERT(x86_hm_held(pp));
492 
493 	if (pp->p_embed) {
494 		if (prev == NULL) {
495 			*ht = (htable_t *)pp->p_mapping;
496 			*entry = pp->p_mlentry;
497 			hm = HMENT_EMBEDDED;
498 		} else {
499 			ASSERT(prev == HMENT_EMBEDDED);
500 			hm = NULL;
501 		}
502 	} else {
503 		if (prev == NULL) {
504 			ASSERT(prev != HMENT_EMBEDDED);
505 			hm = (hment_t *)pp->p_mapping;
506 		} else {
507 			hm = prev->hm_next;
508 		}
509 
510 		if (hm != NULL) {
511 			*ht = hm->hm_htable;
512 			*entry = hm->hm_entry;
513 		}
514 	}
515 	return (hm);
516 }
517 
518 /*
519  * Remove a mapping to a page from its mapping list. Must have
520  * the corresponding mapping list locked.
521  * Finds the mapping list entry with the given pte_t and
522  * unlinks it from the mapping list.
523  */
524 hment_t *
hment_remove(page_t * pp,htable_t * ht,uint_t entry)525 hment_remove(page_t *pp, htable_t *ht, uint_t entry)
526 {
527 	hment_t		dummy;
528 	avl_index_t	where;
529 	hment_t		*hm;
530 	uint_t		idx;
531 
532 	ASSERT(x86_hm_held(pp));
533 
534 	/*
535 	 * Check if we have only one mapping embedded in the page_t.
536 	 */
537 	if (pp->p_embed) {
538 		ASSERT(ht == (htable_t *)pp->p_mapping);
539 		ASSERT(entry == pp->p_mlentry);
540 		ASSERT(pp->p_share == 0);
541 		pp->p_mapping = NULL;
542 		pp->p_mlentry = 0;
543 		pp->p_embed = 0;
544 		return (NULL);
545 	}
546 
547 	/*
548 	 * Otherwise it must be in the list of hments.
549 	 * Find the hment in the system-wide hash table and remove it.
550 	 */
551 	ASSERT(pp->p_share != 0);
552 	dummy.hm_htable = ht;
553 	dummy.hm_entry = entry;
554 	dummy.hm_pfn = pp->p_pagenum;
555 	idx = HMENT_HASH(ht->ht_pfn, entry);
556 	mutex_enter(HASH_MUTEX(idx));
557 	hm = avl_find(&hment_table[idx], &dummy, &where);
558 	if (hm == NULL)
559 		panic("hment_remove() missing in hash table pp=%lx, ht=%lx,"
560 		    "entry=0x%x hash index=0x%x", (uintptr_t)pp, (uintptr_t)ht,
561 		    entry, idx);
562 	avl_remove(&hment_table[idx], hm);
563 	mutex_exit(HASH_MUTEX(idx));
564 
565 	/*
566 	 * Remove the hment from the page's mapping list
567 	 */
568 	if (hm->hm_next)
569 		hm->hm_next->hm_prev = hm->hm_prev;
570 	if (hm->hm_prev)
571 		hm->hm_prev->hm_next = hm->hm_next;
572 	else
573 		pp->p_mapping = hm->hm_next;
574 
575 	--pp->p_share;
576 	hm->hm_hashlink = null_avl_link;
577 	hm->hm_next = NULL;
578 	hm->hm_prev = NULL;
579 
580 	return (hm);
581 }
582 
583 /*
584  * Put initial hment's in the reserve pool.
585  */
586 void
hment_reserve(uint_t count)587 hment_reserve(uint_t count)
588 {
589 	hment_t	*hm;
590 
591 	count += hment_reserve_amount;
592 
593 	while (hment_reserve_count < count) {
594 		hm = kmem_cache_alloc(hment_cache, KM_NOSLEEP);
595 		if (hm == NULL)
596 			return;
597 		hment_put_reserve(hm);
598 	}
599 }
600 
601 /*
602  * Readjust the hment reserves after they may have been used.
603  */
604 void
hment_adjust_reserve()605 hment_adjust_reserve()
606 {
607 	hment_t	*hm;
608 
609 	/*
610 	 * Free up any excess reserves
611 	 */
612 	while (hment_reserve_count > hment_reserve_amount &&
613 	    !USE_HAT_RESERVES()) {
614 		hm = hment_get_reserve();
615 		if (hm == NULL)
616 			return;
617 		kmem_cache_free(hment_cache, hm);
618 	}
619 }
620 
621 /*
622  * initialize hment data structures
623  */
624 void
hment_init(void)625 hment_init(void)
626 {
627 	int i;
628 	int flags = KMC_NOHASH | KMC_NODEBUG;
629 
630 	/*
631 	 * Initialize kmem caches. On 32 bit kernel's we shut off
632 	 * debug information to save on precious kernel VA usage.
633 	 */
634 	hment_cache = kmem_cache_create("hment_t",
635 	    sizeof (hment_t), 0, NULL, NULL, NULL,
636 	    NULL, hat_memload_arena, flags);
637 
638 	hment_table = kmem_zalloc(hment_hash_entries * sizeof (*hment_table),
639 	    KM_SLEEP);
640 
641 	mlist_lock = kmem_zalloc(MLIST_NUM_LOCK * sizeof (kmutex_t), KM_SLEEP);
642 
643 	hash_lock = kmem_zalloc(HASH_NUM_LOCK * sizeof (kmutex_t), KM_SLEEP);
644 
645 	for (i = 0; i < hment_hash_entries; ++i)
646 		avl_create(&hment_table[i], hment_compare, sizeof (hment_t),
647 		    offsetof(hment_t, hm_hashlink));
648 
649 	for (i = 0; i < MLIST_NUM_LOCK; i++)
650 		mutex_init(&mlist_lock[i], NULL, MUTEX_DEFAULT, NULL);
651 
652 	for (i = 0; i < HASH_NUM_LOCK; i++)
653 		mutex_init(&hash_lock[i], NULL, MUTEX_DEFAULT, NULL);
654 
655 
656 }
657 
658 /*
659  * return the number of mappings to a page
660  *
661  * Note there is no ASSERT() that the MUTEX is held for this.
662  * Hence the return value might be inaccurate if this is called without
663  * doing an x86_hm_enter().
664  */
665 uint_t
hment_mapcnt(page_t * pp)666 hment_mapcnt(page_t *pp)
667 {
668 	uint_t cnt;
669 	uint_t szc;
670 	page_t *larger;
671 	hment_t	*hm;
672 
673 	x86_hm_enter(pp);
674 	if (pp->p_mapping == NULL)
675 		cnt = 0;
676 	else if (pp->p_embed)
677 		cnt = 1;
678 	else
679 		cnt = pp->p_share;
680 	x86_hm_exit(pp);
681 
682 	/*
683 	 * walk through all larger mapping sizes counting mappings
684 	 */
685 	for (szc = 1; szc <= pp->p_szc; ++szc) {
686 		larger = PP_GROUPLEADER(pp, szc);
687 		if (larger == pp)	/* don't double count large mappings */
688 			continue;
689 
690 		x86_hm_enter(larger);
691 		if (larger->p_mapping != NULL) {
692 			if (larger->p_embed &&
693 			    ((htable_t *)larger->p_mapping)->ht_level == szc) {
694 				++cnt;
695 			} else if (!larger->p_embed) {
696 				for (hm = larger->p_mapping; hm;
697 				    hm = hm->hm_next) {
698 					if (hm->hm_htable->ht_level == szc)
699 						++cnt;
700 				}
701 			}
702 		}
703 		x86_hm_exit(larger);
704 	}
705 	return (cnt);
706 }
707 
708 /*
709  * We need to steal an hment. Walk through all the page_t's until we
710  * find one that has multiple mappings. Unload one of the mappings
711  * and reclaim that hment. Note that we'll save/restart the starting
712  * page to try and spread the pain.
713  */
714 static page_t *last_page = NULL;
715 
716 static hment_t *
hment_steal(void)717 hment_steal(void)
718 {
719 	page_t *last = last_page;
720 	page_t *pp = last;
721 	hment_t *hm = NULL;
722 	hment_t *hm2;
723 	htable_t *ht;
724 	uint_t found_one = 0;
725 
726 	HATSTAT_INC(hs_hm_steals);
727 	if (pp == NULL)
728 		last = pp = page_first();
729 
730 	while (!found_one) {
731 		HATSTAT_INC(hs_hm_steal_exam);
732 		pp = page_next(pp);
733 		if (pp == NULL)
734 			pp = page_first();
735 
736 		/*
737 		 * The loop and function exit here if nothing found to steal.
738 		 */
739 		if (pp == last)
740 			return (NULL);
741 
742 		/*
743 		 * Only lock the page_t if it has hments.
744 		 */
745 		if (pp->p_mapping == NULL || pp->p_embed)
746 			continue;
747 
748 		/*
749 		 * Search the mapping list for a usable mapping.
750 		 */
751 		x86_hm_enter(pp);
752 		if (!pp->p_embed) {
753 			for (hm = pp->p_mapping; hm; hm = hm->hm_next) {
754 				ht = hm->hm_htable;
755 				if (ht->ht_hat != kas.a_hat &&
756 				    ht->ht_busy == 0 &&
757 				    ht->ht_lock_cnt == 0) {
758 					found_one = 1;
759 					break;
760 				}
761 			}
762 		}
763 		if (!found_one)
764 			x86_hm_exit(pp);
765 	}
766 
767 	/*
768 	 * Steal the mapping we found.  Note that hati_page_unmap() will
769 	 * do the x86_hm_exit().
770 	 */
771 	hm2 = hati_page_unmap(pp, ht, hm->hm_entry);
772 	ASSERT(hm2 == hm);
773 	last_page = pp;
774 	return (hm);
775 }
776