xref: /titanic_50/usr/src/uts/i86pc/vm/hment.c (revision b0aab85ccbd3a1807f0b7b8fa5bd0b7526f6cc12)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/sysmacros.h>
30 #include <sys/kmem.h>
31 #include <sys/atomic.h>
32 #include <sys/bitmap.h>
33 #include <sys/systm.h>
34 #include <vm/seg_kmem.h>
35 #include <vm/hat.h>
36 #include <vm/vm_dep.h>
37 #include <vm/hat_i86.h>
38 #include <sys/cmn_err.h>
39 #include <sys/avl.h>
40 
41 
42 /*
43  * When pages are shared by more than one mapping, a list of these
44  * structs hangs off of the page_t connected by the hm_next and hm_prev
45  * fields.  Every hment is also indexed by a system-wide hash table, using
46  * hm_hashlink to connect the hments within each hash bucket.
47  */
48 struct hment {
49 	avl_node_t	hm_hashlink;	/* links for hash table */
50 	struct hment	*hm_next;	/* next mapping of same page */
51 	struct hment	*hm_prev;	/* previous mapping of same page */
52 	htable_t	*hm_htable;	/* corresponding htable_t */
53 	pfn_t		hm_pfn;		/* mapping page frame number */
54 	uint16_t	hm_entry;	/* index of pte in htable */
55 	uint16_t	hm_pad;		/* explicitly expose compiler padding */
56 #ifdef __amd64
57 	uint32_t	hm_pad2;	/* explicitly expose compiler padding */
58 #endif
59 };
60 
61 /*
62  * Value returned by hment_walk() when dealing with a single mapping
63  * embedded in the page_t.
64  */
65 #define	HMENT_EMBEDDED ((hment_t *)(uintptr_t)1)
66 
67 kmem_cache_t *hment_cache;
68 
69 /*
70  * The hment reserve is similar to the htable reserve, with the following
71  * exception. Hment's are never needed for HAT kmem allocs.
72  *
73  * The hment_reserve_amount variable is used, so that you can change it's
74  * value to zero via a kernel debugger to force stealing to get tested.
75  */
76 #define	HMENT_RESERVE_AMOUNT	(200)	/* currently a guess at right value. */
77 uint_t hment_reserve_amount = HMENT_RESERVE_AMOUNT;
78 kmutex_t hment_reserve_mutex;
79 uint_t	hment_reserve_count;
80 hment_t	*hment_reserve_pool;
81 
82 /*
83  * All hments are stored in a system wide hash of AVL trees.
84  */
85 #define	HMENT_HASH_SIZE (64 * 1024)
86 static uint_t hment_hash_entries = HMENT_HASH_SIZE;
87 static avl_tree_t *hment_table;
88 
89 /*
90  * Lots of highly shared pages will have the same value for "entry" (consider
91  * the starting address of "xterm" or "sh"). So we'll distinguish them by
92  * adding the pfn of the page table into both the high bits.
93  * The shift by 9 corresponds to the range of values for entry (0..511).
94  */
95 #define	HMENT_HASH(pfn, entry) (uint32_t) 	\
96 	((((pfn) << 9) + entry + pfn) & (hment_hash_entries - 1))
97 
98 /*
99  * "mlist_lock" is a hashed mutex lock for protecting per-page mapping
100  * lists and "hash_lock" is a similar lock protecting the hment hash
101  * table.  The hashed approach is taken to avoid the spatial overhead of
102  * maintaining a separate lock for each page, while still achieving better
103  * scalability than a single lock would allow.
104  */
105 #define	MLIST_NUM_LOCK	2048		/* must be power of two */
106 static kmutex_t *mlist_lock;
107 
108 /*
109  * the shift by 9 is so that all large pages don't use the same hash bucket
110  */
111 #define	MLIST_MUTEX(pp) \
112 	&mlist_lock[((pp)->p_pagenum + ((pp)->p_pagenum >> 9)) & \
113 	(MLIST_NUM_LOCK - 1)]
114 
115 #define	HASH_NUM_LOCK	2048		/* must be power of two */
116 static kmutex_t *hash_lock;
117 
118 #define	HASH_MUTEX(idx) &hash_lock[(idx) & (HASH_NUM_LOCK-1)]
119 
120 static avl_node_t null_avl_link;	/* always zero */
121 static hment_t *hment_steal(void);
122 
123 /*
124  * Utility to compare hment_t's for use in AVL tree. The ordering
125  * is entirely arbitrary and is just so that the AVL algorithm works.
126  */
127 static int
hment_compare(const void * hm1,const void * hm2)128 hment_compare(const void *hm1, const void *hm2)
129 {
130 	hment_t *h1 = (hment_t *)hm1;
131 	hment_t *h2 = (hment_t *)hm2;
132 	long diff;
133 
134 	diff = (uintptr_t)h1->hm_htable - (uintptr_t)h2->hm_htable;
135 	if (diff == 0) {
136 		diff = h1->hm_entry - h2->hm_entry;
137 		if (diff == 0)
138 			diff = h1->hm_pfn - h2->hm_pfn;
139 	}
140 	if (diff < 0)
141 		diff = -1;
142 	else if (diff > 0)
143 		diff = 1;
144 	return (diff);
145 }
146 
147 /*
148  * put one hment onto the reserves list
149  */
150 static void
hment_put_reserve(hment_t * hm)151 hment_put_reserve(hment_t *hm)
152 {
153 	HATSTAT_INC(hs_hm_put_reserve);
154 	mutex_enter(&hment_reserve_mutex);
155 	hm->hm_next = hment_reserve_pool;
156 	hment_reserve_pool = hm;
157 	++hment_reserve_count;
158 	mutex_exit(&hment_reserve_mutex);
159 }
160 
161 /*
162  * Take one hment from the reserve.
163  */
164 static hment_t *
hment_get_reserve(void)165 hment_get_reserve(void)
166 {
167 	hment_t *hm = NULL;
168 
169 	/*
170 	 * We rely on a "donation system" to refill the hment reserve
171 	 * list, which only takes place when we are allocating hments for
172 	 * user mappings.  It is theoretically possible that an incredibly
173 	 * long string of kernel hment_alloc()s with no intervening user
174 	 * hment_alloc()s could exhaust that pool.
175 	 */
176 	HATSTAT_INC(hs_hm_get_reserve);
177 	mutex_enter(&hment_reserve_mutex);
178 	if (hment_reserve_count != 0) {
179 		hm = hment_reserve_pool;
180 		hment_reserve_pool = hm->hm_next;
181 		--hment_reserve_count;
182 	}
183 	mutex_exit(&hment_reserve_mutex);
184 	return (hm);
185 }
186 
187 /*
188  * Allocate an hment
189  */
190 static hment_t *
hment_alloc()191 hment_alloc()
192 {
193 	int km_flag = can_steal_post_boot ? KM_NOSLEEP : KM_SLEEP;
194 	hment_t	*hm = NULL;
195 
196 	/*
197 	 * If we aren't using the reserves, try using kmem to get an hment.
198 	 * Donate any successful allocations to reserves if low.
199 	 *
200 	 * If we're in panic, resort to using the reserves.
201 	 */
202 	HATSTAT_INC(hs_hm_alloc);
203 	if (!USE_HAT_RESERVES()) {
204 		for (;;) {
205 			hm = kmem_cache_alloc(hment_cache, km_flag);
206 			if (hm == NULL ||
207 			    USE_HAT_RESERVES() ||
208 			    hment_reserve_count >= hment_reserve_amount)
209 				break;
210 			hment_put_reserve(hm);
211 		}
212 	}
213 
214 	/*
215 	 * If allocation failed, we need to tap the reserves or steal
216 	 */
217 	if (hm == NULL) {
218 		if (USE_HAT_RESERVES())
219 			hm = hment_get_reserve();
220 
221 		/*
222 		 * If we still haven't gotten an hment, attempt to steal one by
223 		 * victimizing a mapping in a user htable.
224 		 */
225 		if (hm == NULL && can_steal_post_boot)
226 			hm = hment_steal();
227 
228 		/*
229 		 * we're in dire straights, try the reserve
230 		 */
231 		if (hm == NULL)
232 			hm = hment_get_reserve();
233 
234 		/*
235 		 * still no hment is a serious problem.
236 		 */
237 		if (hm == NULL)
238 			panic("hment_alloc(): no reserve, couldn't steal");
239 	}
240 
241 
242 	hm->hm_entry = 0;
243 	hm->hm_htable = NULL;
244 	hm->hm_hashlink = null_avl_link;
245 	hm->hm_next = NULL;
246 	hm->hm_prev = NULL;
247 	hm->hm_pfn = PFN_INVALID;
248 	return (hm);
249 }
250 
251 /*
252  * Free an hment, possibly to the reserves list when called from the
253  * thread using the reserves. For example, when freeing an hment during an
254  * htable_steal(), we can't recurse into the kmem allocator, so we just
255  * push the hment onto the reserve list.
256  */
257 void
hment_free(hment_t * hm)258 hment_free(hment_t *hm)
259 {
260 #ifdef DEBUG
261 	/*
262 	 * zero out all fields to try and force any race conditions to segfault
263 	 */
264 	bzero(hm, sizeof (*hm));
265 #endif
266 	HATSTAT_INC(hs_hm_free);
267 	if (USE_HAT_RESERVES() ||
268 	    hment_reserve_count < hment_reserve_amount) {
269 		hment_put_reserve(hm);
270 	} else {
271 		kmem_cache_free(hment_cache, hm);
272 		hment_adjust_reserve();
273 	}
274 }
275 
276 /*
277  * These must test for mlist_lock not having been allocated yet.
278  * We just ignore locking in that case, as it means were in early
279  * single threaded startup.
280  */
281 int
x86_hm_held(page_t * pp)282 x86_hm_held(page_t *pp)
283 {
284 	ASSERT(pp != NULL);
285 	if (mlist_lock == NULL)
286 		return (1);
287 	return (MUTEX_HELD(MLIST_MUTEX(pp)));
288 }
289 
290 void
x86_hm_enter(page_t * pp)291 x86_hm_enter(page_t *pp)
292 {
293 	ASSERT(pp != NULL);
294 	if (mlist_lock != NULL)
295 		mutex_enter(MLIST_MUTEX(pp));
296 }
297 
298 void
x86_hm_exit(page_t * pp)299 x86_hm_exit(page_t *pp)
300 {
301 	ASSERT(pp != NULL);
302 	if (mlist_lock != NULL)
303 		mutex_exit(MLIST_MUTEX(pp));
304 }
305 
306 /*
307  * Internal routine to add a full hment to a page_t mapping list
308  */
309 static void
hment_insert(hment_t * hm,page_t * pp)310 hment_insert(hment_t *hm, page_t *pp)
311 {
312 	uint_t		idx;
313 
314 	ASSERT(x86_hm_held(pp));
315 	ASSERT(!pp->p_embed);
316 
317 	/*
318 	 * Add the hment to the page's mapping list.
319 	 */
320 	++pp->p_share;
321 	hm->hm_next = pp->p_mapping;
322 	if (pp->p_mapping != NULL)
323 		((hment_t *)pp->p_mapping)->hm_prev = hm;
324 	pp->p_mapping = hm;
325 
326 	/*
327 	 * Add the hment to the system-wide hash table.
328 	 */
329 	idx = HMENT_HASH(hm->hm_htable->ht_pfn, hm->hm_entry);
330 
331 	mutex_enter(HASH_MUTEX(idx));
332 	avl_add(&hment_table[idx], hm);
333 	mutex_exit(HASH_MUTEX(idx));
334 }
335 
336 /*
337  * Prepare a mapping list entry to the given page.
338  *
339  * There are 4 different situations to deal with:
340  *
341  * - Adding the first mapping to a page_t as an embedded hment
342  * - Refaulting on an existing embedded mapping
343  * - Upgrading an embedded mapping when adding a 2nd mapping
344  * - Adding another mapping to a page_t that already has multiple mappings
345  *	 note we don't optimized for the refaulting case here.
346  *
347  * Due to competition with other threads that may be mapping/unmapping the
348  * same page and the need to drop all locks while allocating hments, any or
349  * all of the 3 situations can occur (and in almost any order) in any given
350  * call. Isn't this fun!
351  */
352 hment_t *
hment_prepare(htable_t * htable,uint_t entry,page_t * pp)353 hment_prepare(htable_t *htable, uint_t entry, page_t *pp)
354 {
355 	hment_t		*hm = NULL;
356 
357 	ASSERT(x86_hm_held(pp));
358 
359 	for (;;) {
360 
361 		/*
362 		 * The most common case is establishing the first mapping to a
363 		 * page, so check that first. This doesn't need any allocated
364 		 * hment.
365 		 */
366 		if (pp->p_mapping == NULL) {
367 			ASSERT(!pp->p_embed);
368 			ASSERT(pp->p_share == 0);
369 			if (hm == NULL)
370 				break;
371 
372 			/*
373 			 * we had an hment already, so free it and retry
374 			 */
375 			goto free_and_continue;
376 		}
377 
378 		/*
379 		 * If there is an embedded mapping, we may need to
380 		 * convert it to an hment.
381 		 */
382 		if (pp->p_embed) {
383 
384 			/* should point to htable */
385 			ASSERT(pp->p_mapping != NULL);
386 
387 			/*
388 			 * If we are faulting on a pre-existing mapping
389 			 * there is no need to promote/allocate a new hment.
390 			 * This happens a lot due to segmap.
391 			 */
392 			if (pp->p_mapping == htable && pp->p_mlentry == entry) {
393 				if (hm == NULL)
394 					break;
395 				goto free_and_continue;
396 			}
397 
398 			/*
399 			 * If we have an hment allocated, use it to promote the
400 			 * existing embedded mapping.
401 			 */
402 			if (hm != NULL) {
403 				hm->hm_htable = pp->p_mapping;
404 				hm->hm_entry = pp->p_mlentry;
405 				hm->hm_pfn = pp->p_pagenum;
406 				pp->p_mapping = NULL;
407 				pp->p_share = 0;
408 				pp->p_embed = 0;
409 				hment_insert(hm, pp);
410 			}
411 
412 			/*
413 			 * We either didn't have an hment allocated or we just
414 			 * used it for the embedded mapping. In either case,
415 			 * allocate another hment and restart.
416 			 */
417 			goto allocate_and_continue;
418 		}
419 
420 		/*
421 		 * Last possibility is that we're adding an hment to a list
422 		 * of hments.
423 		 */
424 		if (hm != NULL)
425 			break;
426 allocate_and_continue:
427 		x86_hm_exit(pp);
428 		hm = hment_alloc();
429 		x86_hm_enter(pp);
430 		continue;
431 
432 free_and_continue:
433 		/*
434 		 * we allocated an hment already, free it and retry
435 		 */
436 		x86_hm_exit(pp);
437 		hment_free(hm);
438 		hm = NULL;
439 		x86_hm_enter(pp);
440 	}
441 	ASSERT(x86_hm_held(pp));
442 	return (hm);
443 }
444 
445 /*
446  * Record a mapping list entry for the htable/entry to the given page.
447  *
448  * hment_prepare() should have properly set up the situation.
449  */
450 void
hment_assign(htable_t * htable,uint_t entry,page_t * pp,hment_t * hm)451 hment_assign(htable_t *htable, uint_t entry, page_t *pp, hment_t *hm)
452 {
453 	ASSERT(x86_hm_held(pp));
454 
455 	/*
456 	 * The most common case is establishing the first mapping to a
457 	 * page, so check that first. This doesn't need any allocated
458 	 * hment.
459 	 */
460 	if (pp->p_mapping == NULL) {
461 		ASSERT(hm == NULL);
462 		ASSERT(!pp->p_embed);
463 		ASSERT(pp->p_share == 0);
464 		pp->p_embed = 1;
465 		pp->p_mapping = htable;
466 		pp->p_mlentry = entry;
467 		return;
468 	}
469 
470 	/*
471 	 * We should never get here with a pre-existing embedded maping
472 	 */
473 	ASSERT(!pp->p_embed);
474 
475 	/*
476 	 * add the new hment to the mapping list
477 	 */
478 	ASSERT(hm != NULL);
479 	hm->hm_htable = htable;
480 	hm->hm_entry = entry;
481 	hm->hm_pfn = pp->p_pagenum;
482 	hment_insert(hm, pp);
483 }
484 
485 /*
486  * Walk through the mappings for a page.
487  *
488  * must already have done an x86_hm_enter()
489  */
490 hment_t *
hment_walk(page_t * pp,htable_t ** ht,uint_t * entry,hment_t * prev)491 hment_walk(page_t *pp, htable_t **ht, uint_t *entry, hment_t *prev)
492 {
493 	hment_t		*hm;
494 
495 	ASSERT(x86_hm_held(pp));
496 
497 	if (pp->p_embed) {
498 		if (prev == NULL) {
499 			*ht = (htable_t *)pp->p_mapping;
500 			*entry = pp->p_mlentry;
501 			hm = HMENT_EMBEDDED;
502 		} else {
503 			ASSERT(prev == HMENT_EMBEDDED);
504 			hm = NULL;
505 		}
506 	} else {
507 		if (prev == NULL) {
508 			ASSERT(prev != HMENT_EMBEDDED);
509 			hm = (hment_t *)pp->p_mapping;
510 		} else {
511 			hm = prev->hm_next;
512 		}
513 
514 		if (hm != NULL) {
515 			*ht = hm->hm_htable;
516 			*entry = hm->hm_entry;
517 		}
518 	}
519 	return (hm);
520 }
521 
522 /*
523  * Remove a mapping to a page from its mapping list. Must have
524  * the corresponding mapping list locked.
525  * Finds the mapping list entry with the given pte_t and
526  * unlinks it from the mapping list.
527  */
528 hment_t *
hment_remove(page_t * pp,htable_t * ht,uint_t entry)529 hment_remove(page_t *pp, htable_t *ht, uint_t entry)
530 {
531 	hment_t		dummy;
532 	avl_index_t	where;
533 	hment_t		*hm;
534 	uint_t		idx;
535 
536 	ASSERT(x86_hm_held(pp));
537 
538 	/*
539 	 * Check if we have only one mapping embedded in the page_t.
540 	 */
541 	if (pp->p_embed) {
542 		ASSERT(ht == (htable_t *)pp->p_mapping);
543 		ASSERT(entry == pp->p_mlentry);
544 		ASSERT(pp->p_share == 0);
545 		pp->p_mapping = NULL;
546 		pp->p_mlentry = 0;
547 		pp->p_embed = 0;
548 		return (NULL);
549 	}
550 
551 	/*
552 	 * Otherwise it must be in the list of hments.
553 	 * Find the hment in the system-wide hash table and remove it.
554 	 */
555 	ASSERT(pp->p_share != 0);
556 	dummy.hm_htable = ht;
557 	dummy.hm_entry = entry;
558 	dummy.hm_pfn = pp->p_pagenum;
559 	idx = HMENT_HASH(ht->ht_pfn, entry);
560 	mutex_enter(HASH_MUTEX(idx));
561 	hm = avl_find(&hment_table[idx], &dummy, &where);
562 	if (hm == NULL)
563 		panic("hment_remove() missing in hash table pp=%lx, ht=%lx,"
564 		    "entry=0x%x hash index=0x%x", (uintptr_t)pp, (uintptr_t)ht,
565 		    entry, idx);
566 	avl_remove(&hment_table[idx], hm);
567 	mutex_exit(HASH_MUTEX(idx));
568 
569 	/*
570 	 * Remove the hment from the page's mapping list
571 	 */
572 	if (hm->hm_next)
573 		hm->hm_next->hm_prev = hm->hm_prev;
574 	if (hm->hm_prev)
575 		hm->hm_prev->hm_next = hm->hm_next;
576 	else
577 		pp->p_mapping = hm->hm_next;
578 
579 	--pp->p_share;
580 	hm->hm_hashlink = null_avl_link;
581 	hm->hm_next = NULL;
582 	hm->hm_prev = NULL;
583 
584 	return (hm);
585 }
586 
587 /*
588  * Put initial hment's in the reserve pool.
589  */
590 void
hment_reserve(uint_t count)591 hment_reserve(uint_t count)
592 {
593 	hment_t	*hm;
594 
595 	count += hment_reserve_amount;
596 
597 	while (hment_reserve_count < count) {
598 		hm = kmem_cache_alloc(hment_cache, KM_NOSLEEP);
599 		if (hm == NULL)
600 			return;
601 		hment_put_reserve(hm);
602 	}
603 }
604 
605 /*
606  * Readjust the hment reserves after they may have been used.
607  */
608 void
hment_adjust_reserve()609 hment_adjust_reserve()
610 {
611 	hment_t	*hm;
612 
613 	/*
614 	 * Free up any excess reserves
615 	 */
616 	while (hment_reserve_count > hment_reserve_amount &&
617 	    !USE_HAT_RESERVES()) {
618 		hm = hment_get_reserve();
619 		if (hm == NULL)
620 			return;
621 		kmem_cache_free(hment_cache, hm);
622 	}
623 }
624 
625 /*
626  * initialize hment data structures
627  */
628 void
hment_init(void)629 hment_init(void)
630 {
631 	int i;
632 	int flags = KMC_NOHASH | KMC_NODEBUG;
633 
634 	/*
635 	 * Initialize kmem caches. On 32 bit kernel's we shut off
636 	 * debug information to save on precious kernel VA usage.
637 	 */
638 	hment_cache = kmem_cache_create("hment_t",
639 	    sizeof (hment_t), 0, NULL, NULL, NULL,
640 	    NULL, hat_memload_arena, flags);
641 
642 	hment_table = kmem_zalloc(hment_hash_entries * sizeof (*hment_table),
643 	    KM_SLEEP);
644 
645 	mlist_lock = kmem_zalloc(MLIST_NUM_LOCK * sizeof (kmutex_t), KM_SLEEP);
646 
647 	hash_lock = kmem_zalloc(HASH_NUM_LOCK * sizeof (kmutex_t), KM_SLEEP);
648 
649 	for (i = 0; i < hment_hash_entries; ++i)
650 		avl_create(&hment_table[i], hment_compare, sizeof (hment_t),
651 		    offsetof(hment_t, hm_hashlink));
652 
653 	for (i = 0; i < MLIST_NUM_LOCK; i++)
654 		mutex_init(&mlist_lock[i], NULL, MUTEX_DEFAULT, NULL);
655 
656 	for (i = 0; i < HASH_NUM_LOCK; i++)
657 		mutex_init(&hash_lock[i], NULL, MUTEX_DEFAULT, NULL);
658 
659 
660 }
661 
662 /*
663  * return the number of mappings to a page
664  *
665  * Note there is no ASSERT() that the MUTEX is held for this.
666  * Hence the return value might be inaccurate if this is called without
667  * doing an x86_hm_enter().
668  */
669 uint_t
hment_mapcnt(page_t * pp)670 hment_mapcnt(page_t *pp)
671 {
672 	uint_t cnt;
673 	uint_t szc;
674 	page_t *larger;
675 	hment_t	*hm;
676 
677 	x86_hm_enter(pp);
678 	if (pp->p_mapping == NULL)
679 		cnt = 0;
680 	else if (pp->p_embed)
681 		cnt = 1;
682 	else
683 		cnt = pp->p_share;
684 	x86_hm_exit(pp);
685 
686 	/*
687 	 * walk through all larger mapping sizes counting mappings
688 	 */
689 	for (szc = 1; szc <= pp->p_szc; ++szc) {
690 		larger = PP_GROUPLEADER(pp, szc);
691 		if (larger == pp)	/* don't double count large mappings */
692 			continue;
693 
694 		x86_hm_enter(larger);
695 		if (larger->p_mapping != NULL) {
696 			if (larger->p_embed &&
697 			    ((htable_t *)larger->p_mapping)->ht_level == szc) {
698 				++cnt;
699 			} else if (!larger->p_embed) {
700 				for (hm = larger->p_mapping; hm;
701 				    hm = hm->hm_next) {
702 					if (hm->hm_htable->ht_level == szc)
703 						++cnt;
704 				}
705 			}
706 		}
707 		x86_hm_exit(larger);
708 	}
709 	return (cnt);
710 }
711 
712 /*
713  * We need to steal an hment. Walk through all the page_t's until we
714  * find one that has multiple mappings. Unload one of the mappings
715  * and reclaim that hment. Note that we'll save/restart the starting
716  * page to try and spread the pain.
717  */
718 static page_t *last_page = NULL;
719 
720 static hment_t *
hment_steal(void)721 hment_steal(void)
722 {
723 	page_t *last = last_page;
724 	page_t *pp = last;
725 	hment_t *hm = NULL;
726 	hment_t *hm2;
727 	htable_t *ht;
728 	uint_t found_one = 0;
729 
730 	HATSTAT_INC(hs_hm_steals);
731 	if (pp == NULL)
732 		last = pp = page_first();
733 
734 	while (!found_one) {
735 		HATSTAT_INC(hs_hm_steal_exam);
736 		pp = page_next(pp);
737 		if (pp == NULL)
738 			pp = page_first();
739 
740 		/*
741 		 * The loop and function exit here if nothing found to steal.
742 		 */
743 		if (pp == last)
744 			return (NULL);
745 
746 		/*
747 		 * Only lock the page_t if it has hments.
748 		 */
749 		if (pp->p_mapping == NULL || pp->p_embed)
750 			continue;
751 
752 		/*
753 		 * Search the mapping list for a usable mapping.
754 		 */
755 		x86_hm_enter(pp);
756 		if (!pp->p_embed) {
757 			for (hm = pp->p_mapping; hm; hm = hm->hm_next) {
758 				ht = hm->hm_htable;
759 				if (ht->ht_hat != kas.a_hat &&
760 				    ht->ht_busy == 0 &&
761 				    ht->ht_lock_cnt == 0) {
762 					found_one = 1;
763 					break;
764 				}
765 			}
766 		}
767 		if (!found_one)
768 			x86_hm_exit(pp);
769 	}
770 
771 	/*
772 	 * Steal the mapping we found.  Note that hati_page_unmap() will
773 	 * do the x86_hm_exit().
774 	 */
775 	hm2 = hati_page_unmap(pp, ht, hm->hm_entry);
776 	ASSERT(hm2 == hm);
777 	last_page = pp;
778 	return (hm);
779 }
780