1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 30*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 31*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 32*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 33*7c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 34*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> 35*7c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 36*7c478bd9Sstevel@tonic-gate #include <vm/hat.h> 37*7c478bd9Sstevel@tonic-gate #include <vm/vm_dep.h> 38*7c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 40*7c478bd9Sstevel@tonic-gate 41*7c478bd9Sstevel@tonic-gate 42*7c478bd9Sstevel@tonic-gate /* 43*7c478bd9Sstevel@tonic-gate * When pages are shared by more than one mapping, a list of these 44*7c478bd9Sstevel@tonic-gate * structs hangs off of the page_t connected by the hm_next and hm_prev 45*7c478bd9Sstevel@tonic-gate * fields. Every hment is also indexed by a system-wide hash table, using 46*7c478bd9Sstevel@tonic-gate * hm_hashnext to connect it to the chain of hments in a single hash 47*7c478bd9Sstevel@tonic-gate * bucket. 48*7c478bd9Sstevel@tonic-gate */ 49*7c478bd9Sstevel@tonic-gate struct hment { 50*7c478bd9Sstevel@tonic-gate struct hment *hm_hashnext; /* next mapping on hash chain */ 51*7c478bd9Sstevel@tonic-gate struct hment *hm_next; /* next mapping of same page */ 52*7c478bd9Sstevel@tonic-gate struct hment *hm_prev; /* previous mapping of same page */ 53*7c478bd9Sstevel@tonic-gate htable_t *hm_htable; /* corresponding htable_t */ 54*7c478bd9Sstevel@tonic-gate uint16_t hm_entry; /* index of pte in htable */ 55*7c478bd9Sstevel@tonic-gate uint16_t hm_pad; /* explicitly expose compiler padding */ 56*7c478bd9Sstevel@tonic-gate #ifdef __amd64 57*7c478bd9Sstevel@tonic-gate uint32_t hm_pad2; /* explicitly expose compiler padding */ 58*7c478bd9Sstevel@tonic-gate #endif 59*7c478bd9Sstevel@tonic-gate }; 60*7c478bd9Sstevel@tonic-gate 61*7c478bd9Sstevel@tonic-gate /* 62*7c478bd9Sstevel@tonic-gate * Value returned by hment_walk() when dealing with a single mapping 63*7c478bd9Sstevel@tonic-gate * embedded in the page_t. 64*7c478bd9Sstevel@tonic-gate */ 65*7c478bd9Sstevel@tonic-gate #define HMENT_EMBEDDED ((hment_t *)(uintptr_t)1) 66*7c478bd9Sstevel@tonic-gate 67*7c478bd9Sstevel@tonic-gate kmem_cache_t *hment_cache; 68*7c478bd9Sstevel@tonic-gate 69*7c478bd9Sstevel@tonic-gate /* 70*7c478bd9Sstevel@tonic-gate * The hment reserve is similar to the htable reserve, with the following 71*7c478bd9Sstevel@tonic-gate * exception. Hment's are never needed for HAT kmem allocs. 72*7c478bd9Sstevel@tonic-gate * 73*7c478bd9Sstevel@tonic-gate * The hment_reserve_amount variable is used, so that you can change it's 74*7c478bd9Sstevel@tonic-gate * value to zero via a kernel debugger to force stealing to get tested. 75*7c478bd9Sstevel@tonic-gate */ 76*7c478bd9Sstevel@tonic-gate #define HMENT_RESERVE_AMOUNT (200) /* currently a guess at right value. */ 77*7c478bd9Sstevel@tonic-gate uint_t hment_reserve_amount = HMENT_RESERVE_AMOUNT; 78*7c478bd9Sstevel@tonic-gate kmutex_t hment_reserve_mutex; 79*7c478bd9Sstevel@tonic-gate uint_t hment_reserve_count; 80*7c478bd9Sstevel@tonic-gate hment_t *hment_reserve_pool; 81*7c478bd9Sstevel@tonic-gate extern kthread_t *hat_reserves_thread; 82*7c478bd9Sstevel@tonic-gate 83*7c478bd9Sstevel@tonic-gate /* 84*7c478bd9Sstevel@tonic-gate * Possible performance RFE: we might need to make this dynamic, perhaps 85*7c478bd9Sstevel@tonic-gate * based on the number of pages in the system. 86*7c478bd9Sstevel@tonic-gate */ 87*7c478bd9Sstevel@tonic-gate #define HMENT_HASH_SIZE (64 * 1024) 88*7c478bd9Sstevel@tonic-gate static uint_t hment_hash_entries = HMENT_HASH_SIZE; 89*7c478bd9Sstevel@tonic-gate static hment_t **hment_hash; 90*7c478bd9Sstevel@tonic-gate 91*7c478bd9Sstevel@tonic-gate /* 92*7c478bd9Sstevel@tonic-gate * Lots of highly shared pages will have the same value for "entry" (consider 93*7c478bd9Sstevel@tonic-gate * the starting address of "xterm" or "sh"). So we'll distinguish them by 94*7c478bd9Sstevel@tonic-gate * adding the pfn of the page table into both the high bits. 95*7c478bd9Sstevel@tonic-gate * The shift by 9 corresponds to the range of values for entry (0..511). 96*7c478bd9Sstevel@tonic-gate */ 97*7c478bd9Sstevel@tonic-gate #define HMENT_HASH(pfn, entry) (uint32_t) \ 98*7c478bd9Sstevel@tonic-gate ((((pfn) << 9) + entry + pfn) & (hment_hash_entries - 1)) 99*7c478bd9Sstevel@tonic-gate 100*7c478bd9Sstevel@tonic-gate /* 101*7c478bd9Sstevel@tonic-gate * "mlist_lock" is a hashed mutex lock for protecting per-page mapping 102*7c478bd9Sstevel@tonic-gate * lists and "hash_lock" is a similar lock protecting the hment hash 103*7c478bd9Sstevel@tonic-gate * table. The hashed approach is taken to avoid the spatial overhead of 104*7c478bd9Sstevel@tonic-gate * maintaining a separate lock for each page, while still achieving better 105*7c478bd9Sstevel@tonic-gate * scalability than a single lock would allow. 106*7c478bd9Sstevel@tonic-gate */ 107*7c478bd9Sstevel@tonic-gate #define MLIST_NUM_LOCK 256 /* must be power of two */ 108*7c478bd9Sstevel@tonic-gate static kmutex_t mlist_lock[MLIST_NUM_LOCK]; 109*7c478bd9Sstevel@tonic-gate 110*7c478bd9Sstevel@tonic-gate /* 111*7c478bd9Sstevel@tonic-gate * the shift by 9 is so that all large pages don't use the same hash bucket 112*7c478bd9Sstevel@tonic-gate */ 113*7c478bd9Sstevel@tonic-gate #define MLIST_MUTEX(pp) \ 114*7c478bd9Sstevel@tonic-gate &mlist_lock[((pp)->p_pagenum + ((pp)->p_pagenum >> 9)) & \ 115*7c478bd9Sstevel@tonic-gate (MLIST_NUM_LOCK - 1)] 116*7c478bd9Sstevel@tonic-gate 117*7c478bd9Sstevel@tonic-gate #define HASH_NUM_LOCK 256 /* must be power of two */ 118*7c478bd9Sstevel@tonic-gate static kmutex_t hash_lock[HASH_NUM_LOCK]; 119*7c478bd9Sstevel@tonic-gate 120*7c478bd9Sstevel@tonic-gate #define HASH_MUTEX(idx) &hash_lock[(idx) & (HASH_NUM_LOCK-1)] 121*7c478bd9Sstevel@tonic-gate 122*7c478bd9Sstevel@tonic-gate static hment_t *hment_steal(void); 123*7c478bd9Sstevel@tonic-gate 124*7c478bd9Sstevel@tonic-gate /* 125*7c478bd9Sstevel@tonic-gate * put one hment onto the reserves list 126*7c478bd9Sstevel@tonic-gate */ 127*7c478bd9Sstevel@tonic-gate static void 128*7c478bd9Sstevel@tonic-gate hment_put_reserve(hment_t *hm) 129*7c478bd9Sstevel@tonic-gate { 130*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_hm_put_reserve); 131*7c478bd9Sstevel@tonic-gate mutex_enter(&hment_reserve_mutex); 132*7c478bd9Sstevel@tonic-gate hm->hm_next = hment_reserve_pool; 133*7c478bd9Sstevel@tonic-gate hment_reserve_pool = hm; 134*7c478bd9Sstevel@tonic-gate ++hment_reserve_count; 135*7c478bd9Sstevel@tonic-gate mutex_exit(&hment_reserve_mutex); 136*7c478bd9Sstevel@tonic-gate } 137*7c478bd9Sstevel@tonic-gate 138*7c478bd9Sstevel@tonic-gate /* 139*7c478bd9Sstevel@tonic-gate * Take one hment from the reserve. 140*7c478bd9Sstevel@tonic-gate */ 141*7c478bd9Sstevel@tonic-gate static hment_t * 142*7c478bd9Sstevel@tonic-gate hment_get_reserve(void) 143*7c478bd9Sstevel@tonic-gate { 144*7c478bd9Sstevel@tonic-gate hment_t *hm = NULL; 145*7c478bd9Sstevel@tonic-gate 146*7c478bd9Sstevel@tonic-gate /* 147*7c478bd9Sstevel@tonic-gate * We rely on a "donation system" to refill the hment reserve 148*7c478bd9Sstevel@tonic-gate * list, which only takes place when we are allocating hments for 149*7c478bd9Sstevel@tonic-gate * user mappings. It is theoretically possible that an incredibly 150*7c478bd9Sstevel@tonic-gate * long string of kernel hment_alloc()s with no intervening user 151*7c478bd9Sstevel@tonic-gate * hment_alloc()s could exhaust that pool. 152*7c478bd9Sstevel@tonic-gate */ 153*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_hm_get_reserve); 154*7c478bd9Sstevel@tonic-gate mutex_enter(&hment_reserve_mutex); 155*7c478bd9Sstevel@tonic-gate if (hment_reserve_count != 0) { 156*7c478bd9Sstevel@tonic-gate hm = hment_reserve_pool; 157*7c478bd9Sstevel@tonic-gate hment_reserve_pool = hm->hm_next; 158*7c478bd9Sstevel@tonic-gate --hment_reserve_count; 159*7c478bd9Sstevel@tonic-gate } 160*7c478bd9Sstevel@tonic-gate mutex_exit(&hment_reserve_mutex); 161*7c478bd9Sstevel@tonic-gate return (hm); 162*7c478bd9Sstevel@tonic-gate } 163*7c478bd9Sstevel@tonic-gate 164*7c478bd9Sstevel@tonic-gate /* 165*7c478bd9Sstevel@tonic-gate * Allocate an hment 166*7c478bd9Sstevel@tonic-gate */ 167*7c478bd9Sstevel@tonic-gate static hment_t * 168*7c478bd9Sstevel@tonic-gate hment_alloc() 169*7c478bd9Sstevel@tonic-gate { 170*7c478bd9Sstevel@tonic-gate int km_flag = can_steal_post_boot ? KM_NOSLEEP : KM_SLEEP; 171*7c478bd9Sstevel@tonic-gate hment_t *hm = NULL; 172*7c478bd9Sstevel@tonic-gate int use_reserves = (use_boot_reserve || 173*7c478bd9Sstevel@tonic-gate curthread == hat_reserves_thread || panicstr != NULL); 174*7c478bd9Sstevel@tonic-gate 175*7c478bd9Sstevel@tonic-gate /* 176*7c478bd9Sstevel@tonic-gate * If we aren't using the reserves, try using kmem to get an hment. 177*7c478bd9Sstevel@tonic-gate * Donate any successful allocations to reserves if low. 178*7c478bd9Sstevel@tonic-gate * 179*7c478bd9Sstevel@tonic-gate * If we're in panic, resort to using the reserves. 180*7c478bd9Sstevel@tonic-gate */ 181*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_hm_alloc); 182*7c478bd9Sstevel@tonic-gate if (!use_reserves) { 183*7c478bd9Sstevel@tonic-gate for (;;) { 184*7c478bd9Sstevel@tonic-gate hm = kmem_cache_alloc(hment_cache, km_flag); 185*7c478bd9Sstevel@tonic-gate if (hment_reserve_count >= hment_reserve_amount || 186*7c478bd9Sstevel@tonic-gate hm == NULL || panicstr != NULL || 187*7c478bd9Sstevel@tonic-gate curthread == hat_reserves_thread) 188*7c478bd9Sstevel@tonic-gate break; 189*7c478bd9Sstevel@tonic-gate hment_put_reserve(hm); 190*7c478bd9Sstevel@tonic-gate } 191*7c478bd9Sstevel@tonic-gate } 192*7c478bd9Sstevel@tonic-gate 193*7c478bd9Sstevel@tonic-gate /* 194*7c478bd9Sstevel@tonic-gate * If allocation failed, we need to tap the reserves or steal 195*7c478bd9Sstevel@tonic-gate */ 196*7c478bd9Sstevel@tonic-gate if (hm == NULL) { 197*7c478bd9Sstevel@tonic-gate if (use_reserves) 198*7c478bd9Sstevel@tonic-gate hm = hment_get_reserve(); 199*7c478bd9Sstevel@tonic-gate 200*7c478bd9Sstevel@tonic-gate /* 201*7c478bd9Sstevel@tonic-gate * If we still haven't gotten an hment, attempt to steal one by 202*7c478bd9Sstevel@tonic-gate * victimizing a mapping in a user htable. 203*7c478bd9Sstevel@tonic-gate */ 204*7c478bd9Sstevel@tonic-gate if (hm == NULL && can_steal_post_boot) 205*7c478bd9Sstevel@tonic-gate hm = hment_steal(); 206*7c478bd9Sstevel@tonic-gate 207*7c478bd9Sstevel@tonic-gate /* 208*7c478bd9Sstevel@tonic-gate * we're in dire straights, try the reserve 209*7c478bd9Sstevel@tonic-gate */ 210*7c478bd9Sstevel@tonic-gate if (hm == NULL) 211*7c478bd9Sstevel@tonic-gate hm = hment_get_reserve(); 212*7c478bd9Sstevel@tonic-gate 213*7c478bd9Sstevel@tonic-gate /* 214*7c478bd9Sstevel@tonic-gate * still no hment is a serious problem. 215*7c478bd9Sstevel@tonic-gate */ 216*7c478bd9Sstevel@tonic-gate if (hm == NULL) 217*7c478bd9Sstevel@tonic-gate panic("hment_alloc(): no reserve, couldn't steal"); 218*7c478bd9Sstevel@tonic-gate } 219*7c478bd9Sstevel@tonic-gate 220*7c478bd9Sstevel@tonic-gate 221*7c478bd9Sstevel@tonic-gate hm->hm_entry = 0; 222*7c478bd9Sstevel@tonic-gate hm->hm_htable = NULL; 223*7c478bd9Sstevel@tonic-gate hm->hm_hashnext = NULL; 224*7c478bd9Sstevel@tonic-gate hm->hm_next = NULL; 225*7c478bd9Sstevel@tonic-gate hm->hm_prev = NULL; 226*7c478bd9Sstevel@tonic-gate return (hm); 227*7c478bd9Sstevel@tonic-gate } 228*7c478bd9Sstevel@tonic-gate 229*7c478bd9Sstevel@tonic-gate /* 230*7c478bd9Sstevel@tonic-gate * Free an hment, possibly to the reserves list when called from the 231*7c478bd9Sstevel@tonic-gate * thread using the reserves. For example, when freeing an hment during an 232*7c478bd9Sstevel@tonic-gate * htable_steal(), we can't recurse into the kmem allocator, so we just 233*7c478bd9Sstevel@tonic-gate * push the hment onto the reserve list. 234*7c478bd9Sstevel@tonic-gate */ 235*7c478bd9Sstevel@tonic-gate void 236*7c478bd9Sstevel@tonic-gate hment_free(hment_t *hm) 237*7c478bd9Sstevel@tonic-gate { 238*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 239*7c478bd9Sstevel@tonic-gate /* 240*7c478bd9Sstevel@tonic-gate * zero out all fields to try and force any race conditions to segfault 241*7c478bd9Sstevel@tonic-gate */ 242*7c478bd9Sstevel@tonic-gate bzero(hm, sizeof (*hm)); 243*7c478bd9Sstevel@tonic-gate #endif 244*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_hm_free); 245*7c478bd9Sstevel@tonic-gate if (curthread == hat_reserves_thread || 246*7c478bd9Sstevel@tonic-gate hment_reserve_count < hment_reserve_amount) 247*7c478bd9Sstevel@tonic-gate hment_put_reserve(hm); 248*7c478bd9Sstevel@tonic-gate else 249*7c478bd9Sstevel@tonic-gate kmem_cache_free(hment_cache, hm); 250*7c478bd9Sstevel@tonic-gate } 251*7c478bd9Sstevel@tonic-gate 252*7c478bd9Sstevel@tonic-gate int 253*7c478bd9Sstevel@tonic-gate x86_hm_held(page_t *pp) 254*7c478bd9Sstevel@tonic-gate { 255*7c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 256*7c478bd9Sstevel@tonic-gate return (MUTEX_HELD(MLIST_MUTEX(pp))); 257*7c478bd9Sstevel@tonic-gate } 258*7c478bd9Sstevel@tonic-gate 259*7c478bd9Sstevel@tonic-gate void 260*7c478bd9Sstevel@tonic-gate x86_hm_enter(page_t *pp) 261*7c478bd9Sstevel@tonic-gate { 262*7c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 263*7c478bd9Sstevel@tonic-gate mutex_enter(MLIST_MUTEX(pp)); 264*7c478bd9Sstevel@tonic-gate } 265*7c478bd9Sstevel@tonic-gate 266*7c478bd9Sstevel@tonic-gate void 267*7c478bd9Sstevel@tonic-gate x86_hm_exit(page_t *pp) 268*7c478bd9Sstevel@tonic-gate { 269*7c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 270*7c478bd9Sstevel@tonic-gate mutex_exit(MLIST_MUTEX(pp)); 271*7c478bd9Sstevel@tonic-gate } 272*7c478bd9Sstevel@tonic-gate 273*7c478bd9Sstevel@tonic-gate /* 274*7c478bd9Sstevel@tonic-gate * Internal routine to add a full hment to a page_t mapping list 275*7c478bd9Sstevel@tonic-gate */ 276*7c478bd9Sstevel@tonic-gate static void 277*7c478bd9Sstevel@tonic-gate hment_insert(hment_t *hm, page_t *pp) 278*7c478bd9Sstevel@tonic-gate { 279*7c478bd9Sstevel@tonic-gate uint_t idx; 280*7c478bd9Sstevel@tonic-gate 281*7c478bd9Sstevel@tonic-gate ASSERT(x86_hm_held(pp)); 282*7c478bd9Sstevel@tonic-gate ASSERT(!pp->p_embed); 283*7c478bd9Sstevel@tonic-gate 284*7c478bd9Sstevel@tonic-gate /* 285*7c478bd9Sstevel@tonic-gate * Add the hment to the page's mapping list. 286*7c478bd9Sstevel@tonic-gate */ 287*7c478bd9Sstevel@tonic-gate ++pp->p_share; 288*7c478bd9Sstevel@tonic-gate hm->hm_next = pp->p_mapping; 289*7c478bd9Sstevel@tonic-gate if (pp->p_mapping != NULL) 290*7c478bd9Sstevel@tonic-gate ((hment_t *)pp->p_mapping)->hm_prev = hm; 291*7c478bd9Sstevel@tonic-gate pp->p_mapping = hm; 292*7c478bd9Sstevel@tonic-gate 293*7c478bd9Sstevel@tonic-gate /* 294*7c478bd9Sstevel@tonic-gate * Add the hment to the system-wide hash table. 295*7c478bd9Sstevel@tonic-gate */ 296*7c478bd9Sstevel@tonic-gate idx = HMENT_HASH(hm->hm_htable->ht_pfn, hm->hm_entry); 297*7c478bd9Sstevel@tonic-gate 298*7c478bd9Sstevel@tonic-gate mutex_enter(HASH_MUTEX(idx)); 299*7c478bd9Sstevel@tonic-gate hm->hm_hashnext = hment_hash[idx]; 300*7c478bd9Sstevel@tonic-gate hment_hash[idx] = hm; 301*7c478bd9Sstevel@tonic-gate mutex_exit(HASH_MUTEX(idx)); 302*7c478bd9Sstevel@tonic-gate } 303*7c478bd9Sstevel@tonic-gate 304*7c478bd9Sstevel@tonic-gate /* 305*7c478bd9Sstevel@tonic-gate * Prepare a mapping list entry to the given page. 306*7c478bd9Sstevel@tonic-gate * 307*7c478bd9Sstevel@tonic-gate * There are 4 different situations to deal with: 308*7c478bd9Sstevel@tonic-gate * 309*7c478bd9Sstevel@tonic-gate * - Adding the first mapping to a page_t as an embedded hment 310*7c478bd9Sstevel@tonic-gate * - Refaulting on an existing embedded mapping 311*7c478bd9Sstevel@tonic-gate * - Upgrading an embedded mapping when adding a 2nd mapping 312*7c478bd9Sstevel@tonic-gate * - Adding another mapping to a page_t that already has multiple mappings 313*7c478bd9Sstevel@tonic-gate * note we don't optimized for the refaulting case here. 314*7c478bd9Sstevel@tonic-gate * 315*7c478bd9Sstevel@tonic-gate * Due to competition with other threads that may be mapping/unmapping the 316*7c478bd9Sstevel@tonic-gate * same page and the need to drop all locks while allocating hments, any or 317*7c478bd9Sstevel@tonic-gate * all of the 3 situations can occur (and in almost any order) in any given 318*7c478bd9Sstevel@tonic-gate * call. Isn't this fun! 319*7c478bd9Sstevel@tonic-gate */ 320*7c478bd9Sstevel@tonic-gate hment_t * 321*7c478bd9Sstevel@tonic-gate hment_prepare(htable_t *htable, uint_t entry, page_t *pp) 322*7c478bd9Sstevel@tonic-gate { 323*7c478bd9Sstevel@tonic-gate hment_t *hm = NULL; 324*7c478bd9Sstevel@tonic-gate 325*7c478bd9Sstevel@tonic-gate ASSERT(x86_hm_held(pp)); 326*7c478bd9Sstevel@tonic-gate 327*7c478bd9Sstevel@tonic-gate for (;;) { 328*7c478bd9Sstevel@tonic-gate 329*7c478bd9Sstevel@tonic-gate /* 330*7c478bd9Sstevel@tonic-gate * The most common case is establishing the first mapping to a 331*7c478bd9Sstevel@tonic-gate * page, so check that first. This doesn't need any allocated 332*7c478bd9Sstevel@tonic-gate * hment. 333*7c478bd9Sstevel@tonic-gate */ 334*7c478bd9Sstevel@tonic-gate if (pp->p_mapping == NULL) { 335*7c478bd9Sstevel@tonic-gate ASSERT(!pp->p_embed); 336*7c478bd9Sstevel@tonic-gate ASSERT(pp->p_share == 0); 337*7c478bd9Sstevel@tonic-gate if (hm == NULL) 338*7c478bd9Sstevel@tonic-gate break; 339*7c478bd9Sstevel@tonic-gate 340*7c478bd9Sstevel@tonic-gate /* 341*7c478bd9Sstevel@tonic-gate * we had an hment already, so free it and retry 342*7c478bd9Sstevel@tonic-gate */ 343*7c478bd9Sstevel@tonic-gate goto free_and_continue; 344*7c478bd9Sstevel@tonic-gate } 345*7c478bd9Sstevel@tonic-gate 346*7c478bd9Sstevel@tonic-gate /* 347*7c478bd9Sstevel@tonic-gate * If there is an embedded mapping, we may need to 348*7c478bd9Sstevel@tonic-gate * convert it to an hment. 349*7c478bd9Sstevel@tonic-gate */ 350*7c478bd9Sstevel@tonic-gate if (pp->p_embed) { 351*7c478bd9Sstevel@tonic-gate 352*7c478bd9Sstevel@tonic-gate /* should point to htable */ 353*7c478bd9Sstevel@tonic-gate ASSERT(pp->p_mapping != NULL); 354*7c478bd9Sstevel@tonic-gate 355*7c478bd9Sstevel@tonic-gate /* 356*7c478bd9Sstevel@tonic-gate * If we are faulting on a pre-existing mapping 357*7c478bd9Sstevel@tonic-gate * there is no need to promote/allocate a new hment. 358*7c478bd9Sstevel@tonic-gate * This happens a lot due to segmap. 359*7c478bd9Sstevel@tonic-gate */ 360*7c478bd9Sstevel@tonic-gate if (pp->p_mapping == htable && pp->p_mlentry == entry) { 361*7c478bd9Sstevel@tonic-gate if (hm == NULL) 362*7c478bd9Sstevel@tonic-gate break; 363*7c478bd9Sstevel@tonic-gate goto free_and_continue; 364*7c478bd9Sstevel@tonic-gate } 365*7c478bd9Sstevel@tonic-gate 366*7c478bd9Sstevel@tonic-gate /* 367*7c478bd9Sstevel@tonic-gate * If we have an hment allocated, use it to promote the 368*7c478bd9Sstevel@tonic-gate * existing embedded mapping. 369*7c478bd9Sstevel@tonic-gate */ 370*7c478bd9Sstevel@tonic-gate if (hm != NULL) { 371*7c478bd9Sstevel@tonic-gate hm->hm_htable = pp->p_mapping; 372*7c478bd9Sstevel@tonic-gate hm->hm_entry = pp->p_mlentry; 373*7c478bd9Sstevel@tonic-gate pp->p_mapping = NULL; 374*7c478bd9Sstevel@tonic-gate pp->p_share = 0; 375*7c478bd9Sstevel@tonic-gate pp->p_embed = 0; 376*7c478bd9Sstevel@tonic-gate hment_insert(hm, pp); 377*7c478bd9Sstevel@tonic-gate } 378*7c478bd9Sstevel@tonic-gate 379*7c478bd9Sstevel@tonic-gate /* 380*7c478bd9Sstevel@tonic-gate * We either didn't have an hment allocated or we just 381*7c478bd9Sstevel@tonic-gate * used it for the embedded mapping. In either case, 382*7c478bd9Sstevel@tonic-gate * allocate another hment and restart. 383*7c478bd9Sstevel@tonic-gate */ 384*7c478bd9Sstevel@tonic-gate goto allocate_and_continue; 385*7c478bd9Sstevel@tonic-gate } 386*7c478bd9Sstevel@tonic-gate 387*7c478bd9Sstevel@tonic-gate /* 388*7c478bd9Sstevel@tonic-gate * Last possibility is that we're adding an hment to a list 389*7c478bd9Sstevel@tonic-gate * of hments. 390*7c478bd9Sstevel@tonic-gate */ 391*7c478bd9Sstevel@tonic-gate if (hm != NULL) 392*7c478bd9Sstevel@tonic-gate break; 393*7c478bd9Sstevel@tonic-gate allocate_and_continue: 394*7c478bd9Sstevel@tonic-gate x86_hm_exit(pp); 395*7c478bd9Sstevel@tonic-gate hm = hment_alloc(); 396*7c478bd9Sstevel@tonic-gate x86_hm_enter(pp); 397*7c478bd9Sstevel@tonic-gate continue; 398*7c478bd9Sstevel@tonic-gate 399*7c478bd9Sstevel@tonic-gate free_and_continue: 400*7c478bd9Sstevel@tonic-gate /* 401*7c478bd9Sstevel@tonic-gate * we allocated an hment already, free it and retry 402*7c478bd9Sstevel@tonic-gate */ 403*7c478bd9Sstevel@tonic-gate x86_hm_exit(pp); 404*7c478bd9Sstevel@tonic-gate hment_free(hm); 405*7c478bd9Sstevel@tonic-gate hm = NULL; 406*7c478bd9Sstevel@tonic-gate x86_hm_enter(pp); 407*7c478bd9Sstevel@tonic-gate } 408*7c478bd9Sstevel@tonic-gate ASSERT(x86_hm_held(pp)); 409*7c478bd9Sstevel@tonic-gate return (hm); 410*7c478bd9Sstevel@tonic-gate } 411*7c478bd9Sstevel@tonic-gate 412*7c478bd9Sstevel@tonic-gate /* 413*7c478bd9Sstevel@tonic-gate * Record a mapping list entry for the htable/entry to the given page. 414*7c478bd9Sstevel@tonic-gate * 415*7c478bd9Sstevel@tonic-gate * hment_prepare() should have properly set up the situation. 416*7c478bd9Sstevel@tonic-gate */ 417*7c478bd9Sstevel@tonic-gate void 418*7c478bd9Sstevel@tonic-gate hment_assign(htable_t *htable, uint_t entry, page_t *pp, hment_t *hm) 419*7c478bd9Sstevel@tonic-gate { 420*7c478bd9Sstevel@tonic-gate ASSERT(x86_hm_held(pp)); 421*7c478bd9Sstevel@tonic-gate 422*7c478bd9Sstevel@tonic-gate /* 423*7c478bd9Sstevel@tonic-gate * The most common case is establishing the first mapping to a 424*7c478bd9Sstevel@tonic-gate * page, so check that first. This doesn't need any allocated 425*7c478bd9Sstevel@tonic-gate * hment. 426*7c478bd9Sstevel@tonic-gate */ 427*7c478bd9Sstevel@tonic-gate if (pp->p_mapping == NULL) { 428*7c478bd9Sstevel@tonic-gate ASSERT(hm == NULL); 429*7c478bd9Sstevel@tonic-gate ASSERT(!pp->p_embed); 430*7c478bd9Sstevel@tonic-gate ASSERT(pp->p_share == 0); 431*7c478bd9Sstevel@tonic-gate pp->p_embed = 1; 432*7c478bd9Sstevel@tonic-gate pp->p_mapping = htable; 433*7c478bd9Sstevel@tonic-gate pp->p_mlentry = entry; 434*7c478bd9Sstevel@tonic-gate return; 435*7c478bd9Sstevel@tonic-gate } 436*7c478bd9Sstevel@tonic-gate 437*7c478bd9Sstevel@tonic-gate /* 438*7c478bd9Sstevel@tonic-gate * We should never get here with a pre-existing embedded maping 439*7c478bd9Sstevel@tonic-gate */ 440*7c478bd9Sstevel@tonic-gate ASSERT(!pp->p_embed); 441*7c478bd9Sstevel@tonic-gate 442*7c478bd9Sstevel@tonic-gate /* 443*7c478bd9Sstevel@tonic-gate * add the new hment to the mapping list 444*7c478bd9Sstevel@tonic-gate */ 445*7c478bd9Sstevel@tonic-gate ASSERT(hm != NULL); 446*7c478bd9Sstevel@tonic-gate hm->hm_htable = htable; 447*7c478bd9Sstevel@tonic-gate hm->hm_entry = entry; 448*7c478bd9Sstevel@tonic-gate hment_insert(hm, pp); 449*7c478bd9Sstevel@tonic-gate } 450*7c478bd9Sstevel@tonic-gate 451*7c478bd9Sstevel@tonic-gate /* 452*7c478bd9Sstevel@tonic-gate * Walk through the mappings for a page. 453*7c478bd9Sstevel@tonic-gate * 454*7c478bd9Sstevel@tonic-gate * must already have done an x86_hm_enter() 455*7c478bd9Sstevel@tonic-gate */ 456*7c478bd9Sstevel@tonic-gate hment_t * 457*7c478bd9Sstevel@tonic-gate hment_walk(page_t *pp, htable_t **ht, uint_t *entry, hment_t *prev) 458*7c478bd9Sstevel@tonic-gate { 459*7c478bd9Sstevel@tonic-gate hment_t *hm; 460*7c478bd9Sstevel@tonic-gate 461*7c478bd9Sstevel@tonic-gate ASSERT(x86_hm_held(pp)); 462*7c478bd9Sstevel@tonic-gate 463*7c478bd9Sstevel@tonic-gate if (pp->p_embed) { 464*7c478bd9Sstevel@tonic-gate if (prev == NULL) { 465*7c478bd9Sstevel@tonic-gate *ht = (htable_t *)pp->p_mapping; 466*7c478bd9Sstevel@tonic-gate *entry = pp->p_mlentry; 467*7c478bd9Sstevel@tonic-gate hm = HMENT_EMBEDDED; 468*7c478bd9Sstevel@tonic-gate } else { 469*7c478bd9Sstevel@tonic-gate ASSERT(prev == HMENT_EMBEDDED); 470*7c478bd9Sstevel@tonic-gate hm = NULL; 471*7c478bd9Sstevel@tonic-gate } 472*7c478bd9Sstevel@tonic-gate } else { 473*7c478bd9Sstevel@tonic-gate if (prev == NULL) { 474*7c478bd9Sstevel@tonic-gate ASSERT(prev != HMENT_EMBEDDED); 475*7c478bd9Sstevel@tonic-gate hm = (hment_t *)pp->p_mapping; 476*7c478bd9Sstevel@tonic-gate } else { 477*7c478bd9Sstevel@tonic-gate hm = prev->hm_next; 478*7c478bd9Sstevel@tonic-gate } 479*7c478bd9Sstevel@tonic-gate 480*7c478bd9Sstevel@tonic-gate if (hm != NULL) { 481*7c478bd9Sstevel@tonic-gate *ht = hm->hm_htable; 482*7c478bd9Sstevel@tonic-gate *entry = hm->hm_entry; 483*7c478bd9Sstevel@tonic-gate } 484*7c478bd9Sstevel@tonic-gate } 485*7c478bd9Sstevel@tonic-gate return (hm); 486*7c478bd9Sstevel@tonic-gate } 487*7c478bd9Sstevel@tonic-gate 488*7c478bd9Sstevel@tonic-gate /* 489*7c478bd9Sstevel@tonic-gate * Remove a mapping to a page from its mapping list. Must have 490*7c478bd9Sstevel@tonic-gate * the corresponding mapping list locked. 491*7c478bd9Sstevel@tonic-gate * Finds the mapping list entry with the given pte_t and 492*7c478bd9Sstevel@tonic-gate * unlinks it from the mapping list. 493*7c478bd9Sstevel@tonic-gate */ 494*7c478bd9Sstevel@tonic-gate hment_t * 495*7c478bd9Sstevel@tonic-gate hment_remove(page_t *pp, htable_t *ht, uint_t entry) 496*7c478bd9Sstevel@tonic-gate { 497*7c478bd9Sstevel@tonic-gate hment_t *prev = NULL; 498*7c478bd9Sstevel@tonic-gate hment_t *hm; 499*7c478bd9Sstevel@tonic-gate uint_t idx; 500*7c478bd9Sstevel@tonic-gate 501*7c478bd9Sstevel@tonic-gate ASSERT(x86_hm_held(pp)); 502*7c478bd9Sstevel@tonic-gate 503*7c478bd9Sstevel@tonic-gate /* 504*7c478bd9Sstevel@tonic-gate * Check if we have only one mapping embedded in the page_t. 505*7c478bd9Sstevel@tonic-gate */ 506*7c478bd9Sstevel@tonic-gate if (pp->p_embed) { 507*7c478bd9Sstevel@tonic-gate ASSERT(ht == (htable_t *)pp->p_mapping); 508*7c478bd9Sstevel@tonic-gate ASSERT(entry == pp->p_mlentry); 509*7c478bd9Sstevel@tonic-gate ASSERT(pp->p_share == 0); 510*7c478bd9Sstevel@tonic-gate pp->p_mapping = NULL; 511*7c478bd9Sstevel@tonic-gate pp->p_mlentry = 0; 512*7c478bd9Sstevel@tonic-gate pp->p_embed = 0; 513*7c478bd9Sstevel@tonic-gate return (NULL); 514*7c478bd9Sstevel@tonic-gate } 515*7c478bd9Sstevel@tonic-gate 516*7c478bd9Sstevel@tonic-gate /* 517*7c478bd9Sstevel@tonic-gate * Otherwise it must be in the list of hments. 518*7c478bd9Sstevel@tonic-gate * Find the hment in the system-wide hash table and remove it. 519*7c478bd9Sstevel@tonic-gate */ 520*7c478bd9Sstevel@tonic-gate ASSERT(pp->p_share != 0); 521*7c478bd9Sstevel@tonic-gate idx = HMENT_HASH(ht->ht_pfn, entry); 522*7c478bd9Sstevel@tonic-gate mutex_enter(HASH_MUTEX(idx)); 523*7c478bd9Sstevel@tonic-gate hm = hment_hash[idx]; 524*7c478bd9Sstevel@tonic-gate while (hm && (hm->hm_htable != ht || hm->hm_entry != entry)) { 525*7c478bd9Sstevel@tonic-gate prev = hm; 526*7c478bd9Sstevel@tonic-gate hm = hm->hm_hashnext; 527*7c478bd9Sstevel@tonic-gate } 528*7c478bd9Sstevel@tonic-gate if (hm == NULL) 529*7c478bd9Sstevel@tonic-gate panic("hment_remove() mapping not found in hash table"); 530*7c478bd9Sstevel@tonic-gate 531*7c478bd9Sstevel@tonic-gate if (prev) 532*7c478bd9Sstevel@tonic-gate prev->hm_hashnext = hm->hm_hashnext; 533*7c478bd9Sstevel@tonic-gate else 534*7c478bd9Sstevel@tonic-gate hment_hash[idx] = hm->hm_hashnext; 535*7c478bd9Sstevel@tonic-gate mutex_exit(HASH_MUTEX(idx)); 536*7c478bd9Sstevel@tonic-gate 537*7c478bd9Sstevel@tonic-gate /* 538*7c478bd9Sstevel@tonic-gate * Remove the hment from the page's mapping list 539*7c478bd9Sstevel@tonic-gate */ 540*7c478bd9Sstevel@tonic-gate if (hm->hm_next) 541*7c478bd9Sstevel@tonic-gate hm->hm_next->hm_prev = hm->hm_prev; 542*7c478bd9Sstevel@tonic-gate if (hm->hm_prev) 543*7c478bd9Sstevel@tonic-gate hm->hm_prev->hm_next = hm->hm_next; 544*7c478bd9Sstevel@tonic-gate else 545*7c478bd9Sstevel@tonic-gate pp->p_mapping = hm->hm_next; 546*7c478bd9Sstevel@tonic-gate 547*7c478bd9Sstevel@tonic-gate --pp->p_share; 548*7c478bd9Sstevel@tonic-gate hm->hm_hashnext = NULL; 549*7c478bd9Sstevel@tonic-gate hm->hm_next = NULL; 550*7c478bd9Sstevel@tonic-gate hm->hm_prev = NULL; 551*7c478bd9Sstevel@tonic-gate 552*7c478bd9Sstevel@tonic-gate return (hm); 553*7c478bd9Sstevel@tonic-gate } 554*7c478bd9Sstevel@tonic-gate 555*7c478bd9Sstevel@tonic-gate /* 556*7c478bd9Sstevel@tonic-gate * Put initial hment's in the reserve pool. 557*7c478bd9Sstevel@tonic-gate */ 558*7c478bd9Sstevel@tonic-gate void 559*7c478bd9Sstevel@tonic-gate hment_reserve(uint_t count) 560*7c478bd9Sstevel@tonic-gate { 561*7c478bd9Sstevel@tonic-gate hment_t *hm; 562*7c478bd9Sstevel@tonic-gate 563*7c478bd9Sstevel@tonic-gate count += hment_reserve_amount; 564*7c478bd9Sstevel@tonic-gate 565*7c478bd9Sstevel@tonic-gate while (hment_reserve_count < count) { 566*7c478bd9Sstevel@tonic-gate hm = kmem_cache_alloc(hment_cache, KM_NOSLEEP); 567*7c478bd9Sstevel@tonic-gate if (hm == NULL) 568*7c478bd9Sstevel@tonic-gate return; 569*7c478bd9Sstevel@tonic-gate hment_put_reserve(hm); 570*7c478bd9Sstevel@tonic-gate } 571*7c478bd9Sstevel@tonic-gate } 572*7c478bd9Sstevel@tonic-gate 573*7c478bd9Sstevel@tonic-gate /* 574*7c478bd9Sstevel@tonic-gate * Readjust the hment reserves after they may have been used. 575*7c478bd9Sstevel@tonic-gate */ 576*7c478bd9Sstevel@tonic-gate void 577*7c478bd9Sstevel@tonic-gate hment_adjust_reserve() 578*7c478bd9Sstevel@tonic-gate { 579*7c478bd9Sstevel@tonic-gate hment_t *hm; 580*7c478bd9Sstevel@tonic-gate 581*7c478bd9Sstevel@tonic-gate /* 582*7c478bd9Sstevel@tonic-gate * Free up any excess reserves 583*7c478bd9Sstevel@tonic-gate */ 584*7c478bd9Sstevel@tonic-gate while (hment_reserve_count > hment_reserve_amount) { 585*7c478bd9Sstevel@tonic-gate ASSERT(curthread != hat_reserves_thread); 586*7c478bd9Sstevel@tonic-gate hm = hment_get_reserve(); 587*7c478bd9Sstevel@tonic-gate if (hm == NULL) 588*7c478bd9Sstevel@tonic-gate return; 589*7c478bd9Sstevel@tonic-gate hment_free(hm); 590*7c478bd9Sstevel@tonic-gate } 591*7c478bd9Sstevel@tonic-gate } 592*7c478bd9Sstevel@tonic-gate 593*7c478bd9Sstevel@tonic-gate /* 594*7c478bd9Sstevel@tonic-gate * initialize hment data structures 595*7c478bd9Sstevel@tonic-gate */ 596*7c478bd9Sstevel@tonic-gate void 597*7c478bd9Sstevel@tonic-gate hment_init(void) 598*7c478bd9Sstevel@tonic-gate { 599*7c478bd9Sstevel@tonic-gate int i; 600*7c478bd9Sstevel@tonic-gate int flags = KMC_NOHASH | KMC_NODEBUG; 601*7c478bd9Sstevel@tonic-gate 602*7c478bd9Sstevel@tonic-gate /* 603*7c478bd9Sstevel@tonic-gate * Initialize kmem caches. On 32 bit kernel's we shut off 604*7c478bd9Sstevel@tonic-gate * debug information to save on precious kernel VA usage. 605*7c478bd9Sstevel@tonic-gate */ 606*7c478bd9Sstevel@tonic-gate hment_cache = kmem_cache_create("hment_t", 607*7c478bd9Sstevel@tonic-gate sizeof (hment_t), 0, NULL, NULL, NULL, 608*7c478bd9Sstevel@tonic-gate NULL, hat_memload_arena, flags); 609*7c478bd9Sstevel@tonic-gate 610*7c478bd9Sstevel@tonic-gate hment_hash = kmem_zalloc(hment_hash_entries * sizeof (hment_t *), 611*7c478bd9Sstevel@tonic-gate KM_SLEEP); 612*7c478bd9Sstevel@tonic-gate 613*7c478bd9Sstevel@tonic-gate for (i = 0; i < MLIST_NUM_LOCK; i++) 614*7c478bd9Sstevel@tonic-gate mutex_init(&mlist_lock[i], NULL, MUTEX_DEFAULT, NULL); 615*7c478bd9Sstevel@tonic-gate 616*7c478bd9Sstevel@tonic-gate for (i = 0; i < HASH_NUM_LOCK; i++) 617*7c478bd9Sstevel@tonic-gate mutex_init(&hash_lock[i], NULL, MUTEX_DEFAULT, NULL); 618*7c478bd9Sstevel@tonic-gate 619*7c478bd9Sstevel@tonic-gate 620*7c478bd9Sstevel@tonic-gate } 621*7c478bd9Sstevel@tonic-gate 622*7c478bd9Sstevel@tonic-gate /* 623*7c478bd9Sstevel@tonic-gate * return the number of mappings to a page 624*7c478bd9Sstevel@tonic-gate * 625*7c478bd9Sstevel@tonic-gate * Note there is no ASSERT() that the MUTEX is held for this. 626*7c478bd9Sstevel@tonic-gate * Hence the return value might be inaccurate if this is called without 627*7c478bd9Sstevel@tonic-gate * doing an x86_hm_enter(). 628*7c478bd9Sstevel@tonic-gate */ 629*7c478bd9Sstevel@tonic-gate uint_t 630*7c478bd9Sstevel@tonic-gate hment_mapcnt(page_t *pp) 631*7c478bd9Sstevel@tonic-gate { 632*7c478bd9Sstevel@tonic-gate uint_t cnt; 633*7c478bd9Sstevel@tonic-gate uint_t szc; 634*7c478bd9Sstevel@tonic-gate page_t *larger; 635*7c478bd9Sstevel@tonic-gate hment_t *hm; 636*7c478bd9Sstevel@tonic-gate 637*7c478bd9Sstevel@tonic-gate x86_hm_enter(pp); 638*7c478bd9Sstevel@tonic-gate if (pp->p_mapping == NULL) 639*7c478bd9Sstevel@tonic-gate cnt = 0; 640*7c478bd9Sstevel@tonic-gate else if (pp->p_embed) 641*7c478bd9Sstevel@tonic-gate cnt = 1; 642*7c478bd9Sstevel@tonic-gate else 643*7c478bd9Sstevel@tonic-gate cnt = pp->p_share; 644*7c478bd9Sstevel@tonic-gate x86_hm_exit(pp); 645*7c478bd9Sstevel@tonic-gate 646*7c478bd9Sstevel@tonic-gate /* 647*7c478bd9Sstevel@tonic-gate * walk through all larger mapping sizes counting mappings 648*7c478bd9Sstevel@tonic-gate */ 649*7c478bd9Sstevel@tonic-gate for (szc = 1; szc <= pp->p_szc; ++szc) { 650*7c478bd9Sstevel@tonic-gate larger = PP_GROUPLEADER(pp, szc); 651*7c478bd9Sstevel@tonic-gate if (larger == pp) /* don't double count large mappings */ 652*7c478bd9Sstevel@tonic-gate continue; 653*7c478bd9Sstevel@tonic-gate 654*7c478bd9Sstevel@tonic-gate x86_hm_enter(larger); 655*7c478bd9Sstevel@tonic-gate if (larger->p_mapping != NULL) { 656*7c478bd9Sstevel@tonic-gate if (larger->p_embed && 657*7c478bd9Sstevel@tonic-gate ((htable_t *)larger->p_mapping)->ht_level == szc) { 658*7c478bd9Sstevel@tonic-gate ++cnt; 659*7c478bd9Sstevel@tonic-gate } else if (!larger->p_embed) { 660*7c478bd9Sstevel@tonic-gate for (hm = larger->p_mapping; hm; 661*7c478bd9Sstevel@tonic-gate hm = hm->hm_next) { 662*7c478bd9Sstevel@tonic-gate if (hm->hm_htable->ht_level == szc) 663*7c478bd9Sstevel@tonic-gate ++cnt; 664*7c478bd9Sstevel@tonic-gate } 665*7c478bd9Sstevel@tonic-gate } 666*7c478bd9Sstevel@tonic-gate } 667*7c478bd9Sstevel@tonic-gate x86_hm_exit(larger); 668*7c478bd9Sstevel@tonic-gate } 669*7c478bd9Sstevel@tonic-gate return (cnt); 670*7c478bd9Sstevel@tonic-gate } 671*7c478bd9Sstevel@tonic-gate 672*7c478bd9Sstevel@tonic-gate /* 673*7c478bd9Sstevel@tonic-gate * We need to steal an hment. Walk through all the page_t's until we 674*7c478bd9Sstevel@tonic-gate * find one that has multiple mappings. Unload one of the mappings 675*7c478bd9Sstevel@tonic-gate * and reclaim that hment. Note that we'll save/restart the starting 676*7c478bd9Sstevel@tonic-gate * page to try and spread the pain. 677*7c478bd9Sstevel@tonic-gate */ 678*7c478bd9Sstevel@tonic-gate static page_t *last_page = NULL; 679*7c478bd9Sstevel@tonic-gate 680*7c478bd9Sstevel@tonic-gate static hment_t * 681*7c478bd9Sstevel@tonic-gate hment_steal(void) 682*7c478bd9Sstevel@tonic-gate { 683*7c478bd9Sstevel@tonic-gate page_t *last = last_page; 684*7c478bd9Sstevel@tonic-gate page_t *pp = last; 685*7c478bd9Sstevel@tonic-gate hment_t *hm = NULL; 686*7c478bd9Sstevel@tonic-gate hment_t *hm2; 687*7c478bd9Sstevel@tonic-gate htable_t *ht; 688*7c478bd9Sstevel@tonic-gate uint_t found_one = 0; 689*7c478bd9Sstevel@tonic-gate 690*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_hm_steals); 691*7c478bd9Sstevel@tonic-gate if (pp == NULL) 692*7c478bd9Sstevel@tonic-gate last = pp = page_first(); 693*7c478bd9Sstevel@tonic-gate 694*7c478bd9Sstevel@tonic-gate while (!found_one) { 695*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_hm_steal_exam); 696*7c478bd9Sstevel@tonic-gate pp = page_next(pp); 697*7c478bd9Sstevel@tonic-gate if (pp == NULL) 698*7c478bd9Sstevel@tonic-gate pp = page_first(); 699*7c478bd9Sstevel@tonic-gate 700*7c478bd9Sstevel@tonic-gate /* 701*7c478bd9Sstevel@tonic-gate * The loop and function exit here if nothing found to steal. 702*7c478bd9Sstevel@tonic-gate */ 703*7c478bd9Sstevel@tonic-gate if (pp == last) 704*7c478bd9Sstevel@tonic-gate return (NULL); 705*7c478bd9Sstevel@tonic-gate 706*7c478bd9Sstevel@tonic-gate /* 707*7c478bd9Sstevel@tonic-gate * Only lock the page_t if it has hments. 708*7c478bd9Sstevel@tonic-gate */ 709*7c478bd9Sstevel@tonic-gate if (pp->p_mapping == NULL || pp->p_embed) 710*7c478bd9Sstevel@tonic-gate continue; 711*7c478bd9Sstevel@tonic-gate 712*7c478bd9Sstevel@tonic-gate /* 713*7c478bd9Sstevel@tonic-gate * Search the mapping list for a usable mapping. 714*7c478bd9Sstevel@tonic-gate */ 715*7c478bd9Sstevel@tonic-gate x86_hm_enter(pp); 716*7c478bd9Sstevel@tonic-gate if (!pp->p_embed) { 717*7c478bd9Sstevel@tonic-gate for (hm = pp->p_mapping; hm; hm = hm->hm_next) { 718*7c478bd9Sstevel@tonic-gate ht = hm->hm_htable; 719*7c478bd9Sstevel@tonic-gate if (ht->ht_hat != kas.a_hat && 720*7c478bd9Sstevel@tonic-gate ht->ht_busy == 0 && 721*7c478bd9Sstevel@tonic-gate ht->ht_lock_cnt == 0) { 722*7c478bd9Sstevel@tonic-gate found_one = 1; 723*7c478bd9Sstevel@tonic-gate break; 724*7c478bd9Sstevel@tonic-gate } 725*7c478bd9Sstevel@tonic-gate } 726*7c478bd9Sstevel@tonic-gate } 727*7c478bd9Sstevel@tonic-gate if (!found_one) 728*7c478bd9Sstevel@tonic-gate x86_hm_exit(pp); 729*7c478bd9Sstevel@tonic-gate } 730*7c478bd9Sstevel@tonic-gate 731*7c478bd9Sstevel@tonic-gate /* 732*7c478bd9Sstevel@tonic-gate * Steal the mapping we found. Note that hati_page_unmap() will 733*7c478bd9Sstevel@tonic-gate * do the x86_hm_exit(). 734*7c478bd9Sstevel@tonic-gate */ 735*7c478bd9Sstevel@tonic-gate hm2 = hati_page_unmap(pp, ht, hm->hm_entry); 736*7c478bd9Sstevel@tonic-gate ASSERT(hm2 == hm); 737*7c478bd9Sstevel@tonic-gate last_page = pp; 738*7c478bd9Sstevel@tonic-gate return (hm); 739*7c478bd9Sstevel@tonic-gate } 740