1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 30*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 31*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 32*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 33*7c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 34*7c478bd9Sstevel@tonic-gate #include <sys/machparam.h> 35*7c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 36*7c478bd9Sstevel@tonic-gate #include <sys/mman.h> 37*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> 38*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/thread.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/proc.h> 41*7c478bd9Sstevel@tonic-gate #include <sys/cpu.h> 42*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 43*7c478bd9Sstevel@tonic-gate #include <sys/disp.h> 44*7c478bd9Sstevel@tonic-gate #include <sys/vmem.h> 45*7c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h> 46*7c478bd9Sstevel@tonic-gate #include <sys/promif.h> 47*7c478bd9Sstevel@tonic-gate #include <sys/var.h> 48*7c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h> 49*7c478bd9Sstevel@tonic-gate #include <sys/bootconf.h> 50*7c478bd9Sstevel@tonic-gate #include <sys/dumphdr.h> 51*7c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 52*7c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h> 53*7c478bd9Sstevel@tonic-gate #include <vm/hat.h> 54*7c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h> 55*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 56*7c478bd9Sstevel@tonic-gate 57*7c478bd9Sstevel@tonic-gate kmem_cache_t *htable_cache; 58*7c478bd9Sstevel@tonic-gate extern cpuset_t khat_cpuset; 59*7c478bd9Sstevel@tonic-gate 60*7c478bd9Sstevel@tonic-gate /* 61*7c478bd9Sstevel@tonic-gate * The variable htable_reserve_amount, rather than HTABLE_RESERVE_AMOUNT, 62*7c478bd9Sstevel@tonic-gate * is used in order to facilitate testing of the htable_steal() code. 63*7c478bd9Sstevel@tonic-gate * By resetting htable_reserve_amount to a lower value, we can force 64*7c478bd9Sstevel@tonic-gate * stealing to occur. The reserve amount is a guess to get us through boot. 65*7c478bd9Sstevel@tonic-gate */ 66*7c478bd9Sstevel@tonic-gate #define HTABLE_RESERVE_AMOUNT (200) 67*7c478bd9Sstevel@tonic-gate uint_t htable_reserve_amount = HTABLE_RESERVE_AMOUNT; 68*7c478bd9Sstevel@tonic-gate kmutex_t htable_reserve_mutex; 69*7c478bd9Sstevel@tonic-gate uint_t htable_reserve_cnt; 70*7c478bd9Sstevel@tonic-gate htable_t *htable_reserve_pool; 71*7c478bd9Sstevel@tonic-gate 72*7c478bd9Sstevel@tonic-gate /* 73*7c478bd9Sstevel@tonic-gate * This variable is so that we can tune this via /etc/system 74*7c478bd9Sstevel@tonic-gate */ 75*7c478bd9Sstevel@tonic-gate uint_t htable_steal_passes = 10; 76*7c478bd9Sstevel@tonic-gate 77*7c478bd9Sstevel@tonic-gate /* 78*7c478bd9Sstevel@tonic-gate * mutex stuff for access to htable hash 79*7c478bd9Sstevel@tonic-gate */ 80*7c478bd9Sstevel@tonic-gate #define NUM_HTABLE_MUTEX 128 81*7c478bd9Sstevel@tonic-gate kmutex_t htable_mutex[NUM_HTABLE_MUTEX]; 82*7c478bd9Sstevel@tonic-gate #define HTABLE_MUTEX_HASH(h) ((h) & (NUM_HTABLE_MUTEX - 1)) 83*7c478bd9Sstevel@tonic-gate 84*7c478bd9Sstevel@tonic-gate #define HTABLE_ENTER(h) mutex_enter(&htable_mutex[HTABLE_MUTEX_HASH(h)]); 85*7c478bd9Sstevel@tonic-gate #define HTABLE_EXIT(h) mutex_exit(&htable_mutex[HTABLE_MUTEX_HASH(h)]); 86*7c478bd9Sstevel@tonic-gate 87*7c478bd9Sstevel@tonic-gate /* 88*7c478bd9Sstevel@tonic-gate * forward declarations 89*7c478bd9Sstevel@tonic-gate */ 90*7c478bd9Sstevel@tonic-gate static void link_ptp(htable_t *higher, htable_t *new, uintptr_t vaddr); 91*7c478bd9Sstevel@tonic-gate static void unlink_ptp(htable_t *higher, htable_t *old, uintptr_t vaddr); 92*7c478bd9Sstevel@tonic-gate static void htable_free(htable_t *ht); 93*7c478bd9Sstevel@tonic-gate static x86pte_t *x86pte_access_pagetable(htable_t *ht); 94*7c478bd9Sstevel@tonic-gate static void x86pte_release_pagetable(htable_t *ht); 95*7c478bd9Sstevel@tonic-gate static x86pte_t x86pte_cas(htable_t *ht, uint_t entry, x86pte_t old, 96*7c478bd9Sstevel@tonic-gate x86pte_t new); 97*7c478bd9Sstevel@tonic-gate 98*7c478bd9Sstevel@tonic-gate /* 99*7c478bd9Sstevel@tonic-gate * Address used for kernel page tables. See ptable_alloc() below. 100*7c478bd9Sstevel@tonic-gate */ 101*7c478bd9Sstevel@tonic-gate uintptr_t ptable_va = 0; 102*7c478bd9Sstevel@tonic-gate size_t ptable_sz = 2 * MMU_PAGESIZE; 103*7c478bd9Sstevel@tonic-gate 104*7c478bd9Sstevel@tonic-gate /* 105*7c478bd9Sstevel@tonic-gate * A counter to track if we are stealing or reaping htables. When non-zero 106*7c478bd9Sstevel@tonic-gate * htable_free() will directly free htables (either to the reserve or kmem) 107*7c478bd9Sstevel@tonic-gate * instead of putting them in a hat's htable cache. 108*7c478bd9Sstevel@tonic-gate */ 109*7c478bd9Sstevel@tonic-gate uint32_t htable_dont_cache = 0; 110*7c478bd9Sstevel@tonic-gate 111*7c478bd9Sstevel@tonic-gate /* 112*7c478bd9Sstevel@tonic-gate * Track the number of active pagetables, so we can know how many to reap 113*7c478bd9Sstevel@tonic-gate */ 114*7c478bd9Sstevel@tonic-gate static uint32_t active_ptables = 0; 115*7c478bd9Sstevel@tonic-gate 116*7c478bd9Sstevel@tonic-gate /* 117*7c478bd9Sstevel@tonic-gate * Allocate a memory page for a hardware page table. 118*7c478bd9Sstevel@tonic-gate * 119*7c478bd9Sstevel@tonic-gate * The pages allocated for page tables are currently gotten in a hacked up 120*7c478bd9Sstevel@tonic-gate * way. It works for now, but really needs to be fixed up a bit. 121*7c478bd9Sstevel@tonic-gate * 122*7c478bd9Sstevel@tonic-gate * During boot: The boot loader controls physical memory allocation via 123*7c478bd9Sstevel@tonic-gate * boot_alloc(). To avoid conflict with vmem, we just do boot_alloc()s with 124*7c478bd9Sstevel@tonic-gate * addresses less than kernelbase. These addresses are ignored when we take 125*7c478bd9Sstevel@tonic-gate * over mappings from the boot loader. 126*7c478bd9Sstevel@tonic-gate * 127*7c478bd9Sstevel@tonic-gate * Post-boot: we currently use page_create_va() on the kvp with fake offsets, 128*7c478bd9Sstevel@tonic-gate * segments and virt address. This is pretty bogus, but was copied from the 129*7c478bd9Sstevel@tonic-gate * old hat_i86.c code. A better approach would be to have a custom 130*7c478bd9Sstevel@tonic-gate * page_get_physical() interface that can specify either mnode random or 131*7c478bd9Sstevel@tonic-gate * mnode local and takes a page from whatever color has the MOST available - 132*7c478bd9Sstevel@tonic-gate * this would have a minimal impact on page coloring. 133*7c478bd9Sstevel@tonic-gate * 134*7c478bd9Sstevel@tonic-gate * For now the htable pointer in ht is only used to compute a unique vnode 135*7c478bd9Sstevel@tonic-gate * offset for the page. 136*7c478bd9Sstevel@tonic-gate */ 137*7c478bd9Sstevel@tonic-gate static void 138*7c478bd9Sstevel@tonic-gate ptable_alloc(htable_t *ht) 139*7c478bd9Sstevel@tonic-gate { 140*7c478bd9Sstevel@tonic-gate pfn_t pfn; 141*7c478bd9Sstevel@tonic-gate page_t *pp; 142*7c478bd9Sstevel@tonic-gate u_offset_t offset; 143*7c478bd9Sstevel@tonic-gate static struct seg tmpseg; 144*7c478bd9Sstevel@tonic-gate static int first_time = 1; 145*7c478bd9Sstevel@tonic-gate 146*7c478bd9Sstevel@tonic-gate /* 147*7c478bd9Sstevel@tonic-gate * Allocating the associated hardware page table is very different 148*7c478bd9Sstevel@tonic-gate * before boot has finished. We get a physical page to from boot 149*7c478bd9Sstevel@tonic-gate * w/o eating up any kernel address space. 150*7c478bd9Sstevel@tonic-gate */ 151*7c478bd9Sstevel@tonic-gate ht->ht_pfn = PFN_INVALID; 152*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_ptable_allocs); 153*7c478bd9Sstevel@tonic-gate atomic_add_32(&active_ptables, 1); 154*7c478bd9Sstevel@tonic-gate 155*7c478bd9Sstevel@tonic-gate if (use_boot_reserve) { 156*7c478bd9Sstevel@tonic-gate ASSERT(ptable_va != 0); 157*7c478bd9Sstevel@tonic-gate 158*7c478bd9Sstevel@tonic-gate /* 159*7c478bd9Sstevel@tonic-gate * Allocate, then demap the ptable_va, so that we're 160*7c478bd9Sstevel@tonic-gate * sure there exist page table entries for the addresses 161*7c478bd9Sstevel@tonic-gate */ 162*7c478bd9Sstevel@tonic-gate if (first_time) { 163*7c478bd9Sstevel@tonic-gate first_time = 0; 164*7c478bd9Sstevel@tonic-gate if ((uintptr_t)BOP_ALLOC(bootops, (caddr_t)ptable_va, 165*7c478bd9Sstevel@tonic-gate ptable_sz, BO_NO_ALIGN) != ptable_va) 166*7c478bd9Sstevel@tonic-gate panic("BOP_ALLOC failed"); 167*7c478bd9Sstevel@tonic-gate 168*7c478bd9Sstevel@tonic-gate hat_boot_demap(ptable_va); 169*7c478bd9Sstevel@tonic-gate hat_boot_demap(ptable_va + MMU_PAGESIZE); 170*7c478bd9Sstevel@tonic-gate } 171*7c478bd9Sstevel@tonic-gate 172*7c478bd9Sstevel@tonic-gate pfn = ((uintptr_t)BOP_EALLOC(bootops, 0, MMU_PAGESIZE, 173*7c478bd9Sstevel@tonic-gate BO_NO_ALIGN, BOPF_X86_ALLOC_PHYS)) >> MMU_PAGESHIFT; 174*7c478bd9Sstevel@tonic-gate if (page_resv(1, KM_NOSLEEP) == 0) 175*7c478bd9Sstevel@tonic-gate panic("page_resv() failed in ptable alloc"); 176*7c478bd9Sstevel@tonic-gate 177*7c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 178*7c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 179*7c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) 180*7c478bd9Sstevel@tonic-gate page_boot_demote(pp); 181*7c478bd9Sstevel@tonic-gate pp = page_numtopp(pfn, SE_EXCL); 182*7c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 183*7c478bd9Sstevel@tonic-gate 184*7c478bd9Sstevel@tonic-gate } else { 185*7c478bd9Sstevel@tonic-gate /* 186*7c478bd9Sstevel@tonic-gate * Post boot get a page for the table. 187*7c478bd9Sstevel@tonic-gate * 188*7c478bd9Sstevel@tonic-gate * The first check is to see if there is memory in 189*7c478bd9Sstevel@tonic-gate * the system. If we drop to throttlefree, then fail 190*7c478bd9Sstevel@tonic-gate * the ptable_alloc() and let the stealing code kick in. 191*7c478bd9Sstevel@tonic-gate * Note that we have to do this test here, since the test in 192*7c478bd9Sstevel@tonic-gate * page_create_throttle() would let the NOSLEEP allocation 193*7c478bd9Sstevel@tonic-gate * go through and deplete the page reserves. 194*7c478bd9Sstevel@tonic-gate */ 195*7c478bd9Sstevel@tonic-gate if (freemem <= throttlefree + 1) 196*7c478bd9Sstevel@tonic-gate return; 197*7c478bd9Sstevel@tonic-gate 198*7c478bd9Sstevel@tonic-gate /* 199*7c478bd9Sstevel@tonic-gate * This code is temporary, so don't review too critically. 200*7c478bd9Sstevel@tonic-gate * I'm awaiting a new phys page allocator from Kit -- Joe 201*7c478bd9Sstevel@tonic-gate * 202*7c478bd9Sstevel@tonic-gate * We need assign an offset for the page to call 203*7c478bd9Sstevel@tonic-gate * page_create_va. To avoid conflicts with other pages, 204*7c478bd9Sstevel@tonic-gate * we get creative with the offset. 205*7c478bd9Sstevel@tonic-gate * for 32 bits, we pic an offset > 4Gig 206*7c478bd9Sstevel@tonic-gate * for 64 bits, pic an offset somewhere in the VA hole. 207*7c478bd9Sstevel@tonic-gate */ 208*7c478bd9Sstevel@tonic-gate offset = (uintptr_t)ht - kernelbase; 209*7c478bd9Sstevel@tonic-gate offset <<= MMU_PAGESHIFT; 210*7c478bd9Sstevel@tonic-gate #if defined(__amd64) 211*7c478bd9Sstevel@tonic-gate offset += mmu.hole_start; /* something in VA hole */ 212*7c478bd9Sstevel@tonic-gate #else 213*7c478bd9Sstevel@tonic-gate offset += 1ULL << 40; /* something > 4 Gig */ 214*7c478bd9Sstevel@tonic-gate #endif 215*7c478bd9Sstevel@tonic-gate 216*7c478bd9Sstevel@tonic-gate if (page_resv(1, KM_NOSLEEP) == 0) 217*7c478bd9Sstevel@tonic-gate return; 218*7c478bd9Sstevel@tonic-gate 219*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 220*7c478bd9Sstevel@tonic-gate pp = page_exists(&kvp, offset); 221*7c478bd9Sstevel@tonic-gate if (pp != NULL) 222*7c478bd9Sstevel@tonic-gate panic("ptable already exists %p", pp); 223*7c478bd9Sstevel@tonic-gate #endif 224*7c478bd9Sstevel@tonic-gate pp = page_create_va(&kvp, offset, MMU_PAGESIZE, 225*7c478bd9Sstevel@tonic-gate PG_EXCL | PG_NORELOC, &tmpseg, 226*7c478bd9Sstevel@tonic-gate (void *)((uintptr_t)ht << MMU_PAGESHIFT)); 227*7c478bd9Sstevel@tonic-gate if (pp == NULL) 228*7c478bd9Sstevel@tonic-gate return; 229*7c478bd9Sstevel@tonic-gate page_io_unlock(pp); 230*7c478bd9Sstevel@tonic-gate page_hashout(pp, NULL); 231*7c478bd9Sstevel@tonic-gate pfn = pp->p_pagenum; 232*7c478bd9Sstevel@tonic-gate } 233*7c478bd9Sstevel@tonic-gate page_downgrade(pp); 234*7c478bd9Sstevel@tonic-gate ASSERT(PAGE_SHARED(pp)); 235*7c478bd9Sstevel@tonic-gate 236*7c478bd9Sstevel@tonic-gate if (pfn == PFN_INVALID) 237*7c478bd9Sstevel@tonic-gate panic("ptable_alloc(): Invalid PFN!!"); 238*7c478bd9Sstevel@tonic-gate ht->ht_pfn = pfn; 239*7c478bd9Sstevel@tonic-gate } 240*7c478bd9Sstevel@tonic-gate 241*7c478bd9Sstevel@tonic-gate /* 242*7c478bd9Sstevel@tonic-gate * Free an htable's associated page table page. See the comments 243*7c478bd9Sstevel@tonic-gate * for ptable_alloc(). 244*7c478bd9Sstevel@tonic-gate */ 245*7c478bd9Sstevel@tonic-gate static void 246*7c478bd9Sstevel@tonic-gate ptable_free(htable_t *ht) 247*7c478bd9Sstevel@tonic-gate { 248*7c478bd9Sstevel@tonic-gate pfn_t pfn = ht->ht_pfn; 249*7c478bd9Sstevel@tonic-gate page_t *pp; 250*7c478bd9Sstevel@tonic-gate 251*7c478bd9Sstevel@tonic-gate /* 252*7c478bd9Sstevel@tonic-gate * need to destroy the page used for the pagetable 253*7c478bd9Sstevel@tonic-gate */ 254*7c478bd9Sstevel@tonic-gate ASSERT(pfn != PFN_INVALID); 255*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_ptable_frees); 256*7c478bd9Sstevel@tonic-gate atomic_add_32(&active_ptables, -1); 257*7c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 258*7c478bd9Sstevel@tonic-gate if (pp == NULL) 259*7c478bd9Sstevel@tonic-gate panic("ptable_free(): no page for pfn!"); 260*7c478bd9Sstevel@tonic-gate ASSERT(PAGE_SHARED(pp)); 261*7c478bd9Sstevel@tonic-gate ASSERT(pfn == pp->p_pagenum); 262*7c478bd9Sstevel@tonic-gate 263*7c478bd9Sstevel@tonic-gate /* 264*7c478bd9Sstevel@tonic-gate * Get an exclusive lock, might have to wait for a kmem reader. 265*7c478bd9Sstevel@tonic-gate */ 266*7c478bd9Sstevel@tonic-gate if (!page_tryupgrade(pp)) { 267*7c478bd9Sstevel@tonic-gate page_unlock(pp); 268*7c478bd9Sstevel@tonic-gate /* 269*7c478bd9Sstevel@tonic-gate * RFE: we could change this to not loop forever 270*7c478bd9Sstevel@tonic-gate * George Cameron had some idea on how to do that. 271*7c478bd9Sstevel@tonic-gate * For now looping works - it's just like sfmmu. 272*7c478bd9Sstevel@tonic-gate */ 273*7c478bd9Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM)) 274*7c478bd9Sstevel@tonic-gate continue; 275*7c478bd9Sstevel@tonic-gate } 276*7c478bd9Sstevel@tonic-gate page_free(pp, 1); 277*7c478bd9Sstevel@tonic-gate page_unresv(1); 278*7c478bd9Sstevel@tonic-gate ht->ht_pfn = PFN_INVALID; 279*7c478bd9Sstevel@tonic-gate } 280*7c478bd9Sstevel@tonic-gate 281*7c478bd9Sstevel@tonic-gate /* 282*7c478bd9Sstevel@tonic-gate * Put one htable on the reserve list. 283*7c478bd9Sstevel@tonic-gate */ 284*7c478bd9Sstevel@tonic-gate static void 285*7c478bd9Sstevel@tonic-gate htable_put_reserve(htable_t *ht) 286*7c478bd9Sstevel@tonic-gate { 287*7c478bd9Sstevel@tonic-gate ht->ht_hat = NULL; /* no longer tied to a hat */ 288*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_pfn == PFN_INVALID); 289*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_htable_rputs); 290*7c478bd9Sstevel@tonic-gate mutex_enter(&htable_reserve_mutex); 291*7c478bd9Sstevel@tonic-gate ht->ht_next = htable_reserve_pool; 292*7c478bd9Sstevel@tonic-gate htable_reserve_pool = ht; 293*7c478bd9Sstevel@tonic-gate ++htable_reserve_cnt; 294*7c478bd9Sstevel@tonic-gate mutex_exit(&htable_reserve_mutex); 295*7c478bd9Sstevel@tonic-gate } 296*7c478bd9Sstevel@tonic-gate 297*7c478bd9Sstevel@tonic-gate /* 298*7c478bd9Sstevel@tonic-gate * Take one htable from the reserve. 299*7c478bd9Sstevel@tonic-gate */ 300*7c478bd9Sstevel@tonic-gate static htable_t * 301*7c478bd9Sstevel@tonic-gate htable_get_reserve(void) 302*7c478bd9Sstevel@tonic-gate { 303*7c478bd9Sstevel@tonic-gate htable_t *ht = NULL; 304*7c478bd9Sstevel@tonic-gate 305*7c478bd9Sstevel@tonic-gate mutex_enter(&htable_reserve_mutex); 306*7c478bd9Sstevel@tonic-gate if (htable_reserve_cnt != 0) { 307*7c478bd9Sstevel@tonic-gate ht = htable_reserve_pool; 308*7c478bd9Sstevel@tonic-gate ASSERT(ht != NULL); 309*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_pfn == PFN_INVALID); 310*7c478bd9Sstevel@tonic-gate htable_reserve_pool = ht->ht_next; 311*7c478bd9Sstevel@tonic-gate --htable_reserve_cnt; 312*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_htable_rgets); 313*7c478bd9Sstevel@tonic-gate } 314*7c478bd9Sstevel@tonic-gate mutex_exit(&htable_reserve_mutex); 315*7c478bd9Sstevel@tonic-gate return (ht); 316*7c478bd9Sstevel@tonic-gate } 317*7c478bd9Sstevel@tonic-gate 318*7c478bd9Sstevel@tonic-gate /* 319*7c478bd9Sstevel@tonic-gate * Allocate initial htables with page tables and put them on the kernel hat's 320*7c478bd9Sstevel@tonic-gate * cache list. 321*7c478bd9Sstevel@tonic-gate */ 322*7c478bd9Sstevel@tonic-gate void 323*7c478bd9Sstevel@tonic-gate htable_initial_reserve(uint_t count) 324*7c478bd9Sstevel@tonic-gate { 325*7c478bd9Sstevel@tonic-gate htable_t *ht; 326*7c478bd9Sstevel@tonic-gate hat_t *hat = kas.a_hat; 327*7c478bd9Sstevel@tonic-gate 328*7c478bd9Sstevel@tonic-gate count += HTABLE_RESERVE_AMOUNT; 329*7c478bd9Sstevel@tonic-gate while (count > 0) { 330*7c478bd9Sstevel@tonic-gate ht = kmem_cache_alloc(htable_cache, KM_NOSLEEP); 331*7c478bd9Sstevel@tonic-gate ASSERT(ht != NULL); 332*7c478bd9Sstevel@tonic-gate 333*7c478bd9Sstevel@tonic-gate ASSERT(use_boot_reserve); 334*7c478bd9Sstevel@tonic-gate ht->ht_hat = kas.a_hat; /* so htable_free() works */ 335*7c478bd9Sstevel@tonic-gate ht->ht_flags = 0; /* so x86pte_zero works */ 336*7c478bd9Sstevel@tonic-gate ptable_alloc(ht); 337*7c478bd9Sstevel@tonic-gate if (ht->ht_pfn == PFN_INVALID) 338*7c478bd9Sstevel@tonic-gate panic("ptable_alloc() failed"); 339*7c478bd9Sstevel@tonic-gate 340*7c478bd9Sstevel@tonic-gate x86pte_zero(ht, 0, mmu.ptes_per_table); 341*7c478bd9Sstevel@tonic-gate 342*7c478bd9Sstevel@tonic-gate ht->ht_next = hat->hat_ht_cached; 343*7c478bd9Sstevel@tonic-gate hat->hat_ht_cached = ht; 344*7c478bd9Sstevel@tonic-gate --count; 345*7c478bd9Sstevel@tonic-gate } 346*7c478bd9Sstevel@tonic-gate } 347*7c478bd9Sstevel@tonic-gate 348*7c478bd9Sstevel@tonic-gate /* 349*7c478bd9Sstevel@tonic-gate * Readjust the reserves after a thread finishes using them. 350*7c478bd9Sstevel@tonic-gate * 351*7c478bd9Sstevel@tonic-gate * The first time this is called post boot, we'll also clear out the 352*7c478bd9Sstevel@tonic-gate * extra boot htables that were put in the kernel hat's cache list. 353*7c478bd9Sstevel@tonic-gate */ 354*7c478bd9Sstevel@tonic-gate void 355*7c478bd9Sstevel@tonic-gate htable_adjust_reserve() 356*7c478bd9Sstevel@tonic-gate { 357*7c478bd9Sstevel@tonic-gate static int first_time = 1; 358*7c478bd9Sstevel@tonic-gate htable_t *ht; 359*7c478bd9Sstevel@tonic-gate 360*7c478bd9Sstevel@tonic-gate ASSERT(curthread != hat_reserves_thread); 361*7c478bd9Sstevel@tonic-gate 362*7c478bd9Sstevel@tonic-gate /* 363*7c478bd9Sstevel@tonic-gate * The first time this is called after we can steal, we free up the 364*7c478bd9Sstevel@tonic-gate * the kernel's cache htable list. It has lots of extra htable/page 365*7c478bd9Sstevel@tonic-gate * tables that were allocated for boot up. 366*7c478bd9Sstevel@tonic-gate */ 367*7c478bd9Sstevel@tonic-gate if (first_time) { 368*7c478bd9Sstevel@tonic-gate first_time = 0; 369*7c478bd9Sstevel@tonic-gate while ((ht = kas.a_hat->hat_ht_cached) != NULL) { 370*7c478bd9Sstevel@tonic-gate kas.a_hat->hat_ht_cached = ht->ht_next; 371*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_hat == kas.a_hat); 372*7c478bd9Sstevel@tonic-gate ptable_free(ht); 373*7c478bd9Sstevel@tonic-gate htable_put_reserve(ht); 374*7c478bd9Sstevel@tonic-gate } 375*7c478bd9Sstevel@tonic-gate return; 376*7c478bd9Sstevel@tonic-gate } 377*7c478bd9Sstevel@tonic-gate 378*7c478bd9Sstevel@tonic-gate /* 379*7c478bd9Sstevel@tonic-gate * Free any excess htables in the reserve list 380*7c478bd9Sstevel@tonic-gate */ 381*7c478bd9Sstevel@tonic-gate while (htable_reserve_cnt > htable_reserve_amount) { 382*7c478bd9Sstevel@tonic-gate ht = htable_get_reserve(); 383*7c478bd9Sstevel@tonic-gate if (ht == NULL) 384*7c478bd9Sstevel@tonic-gate return; 385*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_pfn == PFN_INVALID); 386*7c478bd9Sstevel@tonic-gate kmem_cache_free(htable_cache, ht); 387*7c478bd9Sstevel@tonic-gate } 388*7c478bd9Sstevel@tonic-gate } 389*7c478bd9Sstevel@tonic-gate 390*7c478bd9Sstevel@tonic-gate 391*7c478bd9Sstevel@tonic-gate /* 392*7c478bd9Sstevel@tonic-gate * This routine steals htables from user processes for htable_alloc() or 393*7c478bd9Sstevel@tonic-gate * for htable_reap(). 394*7c478bd9Sstevel@tonic-gate */ 395*7c478bd9Sstevel@tonic-gate static htable_t * 396*7c478bd9Sstevel@tonic-gate htable_steal(uint_t cnt) 397*7c478bd9Sstevel@tonic-gate { 398*7c478bd9Sstevel@tonic-gate hat_t *hat = kas.a_hat; /* list starts with khat */ 399*7c478bd9Sstevel@tonic-gate htable_t *list = NULL; 400*7c478bd9Sstevel@tonic-gate htable_t *ht; 401*7c478bd9Sstevel@tonic-gate htable_t *higher; 402*7c478bd9Sstevel@tonic-gate uint_t h; 403*7c478bd9Sstevel@tonic-gate uint_t e; 404*7c478bd9Sstevel@tonic-gate uintptr_t va; 405*7c478bd9Sstevel@tonic-gate x86pte_t pte; 406*7c478bd9Sstevel@tonic-gate uint_t stolen = 0; 407*7c478bd9Sstevel@tonic-gate uint_t pass; 408*7c478bd9Sstevel@tonic-gate uint_t threshhold; 409*7c478bd9Sstevel@tonic-gate 410*7c478bd9Sstevel@tonic-gate /* 411*7c478bd9Sstevel@tonic-gate * Limit htable_steal_passes to something reasonable 412*7c478bd9Sstevel@tonic-gate */ 413*7c478bd9Sstevel@tonic-gate if (htable_steal_passes == 0) 414*7c478bd9Sstevel@tonic-gate htable_steal_passes = 1; 415*7c478bd9Sstevel@tonic-gate if (htable_steal_passes > mmu.ptes_per_table) 416*7c478bd9Sstevel@tonic-gate htable_steal_passes = mmu.ptes_per_table; 417*7c478bd9Sstevel@tonic-gate 418*7c478bd9Sstevel@tonic-gate /* 419*7c478bd9Sstevel@tonic-gate * Loop through all hats. The 1st pass takes cached htables that 420*7c478bd9Sstevel@tonic-gate * aren't in use. The later passes steal by removing mappings, too. 421*7c478bd9Sstevel@tonic-gate */ 422*7c478bd9Sstevel@tonic-gate atomic_add_32(&htable_dont_cache, 1); 423*7c478bd9Sstevel@tonic-gate for (pass = 1; pass <= htable_steal_passes && stolen < cnt; ++pass) { 424*7c478bd9Sstevel@tonic-gate threshhold = pass / htable_steal_passes; 425*7c478bd9Sstevel@tonic-gate hat = kas.a_hat->hat_next; 426*7c478bd9Sstevel@tonic-gate for (;;) { 427*7c478bd9Sstevel@tonic-gate 428*7c478bd9Sstevel@tonic-gate /* 429*7c478bd9Sstevel@tonic-gate * move to next hat 430*7c478bd9Sstevel@tonic-gate */ 431*7c478bd9Sstevel@tonic-gate mutex_enter(&hat_list_lock); 432*7c478bd9Sstevel@tonic-gate hat->hat_flags &= ~HAT_VICTIM; 433*7c478bd9Sstevel@tonic-gate cv_broadcast(&hat_list_cv); 434*7c478bd9Sstevel@tonic-gate do { 435*7c478bd9Sstevel@tonic-gate hat = hat->hat_prev; 436*7c478bd9Sstevel@tonic-gate } while (hat->hat_flags & HAT_VICTIM); 437*7c478bd9Sstevel@tonic-gate if (stolen == cnt || hat == kas.a_hat->hat_next) { 438*7c478bd9Sstevel@tonic-gate mutex_exit(&hat_list_lock); 439*7c478bd9Sstevel@tonic-gate break; 440*7c478bd9Sstevel@tonic-gate } 441*7c478bd9Sstevel@tonic-gate hat->hat_flags |= HAT_VICTIM; 442*7c478bd9Sstevel@tonic-gate mutex_exit(&hat_list_lock); 443*7c478bd9Sstevel@tonic-gate 444*7c478bd9Sstevel@tonic-gate /* 445*7c478bd9Sstevel@tonic-gate * Take any htables from the hat's cached "free" list. 446*7c478bd9Sstevel@tonic-gate */ 447*7c478bd9Sstevel@tonic-gate hat_enter(hat); 448*7c478bd9Sstevel@tonic-gate while ((ht = hat->hat_ht_cached) != NULL && 449*7c478bd9Sstevel@tonic-gate stolen < cnt) { 450*7c478bd9Sstevel@tonic-gate hat->hat_ht_cached = ht->ht_next; 451*7c478bd9Sstevel@tonic-gate ht->ht_next = list; 452*7c478bd9Sstevel@tonic-gate list = ht; 453*7c478bd9Sstevel@tonic-gate ++stolen; 454*7c478bd9Sstevel@tonic-gate } 455*7c478bd9Sstevel@tonic-gate hat_exit(hat); 456*7c478bd9Sstevel@tonic-gate 457*7c478bd9Sstevel@tonic-gate /* 458*7c478bd9Sstevel@tonic-gate * Don't steal on first pass. 459*7c478bd9Sstevel@tonic-gate */ 460*7c478bd9Sstevel@tonic-gate if (pass == 1 || stolen == cnt) 461*7c478bd9Sstevel@tonic-gate continue; 462*7c478bd9Sstevel@tonic-gate 463*7c478bd9Sstevel@tonic-gate /* 464*7c478bd9Sstevel@tonic-gate * search the active htables for one to steal 465*7c478bd9Sstevel@tonic-gate */ 466*7c478bd9Sstevel@tonic-gate for (h = 0; h < hat->hat_num_hash && stolen < cnt; 467*7c478bd9Sstevel@tonic-gate ++h) { 468*7c478bd9Sstevel@tonic-gate higher = NULL; 469*7c478bd9Sstevel@tonic-gate HTABLE_ENTER(h); 470*7c478bd9Sstevel@tonic-gate for (ht = hat->hat_ht_hash[h]; ht; 471*7c478bd9Sstevel@tonic-gate ht = ht->ht_next) { 472*7c478bd9Sstevel@tonic-gate 473*7c478bd9Sstevel@tonic-gate /* 474*7c478bd9Sstevel@tonic-gate * Can we rule out reaping? 475*7c478bd9Sstevel@tonic-gate */ 476*7c478bd9Sstevel@tonic-gate if (ht->ht_busy != 0 || 477*7c478bd9Sstevel@tonic-gate (ht->ht_flags & HTABLE_SHARED_PFN)|| 478*7c478bd9Sstevel@tonic-gate ht->ht_level == TOP_LEVEL(hat) || 479*7c478bd9Sstevel@tonic-gate (ht->ht_level >= 480*7c478bd9Sstevel@tonic-gate mmu.max_page_level && 481*7c478bd9Sstevel@tonic-gate ht->ht_valid_cnt > 0) || 482*7c478bd9Sstevel@tonic-gate ht->ht_valid_cnt < threshhold || 483*7c478bd9Sstevel@tonic-gate ht->ht_lock_cnt != 0) 484*7c478bd9Sstevel@tonic-gate continue; 485*7c478bd9Sstevel@tonic-gate 486*7c478bd9Sstevel@tonic-gate /* 487*7c478bd9Sstevel@tonic-gate * Increment busy so the htable can't 488*7c478bd9Sstevel@tonic-gate * disappear. We drop the htable mutex 489*7c478bd9Sstevel@tonic-gate * to avoid deadlocks with 490*7c478bd9Sstevel@tonic-gate * hat_pageunload() and the hment mutex 491*7c478bd9Sstevel@tonic-gate * while we call hat_pte_unmap() 492*7c478bd9Sstevel@tonic-gate */ 493*7c478bd9Sstevel@tonic-gate ++ht->ht_busy; 494*7c478bd9Sstevel@tonic-gate HTABLE_EXIT(h); 495*7c478bd9Sstevel@tonic-gate 496*7c478bd9Sstevel@tonic-gate /* 497*7c478bd9Sstevel@tonic-gate * Try stealing. 498*7c478bd9Sstevel@tonic-gate * - unload and invalidate all PTEs 499*7c478bd9Sstevel@tonic-gate */ 500*7c478bd9Sstevel@tonic-gate for (e = 0, va = ht->ht_vaddr; 501*7c478bd9Sstevel@tonic-gate e < ht->ht_num_ptes && 502*7c478bd9Sstevel@tonic-gate ht->ht_valid_cnt > 0 && 503*7c478bd9Sstevel@tonic-gate ht->ht_busy == 1 && 504*7c478bd9Sstevel@tonic-gate ht->ht_lock_cnt == 0; 505*7c478bd9Sstevel@tonic-gate ++e, va += MMU_PAGESIZE) { 506*7c478bd9Sstevel@tonic-gate pte = x86pte_get(ht, e); 507*7c478bd9Sstevel@tonic-gate if (!PTE_ISVALID(pte)) 508*7c478bd9Sstevel@tonic-gate continue; 509*7c478bd9Sstevel@tonic-gate hat_pte_unmap(ht, e, 510*7c478bd9Sstevel@tonic-gate HAT_UNLOAD, pte, NULL); 511*7c478bd9Sstevel@tonic-gate } 512*7c478bd9Sstevel@tonic-gate 513*7c478bd9Sstevel@tonic-gate /* 514*7c478bd9Sstevel@tonic-gate * Reacquire htable lock. If we didn't 515*7c478bd9Sstevel@tonic-gate * remove all mappings in the table, 516*7c478bd9Sstevel@tonic-gate * or another thread added a new mapping 517*7c478bd9Sstevel@tonic-gate * behind us, give up on this table. 518*7c478bd9Sstevel@tonic-gate */ 519*7c478bd9Sstevel@tonic-gate HTABLE_ENTER(h); 520*7c478bd9Sstevel@tonic-gate if (ht->ht_busy != 1 || 521*7c478bd9Sstevel@tonic-gate ht->ht_valid_cnt != 0 || 522*7c478bd9Sstevel@tonic-gate ht->ht_lock_cnt != 0) { 523*7c478bd9Sstevel@tonic-gate --ht->ht_busy; 524*7c478bd9Sstevel@tonic-gate continue; 525*7c478bd9Sstevel@tonic-gate } 526*7c478bd9Sstevel@tonic-gate 527*7c478bd9Sstevel@tonic-gate /* 528*7c478bd9Sstevel@tonic-gate * Steal it and unlink the page table. 529*7c478bd9Sstevel@tonic-gate */ 530*7c478bd9Sstevel@tonic-gate higher = ht->ht_parent; 531*7c478bd9Sstevel@tonic-gate unlink_ptp(higher, ht, ht->ht_vaddr); 532*7c478bd9Sstevel@tonic-gate 533*7c478bd9Sstevel@tonic-gate /* 534*7c478bd9Sstevel@tonic-gate * remove from the hash list 535*7c478bd9Sstevel@tonic-gate */ 536*7c478bd9Sstevel@tonic-gate if (ht->ht_next) 537*7c478bd9Sstevel@tonic-gate ht->ht_next->ht_prev = 538*7c478bd9Sstevel@tonic-gate ht->ht_prev; 539*7c478bd9Sstevel@tonic-gate 540*7c478bd9Sstevel@tonic-gate if (ht->ht_prev) { 541*7c478bd9Sstevel@tonic-gate ht->ht_prev->ht_next = 542*7c478bd9Sstevel@tonic-gate ht->ht_next; 543*7c478bd9Sstevel@tonic-gate } else { 544*7c478bd9Sstevel@tonic-gate ASSERT(hat->hat_ht_hash[h] == 545*7c478bd9Sstevel@tonic-gate ht); 546*7c478bd9Sstevel@tonic-gate hat->hat_ht_hash[h] = 547*7c478bd9Sstevel@tonic-gate ht->ht_next; 548*7c478bd9Sstevel@tonic-gate } 549*7c478bd9Sstevel@tonic-gate 550*7c478bd9Sstevel@tonic-gate /* 551*7c478bd9Sstevel@tonic-gate * Break to outer loop to release the 552*7c478bd9Sstevel@tonic-gate * higher (ht_parent) pagtable. This 553*7c478bd9Sstevel@tonic-gate * spreads out the pain caused by 554*7c478bd9Sstevel@tonic-gate * pagefaults. 555*7c478bd9Sstevel@tonic-gate */ 556*7c478bd9Sstevel@tonic-gate ht->ht_next = list; 557*7c478bd9Sstevel@tonic-gate list = ht; 558*7c478bd9Sstevel@tonic-gate ++stolen; 559*7c478bd9Sstevel@tonic-gate 560*7c478bd9Sstevel@tonic-gate /* 561*7c478bd9Sstevel@tonic-gate * If this is the last steal, then move 562*7c478bd9Sstevel@tonic-gate * the hat list head, so that we start 563*7c478bd9Sstevel@tonic-gate * here next time. 564*7c478bd9Sstevel@tonic-gate */ 565*7c478bd9Sstevel@tonic-gate if (stolen == cnt) { 566*7c478bd9Sstevel@tonic-gate mutex_enter(&hat_list_lock); 567*7c478bd9Sstevel@tonic-gate kas.a_hat->hat_next = hat; 568*7c478bd9Sstevel@tonic-gate mutex_exit(&hat_list_lock); 569*7c478bd9Sstevel@tonic-gate } 570*7c478bd9Sstevel@tonic-gate break; 571*7c478bd9Sstevel@tonic-gate } 572*7c478bd9Sstevel@tonic-gate HTABLE_EXIT(h); 573*7c478bd9Sstevel@tonic-gate if (higher != NULL) 574*7c478bd9Sstevel@tonic-gate htable_release(higher); 575*7c478bd9Sstevel@tonic-gate } 576*7c478bd9Sstevel@tonic-gate } 577*7c478bd9Sstevel@tonic-gate } 578*7c478bd9Sstevel@tonic-gate atomic_add_32(&htable_dont_cache, -1); 579*7c478bd9Sstevel@tonic-gate return (list); 580*7c478bd9Sstevel@tonic-gate } 581*7c478bd9Sstevel@tonic-gate 582*7c478bd9Sstevel@tonic-gate 583*7c478bd9Sstevel@tonic-gate /* 584*7c478bd9Sstevel@tonic-gate * This is invoked from kmem when the system is low on memory. We try 585*7c478bd9Sstevel@tonic-gate * to free hments, htables, and ptables to improve the memory situation. 586*7c478bd9Sstevel@tonic-gate */ 587*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 588*7c478bd9Sstevel@tonic-gate static void 589*7c478bd9Sstevel@tonic-gate htable_reap(void *handle) 590*7c478bd9Sstevel@tonic-gate { 591*7c478bd9Sstevel@tonic-gate uint_t reap_cnt; 592*7c478bd9Sstevel@tonic-gate htable_t *list; 593*7c478bd9Sstevel@tonic-gate htable_t *ht; 594*7c478bd9Sstevel@tonic-gate 595*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_reap_attempts); 596*7c478bd9Sstevel@tonic-gate if (!can_steal_post_boot) 597*7c478bd9Sstevel@tonic-gate return; 598*7c478bd9Sstevel@tonic-gate 599*7c478bd9Sstevel@tonic-gate /* 600*7c478bd9Sstevel@tonic-gate * Try to reap 5% of the page tables bounded by a maximum of 601*7c478bd9Sstevel@tonic-gate * 5% of physmem and a minimum of 10. 602*7c478bd9Sstevel@tonic-gate */ 603*7c478bd9Sstevel@tonic-gate reap_cnt = MIN(MAX(physmem / 20, active_ptables / 20), 10); 604*7c478bd9Sstevel@tonic-gate 605*7c478bd9Sstevel@tonic-gate /* 606*7c478bd9Sstevel@tonic-gate * Let htable_steal() do the work, we just call htable_free() 607*7c478bd9Sstevel@tonic-gate */ 608*7c478bd9Sstevel@tonic-gate list = htable_steal(reap_cnt); 609*7c478bd9Sstevel@tonic-gate while ((ht = list) != NULL) { 610*7c478bd9Sstevel@tonic-gate list = ht->ht_next; 611*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_reaped); 612*7c478bd9Sstevel@tonic-gate htable_free(ht); 613*7c478bd9Sstevel@tonic-gate } 614*7c478bd9Sstevel@tonic-gate 615*7c478bd9Sstevel@tonic-gate /* 616*7c478bd9Sstevel@tonic-gate * Free up excess reserves 617*7c478bd9Sstevel@tonic-gate */ 618*7c478bd9Sstevel@tonic-gate htable_adjust_reserve(); 619*7c478bd9Sstevel@tonic-gate hment_adjust_reserve(); 620*7c478bd9Sstevel@tonic-gate } 621*7c478bd9Sstevel@tonic-gate 622*7c478bd9Sstevel@tonic-gate /* 623*7c478bd9Sstevel@tonic-gate * allocate an htable, stealing one or using the reserve if necessary 624*7c478bd9Sstevel@tonic-gate */ 625*7c478bd9Sstevel@tonic-gate static htable_t * 626*7c478bd9Sstevel@tonic-gate htable_alloc( 627*7c478bd9Sstevel@tonic-gate hat_t *hat, 628*7c478bd9Sstevel@tonic-gate uintptr_t vaddr, 629*7c478bd9Sstevel@tonic-gate level_t level, 630*7c478bd9Sstevel@tonic-gate htable_t *shared) 631*7c478bd9Sstevel@tonic-gate { 632*7c478bd9Sstevel@tonic-gate htable_t *ht = NULL; 633*7c478bd9Sstevel@tonic-gate uint_t is_vlp; 634*7c478bd9Sstevel@tonic-gate uint_t is_bare = 0; 635*7c478bd9Sstevel@tonic-gate uint_t need_to_zero = 1; 636*7c478bd9Sstevel@tonic-gate int kmflags = (can_steal_post_boot ? KM_NOSLEEP : KM_SLEEP); 637*7c478bd9Sstevel@tonic-gate 638*7c478bd9Sstevel@tonic-gate if (level < 0 || level > TOP_LEVEL(hat)) 639*7c478bd9Sstevel@tonic-gate panic("htable_alloc(): level %d out of range\n", level); 640*7c478bd9Sstevel@tonic-gate 641*7c478bd9Sstevel@tonic-gate is_vlp = (hat->hat_flags & HAT_VLP) && level == VLP_LEVEL; 642*7c478bd9Sstevel@tonic-gate if (is_vlp || shared != NULL) 643*7c478bd9Sstevel@tonic-gate is_bare = 1; 644*7c478bd9Sstevel@tonic-gate 645*7c478bd9Sstevel@tonic-gate /* 646*7c478bd9Sstevel@tonic-gate * First reuse a cached htable from the hat_ht_cached field, this 647*7c478bd9Sstevel@tonic-gate * avoids unnecessary trips through kmem/page allocators. This is also 648*7c478bd9Sstevel@tonic-gate * what happens during use_boot_reserve. 649*7c478bd9Sstevel@tonic-gate */ 650*7c478bd9Sstevel@tonic-gate if (hat->hat_ht_cached != NULL && !is_bare) { 651*7c478bd9Sstevel@tonic-gate hat_enter(hat); 652*7c478bd9Sstevel@tonic-gate ht = hat->hat_ht_cached; 653*7c478bd9Sstevel@tonic-gate if (ht != NULL) { 654*7c478bd9Sstevel@tonic-gate hat->hat_ht_cached = ht->ht_next; 655*7c478bd9Sstevel@tonic-gate need_to_zero = 0; 656*7c478bd9Sstevel@tonic-gate /* XX64 ASSERT() they're all zero somehow */ 657*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_pfn != PFN_INVALID); 658*7c478bd9Sstevel@tonic-gate } 659*7c478bd9Sstevel@tonic-gate hat_exit(hat); 660*7c478bd9Sstevel@tonic-gate } 661*7c478bd9Sstevel@tonic-gate 662*7c478bd9Sstevel@tonic-gate if (ht == NULL) { 663*7c478bd9Sstevel@tonic-gate ASSERT(!use_boot_reserve); 664*7c478bd9Sstevel@tonic-gate /* 665*7c478bd9Sstevel@tonic-gate * When allocating for hat_memload_arena, we use the reserve. 666*7c478bd9Sstevel@tonic-gate * Also use reserves if we are in a panic(). 667*7c478bd9Sstevel@tonic-gate */ 668*7c478bd9Sstevel@tonic-gate if (curthread == hat_reserves_thread || panicstr != NULL) { 669*7c478bd9Sstevel@tonic-gate ASSERT(panicstr != NULL || !is_bare); 670*7c478bd9Sstevel@tonic-gate ASSERT(panicstr != NULL || 671*7c478bd9Sstevel@tonic-gate curthread == hat_reserves_thread); 672*7c478bd9Sstevel@tonic-gate ht = htable_get_reserve(); 673*7c478bd9Sstevel@tonic-gate } else { 674*7c478bd9Sstevel@tonic-gate /* 675*7c478bd9Sstevel@tonic-gate * Donate successful htable allocations to the reserve. 676*7c478bd9Sstevel@tonic-gate */ 677*7c478bd9Sstevel@tonic-gate for (;;) { 678*7c478bd9Sstevel@tonic-gate ASSERT(curthread != hat_reserves_thread); 679*7c478bd9Sstevel@tonic-gate ht = kmem_cache_alloc(htable_cache, kmflags); 680*7c478bd9Sstevel@tonic-gate if (ht == NULL) 681*7c478bd9Sstevel@tonic-gate break; 682*7c478bd9Sstevel@tonic-gate ht->ht_pfn = PFN_INVALID; 683*7c478bd9Sstevel@tonic-gate if (curthread == hat_reserves_thread || 684*7c478bd9Sstevel@tonic-gate panicstr != NULL || 685*7c478bd9Sstevel@tonic-gate htable_reserve_cnt >= htable_reserve_amount) 686*7c478bd9Sstevel@tonic-gate break; 687*7c478bd9Sstevel@tonic-gate htable_put_reserve(ht); 688*7c478bd9Sstevel@tonic-gate } 689*7c478bd9Sstevel@tonic-gate } 690*7c478bd9Sstevel@tonic-gate 691*7c478bd9Sstevel@tonic-gate /* 692*7c478bd9Sstevel@tonic-gate * allocate a page for the hardware page table if needed 693*7c478bd9Sstevel@tonic-gate */ 694*7c478bd9Sstevel@tonic-gate if (ht != NULL && !is_bare) { 695*7c478bd9Sstevel@tonic-gate ptable_alloc(ht); 696*7c478bd9Sstevel@tonic-gate if (ht->ht_pfn == PFN_INVALID) { 697*7c478bd9Sstevel@tonic-gate kmem_cache_free(htable_cache, ht); 698*7c478bd9Sstevel@tonic-gate ht = NULL; 699*7c478bd9Sstevel@tonic-gate } 700*7c478bd9Sstevel@tonic-gate } 701*7c478bd9Sstevel@tonic-gate } 702*7c478bd9Sstevel@tonic-gate 703*7c478bd9Sstevel@tonic-gate /* 704*7c478bd9Sstevel@tonic-gate * if allocations failed resort to stealing 705*7c478bd9Sstevel@tonic-gate */ 706*7c478bd9Sstevel@tonic-gate if (ht == NULL && can_steal_post_boot) { 707*7c478bd9Sstevel@tonic-gate ht = htable_steal(1); 708*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_steals); 709*7c478bd9Sstevel@tonic-gate 710*7c478bd9Sstevel@tonic-gate /* 711*7c478bd9Sstevel@tonic-gate * if we had to steal for a bare htable, release the 712*7c478bd9Sstevel@tonic-gate * page for the pagetable 713*7c478bd9Sstevel@tonic-gate */ 714*7c478bd9Sstevel@tonic-gate if (ht != NULL && is_bare) 715*7c478bd9Sstevel@tonic-gate ptable_free(ht); 716*7c478bd9Sstevel@tonic-gate } 717*7c478bd9Sstevel@tonic-gate 718*7c478bd9Sstevel@tonic-gate /* 719*7c478bd9Sstevel@tonic-gate * All attempts to allocate or steal failed... 720*7c478bd9Sstevel@tonic-gate */ 721*7c478bd9Sstevel@tonic-gate if (ht == NULL) 722*7c478bd9Sstevel@tonic-gate panic("htable_alloc(): couldn't steal\n"); 723*7c478bd9Sstevel@tonic-gate 724*7c478bd9Sstevel@tonic-gate /* 725*7c478bd9Sstevel@tonic-gate * Shared page tables have all entries locked and entries may not 726*7c478bd9Sstevel@tonic-gate * be added or deleted. 727*7c478bd9Sstevel@tonic-gate */ 728*7c478bd9Sstevel@tonic-gate ht->ht_flags = 0; 729*7c478bd9Sstevel@tonic-gate if (shared != NULL) { 730*7c478bd9Sstevel@tonic-gate ASSERT(level == 0); 731*7c478bd9Sstevel@tonic-gate ASSERT(shared->ht_valid_cnt > 0); 732*7c478bd9Sstevel@tonic-gate ht->ht_flags |= HTABLE_SHARED_PFN; 733*7c478bd9Sstevel@tonic-gate ht->ht_pfn = shared->ht_pfn; 734*7c478bd9Sstevel@tonic-gate ht->ht_lock_cnt = 0; 735*7c478bd9Sstevel@tonic-gate ht->ht_valid_cnt = 0; /* updated in hat_share() */ 736*7c478bd9Sstevel@tonic-gate ht->ht_shares = shared; 737*7c478bd9Sstevel@tonic-gate need_to_zero = 0; 738*7c478bd9Sstevel@tonic-gate } else { 739*7c478bd9Sstevel@tonic-gate ht->ht_shares = NULL; 740*7c478bd9Sstevel@tonic-gate ht->ht_lock_cnt = 0; 741*7c478bd9Sstevel@tonic-gate ht->ht_valid_cnt = 0; 742*7c478bd9Sstevel@tonic-gate } 743*7c478bd9Sstevel@tonic-gate 744*7c478bd9Sstevel@tonic-gate /* 745*7c478bd9Sstevel@tonic-gate * setup flags, etc. for VLP htables 746*7c478bd9Sstevel@tonic-gate */ 747*7c478bd9Sstevel@tonic-gate if (is_vlp) { 748*7c478bd9Sstevel@tonic-gate ht->ht_flags |= HTABLE_VLP; 749*7c478bd9Sstevel@tonic-gate ht->ht_num_ptes = VLP_NUM_PTES; 750*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_pfn == PFN_INVALID); 751*7c478bd9Sstevel@tonic-gate need_to_zero = 0; 752*7c478bd9Sstevel@tonic-gate } else if (level == mmu.max_level) { 753*7c478bd9Sstevel@tonic-gate ht->ht_num_ptes = mmu.top_level_count; 754*7c478bd9Sstevel@tonic-gate } else { 755*7c478bd9Sstevel@tonic-gate ht->ht_num_ptes = mmu.ptes_per_table; 756*7c478bd9Sstevel@tonic-gate } 757*7c478bd9Sstevel@tonic-gate 758*7c478bd9Sstevel@tonic-gate /* 759*7c478bd9Sstevel@tonic-gate * fill in the htable 760*7c478bd9Sstevel@tonic-gate */ 761*7c478bd9Sstevel@tonic-gate ht->ht_hat = hat; 762*7c478bd9Sstevel@tonic-gate ht->ht_parent = NULL; 763*7c478bd9Sstevel@tonic-gate ht->ht_vaddr = vaddr; 764*7c478bd9Sstevel@tonic-gate ht->ht_level = level; 765*7c478bd9Sstevel@tonic-gate ht->ht_busy = 1; 766*7c478bd9Sstevel@tonic-gate ht->ht_next = NULL; 767*7c478bd9Sstevel@tonic-gate ht->ht_prev = NULL; 768*7c478bd9Sstevel@tonic-gate 769*7c478bd9Sstevel@tonic-gate /* 770*7c478bd9Sstevel@tonic-gate * Zero out any freshly allocated page table 771*7c478bd9Sstevel@tonic-gate */ 772*7c478bd9Sstevel@tonic-gate if (need_to_zero) 773*7c478bd9Sstevel@tonic-gate x86pte_zero(ht, 0, mmu.ptes_per_table); 774*7c478bd9Sstevel@tonic-gate return (ht); 775*7c478bd9Sstevel@tonic-gate } 776*7c478bd9Sstevel@tonic-gate 777*7c478bd9Sstevel@tonic-gate /* 778*7c478bd9Sstevel@tonic-gate * Free up an htable, either to a hat's cached list, the reserves or 779*7c478bd9Sstevel@tonic-gate * back to kmem. 780*7c478bd9Sstevel@tonic-gate */ 781*7c478bd9Sstevel@tonic-gate static void 782*7c478bd9Sstevel@tonic-gate htable_free(htable_t *ht) 783*7c478bd9Sstevel@tonic-gate { 784*7c478bd9Sstevel@tonic-gate hat_t *hat = ht->ht_hat; 785*7c478bd9Sstevel@tonic-gate 786*7c478bd9Sstevel@tonic-gate /* 787*7c478bd9Sstevel@tonic-gate * If the process isn't exiting, cache the free htable in the hat 788*7c478bd9Sstevel@tonic-gate * structure. We always do this for the boot reserve. We don't 789*7c478bd9Sstevel@tonic-gate * do this if the hat is exiting or we are stealing/reaping htables. 790*7c478bd9Sstevel@tonic-gate */ 791*7c478bd9Sstevel@tonic-gate if (hat != NULL && 792*7c478bd9Sstevel@tonic-gate !(ht->ht_flags & HTABLE_SHARED_PFN) && 793*7c478bd9Sstevel@tonic-gate (use_boot_reserve || 794*7c478bd9Sstevel@tonic-gate (!(hat->hat_flags & HAT_FREEING) && !htable_dont_cache))) { 795*7c478bd9Sstevel@tonic-gate ASSERT((ht->ht_flags & HTABLE_VLP) == 0); 796*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_pfn != PFN_INVALID); 797*7c478bd9Sstevel@tonic-gate hat_enter(hat); 798*7c478bd9Sstevel@tonic-gate ht->ht_next = hat->hat_ht_cached; 799*7c478bd9Sstevel@tonic-gate hat->hat_ht_cached = ht; 800*7c478bd9Sstevel@tonic-gate hat_exit(hat); 801*7c478bd9Sstevel@tonic-gate return; 802*7c478bd9Sstevel@tonic-gate } 803*7c478bd9Sstevel@tonic-gate 804*7c478bd9Sstevel@tonic-gate /* 805*7c478bd9Sstevel@tonic-gate * If we have a hardware page table, free it. 806*7c478bd9Sstevel@tonic-gate * We don't free page tables that are accessed by sharing someone else. 807*7c478bd9Sstevel@tonic-gate */ 808*7c478bd9Sstevel@tonic-gate if (ht->ht_flags & HTABLE_SHARED_PFN) { 809*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_pfn != PFN_INVALID); 810*7c478bd9Sstevel@tonic-gate ht->ht_pfn = PFN_INVALID; 811*7c478bd9Sstevel@tonic-gate } else if (!(ht->ht_flags & HTABLE_VLP)) { 812*7c478bd9Sstevel@tonic-gate ptable_free(ht); 813*7c478bd9Sstevel@tonic-gate } 814*7c478bd9Sstevel@tonic-gate 815*7c478bd9Sstevel@tonic-gate /* 816*7c478bd9Sstevel@tonic-gate * If we are the thread using the reserves, put free htables 817*7c478bd9Sstevel@tonic-gate * into reserves. 818*7c478bd9Sstevel@tonic-gate */ 819*7c478bd9Sstevel@tonic-gate if (curthread == hat_reserves_thread || 820*7c478bd9Sstevel@tonic-gate htable_reserve_cnt < htable_reserve_amount) 821*7c478bd9Sstevel@tonic-gate htable_put_reserve(ht); 822*7c478bd9Sstevel@tonic-gate else 823*7c478bd9Sstevel@tonic-gate kmem_cache_free(htable_cache, ht); 824*7c478bd9Sstevel@tonic-gate } 825*7c478bd9Sstevel@tonic-gate 826*7c478bd9Sstevel@tonic-gate 827*7c478bd9Sstevel@tonic-gate /* 828*7c478bd9Sstevel@tonic-gate * This is called when a hat is being destroyed or swapped out. We reap all 829*7c478bd9Sstevel@tonic-gate * the remaining htables in the hat cache. If destroying all left over 830*7c478bd9Sstevel@tonic-gate * htables are also destroyed. 831*7c478bd9Sstevel@tonic-gate * 832*7c478bd9Sstevel@tonic-gate * We also don't need to invalidate any of the PTPs nor do any demapping. 833*7c478bd9Sstevel@tonic-gate */ 834*7c478bd9Sstevel@tonic-gate void 835*7c478bd9Sstevel@tonic-gate htable_purge_hat(hat_t *hat) 836*7c478bd9Sstevel@tonic-gate { 837*7c478bd9Sstevel@tonic-gate htable_t *ht; 838*7c478bd9Sstevel@tonic-gate int h; 839*7c478bd9Sstevel@tonic-gate 840*7c478bd9Sstevel@tonic-gate /* 841*7c478bd9Sstevel@tonic-gate * Purge the htable cache if just reaping. 842*7c478bd9Sstevel@tonic-gate */ 843*7c478bd9Sstevel@tonic-gate if (!(hat->hat_flags & HAT_FREEING)) { 844*7c478bd9Sstevel@tonic-gate atomic_add_32(&htable_dont_cache, 1); 845*7c478bd9Sstevel@tonic-gate for (;;) { 846*7c478bd9Sstevel@tonic-gate hat_enter(hat); 847*7c478bd9Sstevel@tonic-gate ht = hat->hat_ht_cached; 848*7c478bd9Sstevel@tonic-gate if (ht == NULL) { 849*7c478bd9Sstevel@tonic-gate hat_exit(hat); 850*7c478bd9Sstevel@tonic-gate break; 851*7c478bd9Sstevel@tonic-gate } 852*7c478bd9Sstevel@tonic-gate hat->hat_ht_cached = ht->ht_next; 853*7c478bd9Sstevel@tonic-gate hat_exit(hat); 854*7c478bd9Sstevel@tonic-gate htable_free(ht); 855*7c478bd9Sstevel@tonic-gate } 856*7c478bd9Sstevel@tonic-gate atomic_add_32(&htable_dont_cache, -1); 857*7c478bd9Sstevel@tonic-gate return; 858*7c478bd9Sstevel@tonic-gate } 859*7c478bd9Sstevel@tonic-gate 860*7c478bd9Sstevel@tonic-gate /* 861*7c478bd9Sstevel@tonic-gate * if freeing, no locking is needed 862*7c478bd9Sstevel@tonic-gate */ 863*7c478bd9Sstevel@tonic-gate while ((ht = hat->hat_ht_cached) != NULL) { 864*7c478bd9Sstevel@tonic-gate hat->hat_ht_cached = ht->ht_next; 865*7c478bd9Sstevel@tonic-gate htable_free(ht); 866*7c478bd9Sstevel@tonic-gate } 867*7c478bd9Sstevel@tonic-gate 868*7c478bd9Sstevel@tonic-gate /* 869*7c478bd9Sstevel@tonic-gate * walk thru the htable hash table and free all the htables in it. 870*7c478bd9Sstevel@tonic-gate */ 871*7c478bd9Sstevel@tonic-gate for (h = 0; h < hat->hat_num_hash; ++h) { 872*7c478bd9Sstevel@tonic-gate while ((ht = hat->hat_ht_hash[h]) != NULL) { 873*7c478bd9Sstevel@tonic-gate if (ht->ht_next) 874*7c478bd9Sstevel@tonic-gate ht->ht_next->ht_prev = ht->ht_prev; 875*7c478bd9Sstevel@tonic-gate 876*7c478bd9Sstevel@tonic-gate if (ht->ht_prev) { 877*7c478bd9Sstevel@tonic-gate ht->ht_prev->ht_next = ht->ht_next; 878*7c478bd9Sstevel@tonic-gate } else { 879*7c478bd9Sstevel@tonic-gate ASSERT(hat->hat_ht_hash[h] == ht); 880*7c478bd9Sstevel@tonic-gate hat->hat_ht_hash[h] = ht->ht_next; 881*7c478bd9Sstevel@tonic-gate } 882*7c478bd9Sstevel@tonic-gate htable_free(ht); 883*7c478bd9Sstevel@tonic-gate } 884*7c478bd9Sstevel@tonic-gate } 885*7c478bd9Sstevel@tonic-gate } 886*7c478bd9Sstevel@tonic-gate 887*7c478bd9Sstevel@tonic-gate /* 888*7c478bd9Sstevel@tonic-gate * Unlink an entry for a table at vaddr and level out of the existing table 889*7c478bd9Sstevel@tonic-gate * one level higher. We are always holding the HASH_ENTER() when doing this. 890*7c478bd9Sstevel@tonic-gate */ 891*7c478bd9Sstevel@tonic-gate static void 892*7c478bd9Sstevel@tonic-gate unlink_ptp(htable_t *higher, htable_t *old, uintptr_t vaddr) 893*7c478bd9Sstevel@tonic-gate { 894*7c478bd9Sstevel@tonic-gate uint_t entry = htable_va2entry(vaddr, higher); 895*7c478bd9Sstevel@tonic-gate x86pte_t expect = MAKEPTP(old->ht_pfn, old->ht_level); 896*7c478bd9Sstevel@tonic-gate x86pte_t found; 897*7c478bd9Sstevel@tonic-gate 898*7c478bd9Sstevel@tonic-gate ASSERT(higher->ht_busy > 0); 899*7c478bd9Sstevel@tonic-gate ASSERT(higher->ht_valid_cnt > 0); 900*7c478bd9Sstevel@tonic-gate ASSERT(old->ht_valid_cnt == 0); 901*7c478bd9Sstevel@tonic-gate found = x86pte_cas(higher, entry, expect, 0); 902*7c478bd9Sstevel@tonic-gate if (found != expect) 903*7c478bd9Sstevel@tonic-gate panic("Bad PTP found=" FMT_PTE ", expected=" FMT_PTE, 904*7c478bd9Sstevel@tonic-gate found, expect); 905*7c478bd9Sstevel@tonic-gate HTABLE_DEC(higher->ht_valid_cnt); 906*7c478bd9Sstevel@tonic-gate } 907*7c478bd9Sstevel@tonic-gate 908*7c478bd9Sstevel@tonic-gate /* 909*7c478bd9Sstevel@tonic-gate * Link an entry for a new table at vaddr and level into the existing table 910*7c478bd9Sstevel@tonic-gate * one level higher. We are always holding the HASH_ENTER() when doing this. 911*7c478bd9Sstevel@tonic-gate */ 912*7c478bd9Sstevel@tonic-gate static void 913*7c478bd9Sstevel@tonic-gate link_ptp(htable_t *higher, htable_t *new, uintptr_t vaddr) 914*7c478bd9Sstevel@tonic-gate { 915*7c478bd9Sstevel@tonic-gate uint_t entry = htable_va2entry(vaddr, higher); 916*7c478bd9Sstevel@tonic-gate x86pte_t newptp = MAKEPTP(new->ht_pfn, new->ht_level); 917*7c478bd9Sstevel@tonic-gate x86pte_t found; 918*7c478bd9Sstevel@tonic-gate 919*7c478bd9Sstevel@tonic-gate ASSERT(higher->ht_busy > 0); 920*7c478bd9Sstevel@tonic-gate 921*7c478bd9Sstevel@tonic-gate ASSERT(new->ht_level != mmu.max_level); 922*7c478bd9Sstevel@tonic-gate 923*7c478bd9Sstevel@tonic-gate HTABLE_INC(higher->ht_valid_cnt); 924*7c478bd9Sstevel@tonic-gate 925*7c478bd9Sstevel@tonic-gate found = x86pte_cas(higher, entry, 0, newptp); 926*7c478bd9Sstevel@tonic-gate if (found != 0) 927*7c478bd9Sstevel@tonic-gate panic("HAT: ptp not 0, found=" FMT_PTE, found); 928*7c478bd9Sstevel@tonic-gate } 929*7c478bd9Sstevel@tonic-gate 930*7c478bd9Sstevel@tonic-gate /* 931*7c478bd9Sstevel@tonic-gate * Release of an htable. 932*7c478bd9Sstevel@tonic-gate * 933*7c478bd9Sstevel@tonic-gate * During process exit, some empty page tables are not unlinked - hat_free_end() 934*7c478bd9Sstevel@tonic-gate * cleans them up. Upper level pagetable (mmu.max_page_level and higher) are 935*7c478bd9Sstevel@tonic-gate * only released during hat_free_end() or by htable_steal(). We always 936*7c478bd9Sstevel@tonic-gate * release SHARED page tables. 937*7c478bd9Sstevel@tonic-gate */ 938*7c478bd9Sstevel@tonic-gate void 939*7c478bd9Sstevel@tonic-gate htable_release(htable_t *ht) 940*7c478bd9Sstevel@tonic-gate { 941*7c478bd9Sstevel@tonic-gate uint_t hashval; 942*7c478bd9Sstevel@tonic-gate htable_t *shared; 943*7c478bd9Sstevel@tonic-gate htable_t *higher; 944*7c478bd9Sstevel@tonic-gate hat_t *hat; 945*7c478bd9Sstevel@tonic-gate uintptr_t va; 946*7c478bd9Sstevel@tonic-gate level_t level; 947*7c478bd9Sstevel@tonic-gate 948*7c478bd9Sstevel@tonic-gate while (ht != NULL) { 949*7c478bd9Sstevel@tonic-gate shared = NULL; 950*7c478bd9Sstevel@tonic-gate for (;;) { 951*7c478bd9Sstevel@tonic-gate hat = ht->ht_hat; 952*7c478bd9Sstevel@tonic-gate va = ht->ht_vaddr; 953*7c478bd9Sstevel@tonic-gate level = ht->ht_level; 954*7c478bd9Sstevel@tonic-gate hashval = HTABLE_HASH(hat, va, level); 955*7c478bd9Sstevel@tonic-gate 956*7c478bd9Sstevel@tonic-gate /* 957*7c478bd9Sstevel@tonic-gate * The common case is that this isn't the last use of 958*7c478bd9Sstevel@tonic-gate * an htable so we don't want to free the htable. 959*7c478bd9Sstevel@tonic-gate */ 960*7c478bd9Sstevel@tonic-gate HTABLE_ENTER(hashval); 961*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_lock_cnt == 0 || ht->ht_valid_cnt > 0); 962*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_valid_cnt >= 0); 963*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_busy > 0); 964*7c478bd9Sstevel@tonic-gate if (ht->ht_valid_cnt > 0) 965*7c478bd9Sstevel@tonic-gate break; 966*7c478bd9Sstevel@tonic-gate if (ht->ht_busy > 1) 967*7c478bd9Sstevel@tonic-gate break; 968*7c478bd9Sstevel@tonic-gate 969*7c478bd9Sstevel@tonic-gate /* 970*7c478bd9Sstevel@tonic-gate * we always release empty shared htables 971*7c478bd9Sstevel@tonic-gate */ 972*7c478bd9Sstevel@tonic-gate if (!(ht->ht_flags & HTABLE_SHARED_PFN)) { 973*7c478bd9Sstevel@tonic-gate 974*7c478bd9Sstevel@tonic-gate /* 975*7c478bd9Sstevel@tonic-gate * don't release if in address space tear down 976*7c478bd9Sstevel@tonic-gate */ 977*7c478bd9Sstevel@tonic-gate if (hat->hat_flags & HAT_FREEING) 978*7c478bd9Sstevel@tonic-gate break; 979*7c478bd9Sstevel@tonic-gate 980*7c478bd9Sstevel@tonic-gate /* 981*7c478bd9Sstevel@tonic-gate * At and above max_page_level, free if it's for 982*7c478bd9Sstevel@tonic-gate * a boot-time kernel mapping below kernelbase. 983*7c478bd9Sstevel@tonic-gate */ 984*7c478bd9Sstevel@tonic-gate if (level >= mmu.max_page_level && 985*7c478bd9Sstevel@tonic-gate (hat != kas.a_hat || va >= kernelbase)) 986*7c478bd9Sstevel@tonic-gate break; 987*7c478bd9Sstevel@tonic-gate } 988*7c478bd9Sstevel@tonic-gate 989*7c478bd9Sstevel@tonic-gate /* 990*7c478bd9Sstevel@tonic-gate * remember if we destroy an htable that shares its PFN 991*7c478bd9Sstevel@tonic-gate * from elsewhere 992*7c478bd9Sstevel@tonic-gate */ 993*7c478bd9Sstevel@tonic-gate if (ht->ht_flags & HTABLE_SHARED_PFN) { 994*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_level == 0); 995*7c478bd9Sstevel@tonic-gate ASSERT(shared == NULL); 996*7c478bd9Sstevel@tonic-gate shared = ht->ht_shares; 997*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_htable_unshared); 998*7c478bd9Sstevel@tonic-gate } 999*7c478bd9Sstevel@tonic-gate 1000*7c478bd9Sstevel@tonic-gate /* 1001*7c478bd9Sstevel@tonic-gate * Handle release of a table and freeing the htable_t. 1002*7c478bd9Sstevel@tonic-gate * Unlink it from the table higher (ie. ht_parent). 1003*7c478bd9Sstevel@tonic-gate */ 1004*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_lock_cnt == 0); 1005*7c478bd9Sstevel@tonic-gate higher = ht->ht_parent; 1006*7c478bd9Sstevel@tonic-gate ASSERT(higher != NULL); 1007*7c478bd9Sstevel@tonic-gate 1008*7c478bd9Sstevel@tonic-gate /* 1009*7c478bd9Sstevel@tonic-gate * Unlink the pagetable. 1010*7c478bd9Sstevel@tonic-gate */ 1011*7c478bd9Sstevel@tonic-gate unlink_ptp(higher, ht, va); 1012*7c478bd9Sstevel@tonic-gate 1013*7c478bd9Sstevel@tonic-gate /* 1014*7c478bd9Sstevel@tonic-gate * When any top level VLP page table entry changes, we 1015*7c478bd9Sstevel@tonic-gate * must issue a reload of cr3 on all processors. 1016*7c478bd9Sstevel@tonic-gate */ 1017*7c478bd9Sstevel@tonic-gate if ((hat->hat_flags & HAT_VLP) && 1018*7c478bd9Sstevel@tonic-gate level == VLP_LEVEL - 1) 1019*7c478bd9Sstevel@tonic-gate hat_demap(hat, DEMAP_ALL_ADDR); 1020*7c478bd9Sstevel@tonic-gate 1021*7c478bd9Sstevel@tonic-gate /* 1022*7c478bd9Sstevel@tonic-gate * remove this htable from its hash list 1023*7c478bd9Sstevel@tonic-gate */ 1024*7c478bd9Sstevel@tonic-gate if (ht->ht_next) 1025*7c478bd9Sstevel@tonic-gate ht->ht_next->ht_prev = ht->ht_prev; 1026*7c478bd9Sstevel@tonic-gate 1027*7c478bd9Sstevel@tonic-gate if (ht->ht_prev) { 1028*7c478bd9Sstevel@tonic-gate ht->ht_prev->ht_next = ht->ht_next; 1029*7c478bd9Sstevel@tonic-gate } else { 1030*7c478bd9Sstevel@tonic-gate ASSERT(hat->hat_ht_hash[hashval] == ht); 1031*7c478bd9Sstevel@tonic-gate hat->hat_ht_hash[hashval] = ht->ht_next; 1032*7c478bd9Sstevel@tonic-gate } 1033*7c478bd9Sstevel@tonic-gate HTABLE_EXIT(hashval); 1034*7c478bd9Sstevel@tonic-gate htable_free(ht); 1035*7c478bd9Sstevel@tonic-gate ht = higher; 1036*7c478bd9Sstevel@tonic-gate } 1037*7c478bd9Sstevel@tonic-gate 1038*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_busy >= 1); 1039*7c478bd9Sstevel@tonic-gate --ht->ht_busy; 1040*7c478bd9Sstevel@tonic-gate HTABLE_EXIT(hashval); 1041*7c478bd9Sstevel@tonic-gate 1042*7c478bd9Sstevel@tonic-gate /* 1043*7c478bd9Sstevel@tonic-gate * If we released a shared htable, do a release on the htable 1044*7c478bd9Sstevel@tonic-gate * from which it shared 1045*7c478bd9Sstevel@tonic-gate */ 1046*7c478bd9Sstevel@tonic-gate ht = shared; 1047*7c478bd9Sstevel@tonic-gate } 1048*7c478bd9Sstevel@tonic-gate } 1049*7c478bd9Sstevel@tonic-gate 1050*7c478bd9Sstevel@tonic-gate /* 1051*7c478bd9Sstevel@tonic-gate * Find the htable for the pagetable at the given level for the given address. 1052*7c478bd9Sstevel@tonic-gate * If found acquires a hold that eventually needs to be htable_release()d 1053*7c478bd9Sstevel@tonic-gate */ 1054*7c478bd9Sstevel@tonic-gate htable_t * 1055*7c478bd9Sstevel@tonic-gate htable_lookup(hat_t *hat, uintptr_t vaddr, level_t level) 1056*7c478bd9Sstevel@tonic-gate { 1057*7c478bd9Sstevel@tonic-gate uintptr_t base; 1058*7c478bd9Sstevel@tonic-gate uint_t hashval; 1059*7c478bd9Sstevel@tonic-gate htable_t *ht = NULL; 1060*7c478bd9Sstevel@tonic-gate 1061*7c478bd9Sstevel@tonic-gate ASSERT(level >= 0); 1062*7c478bd9Sstevel@tonic-gate ASSERT(level <= TOP_LEVEL(hat)); 1063*7c478bd9Sstevel@tonic-gate 1064*7c478bd9Sstevel@tonic-gate if (level == TOP_LEVEL(hat)) 1065*7c478bd9Sstevel@tonic-gate base = 0; 1066*7c478bd9Sstevel@tonic-gate else 1067*7c478bd9Sstevel@tonic-gate base = vaddr & LEVEL_MASK(level + 1); 1068*7c478bd9Sstevel@tonic-gate 1069*7c478bd9Sstevel@tonic-gate hashval = HTABLE_HASH(hat, base, level); 1070*7c478bd9Sstevel@tonic-gate HTABLE_ENTER(hashval); 1071*7c478bd9Sstevel@tonic-gate for (ht = hat->hat_ht_hash[hashval]; ht; ht = ht->ht_next) { 1072*7c478bd9Sstevel@tonic-gate if (ht->ht_hat == hat && 1073*7c478bd9Sstevel@tonic-gate ht->ht_vaddr == base && 1074*7c478bd9Sstevel@tonic-gate ht->ht_level == level) 1075*7c478bd9Sstevel@tonic-gate break; 1076*7c478bd9Sstevel@tonic-gate } 1077*7c478bd9Sstevel@tonic-gate if (ht) 1078*7c478bd9Sstevel@tonic-gate ++ht->ht_busy; 1079*7c478bd9Sstevel@tonic-gate 1080*7c478bd9Sstevel@tonic-gate HTABLE_EXIT(hashval); 1081*7c478bd9Sstevel@tonic-gate return (ht); 1082*7c478bd9Sstevel@tonic-gate } 1083*7c478bd9Sstevel@tonic-gate 1084*7c478bd9Sstevel@tonic-gate /* 1085*7c478bd9Sstevel@tonic-gate * Acquires a hold on a known htable (from a locked hment entry). 1086*7c478bd9Sstevel@tonic-gate */ 1087*7c478bd9Sstevel@tonic-gate void 1088*7c478bd9Sstevel@tonic-gate htable_acquire(htable_t *ht) 1089*7c478bd9Sstevel@tonic-gate { 1090*7c478bd9Sstevel@tonic-gate hat_t *hat = ht->ht_hat; 1091*7c478bd9Sstevel@tonic-gate level_t level = ht->ht_level; 1092*7c478bd9Sstevel@tonic-gate uintptr_t base = ht->ht_vaddr; 1093*7c478bd9Sstevel@tonic-gate uint_t hashval = HTABLE_HASH(hat, base, level); 1094*7c478bd9Sstevel@tonic-gate 1095*7c478bd9Sstevel@tonic-gate HTABLE_ENTER(hashval); 1096*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1097*7c478bd9Sstevel@tonic-gate /* 1098*7c478bd9Sstevel@tonic-gate * make sure the htable is there 1099*7c478bd9Sstevel@tonic-gate */ 1100*7c478bd9Sstevel@tonic-gate { 1101*7c478bd9Sstevel@tonic-gate htable_t *h; 1102*7c478bd9Sstevel@tonic-gate 1103*7c478bd9Sstevel@tonic-gate for (h = hat->hat_ht_hash[hashval]; 1104*7c478bd9Sstevel@tonic-gate h && h != ht; 1105*7c478bd9Sstevel@tonic-gate h = h->ht_next) 1106*7c478bd9Sstevel@tonic-gate ; 1107*7c478bd9Sstevel@tonic-gate ASSERT(h == ht); 1108*7c478bd9Sstevel@tonic-gate } 1109*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 1110*7c478bd9Sstevel@tonic-gate ++ht->ht_busy; 1111*7c478bd9Sstevel@tonic-gate HTABLE_EXIT(hashval); 1112*7c478bd9Sstevel@tonic-gate } 1113*7c478bd9Sstevel@tonic-gate 1114*7c478bd9Sstevel@tonic-gate /* 1115*7c478bd9Sstevel@tonic-gate * Find the htable for the pagetable at the given level for the given address. 1116*7c478bd9Sstevel@tonic-gate * If found acquires a hold that eventually needs to be htable_release()d 1117*7c478bd9Sstevel@tonic-gate * If not found the table is created. 1118*7c478bd9Sstevel@tonic-gate * 1119*7c478bd9Sstevel@tonic-gate * Since we can't hold a hash table mutex during allocation, we have to 1120*7c478bd9Sstevel@tonic-gate * drop it and redo the search on a create. Then we may have to free the newly 1121*7c478bd9Sstevel@tonic-gate * allocated htable if another thread raced in and created it ahead of us. 1122*7c478bd9Sstevel@tonic-gate */ 1123*7c478bd9Sstevel@tonic-gate htable_t * 1124*7c478bd9Sstevel@tonic-gate htable_create( 1125*7c478bd9Sstevel@tonic-gate hat_t *hat, 1126*7c478bd9Sstevel@tonic-gate uintptr_t vaddr, 1127*7c478bd9Sstevel@tonic-gate level_t level, 1128*7c478bd9Sstevel@tonic-gate htable_t *shared) 1129*7c478bd9Sstevel@tonic-gate { 1130*7c478bd9Sstevel@tonic-gate uint_t h; 1131*7c478bd9Sstevel@tonic-gate level_t l; 1132*7c478bd9Sstevel@tonic-gate uintptr_t base; 1133*7c478bd9Sstevel@tonic-gate htable_t *ht; 1134*7c478bd9Sstevel@tonic-gate htable_t *higher = NULL; 1135*7c478bd9Sstevel@tonic-gate htable_t *new = NULL; 1136*7c478bd9Sstevel@tonic-gate 1137*7c478bd9Sstevel@tonic-gate if (level < 0 || level > TOP_LEVEL(hat)) 1138*7c478bd9Sstevel@tonic-gate panic("htable_create(): level %d out of range\n", level); 1139*7c478bd9Sstevel@tonic-gate 1140*7c478bd9Sstevel@tonic-gate /* 1141*7c478bd9Sstevel@tonic-gate * Create the page tables in top down order. 1142*7c478bd9Sstevel@tonic-gate */ 1143*7c478bd9Sstevel@tonic-gate for (l = TOP_LEVEL(hat); l >= level; --l) { 1144*7c478bd9Sstevel@tonic-gate new = NULL; 1145*7c478bd9Sstevel@tonic-gate if (l == TOP_LEVEL(hat)) 1146*7c478bd9Sstevel@tonic-gate base = 0; 1147*7c478bd9Sstevel@tonic-gate else 1148*7c478bd9Sstevel@tonic-gate base = vaddr & LEVEL_MASK(l + 1); 1149*7c478bd9Sstevel@tonic-gate 1150*7c478bd9Sstevel@tonic-gate h = HTABLE_HASH(hat, base, l); 1151*7c478bd9Sstevel@tonic-gate try_again: 1152*7c478bd9Sstevel@tonic-gate /* 1153*7c478bd9Sstevel@tonic-gate * look up the htable at this level 1154*7c478bd9Sstevel@tonic-gate */ 1155*7c478bd9Sstevel@tonic-gate HTABLE_ENTER(h); 1156*7c478bd9Sstevel@tonic-gate if (l == TOP_LEVEL(hat)) { 1157*7c478bd9Sstevel@tonic-gate ht = hat->hat_htable; 1158*7c478bd9Sstevel@tonic-gate } else { 1159*7c478bd9Sstevel@tonic-gate for (ht = hat->hat_ht_hash[h]; ht; ht = ht->ht_next) { 1160*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_hat == hat); 1161*7c478bd9Sstevel@tonic-gate if (ht->ht_vaddr == base && 1162*7c478bd9Sstevel@tonic-gate ht->ht_level == l) 1163*7c478bd9Sstevel@tonic-gate break; 1164*7c478bd9Sstevel@tonic-gate } 1165*7c478bd9Sstevel@tonic-gate } 1166*7c478bd9Sstevel@tonic-gate 1167*7c478bd9Sstevel@tonic-gate /* 1168*7c478bd9Sstevel@tonic-gate * if we found the htable, increment its busy cnt 1169*7c478bd9Sstevel@tonic-gate * and if we had allocated a new htable, free it. 1170*7c478bd9Sstevel@tonic-gate */ 1171*7c478bd9Sstevel@tonic-gate if (ht != NULL) { 1172*7c478bd9Sstevel@tonic-gate /* 1173*7c478bd9Sstevel@tonic-gate * If we find a pre-existing shared table, it must 1174*7c478bd9Sstevel@tonic-gate * share from the same place. 1175*7c478bd9Sstevel@tonic-gate */ 1176*7c478bd9Sstevel@tonic-gate if (l == level && shared && ht->ht_shares && 1177*7c478bd9Sstevel@tonic-gate ht->ht_shares != shared) { 1178*7c478bd9Sstevel@tonic-gate panic("htable shared from wrong place " 1179*7c478bd9Sstevel@tonic-gate "found htable=%p shared=%p", ht, shared); 1180*7c478bd9Sstevel@tonic-gate } 1181*7c478bd9Sstevel@tonic-gate ++ht->ht_busy; 1182*7c478bd9Sstevel@tonic-gate HTABLE_EXIT(h); 1183*7c478bd9Sstevel@tonic-gate if (new) 1184*7c478bd9Sstevel@tonic-gate htable_free(new); 1185*7c478bd9Sstevel@tonic-gate if (higher != NULL) 1186*7c478bd9Sstevel@tonic-gate htable_release(higher); 1187*7c478bd9Sstevel@tonic-gate higher = ht; 1188*7c478bd9Sstevel@tonic-gate 1189*7c478bd9Sstevel@tonic-gate /* 1190*7c478bd9Sstevel@tonic-gate * if we didn't find it on the first search 1191*7c478bd9Sstevel@tonic-gate * allocate a new one and search again 1192*7c478bd9Sstevel@tonic-gate */ 1193*7c478bd9Sstevel@tonic-gate } else if (new == NULL) { 1194*7c478bd9Sstevel@tonic-gate HTABLE_EXIT(h); 1195*7c478bd9Sstevel@tonic-gate new = htable_alloc(hat, base, l, 1196*7c478bd9Sstevel@tonic-gate l == level ? shared : NULL); 1197*7c478bd9Sstevel@tonic-gate goto try_again; 1198*7c478bd9Sstevel@tonic-gate 1199*7c478bd9Sstevel@tonic-gate /* 1200*7c478bd9Sstevel@tonic-gate * 2nd search and still not there, use "new" table 1201*7c478bd9Sstevel@tonic-gate * Link new table into higher, when not at top level. 1202*7c478bd9Sstevel@tonic-gate */ 1203*7c478bd9Sstevel@tonic-gate } else { 1204*7c478bd9Sstevel@tonic-gate ht = new; 1205*7c478bd9Sstevel@tonic-gate if (higher != NULL) { 1206*7c478bd9Sstevel@tonic-gate link_ptp(higher, ht, base); 1207*7c478bd9Sstevel@tonic-gate ht->ht_parent = higher; 1208*7c478bd9Sstevel@tonic-gate 1209*7c478bd9Sstevel@tonic-gate /* 1210*7c478bd9Sstevel@tonic-gate * When any top level VLP page table changes, 1211*7c478bd9Sstevel@tonic-gate * we must reload cr3 on all processors. 1212*7c478bd9Sstevel@tonic-gate */ 1213*7c478bd9Sstevel@tonic-gate #ifdef __i386 1214*7c478bd9Sstevel@tonic-gate if (mmu.pae_hat && 1215*7c478bd9Sstevel@tonic-gate #else /* !__i386 */ 1216*7c478bd9Sstevel@tonic-gate if ((hat->hat_flags & HAT_VLP) && 1217*7c478bd9Sstevel@tonic-gate #endif /* __i386 */ 1218*7c478bd9Sstevel@tonic-gate l == VLP_LEVEL - 1) 1219*7c478bd9Sstevel@tonic-gate hat_demap(hat, DEMAP_ALL_ADDR); 1220*7c478bd9Sstevel@tonic-gate } 1221*7c478bd9Sstevel@tonic-gate ht->ht_next = hat->hat_ht_hash[h]; 1222*7c478bd9Sstevel@tonic-gate ASSERT(ht->ht_prev == NULL); 1223*7c478bd9Sstevel@tonic-gate if (hat->hat_ht_hash[h]) 1224*7c478bd9Sstevel@tonic-gate hat->hat_ht_hash[h]->ht_prev = ht; 1225*7c478bd9Sstevel@tonic-gate hat->hat_ht_hash[h] = ht; 1226*7c478bd9Sstevel@tonic-gate HTABLE_EXIT(h); 1227*7c478bd9Sstevel@tonic-gate 1228*7c478bd9Sstevel@tonic-gate /* 1229*7c478bd9Sstevel@tonic-gate * Note we don't do htable_release(higher). 1230*7c478bd9Sstevel@tonic-gate * That happens recursively when "new" is removed by 1231*7c478bd9Sstevel@tonic-gate * htable_release() or htable_steal(). 1232*7c478bd9Sstevel@tonic-gate */ 1233*7c478bd9Sstevel@tonic-gate higher = ht; 1234*7c478bd9Sstevel@tonic-gate 1235*7c478bd9Sstevel@tonic-gate /* 1236*7c478bd9Sstevel@tonic-gate * If we just created a new shared page table we 1237*7c478bd9Sstevel@tonic-gate * increment the shared htable's busy count, so that 1238*7c478bd9Sstevel@tonic-gate * it can't be the victim of a steal even if it's empty. 1239*7c478bd9Sstevel@tonic-gate */ 1240*7c478bd9Sstevel@tonic-gate if (l == level && shared) { 1241*7c478bd9Sstevel@tonic-gate (void) htable_lookup(shared->ht_hat, 1242*7c478bd9Sstevel@tonic-gate shared->ht_vaddr, shared->ht_level); 1243*7c478bd9Sstevel@tonic-gate HATSTAT_INC(hs_htable_shared); 1244*7c478bd9Sstevel@tonic-gate } 1245*7c478bd9Sstevel@tonic-gate } 1246*7c478bd9Sstevel@tonic-gate } 1247*7c478bd9Sstevel@tonic-gate 1248*7c478bd9Sstevel@tonic-gate return (ht); 1249*7c478bd9Sstevel@tonic-gate } 1250*7c478bd9Sstevel@tonic-gate 1251*7c478bd9Sstevel@tonic-gate /* 1252*7c478bd9Sstevel@tonic-gate * Walk through a given htable looking for the first valid entry. This 1253*7c478bd9Sstevel@tonic-gate * routine takes both a starting and ending address. The starting address 1254*7c478bd9Sstevel@tonic-gate * is required to be within the htable provided by the caller, but there is 1255*7c478bd9Sstevel@tonic-gate * no such restriction on the ending address. 1256*7c478bd9Sstevel@tonic-gate * 1257*7c478bd9Sstevel@tonic-gate * If the routine finds a valid entry in the htable (at or beyond the 1258*7c478bd9Sstevel@tonic-gate * starting address), the PTE (and its address) will be returned. 1259*7c478bd9Sstevel@tonic-gate * This PTE may correspond to either a page or a pagetable - it is the 1260*7c478bd9Sstevel@tonic-gate * caller's responsibility to determine which. If no valid entry is 1261*7c478bd9Sstevel@tonic-gate * found, 0 (and invalid PTE) and the next unexamined address will be 1262*7c478bd9Sstevel@tonic-gate * returned. 1263*7c478bd9Sstevel@tonic-gate * 1264*7c478bd9Sstevel@tonic-gate * The loop has been carefully coded for optimization. 1265*7c478bd9Sstevel@tonic-gate */ 1266*7c478bd9Sstevel@tonic-gate static x86pte_t 1267*7c478bd9Sstevel@tonic-gate htable_scan(htable_t *ht, uintptr_t *vap, uintptr_t eaddr) 1268*7c478bd9Sstevel@tonic-gate { 1269*7c478bd9Sstevel@tonic-gate uint_t e; 1270*7c478bd9Sstevel@tonic-gate x86pte_t found_pte = (x86pte_t)0; 1271*7c478bd9Sstevel@tonic-gate char *pte_ptr; 1272*7c478bd9Sstevel@tonic-gate char *end_pte_ptr; 1273*7c478bd9Sstevel@tonic-gate int l = ht->ht_level; 1274*7c478bd9Sstevel@tonic-gate uintptr_t va = *vap & LEVEL_MASK(l); 1275*7c478bd9Sstevel@tonic-gate size_t pgsize = LEVEL_SIZE(l); 1276*7c478bd9Sstevel@tonic-gate 1277*7c478bd9Sstevel@tonic-gate ASSERT(va >= ht->ht_vaddr); 1278*7c478bd9Sstevel@tonic-gate ASSERT(va <= HTABLE_LAST_PAGE(ht)); 1279*7c478bd9Sstevel@tonic-gate 1280*7c478bd9Sstevel@tonic-gate /* 1281*7c478bd9Sstevel@tonic-gate * Compute the starting index and ending virtual address 1282*7c478bd9Sstevel@tonic-gate */ 1283*7c478bd9Sstevel@tonic-gate e = htable_va2entry(va, ht); 1284*7c478bd9Sstevel@tonic-gate 1285*7c478bd9Sstevel@tonic-gate /* 1286*7c478bd9Sstevel@tonic-gate * The following page table scan code knows that the valid 1287*7c478bd9Sstevel@tonic-gate * bit of a PTE is in the lowest byte AND that x86 is little endian!! 1288*7c478bd9Sstevel@tonic-gate */ 1289*7c478bd9Sstevel@tonic-gate pte_ptr = (char *)x86pte_access_pagetable(ht); 1290*7c478bd9Sstevel@tonic-gate end_pte_ptr = pte_ptr + (ht->ht_num_ptes << mmu.pte_size_shift); 1291*7c478bd9Sstevel@tonic-gate pte_ptr += e << mmu.pte_size_shift; 1292*7c478bd9Sstevel@tonic-gate while (*pte_ptr == 0) { 1293*7c478bd9Sstevel@tonic-gate va += pgsize; 1294*7c478bd9Sstevel@tonic-gate if (va >= eaddr) 1295*7c478bd9Sstevel@tonic-gate break; 1296*7c478bd9Sstevel@tonic-gate pte_ptr += mmu.pte_size; 1297*7c478bd9Sstevel@tonic-gate ASSERT(pte_ptr <= end_pte_ptr); 1298*7c478bd9Sstevel@tonic-gate if (pte_ptr == end_pte_ptr) 1299*7c478bd9Sstevel@tonic-gate break; 1300*7c478bd9Sstevel@tonic-gate } 1301*7c478bd9Sstevel@tonic-gate 1302*7c478bd9Sstevel@tonic-gate /* 1303*7c478bd9Sstevel@tonic-gate * if we found a valid PTE, load the entire PTE 1304*7c478bd9Sstevel@tonic-gate */ 1305*7c478bd9Sstevel@tonic-gate if (va < eaddr && pte_ptr != end_pte_ptr) { 1306*7c478bd9Sstevel@tonic-gate if (mmu.pae_hat) { 1307*7c478bd9Sstevel@tonic-gate found_pte = *(x86pte_t *)pte_ptr; 1308*7c478bd9Sstevel@tonic-gate #if defined(__i386) 1309*7c478bd9Sstevel@tonic-gate /* 1310*7c478bd9Sstevel@tonic-gate * 64 bit reads on 32 bit x86 are not atomic 1311*7c478bd9Sstevel@tonic-gate */ 1312*7c478bd9Sstevel@tonic-gate while (found_pte != *(volatile x86pte_t *)pte_ptr) 1313*7c478bd9Sstevel@tonic-gate found_pte = *(volatile x86pte_t *)pte_ptr; 1314*7c478bd9Sstevel@tonic-gate #endif 1315*7c478bd9Sstevel@tonic-gate } else { 1316*7c478bd9Sstevel@tonic-gate found_pte = *(x86pte32_t *)pte_ptr; 1317*7c478bd9Sstevel@tonic-gate } 1318*7c478bd9Sstevel@tonic-gate } 1319*7c478bd9Sstevel@tonic-gate x86pte_release_pagetable(ht); 1320*7c478bd9Sstevel@tonic-gate 1321*7c478bd9Sstevel@tonic-gate #if defined(__amd64) 1322*7c478bd9Sstevel@tonic-gate /* 1323*7c478bd9Sstevel@tonic-gate * deal with VA hole on amd64 1324*7c478bd9Sstevel@tonic-gate */ 1325*7c478bd9Sstevel@tonic-gate if (l == mmu.max_level && va >= mmu.hole_start && va <= mmu.hole_end) 1326*7c478bd9Sstevel@tonic-gate va = mmu.hole_end + va - mmu.hole_start; 1327*7c478bd9Sstevel@tonic-gate #endif /* __amd64 */ 1328*7c478bd9Sstevel@tonic-gate 1329*7c478bd9Sstevel@tonic-gate *vap = va; 1330*7c478bd9Sstevel@tonic-gate return (found_pte); 1331*7c478bd9Sstevel@tonic-gate } 1332*7c478bd9Sstevel@tonic-gate 1333*7c478bd9Sstevel@tonic-gate /* 1334*7c478bd9Sstevel@tonic-gate * Find the address and htable for the first populated translation at or 1335*7c478bd9Sstevel@tonic-gate * above the given virtual address. The caller may also specify an upper 1336*7c478bd9Sstevel@tonic-gate * limit to the address range to search. Uses level information to quickly 1337*7c478bd9Sstevel@tonic-gate * skip unpopulated sections of virtual address spaces. 1338*7c478bd9Sstevel@tonic-gate * 1339*7c478bd9Sstevel@tonic-gate * If not found returns NULL. When found, returns the htable and virt addr 1340*7c478bd9Sstevel@tonic-gate * and has a hold on the htable. 1341*7c478bd9Sstevel@tonic-gate */ 1342*7c478bd9Sstevel@tonic-gate x86pte_t 1343*7c478bd9Sstevel@tonic-gate htable_walk( 1344*7c478bd9Sstevel@tonic-gate struct hat *hat, 1345*7c478bd9Sstevel@tonic-gate htable_t **htp, 1346*7c478bd9Sstevel@tonic-gate uintptr_t *vaddr, 1347*7c478bd9Sstevel@tonic-gate uintptr_t eaddr) 1348*7c478bd9Sstevel@tonic-gate { 1349*7c478bd9Sstevel@tonic-gate uintptr_t va = *vaddr; 1350*7c478bd9Sstevel@tonic-gate htable_t *ht; 1351*7c478bd9Sstevel@tonic-gate htable_t *prev = *htp; 1352*7c478bd9Sstevel@tonic-gate level_t l; 1353*7c478bd9Sstevel@tonic-gate level_t max_mapped_level; 1354*7c478bd9Sstevel@tonic-gate x86pte_t pte; 1355*7c478bd9Sstevel@tonic-gate 1356*7c478bd9Sstevel@tonic-gate ASSERT(eaddr > va); 1357*7c478bd9Sstevel@tonic-gate 1358*7c478bd9Sstevel@tonic-gate /* 1359*7c478bd9Sstevel@tonic-gate * If this is a user address, then we know we need not look beyond 1360*7c478bd9Sstevel@tonic-gate * kernelbase. 1361*7c478bd9Sstevel@tonic-gate */ 1362*7c478bd9Sstevel@tonic-gate ASSERT(hat == kas.a_hat || eaddr <= kernelbase || 1363*7c478bd9Sstevel@tonic-gate eaddr == HTABLE_WALK_TO_END); 1364*7c478bd9Sstevel@tonic-gate if (hat != kas.a_hat && eaddr == HTABLE_WALK_TO_END) 1365*7c478bd9Sstevel@tonic-gate eaddr = kernelbase; 1366*7c478bd9Sstevel@tonic-gate 1367*7c478bd9Sstevel@tonic-gate /* 1368*7c478bd9Sstevel@tonic-gate * If we're coming in with a previous page table, search it first 1369*7c478bd9Sstevel@tonic-gate * without doing an htable_lookup(), this should be frequent. 1370*7c478bd9Sstevel@tonic-gate */ 1371*7c478bd9Sstevel@tonic-gate if (prev) { 1372*7c478bd9Sstevel@tonic-gate ASSERT(prev->ht_busy > 0); 1373*7c478bd9Sstevel@tonic-gate ASSERT(prev->ht_vaddr <= va); 1374*7c478bd9Sstevel@tonic-gate l = prev->ht_level; 1375*7c478bd9Sstevel@tonic-gate if (va <= HTABLE_LAST_PAGE(prev)) { 1376*7c478bd9Sstevel@tonic-gate pte = htable_scan(prev, &va, eaddr); 1377*7c478bd9Sstevel@tonic-gate 1378*7c478bd9Sstevel@tonic-gate if (PTE_ISPAGE(pte, l)) { 1379*7c478bd9Sstevel@tonic-gate *vaddr = va; 1380*7c478bd9Sstevel@tonic-gate *htp = prev; 1381*7c478bd9Sstevel@tonic-gate return (pte); 1382*7c478bd9Sstevel@tonic-gate } 1383*7c478bd9Sstevel@tonic-gate } 1384*7c478bd9Sstevel@tonic-gate 1385*7c478bd9Sstevel@tonic-gate /* 1386*7c478bd9Sstevel@tonic-gate * We found nothing in the htable provided by the caller, 1387*7c478bd9Sstevel@tonic-gate * so fall through and do the full search 1388*7c478bd9Sstevel@tonic-gate */ 1389*7c478bd9Sstevel@tonic-gate htable_release(prev); 1390*7c478bd9Sstevel@tonic-gate } 1391*7c478bd9Sstevel@tonic-gate 1392*7c478bd9Sstevel@tonic-gate /* 1393*7c478bd9Sstevel@tonic-gate * Find the level of the largest pagesize used by this HAT. 1394*7c478bd9Sstevel@tonic-gate */ 1395*7c478bd9Sstevel@tonic-gate max_mapped_level = 0; 1396*7c478bd9Sstevel@tonic-gate for (l = 1; l <= mmu.max_page_level; ++l) 1397*7c478bd9Sstevel@tonic-gate if (hat->hat_pages_mapped[l] != 0) 1398*7c478bd9Sstevel@tonic-gate max_mapped_level = l; 1399*7c478bd9Sstevel@tonic-gate 1400*7c478bd9Sstevel@tonic-gate while (va < eaddr && va >= *vaddr) { 1401*7c478bd9Sstevel@tonic-gate ASSERT(!IN_VA_HOLE(va)); 1402*7c478bd9Sstevel@tonic-gate 1403*7c478bd9Sstevel@tonic-gate /* 1404*7c478bd9Sstevel@tonic-gate * Find lowest table with any entry for given address. 1405*7c478bd9Sstevel@tonic-gate */ 1406*7c478bd9Sstevel@tonic-gate for (l = 0; l <= TOP_LEVEL(hat); ++l) { 1407*7c478bd9Sstevel@tonic-gate ht = htable_lookup(hat, va, l); 1408*7c478bd9Sstevel@tonic-gate if (ht != NULL) { 1409*7c478bd9Sstevel@tonic-gate pte = htable_scan(ht, &va, eaddr); 1410*7c478bd9Sstevel@tonic-gate if (PTE_ISPAGE(pte, l)) { 1411*7c478bd9Sstevel@tonic-gate *vaddr = va; 1412*7c478bd9Sstevel@tonic-gate *htp = ht; 1413*7c478bd9Sstevel@tonic-gate return (pte); 1414*7c478bd9Sstevel@tonic-gate } 1415*7c478bd9Sstevel@tonic-gate htable_release(ht); 1416*7c478bd9Sstevel@tonic-gate break; 1417*7c478bd9Sstevel@tonic-gate } 1418*7c478bd9Sstevel@tonic-gate 1419*7c478bd9Sstevel@tonic-gate /* 1420*7c478bd9Sstevel@tonic-gate * The ht is never NULL at the top level since 1421*7c478bd9Sstevel@tonic-gate * the top level htable is created in hat_alloc(). 1422*7c478bd9Sstevel@tonic-gate */ 1423*7c478bd9Sstevel@tonic-gate ASSERT(l < TOP_LEVEL(hat)); 1424*7c478bd9Sstevel@tonic-gate 1425*7c478bd9Sstevel@tonic-gate /* 1426*7c478bd9Sstevel@tonic-gate * No htable covers the address. If there is no 1427*7c478bd9Sstevel@tonic-gate * larger page size that could cover it, we 1428*7c478bd9Sstevel@tonic-gate * skip to the start of the next page table. 1429*7c478bd9Sstevel@tonic-gate */ 1430*7c478bd9Sstevel@tonic-gate if (l >= max_mapped_level) { 1431*7c478bd9Sstevel@tonic-gate va = NEXT_ENTRY_VA(va, l + 1); 1432*7c478bd9Sstevel@tonic-gate break; 1433*7c478bd9Sstevel@tonic-gate } 1434*7c478bd9Sstevel@tonic-gate } 1435*7c478bd9Sstevel@tonic-gate } 1436*7c478bd9Sstevel@tonic-gate 1437*7c478bd9Sstevel@tonic-gate *vaddr = 0; 1438*7c478bd9Sstevel@tonic-gate *htp = NULL; 1439*7c478bd9Sstevel@tonic-gate return (0); 1440*7c478bd9Sstevel@tonic-gate } 1441*7c478bd9Sstevel@tonic-gate 1442*7c478bd9Sstevel@tonic-gate /* 1443*7c478bd9Sstevel@tonic-gate * Find the htable and page table entry index of the given virtual address 1444*7c478bd9Sstevel@tonic-gate * with pagesize at or below given level. 1445*7c478bd9Sstevel@tonic-gate * If not found returns NULL. When found, returns the htable, sets 1446*7c478bd9Sstevel@tonic-gate * entry, and has a hold on the htable. 1447*7c478bd9Sstevel@tonic-gate */ 1448*7c478bd9Sstevel@tonic-gate htable_t * 1449*7c478bd9Sstevel@tonic-gate htable_getpte( 1450*7c478bd9Sstevel@tonic-gate struct hat *hat, 1451*7c478bd9Sstevel@tonic-gate uintptr_t vaddr, 1452*7c478bd9Sstevel@tonic-gate uint_t *entry, 1453*7c478bd9Sstevel@tonic-gate x86pte_t *pte, 1454*7c478bd9Sstevel@tonic-gate level_t level) 1455*7c478bd9Sstevel@tonic-gate { 1456*7c478bd9Sstevel@tonic-gate htable_t *ht; 1457*7c478bd9Sstevel@tonic-gate level_t l; 1458*7c478bd9Sstevel@tonic-gate uint_t e; 1459*7c478bd9Sstevel@tonic-gate 1460*7c478bd9Sstevel@tonic-gate ASSERT(level <= mmu.max_page_level); 1461*7c478bd9Sstevel@tonic-gate 1462*7c478bd9Sstevel@tonic-gate for (l = 0; l <= level; ++l) { 1463*7c478bd9Sstevel@tonic-gate ht = htable_lookup(hat, vaddr, l); 1464*7c478bd9Sstevel@tonic-gate if (ht == NULL) 1465*7c478bd9Sstevel@tonic-gate continue; 1466*7c478bd9Sstevel@tonic-gate e = htable_va2entry(vaddr, ht); 1467*7c478bd9Sstevel@tonic-gate if (entry != NULL) 1468*7c478bd9Sstevel@tonic-gate *entry = e; 1469*7c478bd9Sstevel@tonic-gate if (pte != NULL) 1470*7c478bd9Sstevel@tonic-gate *pte = x86pte_get(ht, e); 1471*7c478bd9Sstevel@tonic-gate return (ht); 1472*7c478bd9Sstevel@tonic-gate } 1473*7c478bd9Sstevel@tonic-gate return (NULL); 1474*7c478bd9Sstevel@tonic-gate } 1475*7c478bd9Sstevel@tonic-gate 1476*7c478bd9Sstevel@tonic-gate /* 1477*7c478bd9Sstevel@tonic-gate * Find the htable and page table entry index of the given virtual address. 1478*7c478bd9Sstevel@tonic-gate * There must be a valid page mapped at the given address. 1479*7c478bd9Sstevel@tonic-gate * If not found returns NULL. When found, returns the htable, sets 1480*7c478bd9Sstevel@tonic-gate * entry, and has a hold on the htable. 1481*7c478bd9Sstevel@tonic-gate */ 1482*7c478bd9Sstevel@tonic-gate htable_t * 1483*7c478bd9Sstevel@tonic-gate htable_getpage(struct hat *hat, uintptr_t vaddr, uint_t *entry) 1484*7c478bd9Sstevel@tonic-gate { 1485*7c478bd9Sstevel@tonic-gate htable_t *ht; 1486*7c478bd9Sstevel@tonic-gate uint_t e; 1487*7c478bd9Sstevel@tonic-gate x86pte_t pte; 1488*7c478bd9Sstevel@tonic-gate 1489*7c478bd9Sstevel@tonic-gate ht = htable_getpte(hat, vaddr, &e, &pte, mmu.max_page_level); 1490*7c478bd9Sstevel@tonic-gate if (ht == NULL) 1491*7c478bd9Sstevel@tonic-gate return (NULL); 1492*7c478bd9Sstevel@tonic-gate 1493*7c478bd9Sstevel@tonic-gate if (entry) 1494*7c478bd9Sstevel@tonic-gate *entry = e; 1495*7c478bd9Sstevel@tonic-gate 1496*7c478bd9Sstevel@tonic-gate if (PTE_ISPAGE(pte, ht->ht_level)) 1497*7c478bd9Sstevel@tonic-gate return (ht); 1498*7c478bd9Sstevel@tonic-gate htable_release(ht); 1499*7c478bd9Sstevel@tonic-gate return (NULL); 1500*7c478bd9Sstevel@tonic-gate } 1501*7c478bd9Sstevel@tonic-gate 1502*7c478bd9Sstevel@tonic-gate 1503*7c478bd9Sstevel@tonic-gate void 1504*7c478bd9Sstevel@tonic-gate htable_init() 1505*7c478bd9Sstevel@tonic-gate { 1506*7c478bd9Sstevel@tonic-gate /* 1507*7c478bd9Sstevel@tonic-gate * To save on kernel VA usage, we avoid debug information in 32 bit 1508*7c478bd9Sstevel@tonic-gate * kernels. 1509*7c478bd9Sstevel@tonic-gate */ 1510*7c478bd9Sstevel@tonic-gate #if defined(__amd64) 1511*7c478bd9Sstevel@tonic-gate int kmem_flags = KMC_NOHASH; 1512*7c478bd9Sstevel@tonic-gate #elif defined(__i386) 1513*7c478bd9Sstevel@tonic-gate int kmem_flags = KMC_NOHASH | KMC_NODEBUG; 1514*7c478bd9Sstevel@tonic-gate #endif 1515*7c478bd9Sstevel@tonic-gate 1516*7c478bd9Sstevel@tonic-gate /* 1517*7c478bd9Sstevel@tonic-gate * initialize kmem caches 1518*7c478bd9Sstevel@tonic-gate */ 1519*7c478bd9Sstevel@tonic-gate htable_cache = kmem_cache_create("htable_t", 1520*7c478bd9Sstevel@tonic-gate sizeof (htable_t), 0, NULL, NULL, 1521*7c478bd9Sstevel@tonic-gate htable_reap, NULL, hat_memload_arena, kmem_flags); 1522*7c478bd9Sstevel@tonic-gate } 1523*7c478bd9Sstevel@tonic-gate 1524*7c478bd9Sstevel@tonic-gate /* 1525*7c478bd9Sstevel@tonic-gate * get the pte index for the virtual address in the given htable's pagetable 1526*7c478bd9Sstevel@tonic-gate */ 1527*7c478bd9Sstevel@tonic-gate uint_t 1528*7c478bd9Sstevel@tonic-gate htable_va2entry(uintptr_t va, htable_t *ht) 1529*7c478bd9Sstevel@tonic-gate { 1530*7c478bd9Sstevel@tonic-gate level_t l = ht->ht_level; 1531*7c478bd9Sstevel@tonic-gate 1532*7c478bd9Sstevel@tonic-gate ASSERT(va >= ht->ht_vaddr); 1533*7c478bd9Sstevel@tonic-gate ASSERT(va <= HTABLE_LAST_PAGE(ht)); 1534*7c478bd9Sstevel@tonic-gate return ((va >> LEVEL_SHIFT(l)) & (ht->ht_num_ptes - 1)); 1535*7c478bd9Sstevel@tonic-gate } 1536*7c478bd9Sstevel@tonic-gate 1537*7c478bd9Sstevel@tonic-gate /* 1538*7c478bd9Sstevel@tonic-gate * Given an htable and the index of a pte in it, return the virtual address 1539*7c478bd9Sstevel@tonic-gate * of the page. 1540*7c478bd9Sstevel@tonic-gate */ 1541*7c478bd9Sstevel@tonic-gate uintptr_t 1542*7c478bd9Sstevel@tonic-gate htable_e2va(htable_t *ht, uint_t entry) 1543*7c478bd9Sstevel@tonic-gate { 1544*7c478bd9Sstevel@tonic-gate level_t l = ht->ht_level; 1545*7c478bd9Sstevel@tonic-gate uintptr_t va; 1546*7c478bd9Sstevel@tonic-gate 1547*7c478bd9Sstevel@tonic-gate ASSERT(entry < ht->ht_num_ptes); 1548*7c478bd9Sstevel@tonic-gate va = ht->ht_vaddr + ((uintptr_t)entry << LEVEL_SHIFT(l)); 1549*7c478bd9Sstevel@tonic-gate 1550*7c478bd9Sstevel@tonic-gate /* 1551*7c478bd9Sstevel@tonic-gate * Need to skip over any VA hole in top level table 1552*7c478bd9Sstevel@tonic-gate */ 1553*7c478bd9Sstevel@tonic-gate #if defined(__amd64) 1554*7c478bd9Sstevel@tonic-gate if (ht->ht_level == mmu.max_level && va >= mmu.hole_start) 1555*7c478bd9Sstevel@tonic-gate va += ((mmu.hole_end - mmu.hole_start) + 1); 1556*7c478bd9Sstevel@tonic-gate #endif 1557*7c478bd9Sstevel@tonic-gate 1558*7c478bd9Sstevel@tonic-gate return (va); 1559*7c478bd9Sstevel@tonic-gate } 1560*7c478bd9Sstevel@tonic-gate 1561*7c478bd9Sstevel@tonic-gate /* 1562*7c478bd9Sstevel@tonic-gate * The code uses compare and swap instructions to read/write PTE's to 1563*7c478bd9Sstevel@tonic-gate * avoid atomicity problems, since PTEs can be 8 bytes on 32 bit systems. 1564*7c478bd9Sstevel@tonic-gate * Again this can be optimized on 64 bit systems, since aligned load/store 1565*7c478bd9Sstevel@tonic-gate * will naturally be atomic. 1566*7c478bd9Sstevel@tonic-gate * 1567*7c478bd9Sstevel@tonic-gate * The combination of using kpreempt_disable()/_enable() and the hci_mutex 1568*7c478bd9Sstevel@tonic-gate * are used to ensure that an interrupt won't overwrite a temporary mapping 1569*7c478bd9Sstevel@tonic-gate * while it's in use. If an interrupt thread tries to access a PTE, it will 1570*7c478bd9Sstevel@tonic-gate * yield briefly back to the pinned thread which holds the cpu's hci_mutex. 1571*7c478bd9Sstevel@tonic-gate */ 1572*7c478bd9Sstevel@tonic-gate 1573*7c478bd9Sstevel@tonic-gate static struct hat_cpu_info init_hci; /* used for cpu 0 */ 1574*7c478bd9Sstevel@tonic-gate 1575*7c478bd9Sstevel@tonic-gate /* 1576*7c478bd9Sstevel@tonic-gate * Initialize a CPU private window for mapping page tables. 1577*7c478bd9Sstevel@tonic-gate * There will be 3 total pages of addressing needed: 1578*7c478bd9Sstevel@tonic-gate * 1579*7c478bd9Sstevel@tonic-gate * 1 for r/w access to pagetables 1580*7c478bd9Sstevel@tonic-gate * 1 for r access when copying pagetables (hat_alloc) 1581*7c478bd9Sstevel@tonic-gate * 1 that will map the PTEs for the 1st 2, so we can access them quickly 1582*7c478bd9Sstevel@tonic-gate * 1583*7c478bd9Sstevel@tonic-gate * We use vmem_xalloc() to get a correct alignment so that only one 1584*7c478bd9Sstevel@tonic-gate * hat_mempte_setup() is needed. 1585*7c478bd9Sstevel@tonic-gate */ 1586*7c478bd9Sstevel@tonic-gate void 1587*7c478bd9Sstevel@tonic-gate x86pte_cpu_init(cpu_t *cpu, void *pages) 1588*7c478bd9Sstevel@tonic-gate { 1589*7c478bd9Sstevel@tonic-gate struct hat_cpu_info *hci; 1590*7c478bd9Sstevel@tonic-gate caddr_t va; 1591*7c478bd9Sstevel@tonic-gate 1592*7c478bd9Sstevel@tonic-gate /* 1593*7c478bd9Sstevel@tonic-gate * We can't use kmem_alloc/vmem_alloc for the 1st CPU, as this is 1594*7c478bd9Sstevel@tonic-gate * called before we've activated our own HAT 1595*7c478bd9Sstevel@tonic-gate */ 1596*7c478bd9Sstevel@tonic-gate if (pages != NULL) { 1597*7c478bd9Sstevel@tonic-gate hci = &init_hci; 1598*7c478bd9Sstevel@tonic-gate va = pages; 1599*7c478bd9Sstevel@tonic-gate } else { 1600*7c478bd9Sstevel@tonic-gate hci = kmem_alloc(sizeof (struct hat_cpu_info), KM_SLEEP); 1601*7c478bd9Sstevel@tonic-gate va = vmem_xalloc(heap_arena, 3 * MMU_PAGESIZE, MMU_PAGESIZE, 0, 1602*7c478bd9Sstevel@tonic-gate LEVEL_SIZE(1), NULL, NULL, VM_SLEEP); 1603*7c478bd9Sstevel@tonic-gate } 1604*7c478bd9Sstevel@tonic-gate mutex_init(&hci->hci_mutex, NULL, MUTEX_DEFAULT, NULL); 1605*7c478bd9Sstevel@tonic-gate 1606*7c478bd9Sstevel@tonic-gate /* 1607*7c478bd9Sstevel@tonic-gate * If we are using segkpm, then there is no need for any of the 1608*7c478bd9Sstevel@tonic-gate * mempte support. We can access the desired memory through a kpm 1609*7c478bd9Sstevel@tonic-gate * mapping rather than setting up a temporary mempte mapping. 1610*7c478bd9Sstevel@tonic-gate */ 1611*7c478bd9Sstevel@tonic-gate if (kpm_enable == 0) { 1612*7c478bd9Sstevel@tonic-gate hci->hci_mapped_pfn = PFN_INVALID; 1613*7c478bd9Sstevel@tonic-gate 1614*7c478bd9Sstevel@tonic-gate hci->hci_kernel_pte = 1615*7c478bd9Sstevel@tonic-gate hat_mempte_kern_setup(va, va + (2 * MMU_PAGESIZE)); 1616*7c478bd9Sstevel@tonic-gate hci->hci_pagetable_va = (void *)va; 1617*7c478bd9Sstevel@tonic-gate } 1618*7c478bd9Sstevel@tonic-gate 1619*7c478bd9Sstevel@tonic-gate cpu->cpu_hat_info = hci; 1620*7c478bd9Sstevel@tonic-gate } 1621*7c478bd9Sstevel@tonic-gate 1622*7c478bd9Sstevel@tonic-gate /* 1623*7c478bd9Sstevel@tonic-gate * Macro to establish temporary mappings for x86pte_XXX routines. 1624*7c478bd9Sstevel@tonic-gate */ 1625*7c478bd9Sstevel@tonic-gate #define X86PTE_REMAP(addr, pte, index, perm, pfn) { \ 1626*7c478bd9Sstevel@tonic-gate x86pte_t t; \ 1627*7c478bd9Sstevel@tonic-gate \ 1628*7c478bd9Sstevel@tonic-gate t = MAKEPTE((pfn), 0) | (perm) | mmu.pt_global | mmu.pt_nx;\ 1629*7c478bd9Sstevel@tonic-gate if (mmu.pae_hat) \ 1630*7c478bd9Sstevel@tonic-gate pte[index] = t; \ 1631*7c478bd9Sstevel@tonic-gate else \ 1632*7c478bd9Sstevel@tonic-gate ((x86pte32_t *)(pte))[index] = t; \ 1633*7c478bd9Sstevel@tonic-gate mmu_tlbflush_entry((caddr_t)(addr)); \ 1634*7c478bd9Sstevel@tonic-gate } 1635*7c478bd9Sstevel@tonic-gate 1636*7c478bd9Sstevel@tonic-gate /* 1637*7c478bd9Sstevel@tonic-gate * Disable preemption and establish a mapping to the pagetable with the 1638*7c478bd9Sstevel@tonic-gate * given pfn. This is optimized for there case where it's the same 1639*7c478bd9Sstevel@tonic-gate * pfn as we last used referenced from this CPU. 1640*7c478bd9Sstevel@tonic-gate */ 1641*7c478bd9Sstevel@tonic-gate static x86pte_t * 1642*7c478bd9Sstevel@tonic-gate x86pte_access_pagetable(htable_t *ht) 1643*7c478bd9Sstevel@tonic-gate { 1644*7c478bd9Sstevel@tonic-gate pfn_t pfn; 1645*7c478bd9Sstevel@tonic-gate struct hat_cpu_info *hci; 1646*7c478bd9Sstevel@tonic-gate 1647*7c478bd9Sstevel@tonic-gate /* 1648*7c478bd9Sstevel@tonic-gate * VLP pagetables are contained in the hat_t 1649*7c478bd9Sstevel@tonic-gate */ 1650*7c478bd9Sstevel@tonic-gate if (ht->ht_flags & HTABLE_VLP) 1651*7c478bd9Sstevel@tonic-gate return (ht->ht_hat->hat_vlp_ptes); 1652*7c478bd9Sstevel@tonic-gate 1653*7c478bd9Sstevel@tonic-gate /* 1654*7c478bd9Sstevel@tonic-gate * During early boot, use hat_boot_remap() of a page table adddress. 1655*7c478bd9Sstevel@tonic-gate */ 1656*7c478bd9Sstevel@tonic-gate pfn = ht->ht_pfn; 1657*7c478bd9Sstevel@tonic-gate ASSERT(pfn != PFN_INVALID); 1658*7c478bd9Sstevel@tonic-gate if (kpm_enable) 1659*7c478bd9Sstevel@tonic-gate return ((x86pte_t *)hat_kpm_pfn2va(pfn)); 1660*7c478bd9Sstevel@tonic-gate 1661*7c478bd9Sstevel@tonic-gate if (!khat_running) { 1662*7c478bd9Sstevel@tonic-gate (void) hat_boot_remap(ptable_va, pfn); 1663*7c478bd9Sstevel@tonic-gate return ((x86pte_t *)ptable_va); 1664*7c478bd9Sstevel@tonic-gate } 1665*7c478bd9Sstevel@tonic-gate 1666*7c478bd9Sstevel@tonic-gate /* 1667*7c478bd9Sstevel@tonic-gate * Normally, disable preemption and grab the CPU's hci_mutex 1668*7c478bd9Sstevel@tonic-gate */ 1669*7c478bd9Sstevel@tonic-gate kpreempt_disable(); 1670*7c478bd9Sstevel@tonic-gate hci = CPU->cpu_hat_info; 1671*7c478bd9Sstevel@tonic-gate ASSERT(hci != NULL); 1672*7c478bd9Sstevel@tonic-gate mutex_enter(&hci->hci_mutex); 1673*7c478bd9Sstevel@tonic-gate if (hci->hci_mapped_pfn != pfn) { 1674*7c478bd9Sstevel@tonic-gate /* 1675*7c478bd9Sstevel@tonic-gate * The current mapping doesn't already point to this page. 1676*7c478bd9Sstevel@tonic-gate * Update the CPU specific pagetable mapping to map the pfn. 1677*7c478bd9Sstevel@tonic-gate */ 1678*7c478bd9Sstevel@tonic-gate X86PTE_REMAP(hci->hci_pagetable_va, hci->hci_kernel_pte, 0, 1679*7c478bd9Sstevel@tonic-gate PT_WRITABLE, pfn); 1680*7c478bd9Sstevel@tonic-gate hci->hci_mapped_pfn = pfn; 1681*7c478bd9Sstevel@tonic-gate } 1682*7c478bd9Sstevel@tonic-gate return (hci->hci_pagetable_va); 1683*7c478bd9Sstevel@tonic-gate } 1684*7c478bd9Sstevel@tonic-gate 1685*7c478bd9Sstevel@tonic-gate /* 1686*7c478bd9Sstevel@tonic-gate * Release access to a page table. 1687*7c478bd9Sstevel@tonic-gate */ 1688*7c478bd9Sstevel@tonic-gate static void 1689*7c478bd9Sstevel@tonic-gate x86pte_release_pagetable(htable_t *ht) 1690*7c478bd9Sstevel@tonic-gate { 1691*7c478bd9Sstevel@tonic-gate struct hat_cpu_info *hci; 1692*7c478bd9Sstevel@tonic-gate 1693*7c478bd9Sstevel@tonic-gate if (kpm_enable) 1694*7c478bd9Sstevel@tonic-gate return; 1695*7c478bd9Sstevel@tonic-gate 1696*7c478bd9Sstevel@tonic-gate /* 1697*7c478bd9Sstevel@tonic-gate * nothing to do for VLP htables 1698*7c478bd9Sstevel@tonic-gate */ 1699*7c478bd9Sstevel@tonic-gate if (ht->ht_flags & HTABLE_VLP) 1700*7c478bd9Sstevel@tonic-gate return; 1701*7c478bd9Sstevel@tonic-gate 1702*7c478bd9Sstevel@tonic-gate /* 1703*7c478bd9Sstevel@tonic-gate * During boot-up hat_kern_setup(), erase the boot loader remapping. 1704*7c478bd9Sstevel@tonic-gate */ 1705*7c478bd9Sstevel@tonic-gate if (!khat_running) { 1706*7c478bd9Sstevel@tonic-gate hat_boot_demap(ptable_va); 1707*7c478bd9Sstevel@tonic-gate return; 1708*7c478bd9Sstevel@tonic-gate } 1709*7c478bd9Sstevel@tonic-gate 1710*7c478bd9Sstevel@tonic-gate /* 1711*7c478bd9Sstevel@tonic-gate * Normal Operation: drop the CPU's hci_mutex and restore preemption 1712*7c478bd9Sstevel@tonic-gate */ 1713*7c478bd9Sstevel@tonic-gate hci = CPU->cpu_hat_info; 1714*7c478bd9Sstevel@tonic-gate ASSERT(hci != NULL); 1715*7c478bd9Sstevel@tonic-gate mutex_exit(&hci->hci_mutex); 1716*7c478bd9Sstevel@tonic-gate kpreempt_enable(); 1717*7c478bd9Sstevel@tonic-gate } 1718*7c478bd9Sstevel@tonic-gate 1719*7c478bd9Sstevel@tonic-gate /* 1720*7c478bd9Sstevel@tonic-gate * Atomic retrieval of a pagetable entry 1721*7c478bd9Sstevel@tonic-gate */ 1722*7c478bd9Sstevel@tonic-gate x86pte_t 1723*7c478bd9Sstevel@tonic-gate x86pte_get(htable_t *ht, uint_t entry) 1724*7c478bd9Sstevel@tonic-gate { 1725*7c478bd9Sstevel@tonic-gate x86pte_t pte; 1726*7c478bd9Sstevel@tonic-gate x86pte32_t *pte32p; 1727*7c478bd9Sstevel@tonic-gate volatile x86pte_t *ptep; 1728*7c478bd9Sstevel@tonic-gate 1729*7c478bd9Sstevel@tonic-gate /* 1730*7c478bd9Sstevel@tonic-gate * 32 bit (non-pae) is always atomic. 1731*7c478bd9Sstevel@tonic-gate * 64 bit is only atomic on 64 bit mode. 1732*7c478bd9Sstevel@tonic-gate */ 1733*7c478bd9Sstevel@tonic-gate ptep = x86pte_access_pagetable(ht); 1734*7c478bd9Sstevel@tonic-gate if (mmu.pae_hat) { 1735*7c478bd9Sstevel@tonic-gate pte = ptep[entry]; 1736*7c478bd9Sstevel@tonic-gate #if defined(__i386) 1737*7c478bd9Sstevel@tonic-gate while (pte != ptep[entry]) 1738*7c478bd9Sstevel@tonic-gate pte = ptep[entry]; 1739*7c478bd9Sstevel@tonic-gate #endif /* __i386 */ 1740*7c478bd9Sstevel@tonic-gate } else { 1741*7c478bd9Sstevel@tonic-gate pte32p = (x86pte32_t *)ptep; 1742*7c478bd9Sstevel@tonic-gate pte = pte32p[entry]; 1743*7c478bd9Sstevel@tonic-gate } 1744*7c478bd9Sstevel@tonic-gate x86pte_release_pagetable(ht); 1745*7c478bd9Sstevel@tonic-gate return (pte); 1746*7c478bd9Sstevel@tonic-gate } 1747*7c478bd9Sstevel@tonic-gate 1748*7c478bd9Sstevel@tonic-gate 1749*7c478bd9Sstevel@tonic-gate /* 1750*7c478bd9Sstevel@tonic-gate * Atomic unconditional set of a page table entry, it returns the previous 1751*7c478bd9Sstevel@tonic-gate * value. 1752*7c478bd9Sstevel@tonic-gate */ 1753*7c478bd9Sstevel@tonic-gate x86pte_t 1754*7c478bd9Sstevel@tonic-gate x86pte_set(htable_t *ht, uint_t entry, x86pte_t new, void *ptr) 1755*7c478bd9Sstevel@tonic-gate { 1756*7c478bd9Sstevel@tonic-gate x86pte_t old; 1757*7c478bd9Sstevel@tonic-gate x86pte_t prev; 1758*7c478bd9Sstevel@tonic-gate x86pte_t *ptep; 1759*7c478bd9Sstevel@tonic-gate x86pte32_t *pte32p; 1760*7c478bd9Sstevel@tonic-gate x86pte32_t n32, p32; 1761*7c478bd9Sstevel@tonic-gate 1762*7c478bd9Sstevel@tonic-gate ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN)); 1763*7c478bd9Sstevel@tonic-gate if (ptr == NULL) { 1764*7c478bd9Sstevel@tonic-gate ptep = x86pte_access_pagetable(ht); 1765*7c478bd9Sstevel@tonic-gate ptep = (void *)((caddr_t)ptep + (entry << mmu.pte_size_shift)); 1766*7c478bd9Sstevel@tonic-gate } else { 1767*7c478bd9Sstevel@tonic-gate ptep = ptr; 1768*7c478bd9Sstevel@tonic-gate } 1769*7c478bd9Sstevel@tonic-gate 1770*7c478bd9Sstevel@tonic-gate if (mmu.pae_hat) { 1771*7c478bd9Sstevel@tonic-gate for (;;) { 1772*7c478bd9Sstevel@tonic-gate prev = *ptep; 1773*7c478bd9Sstevel@tonic-gate if (prev == new) { 1774*7c478bd9Sstevel@tonic-gate old = new; 1775*7c478bd9Sstevel@tonic-gate break; 1776*7c478bd9Sstevel@tonic-gate } 1777*7c478bd9Sstevel@tonic-gate old = cas64(ptep, prev, new); 1778*7c478bd9Sstevel@tonic-gate if (old == prev) 1779*7c478bd9Sstevel@tonic-gate break; 1780*7c478bd9Sstevel@tonic-gate } 1781*7c478bd9Sstevel@tonic-gate } else { 1782*7c478bd9Sstevel@tonic-gate pte32p = (x86pte32_t *)ptep; 1783*7c478bd9Sstevel@tonic-gate n32 = new; 1784*7c478bd9Sstevel@tonic-gate for (;;) { 1785*7c478bd9Sstevel@tonic-gate p32 = *pte32p; 1786*7c478bd9Sstevel@tonic-gate if (p32 == n32) { 1787*7c478bd9Sstevel@tonic-gate old = new; 1788*7c478bd9Sstevel@tonic-gate break; 1789*7c478bd9Sstevel@tonic-gate } 1790*7c478bd9Sstevel@tonic-gate old = cas32(pte32p, p32, n32); 1791*7c478bd9Sstevel@tonic-gate if (old == p32) 1792*7c478bd9Sstevel@tonic-gate break; 1793*7c478bd9Sstevel@tonic-gate } 1794*7c478bd9Sstevel@tonic-gate } 1795*7c478bd9Sstevel@tonic-gate if (ptr == NULL) 1796*7c478bd9Sstevel@tonic-gate x86pte_release_pagetable(ht); 1797*7c478bd9Sstevel@tonic-gate return (old); 1798*7c478bd9Sstevel@tonic-gate } 1799*7c478bd9Sstevel@tonic-gate 1800*7c478bd9Sstevel@tonic-gate /* 1801*7c478bd9Sstevel@tonic-gate * Atomic compare and swap of a page table entry. 1802*7c478bd9Sstevel@tonic-gate */ 1803*7c478bd9Sstevel@tonic-gate static x86pte_t 1804*7c478bd9Sstevel@tonic-gate x86pte_cas(htable_t *ht, uint_t entry, x86pte_t old, x86pte_t new) 1805*7c478bd9Sstevel@tonic-gate { 1806*7c478bd9Sstevel@tonic-gate x86pte_t pte; 1807*7c478bd9Sstevel@tonic-gate x86pte_t *ptep; 1808*7c478bd9Sstevel@tonic-gate x86pte32_t pte32, o32, n32; 1809*7c478bd9Sstevel@tonic-gate x86pte32_t *pte32p; 1810*7c478bd9Sstevel@tonic-gate 1811*7c478bd9Sstevel@tonic-gate ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN)); 1812*7c478bd9Sstevel@tonic-gate ptep = x86pte_access_pagetable(ht); 1813*7c478bd9Sstevel@tonic-gate if (mmu.pae_hat) { 1814*7c478bd9Sstevel@tonic-gate pte = cas64(&ptep[entry], old, new); 1815*7c478bd9Sstevel@tonic-gate } else { 1816*7c478bd9Sstevel@tonic-gate o32 = old; 1817*7c478bd9Sstevel@tonic-gate n32 = new; 1818*7c478bd9Sstevel@tonic-gate pte32p = (x86pte32_t *)ptep; 1819*7c478bd9Sstevel@tonic-gate pte32 = cas32(&pte32p[entry], o32, n32); 1820*7c478bd9Sstevel@tonic-gate pte = pte32; 1821*7c478bd9Sstevel@tonic-gate } 1822*7c478bd9Sstevel@tonic-gate x86pte_release_pagetable(ht); 1823*7c478bd9Sstevel@tonic-gate 1824*7c478bd9Sstevel@tonic-gate return (pte); 1825*7c478bd9Sstevel@tonic-gate } 1826*7c478bd9Sstevel@tonic-gate 1827*7c478bd9Sstevel@tonic-gate /* 1828*7c478bd9Sstevel@tonic-gate * data structure for cross call information 1829*7c478bd9Sstevel@tonic-gate */ 1830*7c478bd9Sstevel@tonic-gate typedef struct xcall_info { 1831*7c478bd9Sstevel@tonic-gate x86pte_t xi_pte; 1832*7c478bd9Sstevel@tonic-gate x86pte_t xi_old; 1833*7c478bd9Sstevel@tonic-gate x86pte_t *xi_pteptr; 1834*7c478bd9Sstevel@tonic-gate pfn_t xi_pfn; 1835*7c478bd9Sstevel@tonic-gate processorid_t xi_cpuid; 1836*7c478bd9Sstevel@tonic-gate level_t xi_level; 1837*7c478bd9Sstevel@tonic-gate xc_func_t xi_func; 1838*7c478bd9Sstevel@tonic-gate } xcall_info_t; 1839*7c478bd9Sstevel@tonic-gate 1840*7c478bd9Sstevel@tonic-gate /* 1841*7c478bd9Sstevel@tonic-gate * Cross call service function to atomically invalidate a PTE and flush TLBs 1842*7c478bd9Sstevel@tonic-gate */ 1843*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1844*7c478bd9Sstevel@tonic-gate static int 1845*7c478bd9Sstevel@tonic-gate x86pte_inval_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3) 1846*7c478bd9Sstevel@tonic-gate { 1847*7c478bd9Sstevel@tonic-gate xcall_info_t *xi = (xcall_info_t *)a1; 1848*7c478bd9Sstevel@tonic-gate caddr_t addr = (caddr_t)a2; 1849*7c478bd9Sstevel@tonic-gate 1850*7c478bd9Sstevel@tonic-gate /* 1851*7c478bd9Sstevel@tonic-gate * Only the initiating cpu invalidates the page table entry. 1852*7c478bd9Sstevel@tonic-gate * It returns the previous PTE value to the caller. 1853*7c478bd9Sstevel@tonic-gate */ 1854*7c478bd9Sstevel@tonic-gate if (CPU->cpu_id == xi->xi_cpuid) { 1855*7c478bd9Sstevel@tonic-gate x86pte_t *ptep = xi->xi_pteptr; 1856*7c478bd9Sstevel@tonic-gate pfn_t pfn = xi->xi_pfn; 1857*7c478bd9Sstevel@tonic-gate level_t level = xi->xi_level; 1858*7c478bd9Sstevel@tonic-gate x86pte_t old; 1859*7c478bd9Sstevel@tonic-gate x86pte_t prev; 1860*7c478bd9Sstevel@tonic-gate x86pte32_t *pte32p; 1861*7c478bd9Sstevel@tonic-gate x86pte32_t p32; 1862*7c478bd9Sstevel@tonic-gate 1863*7c478bd9Sstevel@tonic-gate if (mmu.pae_hat) { 1864*7c478bd9Sstevel@tonic-gate for (;;) { 1865*7c478bd9Sstevel@tonic-gate prev = *ptep; 1866*7c478bd9Sstevel@tonic-gate if (PTE2PFN(prev, level) != pfn) 1867*7c478bd9Sstevel@tonic-gate break; 1868*7c478bd9Sstevel@tonic-gate old = cas64(ptep, prev, 0); 1869*7c478bd9Sstevel@tonic-gate if (old == prev) 1870*7c478bd9Sstevel@tonic-gate break; 1871*7c478bd9Sstevel@tonic-gate } 1872*7c478bd9Sstevel@tonic-gate } else { 1873*7c478bd9Sstevel@tonic-gate pte32p = (x86pte32_t *)ptep; 1874*7c478bd9Sstevel@tonic-gate for (;;) { 1875*7c478bd9Sstevel@tonic-gate p32 = *pte32p; 1876*7c478bd9Sstevel@tonic-gate if (PTE2PFN(p32, level) != pfn) 1877*7c478bd9Sstevel@tonic-gate break; 1878*7c478bd9Sstevel@tonic-gate old = cas32(pte32p, p32, 0); 1879*7c478bd9Sstevel@tonic-gate if (old == p32) 1880*7c478bd9Sstevel@tonic-gate break; 1881*7c478bd9Sstevel@tonic-gate } 1882*7c478bd9Sstevel@tonic-gate prev = p32; 1883*7c478bd9Sstevel@tonic-gate } 1884*7c478bd9Sstevel@tonic-gate xi->xi_pte = prev; 1885*7c478bd9Sstevel@tonic-gate } 1886*7c478bd9Sstevel@tonic-gate 1887*7c478bd9Sstevel@tonic-gate /* 1888*7c478bd9Sstevel@tonic-gate * For a normal address, we just flush one page mapping 1889*7c478bd9Sstevel@tonic-gate * Otherwise reload cr3 to effect a complete TLB flush. 1890*7c478bd9Sstevel@tonic-gate * 1891*7c478bd9Sstevel@tonic-gate * Note we don't reload VLP pte's -- this assume we never have a 1892*7c478bd9Sstevel@tonic-gate * large page size at VLP_LEVEL for VLP processes. 1893*7c478bd9Sstevel@tonic-gate */ 1894*7c478bd9Sstevel@tonic-gate if ((uintptr_t)addr != DEMAP_ALL_ADDR) { 1895*7c478bd9Sstevel@tonic-gate mmu_tlbflush_entry(addr); 1896*7c478bd9Sstevel@tonic-gate } else { 1897*7c478bd9Sstevel@tonic-gate reload_cr3(); 1898*7c478bd9Sstevel@tonic-gate } 1899*7c478bd9Sstevel@tonic-gate return (0); 1900*7c478bd9Sstevel@tonic-gate } 1901*7c478bd9Sstevel@tonic-gate 1902*7c478bd9Sstevel@tonic-gate /* 1903*7c478bd9Sstevel@tonic-gate * Cross call service function to atomically change a PTE and flush TLBs 1904*7c478bd9Sstevel@tonic-gate */ 1905*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1906*7c478bd9Sstevel@tonic-gate static int 1907*7c478bd9Sstevel@tonic-gate x86pte_update_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3) 1908*7c478bd9Sstevel@tonic-gate { 1909*7c478bd9Sstevel@tonic-gate xcall_info_t *xi = (xcall_info_t *)a1; 1910*7c478bd9Sstevel@tonic-gate caddr_t addr = (caddr_t)a2; 1911*7c478bd9Sstevel@tonic-gate 1912*7c478bd9Sstevel@tonic-gate /* 1913*7c478bd9Sstevel@tonic-gate * Only the initiating cpu changes the page table entry. 1914*7c478bd9Sstevel@tonic-gate * It returns the previous PTE value to the caller. 1915*7c478bd9Sstevel@tonic-gate */ 1916*7c478bd9Sstevel@tonic-gate if (CPU->cpu_id == xi->xi_cpuid) { 1917*7c478bd9Sstevel@tonic-gate x86pte_t *ptep = xi->xi_pteptr; 1918*7c478bd9Sstevel@tonic-gate x86pte_t new = xi->xi_pte; 1919*7c478bd9Sstevel@tonic-gate x86pte_t old = xi->xi_old; 1920*7c478bd9Sstevel@tonic-gate x86pte_t prev; 1921*7c478bd9Sstevel@tonic-gate 1922*7c478bd9Sstevel@tonic-gate if (mmu.pae_hat) { 1923*7c478bd9Sstevel@tonic-gate prev = cas64(ptep, old, new); 1924*7c478bd9Sstevel@tonic-gate } else { 1925*7c478bd9Sstevel@tonic-gate x86pte32_t o32 = old; 1926*7c478bd9Sstevel@tonic-gate x86pte32_t n32 = new; 1927*7c478bd9Sstevel@tonic-gate x86pte32_t *pte32p = (x86pte32_t *)ptep; 1928*7c478bd9Sstevel@tonic-gate prev = cas32(pte32p, o32, n32); 1929*7c478bd9Sstevel@tonic-gate } 1930*7c478bd9Sstevel@tonic-gate 1931*7c478bd9Sstevel@tonic-gate xi->xi_pte = prev; 1932*7c478bd9Sstevel@tonic-gate } 1933*7c478bd9Sstevel@tonic-gate 1934*7c478bd9Sstevel@tonic-gate /* 1935*7c478bd9Sstevel@tonic-gate * Flush the TLB entry 1936*7c478bd9Sstevel@tonic-gate */ 1937*7c478bd9Sstevel@tonic-gate if ((uintptr_t)addr != DEMAP_ALL_ADDR) 1938*7c478bd9Sstevel@tonic-gate mmu_tlbflush_entry(addr); 1939*7c478bd9Sstevel@tonic-gate else 1940*7c478bd9Sstevel@tonic-gate reload_cr3(); 1941*7c478bd9Sstevel@tonic-gate return (0); 1942*7c478bd9Sstevel@tonic-gate } 1943*7c478bd9Sstevel@tonic-gate 1944*7c478bd9Sstevel@tonic-gate /* 1945*7c478bd9Sstevel@tonic-gate * Use cross calls to change a page table entry and invalidate TLBs. 1946*7c478bd9Sstevel@tonic-gate */ 1947*7c478bd9Sstevel@tonic-gate void 1948*7c478bd9Sstevel@tonic-gate x86pte_xcall(hat_t *hat, xcall_info_t *xi, uintptr_t addr) 1949*7c478bd9Sstevel@tonic-gate { 1950*7c478bd9Sstevel@tonic-gate cpuset_t cpus; 1951*7c478bd9Sstevel@tonic-gate 1952*7c478bd9Sstevel@tonic-gate /* 1953*7c478bd9Sstevel@tonic-gate * Given the current implementation of hat_share(), doing a 1954*7c478bd9Sstevel@tonic-gate * hat_pageunload() on a shared page table requries invalidating 1955*7c478bd9Sstevel@tonic-gate * all user TLB entries on all CPUs. 1956*7c478bd9Sstevel@tonic-gate */ 1957*7c478bd9Sstevel@tonic-gate if (hat->hat_flags & HAT_SHARED) { 1958*7c478bd9Sstevel@tonic-gate hat = kas.a_hat; 1959*7c478bd9Sstevel@tonic-gate addr = DEMAP_ALL_ADDR; 1960*7c478bd9Sstevel@tonic-gate } 1961*7c478bd9Sstevel@tonic-gate 1962*7c478bd9Sstevel@tonic-gate /* 1963*7c478bd9Sstevel@tonic-gate * Use a cross call to do the invalidations. 1964*7c478bd9Sstevel@tonic-gate * Note the current CPU always has to be in the cross call CPU set. 1965*7c478bd9Sstevel@tonic-gate */ 1966*7c478bd9Sstevel@tonic-gate kpreempt_disable(); 1967*7c478bd9Sstevel@tonic-gate xi->xi_cpuid = CPU->cpu_id; 1968*7c478bd9Sstevel@tonic-gate CPUSET_ZERO(cpus); 1969*7c478bd9Sstevel@tonic-gate if (hat == kas.a_hat) { 1970*7c478bd9Sstevel@tonic-gate CPUSET_OR(cpus, khat_cpuset); 1971*7c478bd9Sstevel@tonic-gate } else { 1972*7c478bd9Sstevel@tonic-gate mutex_enter(&hat->hat_switch_mutex); 1973*7c478bd9Sstevel@tonic-gate CPUSET_OR(cpus, hat->hat_cpus); 1974*7c478bd9Sstevel@tonic-gate CPUSET_ADD(cpus, CPU->cpu_id); 1975*7c478bd9Sstevel@tonic-gate } 1976*7c478bd9Sstevel@tonic-gate 1977*7c478bd9Sstevel@tonic-gate /* 1978*7c478bd9Sstevel@tonic-gate * Use a cross call to modify the page table entry and invalidate TLBs. 1979*7c478bd9Sstevel@tonic-gate * If we're panic'ing, don't bother with the cross call. 1980*7c478bd9Sstevel@tonic-gate * Note the panicstr check isn't bullet proof and the panic system 1981*7c478bd9Sstevel@tonic-gate * ought to be made tighter. 1982*7c478bd9Sstevel@tonic-gate */ 1983*7c478bd9Sstevel@tonic-gate if (panicstr == NULL) 1984*7c478bd9Sstevel@tonic-gate xc_wait_sync((xc_arg_t)xi, addr, NULL, X_CALL_HIPRI, 1985*7c478bd9Sstevel@tonic-gate cpus, xi->xi_func); 1986*7c478bd9Sstevel@tonic-gate else 1987*7c478bd9Sstevel@tonic-gate (void) xi->xi_func((xc_arg_t)xi, (xc_arg_t)addr, NULL); 1988*7c478bd9Sstevel@tonic-gate if (hat != kas.a_hat) 1989*7c478bd9Sstevel@tonic-gate mutex_exit(&hat->hat_switch_mutex); 1990*7c478bd9Sstevel@tonic-gate kpreempt_enable(); 1991*7c478bd9Sstevel@tonic-gate } 1992*7c478bd9Sstevel@tonic-gate 1993*7c478bd9Sstevel@tonic-gate /* 1994*7c478bd9Sstevel@tonic-gate * Invalidate a page table entry if it currently maps the given pfn. 1995*7c478bd9Sstevel@tonic-gate * This returns the previous value of the PTE. 1996*7c478bd9Sstevel@tonic-gate */ 1997*7c478bd9Sstevel@tonic-gate x86pte_t 1998*7c478bd9Sstevel@tonic-gate x86pte_invalidate_pfn(htable_t *ht, uint_t entry, pfn_t pfn, void *pte_ptr) 1999*7c478bd9Sstevel@tonic-gate { 2000*7c478bd9Sstevel@tonic-gate xcall_info_t xi; 2001*7c478bd9Sstevel@tonic-gate x86pte_t *ptep; 2002*7c478bd9Sstevel@tonic-gate hat_t *hat; 2003*7c478bd9Sstevel@tonic-gate uintptr_t addr; 2004*7c478bd9Sstevel@tonic-gate 2005*7c478bd9Sstevel@tonic-gate ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN)); 2006*7c478bd9Sstevel@tonic-gate if (pte_ptr != NULL) { 2007*7c478bd9Sstevel@tonic-gate ptep = pte_ptr; 2008*7c478bd9Sstevel@tonic-gate } else { 2009*7c478bd9Sstevel@tonic-gate ptep = x86pte_access_pagetable(ht); 2010*7c478bd9Sstevel@tonic-gate ptep = (void *)((caddr_t)ptep + (entry << mmu.pte_size_shift)); 2011*7c478bd9Sstevel@tonic-gate } 2012*7c478bd9Sstevel@tonic-gate 2013*7c478bd9Sstevel@tonic-gate /* 2014*7c478bd9Sstevel@tonic-gate * Fill in the structure used by the cross call function to do the 2015*7c478bd9Sstevel@tonic-gate * invalidation. 2016*7c478bd9Sstevel@tonic-gate */ 2017*7c478bd9Sstevel@tonic-gate xi.xi_pte = 0; 2018*7c478bd9Sstevel@tonic-gate xi.xi_pteptr = ptep; 2019*7c478bd9Sstevel@tonic-gate xi.xi_pfn = pfn; 2020*7c478bd9Sstevel@tonic-gate xi.xi_level = ht->ht_level; 2021*7c478bd9Sstevel@tonic-gate xi.xi_func = x86pte_inval_func; 2022*7c478bd9Sstevel@tonic-gate ASSERT(xi.xi_level != VLP_LEVEL); 2023*7c478bd9Sstevel@tonic-gate 2024*7c478bd9Sstevel@tonic-gate hat = ht->ht_hat; 2025*7c478bd9Sstevel@tonic-gate addr = htable_e2va(ht, entry); 2026*7c478bd9Sstevel@tonic-gate 2027*7c478bd9Sstevel@tonic-gate x86pte_xcall(hat, &xi, addr); 2028*7c478bd9Sstevel@tonic-gate 2029*7c478bd9Sstevel@tonic-gate if (pte_ptr == NULL) 2030*7c478bd9Sstevel@tonic-gate x86pte_release_pagetable(ht); 2031*7c478bd9Sstevel@tonic-gate return (xi.xi_pte); 2032*7c478bd9Sstevel@tonic-gate } 2033*7c478bd9Sstevel@tonic-gate 2034*7c478bd9Sstevel@tonic-gate /* 2035*7c478bd9Sstevel@tonic-gate * update a PTE and invalidate any stale TLB entries. 2036*7c478bd9Sstevel@tonic-gate */ 2037*7c478bd9Sstevel@tonic-gate x86pte_t 2038*7c478bd9Sstevel@tonic-gate x86pte_update(htable_t *ht, uint_t entry, x86pte_t expected, x86pte_t new) 2039*7c478bd9Sstevel@tonic-gate { 2040*7c478bd9Sstevel@tonic-gate xcall_info_t xi; 2041*7c478bd9Sstevel@tonic-gate x86pte_t *ptep; 2042*7c478bd9Sstevel@tonic-gate hat_t *hat; 2043*7c478bd9Sstevel@tonic-gate uintptr_t addr; 2044*7c478bd9Sstevel@tonic-gate 2045*7c478bd9Sstevel@tonic-gate ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN)); 2046*7c478bd9Sstevel@tonic-gate ptep = x86pte_access_pagetable(ht); 2047*7c478bd9Sstevel@tonic-gate ptep = (void *)((caddr_t)ptep + (entry << mmu.pte_size_shift)); 2048*7c478bd9Sstevel@tonic-gate 2049*7c478bd9Sstevel@tonic-gate /* 2050*7c478bd9Sstevel@tonic-gate * Fill in the structure used by the cross call function to do the 2051*7c478bd9Sstevel@tonic-gate * invalidation. 2052*7c478bd9Sstevel@tonic-gate */ 2053*7c478bd9Sstevel@tonic-gate xi.xi_pte = new; 2054*7c478bd9Sstevel@tonic-gate xi.xi_old = expected; 2055*7c478bd9Sstevel@tonic-gate xi.xi_pteptr = ptep; 2056*7c478bd9Sstevel@tonic-gate xi.xi_func = x86pte_update_func; 2057*7c478bd9Sstevel@tonic-gate 2058*7c478bd9Sstevel@tonic-gate hat = ht->ht_hat; 2059*7c478bd9Sstevel@tonic-gate addr = htable_e2va(ht, entry); 2060*7c478bd9Sstevel@tonic-gate 2061*7c478bd9Sstevel@tonic-gate x86pte_xcall(hat, &xi, addr); 2062*7c478bd9Sstevel@tonic-gate 2063*7c478bd9Sstevel@tonic-gate x86pte_release_pagetable(ht); 2064*7c478bd9Sstevel@tonic-gate return (xi.xi_pte); 2065*7c478bd9Sstevel@tonic-gate } 2066*7c478bd9Sstevel@tonic-gate 2067*7c478bd9Sstevel@tonic-gate /* 2068*7c478bd9Sstevel@tonic-gate * Copy page tables - this is just a little more complicated than the 2069*7c478bd9Sstevel@tonic-gate * previous routines. Note that it's also not atomic! It also is never 2070*7c478bd9Sstevel@tonic-gate * used for VLP pagetables. 2071*7c478bd9Sstevel@tonic-gate */ 2072*7c478bd9Sstevel@tonic-gate void 2073*7c478bd9Sstevel@tonic-gate x86pte_copy(htable_t *src, htable_t *dest, uint_t entry, uint_t count) 2074*7c478bd9Sstevel@tonic-gate { 2075*7c478bd9Sstevel@tonic-gate struct hat_cpu_info *hci; 2076*7c478bd9Sstevel@tonic-gate caddr_t src_va; 2077*7c478bd9Sstevel@tonic-gate caddr_t dst_va; 2078*7c478bd9Sstevel@tonic-gate size_t size; 2079*7c478bd9Sstevel@tonic-gate 2080*7c478bd9Sstevel@tonic-gate ASSERT(khat_running); 2081*7c478bd9Sstevel@tonic-gate ASSERT(!(dest->ht_flags & HTABLE_VLP)); 2082*7c478bd9Sstevel@tonic-gate ASSERT(!(src->ht_flags & HTABLE_VLP)); 2083*7c478bd9Sstevel@tonic-gate ASSERT(!(src->ht_flags & HTABLE_SHARED_PFN)); 2084*7c478bd9Sstevel@tonic-gate ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN)); 2085*7c478bd9Sstevel@tonic-gate 2086*7c478bd9Sstevel@tonic-gate /* 2087*7c478bd9Sstevel@tonic-gate * Acquire access to the CPU pagetable window for the destination. 2088*7c478bd9Sstevel@tonic-gate */ 2089*7c478bd9Sstevel@tonic-gate dst_va = (caddr_t)x86pte_access_pagetable(dest); 2090*7c478bd9Sstevel@tonic-gate if (kpm_enable) { 2091*7c478bd9Sstevel@tonic-gate src_va = (caddr_t)x86pte_access_pagetable(src); 2092*7c478bd9Sstevel@tonic-gate } else { 2093*7c478bd9Sstevel@tonic-gate hci = CPU->cpu_hat_info; 2094*7c478bd9Sstevel@tonic-gate 2095*7c478bd9Sstevel@tonic-gate /* 2096*7c478bd9Sstevel@tonic-gate * Finish defining the src pagetable mapping 2097*7c478bd9Sstevel@tonic-gate */ 2098*7c478bd9Sstevel@tonic-gate src_va = dst_va + MMU_PAGESIZE; 2099*7c478bd9Sstevel@tonic-gate X86PTE_REMAP(src_va, hci->hci_kernel_pte, 1, 0, src->ht_pfn); 2100*7c478bd9Sstevel@tonic-gate } 2101*7c478bd9Sstevel@tonic-gate 2102*7c478bd9Sstevel@tonic-gate /* 2103*7c478bd9Sstevel@tonic-gate * now do the copy 2104*7c478bd9Sstevel@tonic-gate */ 2105*7c478bd9Sstevel@tonic-gate 2106*7c478bd9Sstevel@tonic-gate dst_va += entry << mmu.pte_size_shift; 2107*7c478bd9Sstevel@tonic-gate src_va += entry << mmu.pte_size_shift; 2108*7c478bd9Sstevel@tonic-gate size = count << mmu.pte_size_shift; 2109*7c478bd9Sstevel@tonic-gate bcopy(src_va, dst_va, size); 2110*7c478bd9Sstevel@tonic-gate 2111*7c478bd9Sstevel@tonic-gate x86pte_release_pagetable(dest); 2112*7c478bd9Sstevel@tonic-gate } 2113*7c478bd9Sstevel@tonic-gate 2114*7c478bd9Sstevel@tonic-gate /* 2115*7c478bd9Sstevel@tonic-gate * Zero page table entries - Note this doesn't use atomic stores! 2116*7c478bd9Sstevel@tonic-gate */ 2117*7c478bd9Sstevel@tonic-gate void 2118*7c478bd9Sstevel@tonic-gate x86pte_zero(htable_t *dest, uint_t entry, uint_t count) 2119*7c478bd9Sstevel@tonic-gate { 2120*7c478bd9Sstevel@tonic-gate caddr_t dst_va; 2121*7c478bd9Sstevel@tonic-gate x86pte_t *p; 2122*7c478bd9Sstevel@tonic-gate x86pte32_t *p32; 2123*7c478bd9Sstevel@tonic-gate size_t size; 2124*7c478bd9Sstevel@tonic-gate extern void hat_pte_zero(void *, size_t); 2125*7c478bd9Sstevel@tonic-gate 2126*7c478bd9Sstevel@tonic-gate /* 2127*7c478bd9Sstevel@tonic-gate * Map in the page table to be zeroed. 2128*7c478bd9Sstevel@tonic-gate */ 2129*7c478bd9Sstevel@tonic-gate ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN)); 2130*7c478bd9Sstevel@tonic-gate ASSERT(!(dest->ht_flags & HTABLE_VLP)); 2131*7c478bd9Sstevel@tonic-gate dst_va = (caddr_t)x86pte_access_pagetable(dest); 2132*7c478bd9Sstevel@tonic-gate dst_va += entry << mmu.pte_size_shift; 2133*7c478bd9Sstevel@tonic-gate size = count << mmu.pte_size_shift; 2134*7c478bd9Sstevel@tonic-gate if (x86_feature & X86_SSE2) { 2135*7c478bd9Sstevel@tonic-gate hat_pte_zero(dst_va, size); 2136*7c478bd9Sstevel@tonic-gate } else if (khat_running) { 2137*7c478bd9Sstevel@tonic-gate bzero(dst_va, size); 2138*7c478bd9Sstevel@tonic-gate } else { 2139*7c478bd9Sstevel@tonic-gate /* 2140*7c478bd9Sstevel@tonic-gate * Can't just use bzero during boot because it checks the 2141*7c478bd9Sstevel@tonic-gate * address against kernelbase. Instead just use a zero loop. 2142*7c478bd9Sstevel@tonic-gate */ 2143*7c478bd9Sstevel@tonic-gate if (mmu.pae_hat) { 2144*7c478bd9Sstevel@tonic-gate p = (x86pte_t *)dst_va; 2145*7c478bd9Sstevel@tonic-gate while (count-- > 0) 2146*7c478bd9Sstevel@tonic-gate *p++ = 0; 2147*7c478bd9Sstevel@tonic-gate } else { 2148*7c478bd9Sstevel@tonic-gate p32 = (x86pte32_t *)dst_va; 2149*7c478bd9Sstevel@tonic-gate while (count-- > 0) 2150*7c478bd9Sstevel@tonic-gate *p32++ = 0; 2151*7c478bd9Sstevel@tonic-gate } 2152*7c478bd9Sstevel@tonic-gate } 2153*7c478bd9Sstevel@tonic-gate x86pte_release_pagetable(dest); 2154*7c478bd9Sstevel@tonic-gate } 2155*7c478bd9Sstevel@tonic-gate 2156*7c478bd9Sstevel@tonic-gate /* 2157*7c478bd9Sstevel@tonic-gate * Called to ensure that all pagetables are in the system dump 2158*7c478bd9Sstevel@tonic-gate */ 2159*7c478bd9Sstevel@tonic-gate void 2160*7c478bd9Sstevel@tonic-gate hat_dump(void) 2161*7c478bd9Sstevel@tonic-gate { 2162*7c478bd9Sstevel@tonic-gate hat_t *hat; 2163*7c478bd9Sstevel@tonic-gate uint_t h; 2164*7c478bd9Sstevel@tonic-gate htable_t *ht; 2165*7c478bd9Sstevel@tonic-gate int count; 2166*7c478bd9Sstevel@tonic-gate 2167*7c478bd9Sstevel@tonic-gate /* 2168*7c478bd9Sstevel@tonic-gate * kas.a_hat is the head of the circular list, but not an element of 2169*7c478bd9Sstevel@tonic-gate * the list. Once we pass kas.a_hat->hat_next a second time, we 2170*7c478bd9Sstevel@tonic-gate * know we've iterated through every hat structure. 2171*7c478bd9Sstevel@tonic-gate */ 2172*7c478bd9Sstevel@tonic-gate for (hat = kas.a_hat, count = 0; hat != kas.a_hat->hat_next || 2173*7c478bd9Sstevel@tonic-gate count++ == 0; hat = hat->hat_next) { 2174*7c478bd9Sstevel@tonic-gate for (h = 0; h < hat->hat_num_hash; ++h) { 2175*7c478bd9Sstevel@tonic-gate for (ht = hat->hat_ht_hash[h]; ht; ht = ht->ht_next) { 2176*7c478bd9Sstevel@tonic-gate if ((ht->ht_flags & HTABLE_VLP) == 0) { 2177*7c478bd9Sstevel@tonic-gate dump_page(ht->ht_pfn); 2178*7c478bd9Sstevel@tonic-gate } 2179*7c478bd9Sstevel@tonic-gate } 2180*7c478bd9Sstevel@tonic-gate } 2181*7c478bd9Sstevel@tonic-gate } 2182*7c478bd9Sstevel@tonic-gate } 2183