1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _VM_HTABLE_H 28 #define _VM_HTABLE_H 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #ifdef __cplusplus 33 extern "C" { 34 #endif 35 36 #if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL) 37 #include <asm/htable.h> 38 #endif 39 40 extern void atomic_andb(uint8_t *addr, uint8_t value); 41 extern void atomic_orb(uint8_t *addr, uint8_t value); 42 extern void atomic_inc16(uint16_t *addr); 43 extern void atomic_dec16(uint16_t *addr); 44 extern void mmu_tlbflush_entry(caddr_t addr); 45 46 /* 47 * Each hardware page table has an htable_t describing it. 48 * 49 * We use a reference counter mechanism to detect when we can free an htable. 50 * In the implmentation the reference count is split into 2 separate counters: 51 * 52 * ht_busy is a traditional reference count of uses of the htable pointer 53 * 54 * ht_valid_cnt is a count of how references are implied by valid PTE/PTP 55 * entries in the pagetable 56 * 57 * ht_busy is only incremented by htable_lookup() or htable_create() 58 * while holding the appropriate hash_table mutex. While installing a new 59 * valid PTE or PTP, in order to increment ht_valid_cnt a thread must have 60 * done an htable_lookup() or htable_create() but not the htable_release yet. 61 * 62 * htable_release(), while holding the mutex, can know that if 63 * busy == 1 and valid_cnt == 0, the htable can be free'd. 64 * 65 * The fields have been ordered to make htable_lookup() fast. Hence, 66 * ht_hat, ht_vaddr, ht_level and ht_next need to be clustered together. 67 */ 68 struct htable { 69 struct htable *ht_next; /* forward link for hash table */ 70 struct hat *ht_hat; /* hat this mapping comes from */ 71 uintptr_t ht_vaddr; /* virt addr at start of this table */ 72 level_t ht_level; /* page table level: 0=4K, 1=2M, ... */ 73 uint16_t ht_flags; /* see below */ 74 int16_t ht_busy; /* implements locking protocol */ 75 uint16_t ht_num_ptes; /* # of PTEs in page table */ 76 int16_t ht_valid_cnt; /* # of valid entries in this table */ 77 uint32_t ht_lock_cnt; /* # of locked entries in this table */ 78 /* never used for kernel hat */ 79 pfn_t ht_pfn; /* pfn of page of the pagetable */ 80 struct htable *ht_prev; /* backward link for hash table */ 81 struct htable *ht_parent; /* htable that points to this htable */ 82 struct htable *ht_shares; /* for HTABLE_SHARED_PFN only */ 83 }; 84 typedef struct htable htable_t; 85 86 /* 87 * Flags values for htable ht_flags field: 88 * 89 * HTABLE_VLP - this is the top level htable of a VLP HAT. 90 * 91 * HTABLE_SHARED_PFN - this htable had it's PFN assigned from sharing another 92 * htable. Used by hat_share() for ISM. 93 */ 94 #define HTABLE_VLP (0x0001) 95 #define HTABLE_SHARED_PFN (0x0002) 96 97 /* 98 * The htable hash table hashing function. The 28 is so that high 99 * order bits are include in the hash index to skew the wrap 100 * around of addresses. Even though the hash buckets are stored per 101 * hat we include the value of hat pointer in the hash function so 102 * that the secondary hash for the htable mutex winds up begin different in 103 * every address space. 104 */ 105 #define HTABLE_HASH(hat, va, lvl) \ 106 ((((va) >> LEVEL_SHIFT(1)) + ((va) >> 28) + (lvl) + \ 107 ((uintptr_t)(hat) >> 4)) & ((hat)->hat_num_hash - 1)) 108 109 /* 110 * For 32 bit, access to page table entries is done via the page table's PFN and 111 * the index of the PTE. We use a CPU specific mapping (a la ppcopy) to map 112 * in page tables on an "as needed" basis. 113 * 114 * 64 bit kernels will use seg_kpm style mappings and avoid any overhead. 115 * 116 * The code uses compare and swap instructions to read/write PTE's to 117 * avoid atomicity problems, since PTEs can be 8 bytes on 32 bit systems. 118 * Again this can be optimized on 64 bit systems, since aligned load/store 119 * will naturally be atomic. 120 * 121 * Each CPU gets a unique hat_cpu_info structure in cpu_hat_info. 122 */ 123 struct hat_cpu_info { 124 pfn_t hci_mapped_pfn; /* pfn of currently mapped page table */ 125 x86pte_t *hci_pagetable_va; /* VA to use for mappings */ 126 x86pte_t *hci_kernel_pte; /* kernel PTE for cpu_pagetable_va */ 127 kmutex_t hci_mutex; /* mutex to ensure sequential usage */ 128 #if defined(__amd64) 129 pfn_t hci_vlp_pfn; /* pfn of hci_vlp_l3ptes */ 130 x86pte_t *hci_vlp_l3ptes; /* VLP Level==3 pagetable (top) */ 131 x86pte_t *hci_vlp_l2ptes; /* VLP Level==2 pagetable */ 132 #endif /* __amd64 */ 133 }; 134 135 136 /* 137 * Compute the last page aligned VA mapped by an htable. 138 * 139 * Given a va and a level, compute the virtual address of the start of the 140 * next page at that level. 141 * 142 * XX64 - The check for the VA hole needs to be better generalized. 143 */ 144 #if defined(__amd64) 145 146 #define HTABLE_LAST_PAGE(ht) \ 147 ((ht)->ht_level == mmu.max_level ? ((uintptr_t)0UL - MMU_PAGESIZE) :\ 148 ((ht)->ht_vaddr - MMU_PAGESIZE + \ 149 ((uintptr_t)((ht)->ht_num_ptes) << LEVEL_SHIFT((ht)->ht_level)))) 150 151 #define NEXT_ENTRY_VA(va, l) \ 152 ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l) == mmu.hole_start ? \ 153 mmu.hole_end : (va & LEVEL_MASK(l)) + LEVEL_SIZE(l)) 154 155 #elif defined(__i386) 156 157 #define HTABLE_LAST_PAGE(ht) ((ht)->ht_vaddr - MMU_PAGESIZE + \ 158 ((uintptr_t)((ht)->ht_num_ptes) << LEVEL_SHIFT((ht)->ht_level))) 159 160 #define NEXT_ENTRY_VA(va, l) ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l)) 161 162 #endif 163 164 #if defined(_KERNEL) 165 166 /* 167 * initialization function called from hat_init() 168 */ 169 extern void htable_init(void); 170 171 /* 172 * Functions to lookup, or "lookup and create", the htable corresponding 173 * to the virtual address "vaddr" in the "hat" at the given "level" of 174 * page tables. htable_lookup() may return NULL if no such entry exists. 175 * 176 * On return the given htable is marked busy (a shared lock) - this prevents 177 * the htable from being stolen or freed) until htable_release() is called. 178 * 179 * If kalloc_flag is set on an htable_create() we can't call kmem allocation 180 * routines for this htable, since it's for the kernel hat itself. 181 * 182 * htable_acquire() is used when an htable pointer has been extracted from 183 * an hment and we need to get a reference to the htable. 184 */ 185 extern htable_t *htable_lookup(struct hat *hat, uintptr_t vaddr, level_t level); 186 extern htable_t *htable_create(struct hat *hat, uintptr_t vaddr, level_t level, 187 htable_t *shared); 188 extern void htable_acquire(htable_t *); 189 190 extern void htable_release(htable_t *ht); 191 192 /* 193 * Code to free all remaining htables for a hat. Called after the hat is no 194 * longer in use by any thread. 195 */ 196 extern void htable_purge_hat(struct hat *hat); 197 198 /* 199 * Find the htable, page table entry index, and PTE of the given virtual 200 * address. If not found returns NULL. When found, returns the htable_t *, 201 * sets entry, and has a hold on the htable. 202 */ 203 extern htable_t *htable_getpte(struct hat *, uintptr_t, uint_t *, x86pte_t *, 204 level_t); 205 206 /* 207 * Similar to hat_getpte(), except that this only succeeds if a valid 208 * page mapping is present. 209 */ 210 extern htable_t *htable_getpage(struct hat *hat, uintptr_t va, uint_t *entry); 211 212 /* 213 * Called to allocate initial/additional htables for reserve. 214 */ 215 extern void htable_initial_reserve(uint_t); 216 extern void htable_reserve(uint_t); 217 218 /* 219 * Used to readjust the htable reserve after the reserve list has been used. 220 * Also called after boot to release left over boot reserves. 221 */ 222 extern void htable_adjust_reserve(void); 223 224 /* 225 * Routine to find the next populated htable at or above a given virtual 226 * address. Can specify an upper limit, or HTABLE_WALK_TO_END to indicate 227 * that it should search the entire address space. Similar to 228 * hat_getpte(), but used for walking through address ranges. It can be 229 * used like this: 230 * 231 * va = ... 232 * ht = NULL; 233 * while (va < end_va) { 234 * pte = htable_walk(hat, &ht, &va, end_va); 235 * if (!pte) 236 * break; 237 * 238 * ... code to operate on page at va ... 239 * 240 * va += LEVEL_SIZE(ht->ht_level); 241 * } 242 * if (ht) 243 * htable_release(ht); 244 * 245 */ 246 extern x86pte_t htable_walk(struct hat *hat, htable_t **ht, uintptr_t *va, 247 uintptr_t eaddr); 248 249 #define HTABLE_WALK_TO_END ((uintptr_t)-1) 250 251 /* 252 * Utilities convert between virtual addresses and page table entry indeces. 253 */ 254 extern uint_t htable_va2entry(uintptr_t va, htable_t *ht); 255 extern uintptr_t htable_e2va(htable_t *ht, uint_t entry); 256 257 /* 258 * Interfaces that provide access to page table entries via the htable. 259 * 260 * Note that all accesses except x86pte_copy() and x86pte_zero() are atomic. 261 */ 262 extern void x86pte_cpu_init(cpu_t *, void *); 263 264 extern x86pte_t x86pte_get(htable_t *, uint_t entry); 265 266 extern x86pte_t x86pte_set(htable_t *, uint_t entry, x86pte_t new, void *); 267 268 extern x86pte_t x86pte_invalidate_pfn(htable_t *ht, uint_t entry, pfn_t pfn, 269 void *pte_ptr); 270 271 extern x86pte_t x86pte_update(htable_t *ht, uint_t entry, 272 x86pte_t old, x86pte_t new); 273 274 extern void x86pte_copy(htable_t *src, htable_t *dest, uint_t entry, 275 uint_t cnt); 276 277 extern void x86pte_zero(htable_t *ht, uint_t entry, uint_t cnt); 278 279 280 /* 281 * these are actually inlines for "lock; incw", "lock; decw", etc. instructions. 282 */ 283 #define HTABLE_INC(x) atomic_inc16((uint16_t *)&x) 284 #define HTABLE_DEC(x) atomic_dec16((uint16_t *)&x) 285 #define HTABLE_LOCK_INC(ht) atomic_add_32(&(ht)->ht_lock_cnt, 1) 286 #define HTABLE_LOCK_DEC(ht) atomic_add_32(&(ht)->ht_lock_cnt, -1) 287 288 #endif /* _KERNEL */ 289 290 291 #ifdef __cplusplus 292 } 293 #endif 294 295 #endif /* _VM_HTABLE_H */ 296