1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifndef _VM_HTABLE_H 27 #define _VM_HTABLE_H 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #ifdef __cplusplus 32 extern "C" { 33 #endif 34 35 #if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL) 36 #include <asm/htable.h> 37 #endif 38 39 extern void atomic_andb(uint8_t *addr, uint8_t value); 40 extern void atomic_orb(uint8_t *addr, uint8_t value); 41 extern void atomic_inc16(uint16_t *addr); 42 extern void atomic_dec16(uint16_t *addr); 43 extern void mmu_tlbflush_entry(caddr_t addr); 44 45 /* 46 * Each hardware page table has an htable_t describing it. 47 * 48 * We use a reference counter mechanism to detect when we can free an htable. 49 * In the implmentation the reference count is split into 2 separate counters: 50 * 51 * ht_busy is a traditional reference count of uses of the htable pointer 52 * 53 * ht_valid_cnt is a count of how references are implied by valid PTE/PTP 54 * entries in the pagetable 55 * 56 * ht_busy is only incremented by htable_lookup() or htable_create() 57 * while holding the appropriate hash_table mutex. While installing a new 58 * valid PTE or PTP, in order to increment ht_valid_cnt a thread must have 59 * done an htable_lookup() or htable_create() but not the htable_release yet. 60 * 61 * htable_release(), while holding the mutex, can know that if 62 * busy == 1 and valid_cnt == 0, the htable can be free'd. 63 * 64 * The fields have been ordered to make htable_lookup() fast. Hence, 65 * ht_hat, ht_vaddr, ht_level and ht_next need to be clustered together. 66 */ 67 struct htable { 68 struct htable *ht_next; /* forward link for hash table */ 69 struct hat *ht_hat; /* hat this mapping comes from */ 70 uintptr_t ht_vaddr; /* virt addr at start of this table */ 71 int8_t ht_level; /* page table level: 0=4K, 1=2M, ... */ 72 uint8_t ht_flags; /* see below */ 73 int16_t ht_busy; /* implements locking protocol */ 74 int16_t ht_valid_cnt; /* # of valid entries in this table */ 75 uint32_t ht_lock_cnt; /* # of locked entries in this table */ 76 /* never used for kernel hat */ 77 pfn_t ht_pfn; /* pfn of page of the pagetable */ 78 struct htable *ht_prev; /* backward link for hash table */ 79 struct htable *ht_parent; /* htable that points to this htable */ 80 struct htable *ht_shares; /* for HTABLE_SHARED_PFN only */ 81 }; 82 typedef struct htable htable_t; 83 84 /* 85 * Flags values for htable ht_flags field: 86 * 87 * HTABLE_VLP - this is the top level htable of a VLP HAT. 88 * 89 * HTABLE_SHARED_PFN - this htable had its PFN assigned from sharing another 90 * htable. Used by hat_share() for ISM. 91 */ 92 #define HTABLE_VLP (0x01) 93 #define HTABLE_SHARED_PFN (0x02) 94 95 /* 96 * The htable hash table hashing function. The 28 is so that high 97 * order bits are include in the hash index to skew the wrap 98 * around of addresses. Even though the hash buckets are stored per 99 * hat we include the value of hat pointer in the hash function so 100 * that the secondary hash for the htable mutex winds up begin different in 101 * every address space. 102 */ 103 #define HTABLE_HASH(hat, va, lvl) \ 104 ((((va) >> LEVEL_SHIFT(1)) + ((va) >> 28) + (lvl) + \ 105 ((uintptr_t)(hat) >> 4)) & ((hat)->hat_num_hash - 1)) 106 107 /* 108 * Each CPU gets a unique hat_cpu_info structure in cpu_hat_info. 109 */ 110 struct hat_cpu_info { 111 kmutex_t hci_mutex; /* mutex to ensure sequential usage */ 112 #if defined(__amd64) 113 pfn_t hci_vlp_pfn; /* pfn of hci_vlp_l3ptes */ 114 x86pte_t *hci_vlp_l3ptes; /* VLP Level==3 pagetable (top) */ 115 x86pte_t *hci_vlp_l2ptes; /* VLP Level==2 pagetable */ 116 #endif /* __amd64 */ 117 }; 118 119 120 /* 121 * Compute the last page aligned VA mapped by an htable. 122 * 123 * Given a va and a level, compute the virtual address of the start of the 124 * next page at that level. 125 * 126 * XX64 - The check for the VA hole needs to be better generalized. 127 */ 128 #define HTABLE_NUM_PTES_PAE(ht) \ 129 (((ht)->ht_flags & HTABLE_VLP) ? 4 : 512) 130 #if defined(__amd64) 131 #define HTABLE_NUM_PTES(ht) HTABLE_NUM_PTES_PAE(ht) 132 133 #define HTABLE_LAST_PAGE(ht) \ 134 ((ht)->ht_level == mmu.max_level ? ((uintptr_t)0UL - MMU_PAGESIZE) :\ 135 ((ht)->ht_vaddr - MMU_PAGESIZE + \ 136 ((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level)))) 137 138 #define NEXT_ENTRY_VA(va, l) \ 139 ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l) == mmu.hole_start ? \ 140 mmu.hole_end : (va & LEVEL_MASK(l)) + LEVEL_SIZE(l)) 141 142 #elif defined(__i386) 143 144 #define HTABLE_NUM_PTES(ht) (!mmu.pae_hat ? 1024 : HTABLE_NUM_PTES_PAE(ht)) 145 146 #define HTABLE_LAST_PAGE(ht) ((ht)->ht_vaddr - MMU_PAGESIZE + \ 147 ((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level))) 148 149 #define NEXT_ENTRY_VA(va, l) ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l)) 150 151 #endif 152 153 #if defined(_KERNEL) 154 155 /* 156 * initialization function called from hat_init() 157 */ 158 extern void htable_init(void); 159 160 /* 161 * Functions to lookup, or "lookup and create", the htable corresponding 162 * to the virtual address "vaddr" in the "hat" at the given "level" of 163 * page tables. htable_lookup() may return NULL if no such entry exists. 164 * 165 * On return the given htable is marked busy (a shared lock) - this prevents 166 * the htable from being stolen or freed) until htable_release() is called. 167 * 168 * If kalloc_flag is set on an htable_create() we can't call kmem allocation 169 * routines for this htable, since it's for the kernel hat itself. 170 * 171 * htable_acquire() is used when an htable pointer has been extracted from 172 * an hment and we need to get a reference to the htable. 173 */ 174 extern htable_t *htable_lookup(struct hat *hat, uintptr_t vaddr, level_t level); 175 extern htable_t *htable_create(struct hat *hat, uintptr_t vaddr, level_t level, 176 htable_t *shared); 177 extern void htable_acquire(htable_t *); 178 179 extern void htable_release(htable_t *ht); 180 extern void htable_destroy(htable_t *ht); 181 182 /* 183 * Code to free all remaining htables for a hat. Called after the hat is no 184 * longer in use by any thread. 185 */ 186 extern void htable_purge_hat(struct hat *hat); 187 188 /* 189 * Find the htable, page table entry index, and PTE of the given virtual 190 * address. If not found returns NULL. When found, returns the htable_t *, 191 * sets entry, and has a hold on the htable. 192 */ 193 extern htable_t *htable_getpte(struct hat *, uintptr_t, uint_t *, x86pte_t *, 194 level_t); 195 196 /* 197 * Similar to hat_getpte(), except that this only succeeds if a valid 198 * page mapping is present. 199 */ 200 extern htable_t *htable_getpage(struct hat *hat, uintptr_t va, uint_t *entry); 201 202 /* 203 * Called to allocate initial/additional htables for reserve. 204 */ 205 extern void htable_initial_reserve(uint_t); 206 extern void htable_reserve(uint_t); 207 208 /* 209 * Used to readjust the htable reserve after the reserve list has been used. 210 * Also called after boot to release left over boot reserves. 211 */ 212 extern void htable_adjust_reserve(void); 213 214 /* 215 * Attach initial pagetables as htables 216 */ 217 extern void htable_attach(struct hat *, uintptr_t, level_t, struct htable *, 218 pfn_t); 219 220 /* 221 * return the number of pages mapped by a hat 222 */ 223 extern pgcnt_t htable_count_pages(struct hat *); 224 225 /* 226 * Routine to find the next populated htable at or above a given virtual 227 * address. Can specify an upper limit, or HTABLE_WALK_TO_END to indicate 228 * that it should search the entire address space. Similar to 229 * hat_getpte(), but used for walking through address ranges. It can be 230 * used like this: 231 * 232 * va = ... 233 * ht = NULL; 234 * while (va < end_va) { 235 * pte = htable_walk(hat, &ht, &va, end_va); 236 * if (!pte) 237 * break; 238 * 239 * ... code to operate on page at va ... 240 * 241 * va += LEVEL_SIZE(ht->ht_level); 242 * } 243 * if (ht) 244 * htable_release(ht); 245 * 246 */ 247 extern x86pte_t htable_walk(struct hat *hat, htable_t **ht, uintptr_t *va, 248 uintptr_t eaddr); 249 250 #define HTABLE_WALK_TO_END ((uintptr_t)-1) 251 252 /* 253 * Utilities convert between virtual addresses and page table entry indeces. 254 */ 255 extern uint_t htable_va2entry(uintptr_t va, htable_t *ht); 256 extern uintptr_t htable_e2va(htable_t *ht, uint_t entry); 257 258 /* 259 * Interfaces that provide access to page table entries via the htable. 260 * 261 * Note that all accesses except x86pte_copy() and x86pte_zero() are atomic. 262 */ 263 extern void x86pte_cpu_init(cpu_t *); 264 extern void x86pte_cpu_fini(cpu_t *); 265 266 extern x86pte_t x86pte_get(htable_t *, uint_t entry); 267 268 /* 269 * x86pte_set returns LPAGE_ERROR if it's asked to overwrite a page table 270 * link with a large page mapping. 271 */ 272 #define LPAGE_ERROR (-(x86pte_t)1) 273 extern x86pte_t x86pte_set(htable_t *, uint_t entry, x86pte_t new, void *); 274 275 extern x86pte_t x86pte_inval(htable_t *ht, uint_t entry, 276 x86pte_t old, x86pte_t *ptr); 277 278 extern x86pte_t x86pte_update(htable_t *ht, uint_t entry, 279 x86pte_t old, x86pte_t new); 280 281 extern void x86pte_copy(htable_t *src, htable_t *dest, uint_t entry, 282 uint_t cnt); 283 284 /* 285 * access to a pagetable knowing only the pfn 286 */ 287 extern x86pte_t *x86pte_mapin(pfn_t, uint_t, htable_t *); 288 extern void x86pte_mapout(void); 289 290 /* 291 * these are actually inlines for "lock; incw", "lock; decw", etc. instructions. 292 */ 293 #define HTABLE_INC(x) atomic_inc16((uint16_t *)&x) 294 #define HTABLE_DEC(x) atomic_dec16((uint16_t *)&x) 295 #define HTABLE_LOCK_INC(ht) atomic_add_32(&(ht)->ht_lock_cnt, 1) 296 #define HTABLE_LOCK_DEC(ht) atomic_add_32(&(ht)->ht_lock_cnt, -1) 297 298 #endif /* _KERNEL */ 299 300 301 #ifdef __cplusplus 302 } 303 #endif 304 305 #endif /* _VM_HTABLE_H */ 306