1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _VM_HAT_PTE_H 28 #define _VM_HAT_PTE_H 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #ifdef __cplusplus 33 extern "C" { 34 #endif 35 36 37 #include <sys/types.h> 38 39 /* 40 * Defines for the bits in X86 and AMD64 Page Tables 41 * 42 * Notes: 43 * 44 * Largepages and PAT bits: 45 * 46 * bit 7 at level 0 is the PAT bit 47 * bit 7 above level 0 is the Pagesize bit (set for large page) 48 * bit 12 (when a large page) is the PAT bit 49 * 50 * In Solaris the PAT/PWT/PCD values are set up so that: 51 * 52 * PAT & PWT -> Write Protected 53 * PAT & PCD -> Write Combining 54 * PAT by itself (PWT == 0 && PCD == 0) yields uncacheable (same as PCD == 1) 55 * 56 * 57 * Permission bits: 58 * 59 * - PT_USER must be set in all levels for user pages 60 * - PT_WRITE must be set in all levels for user writable pages 61 * - PT_NX applies if set at any level 62 * 63 * For these, we use the "allow" settings in all tables above level 0 and only 64 * ever disable things in PTEs. 65 * 66 * The use of PT_GLOBAL and PT_NX depend on being enabled in processor 67 * control registers. Hence, we use a variable to reference these bit 68 * masks. During hat_kern_setup() if the feature isn't enabled we 69 * clear out the variables. 70 */ 71 #define PT_VALID (0x001) /* a valid translation is present */ 72 #define PT_WRITABLE (0x002) /* the page is writable */ 73 #define PT_USER (0x004) /* the page is accessible by user mode */ 74 #define PT_WRITETHRU (0x008) /* write back caching is disabled (non-PAT) */ 75 #define PT_NOCACHE (0x010) /* page is not cacheable (non-PAT) */ 76 #define PT_REF (0x020) /* page was referenced */ 77 #define PT_MOD (0x040) /* page was modified */ 78 #define PT_PAGESIZE (0x080) /* above level 0, indicates a large page */ 79 #define PT_PAT_4K (0x080) /* at level 0, used for write combining */ 80 #define PT_GLOBAL (0x100) /* the mapping is global */ 81 #define PT_SOFTWARE (0xe00) /* available for software */ 82 83 #define PT_PAT_LARGE (0x1000) /* PAT bit for large pages */ 84 85 #define PT_PTPBITS (PT_VALID | PT_USER | PT_WRITABLE | PT_REF) 86 #define PT_FLAGBITS (0xfff) /* for masking off flag bits */ 87 88 /* 89 * The software bits are used by the HAT to track attributes. 90 * 91 * PT_NOSYNC - The PT_REF/PT_MOD bits are not sync'd to page_t. 92 * The hat will install them as always set. 93 * 94 * PT_NOCONSIST - There is no entry for this hment for this mapping. 95 */ 96 #define PT_NOSYNC (0x200) /* PTE was created with HAT_NOSYNC */ 97 #define PT_NOCONSIST (0x400) /* PTE was created with HAT_LOAD_NOCONSIST */ 98 99 /* 100 * macros to get/set/clear the PTE fields 101 */ 102 #define PTE_SET(p, f) ((p) |= (f)) 103 #define PTE_CLR(p, f) ((p) &= ~(x86pte_t)(f)) 104 #define PTE_GET(p, f) ((p) & (f)) 105 106 /* 107 * Handy macro to check if a pagetable entry or pointer is valid 108 */ 109 #define PTE_ISVALID(p) PTE_GET(p, PT_VALID) 110 111 /* 112 * Does a PTE map a large page. 113 */ 114 #define PTE_IS_LGPG(p, l) ((l) > 0 && PTE_GET((p), PT_PAGESIZE)) 115 116 /* 117 * does this PTE represent a page (not a pointer to another page table)? 118 */ 119 #define PTE_ISPAGE(p, l) \ 120 (PTE_ISVALID(p) && ((l) == 0 || PTE_GET(p, PT_PAGESIZE))) 121 122 /* 123 * Handy macro to check if 2 PTE's are the same - ignores REF/MOD bits 124 */ 125 #define PTE_EQUIV(a, b) (((a) | PT_REF | PT_MOD) == ((b) | PT_REF | PT_MOD)) 126 127 /* 128 * Shorthand for converting a PTE to it's pfn. 129 */ 130 #define PTE2PFN(p, l) \ 131 mmu_btop(PTE_GET((p), PTE_IS_LGPG((p), (l)) ? PT_PADDR_LGPG : PT_PADDR)) 132 133 /* 134 * The software extraction for a single Page Table Entry will always 135 * be a 64 bit unsigned int. If running a non-PAE hat, the page table 136 * access routines know to extend/shorten it to 32 bits. 137 */ 138 typedef uint64_t x86pte_t; 139 typedef uint32_t x86pte32_t; 140 #define PT_NX (0x8000000000000000ull) 141 #define PT_PADDR (0x00fffffffffff000ull) 142 #define PT_PADDR_LGPG (0x00ffffffffffe000ull) /* phys addr for large pages */ 143 144 /* 145 * Macros to create a PTP or PTE from the pfn and level 146 */ 147 #define MAKEPTP(pfn, l) \ 148 (((x86pte_t)(pfn) << MMU_PAGESHIFT) | mmu.ptp_bits[(l) + 1]) 149 #define MAKEPTE(pfn, l) \ 150 (((x86pte_t)(pfn) << MMU_PAGESHIFT) | mmu.pte_bits[l]) 151 152 /* 153 * The idea of "level" refers to the level where the page table is used in the 154 * the hardware address translation steps. The level values correspond to the 155 * following names of tables used in AMD/Intel architecture documents: 156 * 157 * AMD/INTEL name Level # 158 * ---------------------- ------- 159 * Page Map Level 4 3 160 * Page Directory Pointer 2 161 * Page Directory 1 162 * Page Table 0 163 * 164 * The numbering scheme is such that the values of 0 and 1 can correspond to 165 * the pagesize codes used for MPSS support. For now the Maximum level at 166 * which you can have a large page is a constant, that may change in 167 * future processors. 168 * 169 * The type of "level_t" is signed so that it can be used like: 170 * level_t l; 171 * ... 172 * while (--l >= 0) 173 * ... 174 */ 175 #define MAX_NUM_LEVEL 4 176 #define MAX_PAGE_LEVEL 1 /* for now.. sigh */ 177 typedef int16_t level_t; 178 #define LEVEL_SHIFT(l) (mmu.level_shift[l]) 179 #define LEVEL_SIZE(l) (mmu.level_size[l]) 180 #define LEVEL_OFFSET(l) (mmu.level_offset[l]) 181 #define LEVEL_MASK(l) (mmu.level_mask[l]) 182 183 /* 184 * Macros to: 185 * Check for a PFN above 4Gig and 64Gig for 32 bit PAE support 186 */ 187 #define PFN_4G (4ull * (1024 * 1024 * 1024 / MMU_PAGESIZE)) 188 #define PFN_64G (64ull * (1024 * 1024 * 1024 / MMU_PAGESIZE)) 189 #define PFN_ABOVE4G(pfn) ((pfn) >= PFN_4G) 190 #define PFN_ABOVE64G(pfn) ((pfn) >= PFN_64G) 191 192 /* 193 * The CR3 register holds the physical address of the top level page table. 194 */ 195 #define MAKECR3(pfn) mmu_ptob(pfn) 196 197 /* 198 * HAT/MMU parameters that depend on kernel mode and/or processor type 199 */ 200 struct htable; 201 struct hat_mmu_info { 202 x86pte_t pt_nx; /* either 0 or PT_NX */ 203 x86pte_t pt_global; /* either 0 or PT_GLOBAL */ 204 205 pfn_t highest_pfn; 206 207 uint_t num_level; /* number of page table levels in use */ 208 uint_t max_level; /* just num_level - 1 */ 209 uint_t max_page_level; /* maximum level at which we can map a page */ 210 uint_t ptes_per_table; /* # of entries in lower level page tables */ 211 uint_t top_level_count; /* # of entries in top most level page table */ 212 213 uint_t hash_cnt; /* cnt of entries in htable_hash_cache */ 214 uint_t vlp_hash_cnt; /* cnt of entries in vlp htable_hash_cache */ 215 216 uint_t pae_hat; /* either 0 or 1 */ 217 218 uintptr_t hole_start; /* start of VA hole (or -1 if none) */ 219 uintptr_t hole_end; /* end of VA hole (or 0 if none) */ 220 221 struct htable **kmap_htables; /* htables for segmap + 32 bit heap */ 222 x86pte_t *kmap_ptes; /* mapping of pagetables that map kmap */ 223 uintptr_t kmap_addr; /* start addr of kmap */ 224 uintptr_t kmap_eaddr; /* end addr of kmap */ 225 226 uint_t pte_size; /* either 4 or 8 */ 227 uint_t pte_size_shift; /* either 2 or 3 */ 228 x86pte_t ptp_bits[MAX_NUM_LEVEL]; /* bits set for interior PTP */ 229 x86pte_t pte_bits[MAX_NUM_LEVEL]; /* bits set for leaf PTE */ 230 231 /* 232 * The following tables are equivalent to PAGEXXXXX at different levels 233 * in the page table hierarchy. 234 */ 235 uint_t level_shift[MAX_NUM_LEVEL]; /* PAGESHIFT for given level */ 236 uintptr_t level_size[MAX_NUM_LEVEL]; /* PAGESIZE for given level */ 237 uintptr_t level_offset[MAX_NUM_LEVEL]; /* PAGEOFFSET for given level */ 238 uintptr_t level_mask[MAX_NUM_LEVEL]; /* PAGEMASK for given level */ 239 240 uint_t tlb_entries[MAX_NUM_LEVEL]; /* tlb entries per pagesize */ 241 }; 242 243 244 #if defined(_KERNEL) 245 /* 246 * The concept of a VA hole exists in AMD64. This might need to be made 247 * model specific eventually. 248 * 249 * In the 64 bit kernel PTE loads are atomic, but need cas64 on 32 bit kernel. 250 */ 251 #if defined(__amd64) 252 253 #ifdef lint 254 #define IN_VA_HOLE(va) (__lintzero) 255 #else 256 #define IN_VA_HOLE(va) (mmu.hole_start <= (va) && (va) < mmu.hole_end) 257 #endif 258 259 #define FMT_PTE "%lx" 260 #define ATOMIC_LOAD64(ptr, pte) ((pte) = *(ptr)) 261 262 #elif defined(__i386) 263 264 #ifdef lint 265 #define IN_VA_HOLE(va) (__lintzero) 266 #else 267 #define IN_VA_HOLE(va) (0) 268 #endif 269 270 #define FMT_PTE "%llx" 271 #define ATOMIC_LOAD64(ptr, pte) (((pte) = *(ptr)), \ 272 ((pte) = cas64(ptr, pte, pte))) 273 274 #endif /* __i386 */ 275 276 277 extern struct hat_mmu_info mmu; 278 279 #endif /* _KERNEL */ 280 281 282 #ifdef __cplusplus 283 } 284 #endif 285 286 #endif /* _VM_HAT_PTE_H */ 287