1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/types.h> 33 #include <sys/errno.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/smp.h> 37 38 #include <vm/vm.h> 39 #include <vm/pmap.h> 40 41 #include <machine/param.h> 42 #include <machine/cpufunc.h> 43 #include <machine/pmap.h> 44 #include <machine/vmparam.h> 45 46 #include <machine/vmm.h> 47 #include "vmx_cpufunc.h" 48 #include "vmx_msr.h" 49 #include "vmx.h" 50 #include "ept.h" 51 52 #define EPT_PWL4(cap) ((cap) & (1UL << 6)) 53 #define EPT_MEMORY_TYPE_WB(cap) ((cap) & (1UL << 14)) 54 #define EPT_PDE_SUPERPAGE(cap) ((cap) & (1UL << 16)) /* 2MB pages */ 55 #define EPT_PDPTE_SUPERPAGE(cap) ((cap) & (1UL << 17)) /* 1GB pages */ 56 #define INVVPID_SUPPORTED(cap) ((cap) & (1UL << 32)) 57 #define INVEPT_SUPPORTED(cap) ((cap) & (1UL << 20)) 58 59 #define INVVPID_ALL_TYPES_MASK 0xF0000000000UL 60 #define INVVPID_ALL_TYPES_SUPPORTED(cap) \ 61 (((cap) & INVVPID_ALL_TYPES_MASK) == INVVPID_ALL_TYPES_MASK) 62 63 #define INVEPT_ALL_TYPES_MASK 0x6000000UL 64 #define INVEPT_ALL_TYPES_SUPPORTED(cap) \ 65 (((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK) 66 67 #define EPT_PG_RD (1 << 0) 68 #define EPT_PG_WR (1 << 1) 69 #define EPT_PG_EX (1 << 2) 70 #define EPT_PG_MEMORY_TYPE(x) ((x) << 3) 71 #define EPT_PG_IGNORE_PAT (1 << 6) 72 #define EPT_PG_SUPERPAGE (1 << 7) 73 74 #define EPT_ADDR_MASK ((uint64_t)-1 << 12) 75 76 MALLOC_DECLARE(M_VMX); 77 78 static uint64_t page_sizes_mask; 79 80 int 81 ept_init(void) 82 { 83 int page_shift; 84 uint64_t cap; 85 86 cap = rdmsr(MSR_VMX_EPT_VPID_CAP); 87 88 /* 89 * Verify that: 90 * - page walk length is 4 steps 91 * - extended page tables can be laid out in write-back memory 92 * - invvpid instruction with all possible types is supported 93 * - invept instruction with all possible types is supported 94 */ 95 if (!EPT_PWL4(cap) || 96 !EPT_MEMORY_TYPE_WB(cap) || 97 !INVVPID_SUPPORTED(cap) || 98 !INVVPID_ALL_TYPES_SUPPORTED(cap) || 99 !INVEPT_SUPPORTED(cap) || 100 !INVEPT_ALL_TYPES_SUPPORTED(cap)) 101 return (EINVAL); 102 103 /* Set bits in 'page_sizes_mask' for each valid page size */ 104 page_shift = PAGE_SHIFT; 105 page_sizes_mask = 1UL << page_shift; /* 4KB page */ 106 107 page_shift += 9; 108 if (EPT_PDE_SUPERPAGE(cap)) 109 page_sizes_mask |= 1UL << page_shift; /* 2MB superpage */ 110 111 page_shift += 9; 112 if (EPT_PDPTE_SUPERPAGE(cap)) 113 page_sizes_mask |= 1UL << page_shift; /* 1GB superpage */ 114 115 return (0); 116 } 117 118 #if 0 119 static void 120 ept_dump(uint64_t *ptp, int nlevels) 121 { 122 int i, t, tabs; 123 uint64_t *ptpnext, ptpval; 124 125 if (--nlevels < 0) 126 return; 127 128 tabs = 3 - nlevels; 129 for (t = 0; t < tabs; t++) 130 printf("\t"); 131 printf("PTP = %p\n", ptp); 132 133 for (i = 0; i < 512; i++) { 134 ptpval = ptp[i]; 135 136 if (ptpval == 0) 137 continue; 138 139 for (t = 0; t < tabs; t++) 140 printf("\t"); 141 printf("%3d 0x%016lx\n", i, ptpval); 142 143 if (nlevels != 0 && (ptpval & EPT_PG_SUPERPAGE) == 0) { 144 ptpnext = (uint64_t *) 145 PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK); 146 ept_dump(ptpnext, nlevels); 147 } 148 } 149 } 150 #endif 151 152 static size_t 153 ept_create_mapping(uint64_t *ptp, vm_paddr_t gpa, vm_paddr_t hpa, size_t length, 154 vm_memattr_t attr, vm_prot_t prot, boolean_t spok) 155 { 156 int spshift, ptpshift, ptpindex, nlevels; 157 158 /* 159 * Compute the size of the mapping that we can accomodate. 160 * 161 * This is based on three factors: 162 * - super page sizes supported by the processor 163 * - alignment of the region starting at 'gpa' and 'hpa' 164 * - length of the region 'len' 165 */ 166 spshift = PAGE_SHIFT; 167 if (spok) 168 spshift += (EPT_PWLEVELS - 1) * 9; 169 while (spshift >= PAGE_SHIFT) { 170 uint64_t spsize = 1UL << spshift; 171 if ((page_sizes_mask & spsize) != 0 && 172 (gpa & (spsize - 1)) == 0 && 173 (hpa & (spsize - 1)) == 0 && 174 length >= spsize) { 175 break; 176 } 177 spshift -= 9; 178 } 179 180 if (spshift < PAGE_SHIFT) { 181 panic("Invalid spshift for gpa 0x%016lx, hpa 0x%016lx, " 182 "length 0x%016lx, page_sizes_mask 0x%016lx", 183 gpa, hpa, length, page_sizes_mask); 184 } 185 186 nlevels = EPT_PWLEVELS; 187 while (--nlevels >= 0) { 188 ptpshift = PAGE_SHIFT + nlevels * 9; 189 ptpindex = (gpa >> ptpshift) & 0x1FF; 190 191 /* We have reached the leaf mapping */ 192 if (spshift >= ptpshift) 193 break; 194 195 /* 196 * We are working on a non-leaf page table page. 197 * 198 * Create the next level page table page if necessary and point 199 * to it from the current page table. 200 */ 201 if (ptp[ptpindex] == 0) { 202 void *nlp = malloc(PAGE_SIZE, M_VMX, M_WAITOK | M_ZERO); 203 ptp[ptpindex] = vtophys(nlp); 204 ptp[ptpindex] |= EPT_PG_RD | EPT_PG_WR | EPT_PG_EX; 205 } 206 207 /* Work our way down to the next level page table page */ 208 ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & EPT_ADDR_MASK); 209 } 210 211 if ((gpa & ((1UL << ptpshift) - 1)) != 0) { 212 panic("ept_create_mapping: gpa 0x%016lx and ptpshift %d " 213 "mismatch\n", gpa, ptpshift); 214 } 215 216 if (prot != VM_PROT_NONE) { 217 /* Do the mapping */ 218 ptp[ptpindex] = hpa; 219 220 /* Apply the access controls */ 221 if (prot & VM_PROT_READ) 222 ptp[ptpindex] |= EPT_PG_RD; 223 if (prot & VM_PROT_WRITE) 224 ptp[ptpindex] |= EPT_PG_WR; 225 if (prot & VM_PROT_EXECUTE) 226 ptp[ptpindex] |= EPT_PG_EX; 227 228 /* 229 * XXX should we enforce this memory type by setting the 230 * ignore PAT bit to 1. 231 */ 232 ptp[ptpindex] |= EPT_PG_MEMORY_TYPE(attr); 233 234 if (nlevels > 0) 235 ptp[ptpindex] |= EPT_PG_SUPERPAGE; 236 } else { 237 /* Remove the mapping */ 238 ptp[ptpindex] = 0; 239 } 240 241 return (1UL << ptpshift); 242 } 243 244 static vm_paddr_t 245 ept_lookup_mapping(uint64_t *ptp, vm_paddr_t gpa) 246 { 247 int nlevels, ptpshift, ptpindex; 248 uint64_t ptpval, hpabase, pgmask; 249 250 nlevels = EPT_PWLEVELS; 251 while (--nlevels >= 0) { 252 ptpshift = PAGE_SHIFT + nlevels * 9; 253 ptpindex = (gpa >> ptpshift) & 0x1FF; 254 255 ptpval = ptp[ptpindex]; 256 257 /* Cannot make progress beyond this point */ 258 if ((ptpval & (EPT_PG_RD | EPT_PG_WR | EPT_PG_EX)) == 0) 259 break; 260 261 if (nlevels == 0 || (ptpval & EPT_PG_SUPERPAGE)) { 262 pgmask = (1UL << ptpshift) - 1; 263 hpabase = ptpval & ~pgmask; 264 return (hpabase | (gpa & pgmask)); 265 } 266 267 /* Work our way down to the next level page table page */ 268 ptp = (uint64_t *)PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK); 269 } 270 271 return ((vm_paddr_t)-1); 272 } 273 274 static void 275 ept_free_pt_entry(pt_entry_t pte) 276 { 277 if (pte == 0) 278 return; 279 280 /* sanity check */ 281 if ((pte & EPT_PG_SUPERPAGE) != 0) 282 panic("ept_free_pt_entry: pte cannot have superpage bit"); 283 284 return; 285 } 286 287 static void 288 ept_free_pd_entry(pd_entry_t pde) 289 { 290 pt_entry_t *pt; 291 int i; 292 293 if (pde == 0) 294 return; 295 296 if ((pde & EPT_PG_SUPERPAGE) == 0) { 297 pt = (pt_entry_t *)PHYS_TO_DMAP(pde & EPT_ADDR_MASK); 298 for (i = 0; i < NPTEPG; i++) 299 ept_free_pt_entry(pt[i]); 300 free(pt, M_VMX); /* free the page table page */ 301 } 302 } 303 304 static void 305 ept_free_pdp_entry(pdp_entry_t pdpe) 306 { 307 pd_entry_t *pd; 308 int i; 309 310 if (pdpe == 0) 311 return; 312 313 if ((pdpe & EPT_PG_SUPERPAGE) == 0) { 314 pd = (pd_entry_t *)PHYS_TO_DMAP(pdpe & EPT_ADDR_MASK); 315 for (i = 0; i < NPDEPG; i++) 316 ept_free_pd_entry(pd[i]); 317 free(pd, M_VMX); /* free the page directory page */ 318 } 319 } 320 321 static void 322 ept_free_pml4_entry(pml4_entry_t pml4e) 323 { 324 pdp_entry_t *pdp; 325 int i; 326 327 if (pml4e == 0) 328 return; 329 330 if ((pml4e & EPT_PG_SUPERPAGE) == 0) { 331 pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4e & EPT_ADDR_MASK); 332 for (i = 0; i < NPDPEPG; i++) 333 ept_free_pdp_entry(pdp[i]); 334 free(pdp, M_VMX); /* free the page directory ptr page */ 335 } 336 } 337 338 void 339 ept_vmcleanup(struct vmx *vmx) 340 { 341 int i; 342 343 for (i = 0; i < NPML4EPG; i++) 344 ept_free_pml4_entry(vmx->pml4ept[i]); 345 } 346 347 int 348 ept_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t len, 349 vm_memattr_t attr, int prot, boolean_t spok) 350 { 351 size_t n; 352 struct vmx *vmx = arg; 353 354 while (len > 0) { 355 n = ept_create_mapping(vmx->pml4ept, gpa, hpa, len, attr, 356 prot, spok); 357 len -= n; 358 gpa += n; 359 hpa += n; 360 } 361 362 return (0); 363 } 364 365 vm_paddr_t 366 ept_vmmmap_get(void *arg, vm_paddr_t gpa) 367 { 368 vm_paddr_t hpa; 369 struct vmx *vmx; 370 371 vmx = arg; 372 hpa = ept_lookup_mapping(vmx->pml4ept, gpa); 373 return (hpa); 374 } 375 376 static void 377 invept_single_context(void *arg) 378 { 379 struct invept_desc desc = *(struct invept_desc *)arg; 380 381 invept(INVEPT_TYPE_SINGLE_CONTEXT, desc); 382 } 383 384 void 385 ept_invalidate_mappings(u_long pml4ept) 386 { 387 struct invept_desc invept_desc = { 0 }; 388 389 invept_desc.eptp = EPTP(pml4ept); 390 391 smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc); 392 } 393