xref: /freebsd/sys/amd64/vmm/intel/ept.c (revision e14ddd1f16e7e5788392c50de21ea7c927e0690c)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/types.h>
33 #include <sys/errno.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/smp.h>
37 
38 #include <vm/vm.h>
39 #include <vm/pmap.h>
40 
41 #include <machine/param.h>
42 #include <machine/cpufunc.h>
43 #include <machine/pmap.h>
44 #include <machine/vmparam.h>
45 
46 #include <machine/vmm.h>
47 #include "vmx_cpufunc.h"
48 #include "vmx_msr.h"
49 #include "vmx.h"
50 #include "ept.h"
51 
52 #define	EPT_PWL4(cap)			((cap) & (1UL << 6))
53 #define	EPT_MEMORY_TYPE_WB(cap)		((cap) & (1UL << 14))
54 #define	EPT_PDE_SUPERPAGE(cap)		((cap) & (1UL << 16))	/* 2MB pages */
55 #define	EPT_PDPTE_SUPERPAGE(cap)	((cap) & (1UL << 17))	/* 1GB pages */
56 #define	INVVPID_SUPPORTED(cap)		((cap) & (1UL << 32))
57 #define	INVEPT_SUPPORTED(cap)		((cap) & (1UL << 20))
58 
59 #define	INVVPID_ALL_TYPES_MASK		0xF0000000000UL
60 #define	INVVPID_ALL_TYPES_SUPPORTED(cap)	\
61 	(((cap) & INVVPID_ALL_TYPES_MASK) == INVVPID_ALL_TYPES_MASK)
62 
63 #define	INVEPT_ALL_TYPES_MASK		0x6000000UL
64 #define	INVEPT_ALL_TYPES_SUPPORTED(cap)		\
65 	(((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK)
66 
67 #define	EPT_PG_RD			(1 << 0)
68 #define	EPT_PG_WR			(1 << 1)
69 #define	EPT_PG_EX			(1 << 2)
70 #define	EPT_PG_MEMORY_TYPE(x)		((x) << 3)
71 #define	EPT_PG_IGNORE_PAT		(1 << 6)
72 #define	EPT_PG_SUPERPAGE		(1 << 7)
73 
74 #define	EPT_ADDR_MASK			((uint64_t)-1 << 12)
75 
76 MALLOC_DECLARE(M_VMX);
77 
78 static uint64_t page_sizes_mask;
79 
80 int
81 ept_init(void)
82 {
83 	int page_shift;
84 	uint64_t cap;
85 
86 	cap = rdmsr(MSR_VMX_EPT_VPID_CAP);
87 
88 	/*
89 	 * Verify that:
90 	 * - page walk length is 4 steps
91 	 * - extended page tables can be laid out in write-back memory
92 	 * - invvpid instruction with all possible types is supported
93 	 * - invept instruction with all possible types is supported
94 	 */
95 	if (!EPT_PWL4(cap) ||
96 	    !EPT_MEMORY_TYPE_WB(cap) ||
97 	    !INVVPID_SUPPORTED(cap) ||
98 	    !INVVPID_ALL_TYPES_SUPPORTED(cap) ||
99 	    !INVEPT_SUPPORTED(cap) ||
100 	    !INVEPT_ALL_TYPES_SUPPORTED(cap))
101 		return (EINVAL);
102 
103 	/* Set bits in 'page_sizes_mask' for each valid page size */
104 	page_shift = PAGE_SHIFT;
105 	page_sizes_mask = 1UL << page_shift;		/* 4KB page */
106 
107 	page_shift += 9;
108 	if (EPT_PDE_SUPERPAGE(cap))
109 		page_sizes_mask |= 1UL << page_shift;	/* 2MB superpage */
110 
111 	page_shift += 9;
112 	if (EPT_PDPTE_SUPERPAGE(cap))
113 		page_sizes_mask |= 1UL << page_shift;	/* 1GB superpage */
114 
115 	return (0);
116 }
117 
118 #if 0
119 static void
120 ept_dump(uint64_t *ptp, int nlevels)
121 {
122 	int i, t, tabs;
123 	uint64_t *ptpnext, ptpval;
124 
125 	if (--nlevels < 0)
126 		return;
127 
128 	tabs = 3 - nlevels;
129 	for (t = 0; t < tabs; t++)
130 		printf("\t");
131 	printf("PTP = %p\n", ptp);
132 
133 	for (i = 0; i < 512; i++) {
134 		ptpval = ptp[i];
135 
136 		if (ptpval == 0)
137 			continue;
138 
139 		for (t = 0; t < tabs; t++)
140 			printf("\t");
141 		printf("%3d 0x%016lx\n", i, ptpval);
142 
143 		if (nlevels != 0 && (ptpval & EPT_PG_SUPERPAGE) == 0) {
144 			ptpnext = (uint64_t *)
145 				  PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK);
146 			ept_dump(ptpnext, nlevels);
147 		}
148 	}
149 }
150 #endif
151 
152 static size_t
153 ept_create_mapping(uint64_t *ptp, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
154 		   vm_memattr_t attr, vm_prot_t prot, boolean_t spok)
155 {
156 	int spshift, ptpshift, ptpindex, nlevels;
157 
158 	/*
159 	 * Compute the size of the mapping that we can accomodate.
160 	 *
161 	 * This is based on three factors:
162 	 * - super page sizes supported by the processor
163 	 * - alignment of the region starting at 'gpa' and 'hpa'
164 	 * - length of the region 'len'
165 	 */
166 	spshift = PAGE_SHIFT;
167 	if (spok)
168 		spshift += (EPT_PWLEVELS - 1) * 9;
169 	while (spshift >= PAGE_SHIFT) {
170 		uint64_t spsize = 1UL << spshift;
171 		if ((page_sizes_mask & spsize) != 0 &&
172 		    (gpa & (spsize - 1)) == 0 &&
173 		    (hpa & (spsize - 1)) == 0 &&
174 		    length >= spsize) {
175 			break;
176 		}
177 		spshift -= 9;
178 	}
179 
180 	if (spshift < PAGE_SHIFT) {
181 		panic("Invalid spshift for gpa 0x%016lx, hpa 0x%016lx, "
182 		      "length 0x%016lx, page_sizes_mask 0x%016lx",
183 		      gpa, hpa, length, page_sizes_mask);
184 	}
185 
186 	nlevels = EPT_PWLEVELS;
187 	while (--nlevels >= 0) {
188 		ptpshift = PAGE_SHIFT + nlevels * 9;
189 		ptpindex = (gpa >> ptpshift) & 0x1FF;
190 
191 		/* We have reached the leaf mapping */
192 		if (spshift >= ptpshift)
193 			break;
194 
195 		/*
196 		 * We are working on a non-leaf page table page.
197 		 *
198 		 * Create the next level page table page if necessary and point
199 		 * to it from the current page table.
200 		 */
201 		if (ptp[ptpindex] == 0) {
202 			void *nlp = malloc(PAGE_SIZE, M_VMX, M_WAITOK | M_ZERO);
203 			ptp[ptpindex] = vtophys(nlp);
204 			ptp[ptpindex] |= EPT_PG_RD | EPT_PG_WR | EPT_PG_EX;
205 		}
206 
207 		/* Work our way down to the next level page table page */
208 		ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & EPT_ADDR_MASK);
209 	}
210 
211 	if ((gpa & ((1UL << ptpshift) - 1)) != 0) {
212 		panic("ept_create_mapping: gpa 0x%016lx and ptpshift %d "
213 		      "mismatch\n", gpa, ptpshift);
214 	}
215 
216 	if (prot != VM_PROT_NONE) {
217 		/* Do the mapping */
218 		ptp[ptpindex] = hpa;
219 
220 		/* Apply the access controls */
221 		if (prot & VM_PROT_READ)
222 			ptp[ptpindex] |= EPT_PG_RD;
223 		if (prot & VM_PROT_WRITE)
224 			ptp[ptpindex] |= EPT_PG_WR;
225 		if (prot & VM_PROT_EXECUTE)
226 			ptp[ptpindex] |= EPT_PG_EX;
227 
228 		/*
229 		 * XXX should we enforce this memory type by setting the
230 		 * ignore PAT bit to 1.
231 		 */
232 		ptp[ptpindex] |= EPT_PG_MEMORY_TYPE(attr);
233 
234 		if (nlevels > 0)
235 			ptp[ptpindex] |= EPT_PG_SUPERPAGE;
236 	} else {
237 		/* Remove the mapping */
238 		ptp[ptpindex] = 0;
239 	}
240 
241 	return (1UL << ptpshift);
242 }
243 
244 static vm_paddr_t
245 ept_lookup_mapping(uint64_t *ptp, vm_paddr_t gpa)
246 {
247 	int nlevels, ptpshift, ptpindex;
248 	uint64_t ptpval, hpabase, pgmask;
249 
250 	nlevels = EPT_PWLEVELS;
251 	while (--nlevels >= 0) {
252 		ptpshift = PAGE_SHIFT + nlevels * 9;
253 		ptpindex = (gpa >> ptpshift) & 0x1FF;
254 
255 		ptpval = ptp[ptpindex];
256 
257 		/* Cannot make progress beyond this point */
258 		if ((ptpval & (EPT_PG_RD | EPT_PG_WR | EPT_PG_EX)) == 0)
259 			break;
260 
261 		if (nlevels == 0 || (ptpval & EPT_PG_SUPERPAGE)) {
262 			pgmask = (1UL << ptpshift) - 1;
263 			hpabase = ptpval & ~pgmask;
264 			return (hpabase | (gpa & pgmask));
265 		}
266 
267 		/* Work our way down to the next level page table page */
268 		ptp = (uint64_t *)PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK);
269 	}
270 
271 	return ((vm_paddr_t)-1);
272 }
273 
274 static void
275 ept_free_pt_entry(pt_entry_t pte)
276 {
277 	if (pte == 0)
278 		return;
279 
280 	/* sanity check */
281 	if ((pte & EPT_PG_SUPERPAGE) != 0)
282 		panic("ept_free_pt_entry: pte cannot have superpage bit");
283 
284 	return;
285 }
286 
287 static void
288 ept_free_pd_entry(pd_entry_t pde)
289 {
290 	pt_entry_t	*pt;
291 	int		i;
292 
293 	if (pde == 0)
294 		return;
295 
296 	if ((pde & EPT_PG_SUPERPAGE) == 0) {
297 		pt = (pt_entry_t *)PHYS_TO_DMAP(pde & EPT_ADDR_MASK);
298 		for (i = 0; i < NPTEPG; i++)
299 			ept_free_pt_entry(pt[i]);
300 		free(pt, M_VMX);	/* free the page table page */
301 	}
302 }
303 
304 static void
305 ept_free_pdp_entry(pdp_entry_t pdpe)
306 {
307 	pd_entry_t 	*pd;
308 	int		 i;
309 
310 	if (pdpe == 0)
311 		return;
312 
313 	if ((pdpe & EPT_PG_SUPERPAGE) == 0) {
314 		pd = (pd_entry_t *)PHYS_TO_DMAP(pdpe & EPT_ADDR_MASK);
315 		for (i = 0; i < NPDEPG; i++)
316 			ept_free_pd_entry(pd[i]);
317 		free(pd, M_VMX);	/* free the page directory page */
318 	}
319 }
320 
321 static void
322 ept_free_pml4_entry(pml4_entry_t pml4e)
323 {
324 	pdp_entry_t	*pdp;
325 	int		i;
326 
327 	if (pml4e == 0)
328 		return;
329 
330 	if ((pml4e & EPT_PG_SUPERPAGE) == 0) {
331 		pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4e & EPT_ADDR_MASK);
332 		for (i = 0; i < NPDPEPG; i++)
333 			ept_free_pdp_entry(pdp[i]);
334 		free(pdp, M_VMX);	/* free the page directory ptr page */
335 	}
336 }
337 
338 void
339 ept_vmcleanup(struct vmx *vmx)
340 {
341 	int 		 i;
342 
343 	for (i = 0; i < NPML4EPG; i++)
344 		ept_free_pml4_entry(vmx->pml4ept[i]);
345 }
346 
347 int
348 ept_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t len,
349 		vm_memattr_t attr, int prot, boolean_t spok)
350 {
351 	size_t n;
352 	struct vmx *vmx = arg;
353 
354 	while (len > 0) {
355 		n = ept_create_mapping(vmx->pml4ept, gpa, hpa, len, attr,
356 				       prot, spok);
357 		len -= n;
358 		gpa += n;
359 		hpa += n;
360 	}
361 
362 	return (0);
363 }
364 
365 vm_paddr_t
366 ept_vmmmap_get(void *arg, vm_paddr_t gpa)
367 {
368 	vm_paddr_t hpa;
369 	struct vmx *vmx;
370 
371 	vmx = arg;
372 	hpa = ept_lookup_mapping(vmx->pml4ept, gpa);
373 	return (hpa);
374 }
375 
376 static void
377 invept_single_context(void *arg)
378 {
379 	struct invept_desc desc = *(struct invept_desc *)arg;
380 
381 	invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
382 }
383 
384 void
385 ept_invalidate_mappings(u_long pml4ept)
386 {
387 	struct invept_desc invept_desc = { 0 };
388 
389 	invept_desc.eptp = EPTP(pml4ept);
390 
391 	smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc);
392 }
393