1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/smp.h>
34 #include <sys/sysctl.h>
35
36 #include <vm/vm.h>
37 #include <vm/pmap.h>
38 #include <vm/vm_extern.h>
39
40 #include <machine/vmm.h>
41
42 #include "vmx_cpufunc.h"
43 #include "ept.h"
44
45 #define EPT_SUPPORTS_EXEC_ONLY(cap) ((cap) & (1UL << 0))
46 #define EPT_PWL4(cap) ((cap) & (1UL << 6))
47 #define EPT_MEMORY_TYPE_WB(cap) ((cap) & (1UL << 14))
48 #define EPT_PDE_SUPERPAGE(cap) ((cap) & (1UL << 16)) /* 2MB pages */
49 #define EPT_PDPTE_SUPERPAGE(cap) ((cap) & (1UL << 17)) /* 1GB pages */
50 #define INVEPT_SUPPORTED(cap) ((cap) & (1UL << 20))
51 #define AD_BITS_SUPPORTED(cap) ((cap) & (1UL << 21))
52 #define INVVPID_SUPPORTED(cap) ((cap) & (1UL << 32))
53
54 #define INVVPID_ALL_TYPES_MASK 0xF0000000000UL
55 #define INVVPID_ALL_TYPES_SUPPORTED(cap) \
56 (((cap) & INVVPID_ALL_TYPES_MASK) == INVVPID_ALL_TYPES_MASK)
57
58 #define INVEPT_ALL_TYPES_MASK 0x6000000UL
59 #define INVEPT_ALL_TYPES_SUPPORTED(cap) \
60 (((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK)
61
62 #define EPT_PWLEVELS 4 /* page walk levels */
63 #define EPT_ENABLE_AD_BITS (1 << 6)
64
65 SYSCTL_DECL(_hw_vmm);
66 SYSCTL_NODE(_hw_vmm, OID_AUTO, ept, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
67 NULL);
68
69 static int ept_enable_ad_bits;
70
71 static int ept_pmap_flags;
72 SYSCTL_INT(_hw_vmm_ept, OID_AUTO, pmap_flags, CTLFLAG_RD,
73 &ept_pmap_flags, 0, NULL);
74
75 int
ept_init(int ipinum)76 ept_init(int ipinum)
77 {
78 int use_hw_ad_bits, use_superpages, use_exec_only;
79 uint64_t cap;
80
81 cap = rdmsr(MSR_VMX_EPT_VPID_CAP);
82
83 /*
84 * Verify that:
85 * - page walk length is 4 steps
86 * - extended page tables can be laid out in write-back memory
87 * - invvpid instruction with all possible types is supported
88 * - invept instruction with all possible types is supported
89 */
90 if (!EPT_PWL4(cap) ||
91 !EPT_MEMORY_TYPE_WB(cap) ||
92 !INVVPID_SUPPORTED(cap) ||
93 !INVVPID_ALL_TYPES_SUPPORTED(cap) ||
94 !INVEPT_SUPPORTED(cap) ||
95 !INVEPT_ALL_TYPES_SUPPORTED(cap))
96 return (EINVAL);
97
98 ept_pmap_flags = ipinum & PMAP_NESTED_IPIMASK;
99
100 use_superpages = 1;
101 TUNABLE_INT_FETCH("hw.vmm.ept.use_superpages", &use_superpages);
102 if (use_superpages && EPT_PDE_SUPERPAGE(cap))
103 ept_pmap_flags |= PMAP_PDE_SUPERPAGE; /* 2MB superpage */
104
105 use_hw_ad_bits = 1;
106 TUNABLE_INT_FETCH("hw.vmm.ept.use_hw_ad_bits", &use_hw_ad_bits);
107 if (use_hw_ad_bits && AD_BITS_SUPPORTED(cap))
108 ept_enable_ad_bits = 1;
109 else
110 ept_pmap_flags |= PMAP_EMULATE_AD_BITS;
111
112 use_exec_only = 1;
113 TUNABLE_INT_FETCH("hw.vmm.ept.use_exec_only", &use_exec_only);
114 if (use_exec_only && EPT_SUPPORTS_EXEC_ONLY(cap))
115 ept_pmap_flags |= PMAP_SUPPORTS_EXEC_ONLY;
116
117 return (0);
118 }
119
120 #if 0
121 static void
122 ept_dump(uint64_t *ptp, int nlevels)
123 {
124 int i, t, tabs;
125 uint64_t *ptpnext, ptpval;
126
127 if (--nlevels < 0)
128 return;
129
130 tabs = 3 - nlevels;
131 for (t = 0; t < tabs; t++)
132 printf("\t");
133 printf("PTP = %p\n", ptp);
134
135 for (i = 0; i < 512; i++) {
136 ptpval = ptp[i];
137
138 if (ptpval == 0)
139 continue;
140
141 for (t = 0; t < tabs; t++)
142 printf("\t");
143 printf("%3d 0x%016lx\n", i, ptpval);
144
145 if (nlevels != 0 && (ptpval & EPT_PG_SUPERPAGE) == 0) {
146 ptpnext = (uint64_t *)
147 PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK);
148 ept_dump(ptpnext, nlevels);
149 }
150 }
151 }
152 #endif
153
154 static void
invept_single_context(void * arg)155 invept_single_context(void *arg)
156 {
157 struct invept_desc desc = *(struct invept_desc *)arg;
158
159 invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
160 }
161
162 void
ept_invalidate_mappings(u_long eptp)163 ept_invalidate_mappings(u_long eptp)
164 {
165 struct invept_desc invept_desc = { 0 };
166
167 invept_desc.eptp = eptp;
168
169 smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc);
170 }
171
172 static int
ept_pinit(pmap_t pmap)173 ept_pinit(pmap_t pmap)
174 {
175
176 return (pmap_pinit_type(pmap, PT_EPT, ept_pmap_flags));
177 }
178
179 struct vmspace *
ept_vmspace_alloc(vm_offset_t min,vm_offset_t max)180 ept_vmspace_alloc(vm_offset_t min, vm_offset_t max)
181 {
182
183 return (vmspace_alloc(min, max, ept_pinit));
184 }
185
186 void
ept_vmspace_free(struct vmspace * vmspace)187 ept_vmspace_free(struct vmspace *vmspace)
188 {
189
190 vmspace_free(vmspace);
191 }
192
193 uint64_t
eptp(uint64_t pml4)194 eptp(uint64_t pml4)
195 {
196 uint64_t eptp_val;
197
198 eptp_val = pml4 | (EPT_PWLEVELS - 1) << 3 | PAT_WRITE_BACK;
199 if (ept_enable_ad_bits)
200 eptp_val |= EPT_ENABLE_AD_BITS;
201
202 return (eptp_val);
203 }
204