xref: /linux/tools/testing/selftests/kvm/x86/nested_tdp_fault_test.c (revision 0de1020f7bbb3e1c9cd5b6f3eb4bdd661b1ff735)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2025, Google, Inc.
4  */
5 
6 #include "test_util.h"
7 #include "kvm_util.h"
8 #include "processor.h"
9 #include "svm_util.h"
10 #include "vmx.h"
11 
12 #define L2_GUEST_STACK_SIZE 64
13 
14 enum test_type {
15 	TEST_FINAL_PAGE_UNMAPPED,	    /* Final data page not present */
16 	TEST_PT_PAGE_UNMAPPED,		    /* Page table page not present */
17 	TEST_FINAL_PAGE_WRITE_PROTECTED,    /* Final data page read-only */
18 	TEST_PT_PAGE_WRITE_PROTECTED,	    /* Page table page read-only */
19 };
20 
21 static gva_t l2_test_page;
22 static void (*l2_entry)(void);
23 
24 #define TEST_IO_PORT 0x80
25 #define TEST1_VADDR 0x8000000ULL
26 #define TEST2_VADDR 0x10000000ULL
27 #define TEST3_VADDR 0x18000000ULL
28 #define TEST4_VADDR 0x20000000ULL
29 
30 /*
31  * L2 executes OUTS reading from l2_test_page, triggering a nested page
32  * fault on the read access.
33  */
34 static void l2_guest_code_outs(void)
35 {
36 	asm volatile("outsb" ::"S"(l2_test_page), "d"(TEST_IO_PORT) : "memory");
37 	GUEST_FAIL("L2 should not reach here");
38 }
39 
40 /*
41  * L2 executes INS writing to l2_test_page, triggering a nested page
42  * fault on the write access.
43  */
44 static void l2_guest_code_ins(void)
45 {
46 	asm volatile("insb" ::"D"(l2_test_page), "d"(TEST_IO_PORT) : "memory");
47 	GUEST_FAIL("L2 should not reach here");
48 }
49 
50 #define GUEST_ASSERT_EXIT_QUAL(ac_eq, ex_eq)		\
51 	__GUEST_ASSERT((ac_eq) == (ex_eq),		\
52 		       "Wanted EXIT_QUAL '0x%lx', got '0x%lx'", ex_eq, ac_eq)
53 
54 static void l1_vmx_code(struct vmx_pages *vmx, u64 expected_fault_gpa,
55 			u64 test_type)
56 {
57 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
58 	u64 exit_qual;
59 
60 	GUEST_ASSERT(vmx->vmcs_gpa);
61 	GUEST_ASSERT(prepare_for_vmx_operation(vmx));
62 	GUEST_ASSERT(load_vmcs(vmx));
63 
64 	prepare_vmcs(vmx, l2_entry, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
65 
66 	GUEST_ASSERT(!vmlaunch());
67 
68 	/* Verify we got an EPT violation exit */
69 	__GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION,
70 		       "Expected EPT violation (0x%x), got 0x%lx",
71 		       EXIT_REASON_EPT_VIOLATION,
72 		       vmreadz(VM_EXIT_REASON));
73 
74 	__GUEST_ASSERT(vmreadz(GUEST_PHYSICAL_ADDRESS) == expected_fault_gpa,
75 		       "Expected guest_physical_address = 0x%lx, got 0x%lx",
76 		       expected_fault_gpa,
77 		       vmreadz(GUEST_PHYSICAL_ADDRESS));
78 
79 	exit_qual = vmreadz(EXIT_QUALIFICATION);
80 
81 	/*
82 	 * Note, EPT page table accesses are always read+write, e.g. so that
83 	 * the CPU can do A/D updates at-will.
84 	 */
85 	switch (test_type) {
86 	case TEST_FINAL_PAGE_UNMAPPED:
87 		GUEST_ASSERT_EXIT_QUAL(exit_qual, EPT_VIOLATION_ACC_READ |
88 						  EPT_VIOLATION_GVA_IS_VALID |
89 						  EPT_VIOLATION_GVA_TRANSLATED);
90 		break;
91 	case TEST_PT_PAGE_UNMAPPED:
92 		GUEST_ASSERT_EXIT_QUAL(exit_qual, EPT_VIOLATION_ACC_READ |
93 						  EPT_VIOLATION_ACC_WRITE |
94 						  EPT_VIOLATION_GVA_IS_VALID);
95 		break;
96 	case TEST_FINAL_PAGE_WRITE_PROTECTED:
97 		GUEST_ASSERT_EXIT_QUAL(exit_qual, EPT_VIOLATION_ACC_WRITE |
98 						  EPT_VIOLATION_PROT_READ |
99 						  EPT_VIOLATION_PROT_EXEC |
100 						  EPT_VIOLATION_GVA_IS_VALID |
101 						  EPT_VIOLATION_GVA_TRANSLATED);
102 		break;
103 	case TEST_PT_PAGE_WRITE_PROTECTED:
104 		GUEST_ASSERT_EXIT_QUAL(exit_qual, EPT_VIOLATION_ACC_READ |
105 						  EPT_VIOLATION_ACC_WRITE |
106 						  EPT_VIOLATION_PROT_READ |
107 						  EPT_VIOLATION_PROT_EXEC |
108 						  EPT_VIOLATION_GVA_IS_VALID);
109 		break;
110 	}
111 
112 	GUEST_DONE();
113 }
114 
115 #define GUEST_ASSERT_NPF_EC(ac_ec, ex_ec)		\
116 	__GUEST_ASSERT((ac_ec) == (ex_ec),		\
117 		       "Wanted NPF error code '0x%lx', got '0x%lx'", (u64)(ex_ec), ac_ec)
118 
119 
120 static void l1_svm_code(struct svm_test_data *svm, u64 expected_fault_gpa,
121 			 u64 test_type)
122 {
123 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
124 	struct vmcb *vmcb = svm->vmcb;
125 	u64 exit_info_1;
126 
127 	generic_svm_setup(svm, l2_entry,
128 			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
129 
130 	run_guest(vmcb, svm->vmcb_gpa);
131 
132 	/* Verify we got an NPF exit */
133 	__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_NPF,
134 		       "Expected NPF exit (0x%x), got 0x%lx", SVM_EXIT_NPF,
135 		       vmcb->control.exit_code);
136 
137 	__GUEST_ASSERT(vmcb->control.exit_info_2 == expected_fault_gpa,
138 		       "Expected exit_info_2 = 0x%lx, got 0x%lx",
139 		       expected_fault_gpa,
140 		       vmcb->control.exit_info_2);
141 
142 	exit_info_1 = vmcb->control.exit_info_1;
143 
144 	/*
145 	 * Note, without GMET enabled, NPT walks are always user accesses.  And
146 	 * like EPT, page table accesses are always read+write.
147 	 */
148 	switch (test_type) {
149 	case TEST_FINAL_PAGE_UNMAPPED:
150 		GUEST_ASSERT_NPF_EC(exit_info_1, PFERR_USER_MASK |
151 						 PFERR_GUEST_FINAL_MASK);
152 		break;
153 	case TEST_PT_PAGE_UNMAPPED:
154 		GUEST_ASSERT_NPF_EC(exit_info_1, PFERR_WRITE_MASK |
155 						 PFERR_USER_MASK |
156 						 PFERR_GUEST_PAGE_MASK);
157 		break;
158 	case TEST_FINAL_PAGE_WRITE_PROTECTED:
159 		GUEST_ASSERT_NPF_EC(exit_info_1, PFERR_PRESENT_MASK |
160 						 PFERR_WRITE_MASK |
161 						 PFERR_USER_MASK |
162 						 PFERR_GUEST_FINAL_MASK);
163 		break;
164 	case TEST_PT_PAGE_WRITE_PROTECTED:
165 		GUEST_ASSERT_NPF_EC(exit_info_1, PFERR_PRESENT_MASK |
166 						 PFERR_WRITE_MASK |
167 						 PFERR_USER_MASK |
168 						 PFERR_GUEST_PAGE_MASK);
169 		break;
170 	}
171 
172 	GUEST_DONE();
173 }
174 
175 static void l1_guest_code(void *data, u64 expected_fault_gpa,
176 			  u64 test_type)
177 {
178 	if (this_cpu_has(X86_FEATURE_VMX))
179 		l1_vmx_code(data, expected_fault_gpa, test_type);
180 	else
181 		l1_svm_code(data, expected_fault_gpa, test_type);
182 }
183 
184 /* Returns the GPA of the PT page that maps @vaddr. */
185 static u64 get_pt_gpa_for_vaddr(struct kvm_vm *vm, u64 vaddr)
186 {
187 	u64 *pte;
188 
189 	pte = vm_get_pte(vm, vaddr);
190 	TEST_ASSERT(pte && (*pte & 0x1), "PTE not present for vaddr 0x%lx",
191 		    (unsigned long)vaddr);
192 
193 	return addr_hva2gpa(vm, (void *)((u64)pte & ~0xFFFULL));
194 }
195 
196 static void run_test(enum test_type type)
197 {
198 	gpa_t expected_fault_gpa;
199 	gva_t nested_gva;
200 
201 	struct kvm_vcpu *vcpu;
202 	struct kvm_vm *vm;
203 	struct ucall uc;
204 
205 	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
206 	vm_enable_tdp(vm);
207 
208 	if (kvm_cpu_has(X86_FEATURE_VMX))
209 		vcpu_alloc_vmx(vm, &nested_gva);
210 	else
211 		vcpu_alloc_svm(vm, &nested_gva);
212 
213 	switch (type) {
214 	case TEST_FINAL_PAGE_UNMAPPED:
215 		/*
216 		 * Unmap the final data page from NPT/EPT. The guest page
217 		 * table walk succeeds, but the final GPA->HPA translation
218 		 * fails. L2 reads from the page via OUTS.
219 		 */
220 		l2_entry = l2_guest_code_outs;
221 		l2_test_page = vm_alloc(vm, vm->page_size, TEST1_VADDR);
222 		expected_fault_gpa = addr_gva2gpa(vm, l2_test_page);
223 		break;
224 	case TEST_PT_PAGE_UNMAPPED:
225 		/*
226 		 * Unmap a page table page from NPT/EPT. The hardware page
227 		 * table walk fails when translating the PT page's GPA
228 		 * through NPT/EPT. L2 reads from the page via OUTS.
229 		 */
230 		l2_entry = l2_guest_code_outs;
231 		l2_test_page = vm_alloc(vm, vm->page_size, TEST2_VADDR);
232 		expected_fault_gpa = get_pt_gpa_for_vaddr(vm, l2_test_page);
233 		break;
234 	case TEST_FINAL_PAGE_WRITE_PROTECTED:
235 		/*
236 		 * Write-protect the final data page in NPT/EPT.  The page
237 		 * is present and readable, but not writable.  L2 writes to
238 		 * the page via INS, triggering a protection violation.
239 		 */
240 		l2_entry = l2_guest_code_ins;
241 		l2_test_page = vm_alloc(vm, vm->page_size, TEST3_VADDR);
242 		expected_fault_gpa = addr_gva2gpa(vm, l2_test_page);
243 		break;
244 	case TEST_PT_PAGE_WRITE_PROTECTED:
245 		/*
246 		 * Write-protect a page table page in NPT/EPT.  The page is
247 		 * present and readable, but not writable.  The guest page
248 		 * table walk needs write access to set A/D bits, so it
249 		 * triggers a protection violation on the PT page.
250 		 * L2 reads from the page via OUTS.
251 		 */
252 		l2_entry = l2_guest_code_outs;
253 		l2_test_page = vm_alloc(vm, vm->page_size, TEST4_VADDR);
254 		expected_fault_gpa = get_pt_gpa_for_vaddr(vm, l2_test_page);
255 		break;
256 	}
257 
258 	tdp_identity_map_default_memslots(vm);
259 
260 	if (type == TEST_FINAL_PAGE_WRITE_PROTECTED ||
261 	    type == TEST_PT_PAGE_WRITE_PROTECTED)
262 		*tdp_get_pte(vm, expected_fault_gpa) &= ~PTE_WRITABLE_MASK(&vm->stage2_mmu);
263 	else
264 		*tdp_get_pte(vm, expected_fault_gpa) &= ~(PTE_PRESENT_MASK(&vm->stage2_mmu) |
265 							   PTE_READABLE_MASK(&vm->stage2_mmu) |
266 							   PTE_WRITABLE_MASK(&vm->stage2_mmu) |
267 							   PTE_EXECUTABLE_MASK(&vm->stage2_mmu));
268 
269 	sync_global_to_guest(vm, l2_entry);
270 	sync_global_to_guest(vm, l2_test_page);
271 	vcpu_args_set(vcpu, 3, nested_gva, expected_fault_gpa, (u64)type);
272 
273 	/*
274 	 * For the INS-based write test, KVM emulates the instruction and
275 	 * first reads from the I/O port, which exits to userspace.
276 	 * Re-enter the guest so emulation can proceed to the memory
277 	 * write, where the nested page fault is triggered.
278 	 */
279 	for (;;) {
280 		vcpu_run(vcpu);
281 
282 		if (vcpu->run->exit_reason == KVM_EXIT_IO &&
283 		    vcpu->run->io.port == TEST_IO_PORT &&
284 		    vcpu->run->io.direction == KVM_EXIT_IO_IN) {
285 			continue;
286 		}
287 		break;
288 	}
289 
290 	switch (get_ucall(vcpu, &uc)) {
291 	case UCALL_DONE:
292 		break;
293 	case UCALL_ABORT:
294 		REPORT_GUEST_ASSERT(uc);
295 	default:
296 		TEST_FAIL("Unexpected exit reason: %d", vcpu->run->exit_reason);
297 	}
298 
299 	kvm_vm_free(vm);
300 }
301 
302 int main(int argc, char *argv[])
303 {
304 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM));
305 	TEST_REQUIRE(kvm_cpu_has_tdp());
306 
307 	run_test(TEST_FINAL_PAGE_UNMAPPED);
308 	run_test(TEST_PT_PAGE_UNMAPPED);
309 	run_test(TEST_FINAL_PAGE_WRITE_PROTECTED);
310 	run_test(TEST_PT_PAGE_WRITE_PROTECTED);
311 
312 	return 0;
313 }
314