xref: /freebsd/sys/dev/hyperv/vmbus/hyperv_mmu.c (revision ae8d58814089308028046ac80aeeb9cbb784bd0a)
1 /*-
2  * Copyright (c) 2009-2012,2016-2024 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/bus.h>
31 #include <sys/kernel.h>
32 #include <sys/linker.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/mutex.h>
37 #include <sys/sbuf.h>
38 #include <sys/smp.h>
39 #include <sys/sysctl.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/sched.h>
43 #include <sys/kdb.h>
44 #include <vm/vm.h>
45 #include <vm/pmap.h>
46 
47 #include <machine/bus.h>
48 #include <dev/hyperv/vmbus/x86/hyperv_machdep.h>
49 #include <dev/hyperv/vmbus/x86/hyperv_reg.h>
50 #include <dev/hyperv/include/hyperv.h>
51 #include <dev/hyperv/vmbus/hyperv_var.h>
52 #include <dev/hyperv/vmbus/vmbus_reg.h>
53 #include <dev/hyperv/vmbus/vmbus_var.h>
54 #include <dev/hyperv/vmbus/hyperv_common_reg.h>
55 #include "hyperv_mmu.h"
56 
57 static inline int fill_gva_list(uint64_t gva_list[],
58     unsigned long start, unsigned long end)
59 {
60 	int gva_n = 0;
61 	unsigned long cur = start, diff;
62 
63 	do {
64 		diff = end > cur ? end - cur : 0;
65 
66 		gva_list[gva_n] = cur;
67 		/*
68 		 * Lower 12 bits encode the number of additional
69 		 * pages to flush (in addition to the 'cur' page).
70 		 */
71 		if (diff >= HV_TLB_FLUSH_UNIT) {
72 			gva_list[gva_n] |= PAGE_MASK;
73 			cur += HV_TLB_FLUSH_UNIT;
74 		}  else if (diff) {
75 			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
76 			cur = end;
77 		}
78 
79 		gva_n++;
80 
81 	} while (cur < end);
82 
83 	return gva_n;
84 }
85 
86 
87 inline int hv_cpumask_to_vpset(struct hv_vpset *vpset,
88     const cpuset_t *cpus, struct vmbus_softc * sc)
89 {
90 	int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
91 	int max_vcpu_bank = hv_max_vp_index / HV_VCPUS_PER_SPARSE_BANK;
92 
93 	/*
94 	 * vpset.valid_bank_mask can represent up to
95 	 * HV_MAX_SPARSE_VCPU_BANKS banks
96 	 */
97 	if (max_vcpu_bank >= HV_MAX_SPARSE_VCPU_BANKS)
98 		return 0;
99 
100 	/*
101 	 * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex
102 	 * structs are not cleared between calls, we risk flushing unneeded
103 	 * vCPUs otherwise.
104 	 */
105 	for (vcpu_bank = 0; vcpu_bank <= max_vcpu_bank; vcpu_bank++)
106 		vpset->bank_contents[vcpu_bank] = 0;
107 
108 	/*
109 	 * Some banks may end up being empty but this is acceptable.
110 	 */
111 	CPU_FOREACH_ISSET(cpu, cpus) {
112 		vcpu = VMBUS_PCPU_GET(sc, vcpuid, cpu);
113 		if (vcpu == -1)
114 			return -1;
115 		vcpu_bank = vcpu / HV_VCPUS_PER_SPARSE_BANK;
116 		vcpu_offset = vcpu % HV_VCPUS_PER_SPARSE_BANK;
117 		set_bit(vcpu_offset, (unsigned long *)
118 		    &vpset->bank_contents[vcpu_bank]);
119 		if (vcpu_bank >= nr_bank)
120 			nr_bank = vcpu_bank + 1;
121 	}
122 	vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
123 	return nr_bank;
124 }
125 
126 
127 
128 
129 void
130 hv_vm_tlb_flush(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2,
131     enum invl_op_codes op, struct vmbus_softc *sc, smp_invl_local_cb_t curcpu_cb)
132 {
133 	cpuset_t tmp_mask, mask;
134 	struct hyperv_tlb_flush *flush;
135 	int cpu, vcpu;
136 	int max_gvas, gva_n;
137 	uint64_t status = 0;
138 	uint64_t cr3;
139 
140 	/*
141 	 * Hyper-V doesn't handle the invalidating cache. Let system handle it.
142 	 */
143 	if (op == INVL_OP_CACHE)
144 		return smp_targeted_tlb_shootdown_native(pmap, addr1, addr2,
145 		    curcpu_cb, op);
146 
147 	flush = *VMBUS_PCPU_PTR(sc, cpu_mem, curcpu);
148 	if (flush == NULL)
149 		return smp_targeted_tlb_shootdown_native(pmap, addr1, addr2,
150 		    curcpu_cb, op);
151 	/*
152 	 * It is not necessary to signal other CPUs while booting or
153 	 * when in the debugger.
154 	 */
155 	if (__predict_false(kdb_active || KERNEL_PANICKED() || !smp_started))
156 		goto local_cb;
157 
158 	KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
159 
160 	/*
161 	 * Make a stable copy of the set of CPUs on which the pmap is active.
162 	 * See if we have to interrupt other CPUs.
163 	 */
164 	CPU_COPY(pmap_invalidate_cpu_mask(pmap), &tmp_mask);
165 	CPU_COPY(pmap_invalidate_cpu_mask(pmap), &mask);
166 	CPU_CLR(curcpu, &tmp_mask);
167 	if (CPU_EMPTY(&tmp_mask))
168 		goto local_cb;
169 
170 	/*
171 	 * Initiator must have interrupts enabled, which prevents
172 	 * non-invalidation IPIs that take smp_ipi_mtx spinlock,
173 	 * from deadlocking with us.  On the other hand, preemption
174 	 * must be disabled to pin initiator to the instance of the
175 	 * pcpu pc_smp_tlb data and scoreboard line.
176 	 */
177 	KASSERT((read_rflags() & PSL_I) != 0,
178 	    ("hv_tlb_flush: interrupts disabled"));
179 	critical_enter();
180 	flush->processor_mask = 0;
181 	cr3 = pmap->pm_cr3;
182 
183 	if (op == INVL_OP_TLB || op == INVL_OP_TLB_INVPCID ||
184 	    op == INVL_OP_TLB_INVPCID_PTI || op == INVL_OP_TLB_PCID) {
185 		flush->address_space = 0;
186 		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
187 	} else {
188 
189 		flush->address_space = cr3;
190 		flush->address_space &= ~CR3_PCID_MASK;
191 		flush->flags = 0;
192 	}
193 	if(CPU_CMP(&mask, &all_cpus) == 0) {
194 		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
195 	} else {
196 		if (CPU_FLS(&mask) < mp_ncpus && CPU_FLS(&mask) >= 64)
197 			goto do_ex_hypercall;
198 
199 		CPU_FOREACH_ISSET(cpu, &mask) {
200 			vcpu = VMBUS_PCPU_GET(sc, vcpuid, cpu);
201 			if (vcpu >= 64)
202 				goto do_ex_hypercall;
203 
204 			set_bit(vcpu, &flush->processor_mask);
205 		}
206 		if (!flush->processor_mask )
207 			goto native;
208 	}
209 	max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
210 	if (addr2 == 0) {
211 		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
212 		status = hypercall_do_md(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
213 		    (uint64_t)flush, (uint64_t)NULL);
214 	} else if ((addr2 && (addr2 -addr1)/HV_TLB_FLUSH_UNIT) > max_gvas) {
215 		status = hypercall_do_md(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
216 		    (uint64_t)flush, (uint64_t)NULL);
217 	} else {
218 		gva_n = fill_gva_list(flush->gva_list, addr1, addr2);
219 
220 		status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
221 		    gva_n, 0, (uint64_t)flush, (uint64_t)NULL);
222 
223 	}
224 	if(status)
225 		goto native;
226 	sched_unpin();
227 	critical_exit();
228 	return;
229 
230 local_cb:
231 	critical_enter();
232 	curcpu_cb(pmap, addr1, addr2);
233 	sched_unpin();
234 	critical_exit();
235 	return;
236 do_ex_hypercall:
237 	status = hv_flush_tlb_others_ex(pmap, addr1, addr2, mask, op, sc);
238 	if (status)
239 		goto native;
240 	sched_unpin();
241 	critical_exit();
242 	return;
243 native:
244 	sched_unpin();
245 	critical_exit();
246 	return smp_targeted_tlb_shootdown_native(pmap, addr1,
247 	    addr2, curcpu_cb, op);
248 }
249 
250 uint64_t
251 hv_flush_tlb_others_ex(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2,
252     const cpuset_t mask, enum invl_op_codes op, struct vmbus_softc *sc)
253 {
254 	int nr_bank = 0, max_gvas, gva_n;
255 	struct hv_tlb_flush_ex *flush;
256 	if(*VMBUS_PCPU_PTR(sc, cpu_mem, curcpu) == NULL)
257 		return EINVAL;
258 	flush = *VMBUS_PCPU_PTR(sc, cpu_mem, curcpu);
259 	uint64_t status = 0;
260 	uint64_t cr3;
261 
262 	if (!(hyperv_recommends & HYPERV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
263 	       return EINVAL;
264 
265 	cr3 = pmap->pm_cr3;
266 	if (op == INVL_OP_TLB) {
267 		flush->address_space = 0;
268 		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
269 	} else {
270 
271 		flush->address_space = cr3;
272 		flush->address_space &= ~CR3_PCID_MASK;
273 		flush->flags = 0;
274 	}
275 
276 	flush->hv_vp_set.valid_bank_mask = 0;
277 
278 	flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
279 	nr_bank = hv_cpumask_to_vpset(&flush->hv_vp_set, &mask, sc);
280 	if (nr_bank < 0)
281 		return EINVAL;
282 
283 	/*
284 	 * We can flush not more than max_gvas with one hypercall. Flush the
285 	 * whole address space if we were asked to do more.
286 	 */
287 	max_gvas = (PAGE_SIZE - sizeof(*flush) - nr_bank *
288 	    sizeof(flush->hv_vp_set.bank_contents[0])) /
289 	    sizeof(flush->hv_vp_set.bank_contents[0]);
290 
291 	if (addr2 == 0) {
292 		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
293 		status = hv_do_rep_hypercall(
294 				HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
295 				0, nr_bank, (uint64_t)flush, (uint64_t)NULL);
296 	} else if (addr2 &&
297 	    ((addr2 - addr1)/HV_TLB_FLUSH_UNIT) > max_gvas) {
298 		status = hv_do_rep_hypercall(
299 		    HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
300 		    0, nr_bank, (uint64_t)flush, (uint64_t)NULL);
301 	} else {
302 		gva_n = fill_gva_list(&flush->hv_vp_set.bank_contents[nr_bank],
303 		    addr1, addr2);
304 		status = hv_do_rep_hypercall(
305 		    HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
306 		    gva_n, nr_bank, (uint64_t)flush, (uint64_t)NULL);
307 	}
308 	return status;
309 }
310