1 /*- 2 * Copyright (c) 2009-2012,2016-2024 Microsoft Corp. 3 * Copyright (c) 2012 NetApp Inc. 4 * Copyright (c) 2012 Citrix Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/bus.h> 31 #include <sys/kernel.h> 32 #include <sys/linker.h> 33 #include <sys/lock.h> 34 #include <sys/malloc.h> 35 #include <sys/module.h> 36 #include <sys/mutex.h> 37 #include <sys/sbuf.h> 38 #include <sys/smp.h> 39 #include <sys/sysctl.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/sched.h> 43 #include <sys/kdb.h> 44 #include <vm/vm.h> 45 #include <vm/pmap.h> 46 47 #include <machine/bus.h> 48 #include <dev/hyperv/vmbus/x86/hyperv_machdep.h> 49 #include <dev/hyperv/vmbus/x86/hyperv_reg.h> 50 #include <dev/hyperv/include/hyperv.h> 51 #include <dev/hyperv/vmbus/hyperv_var.h> 52 #include <dev/hyperv/vmbus/vmbus_reg.h> 53 #include <dev/hyperv/vmbus/vmbus_var.h> 54 #include <dev/hyperv/vmbus/hyperv_common_reg.h> 55 #include "hyperv_mmu.h" 56 57 static inline int fill_gva_list(uint64_t gva_list[], 58 unsigned long start, unsigned long end) 59 { 60 int gva_n = 0; 61 unsigned long cur = start, diff; 62 63 do { 64 diff = end > cur ? end - cur : 0; 65 66 gva_list[gva_n] = cur; 67 /* 68 * Lower 12 bits encode the number of additional 69 * pages to flush (in addition to the 'cur' page). 70 */ 71 if (diff >= HV_TLB_FLUSH_UNIT) { 72 gva_list[gva_n] |= PAGE_MASK; 73 cur += HV_TLB_FLUSH_UNIT; 74 } else if (diff) { 75 gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT; 76 cur = end; 77 } 78 79 gva_n++; 80 81 } while (cur < end); 82 83 return gva_n; 84 } 85 86 87 inline int hv_cpumask_to_vpset(struct hv_vpset *vpset, 88 const cpuset_t *cpus, struct vmbus_softc * sc) 89 { 90 int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; 91 int max_vcpu_bank = hv_max_vp_index / HV_VCPUS_PER_SPARSE_BANK; 92 93 /* 94 * vpset.valid_bank_mask can represent up to 95 * HV_MAX_SPARSE_VCPU_BANKS banks 96 */ 97 if (max_vcpu_bank >= HV_MAX_SPARSE_VCPU_BANKS) 98 return 0; 99 100 /* 101 * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex 102 * structs are not cleared between calls, we risk flushing unneeded 103 * vCPUs otherwise. 104 */ 105 for (vcpu_bank = 0; vcpu_bank <= max_vcpu_bank; vcpu_bank++) 106 vpset->bank_contents[vcpu_bank] = 0; 107 108 /* 109 * Some banks may end up being empty but this is acceptable. 110 */ 111 CPU_FOREACH_ISSET(cpu, cpus) { 112 vcpu = VMBUS_PCPU_GET(sc, vcpuid, cpu); 113 if (vcpu == -1) 114 return -1; 115 vcpu_bank = vcpu / HV_VCPUS_PER_SPARSE_BANK; 116 vcpu_offset = vcpu % HV_VCPUS_PER_SPARSE_BANK; 117 set_bit(vcpu_offset, (unsigned long *) 118 &vpset->bank_contents[vcpu_bank]); 119 if (vcpu_bank >= nr_bank) 120 nr_bank = vcpu_bank + 1; 121 } 122 vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); 123 return nr_bank; 124 } 125 126 127 128 129 void 130 hv_vm_tlb_flush(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2, 131 enum invl_op_codes op, struct vmbus_softc *sc, smp_invl_local_cb_t curcpu_cb) 132 { 133 cpuset_t tmp_mask, mask; 134 struct hyperv_tlb_flush *flush; 135 int cpu, vcpu; 136 int max_gvas, gva_n; 137 uint64_t status = 0; 138 uint64_t cr3; 139 140 /* 141 * Hyper-V doesn't handle the invalidating cache. Let system handle it. 142 */ 143 if (op == INVL_OP_CACHE) 144 return smp_targeted_tlb_shootdown_native(pmap, addr1, addr2, 145 curcpu_cb, op); 146 147 flush = *VMBUS_PCPU_PTR(sc, cpu_mem, curcpu); 148 if (flush == NULL) 149 return smp_targeted_tlb_shootdown_native(pmap, addr1, addr2, 150 curcpu_cb, op); 151 /* 152 * It is not necessary to signal other CPUs while booting or 153 * when in the debugger. 154 */ 155 if (__predict_false(kdb_active || KERNEL_PANICKED() || !smp_started)) 156 goto local_cb; 157 158 KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 159 160 /* 161 * Make a stable copy of the set of CPUs on which the pmap is active. 162 * See if we have to interrupt other CPUs. 163 */ 164 CPU_COPY(pmap_invalidate_cpu_mask(pmap), &tmp_mask); 165 CPU_COPY(pmap_invalidate_cpu_mask(pmap), &mask); 166 CPU_CLR(curcpu, &tmp_mask); 167 if (CPU_EMPTY(&tmp_mask)) 168 goto local_cb; 169 170 /* 171 * Initiator must have interrupts enabled, which prevents 172 * non-invalidation IPIs that take smp_ipi_mtx spinlock, 173 * from deadlocking with us. On the other hand, preemption 174 * must be disabled to pin initiator to the instance of the 175 * pcpu pc_smp_tlb data and scoreboard line. 176 */ 177 KASSERT((read_rflags() & PSL_I) != 0, 178 ("hv_tlb_flush: interrupts disabled")); 179 critical_enter(); 180 flush->processor_mask = 0; 181 cr3 = pmap->pm_cr3; 182 183 if (op == INVL_OP_TLB || op == INVL_OP_TLB_INVPCID || 184 op == INVL_OP_TLB_INVPCID_PTI || op == INVL_OP_TLB_PCID) { 185 flush->address_space = 0; 186 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; 187 } else { 188 189 flush->address_space = cr3; 190 flush->address_space &= ~CR3_PCID_MASK; 191 flush->flags = 0; 192 } 193 if(CPU_CMP(&mask, &all_cpus) == 0) { 194 flush->flags |= HV_FLUSH_ALL_PROCESSORS; 195 } else { 196 if (CPU_FLS(&mask) < mp_ncpus && CPU_FLS(&mask) >= 64) 197 goto do_ex_hypercall; 198 199 CPU_FOREACH_ISSET(cpu, &mask) { 200 vcpu = VMBUS_PCPU_GET(sc, vcpuid, cpu); 201 if (vcpu >= 64) 202 goto do_ex_hypercall; 203 204 set_bit(vcpu, &flush->processor_mask); 205 } 206 if (!flush->processor_mask ) 207 goto native; 208 } 209 max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); 210 if (addr2 == 0) { 211 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; 212 status = hypercall_do_md(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 213 (uint64_t)flush, (uint64_t)NULL); 214 } else if ((addr2 && (addr2 -addr1)/HV_TLB_FLUSH_UNIT) > max_gvas) { 215 status = hypercall_do_md(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 216 (uint64_t)flush, (uint64_t)NULL); 217 } else { 218 gva_n = fill_gva_list(flush->gva_list, addr1, addr2); 219 220 status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, 221 gva_n, 0, (uint64_t)flush, (uint64_t)NULL); 222 223 } 224 if(status) 225 goto native; 226 sched_unpin(); 227 critical_exit(); 228 return; 229 230 local_cb: 231 critical_enter(); 232 curcpu_cb(pmap, addr1, addr2); 233 sched_unpin(); 234 critical_exit(); 235 return; 236 do_ex_hypercall: 237 status = hv_flush_tlb_others_ex(pmap, addr1, addr2, mask, op, sc); 238 if (status) 239 goto native; 240 sched_unpin(); 241 critical_exit(); 242 return; 243 native: 244 critical_exit(); 245 return smp_targeted_tlb_shootdown_native(pmap, addr1, 246 addr2, curcpu_cb, op); 247 } 248 249 uint64_t 250 hv_flush_tlb_others_ex(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2, 251 const cpuset_t mask, enum invl_op_codes op, struct vmbus_softc *sc) 252 { 253 int nr_bank = 0, max_gvas, gva_n; 254 struct hv_tlb_flush_ex *flush; 255 if(*VMBUS_PCPU_PTR(sc, cpu_mem, curcpu) == NULL) 256 return EINVAL; 257 flush = *VMBUS_PCPU_PTR(sc, cpu_mem, curcpu); 258 uint64_t status = 0; 259 uint64_t cr3; 260 261 if (!(hyperv_recommends & HYPERV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) 262 return EINVAL; 263 264 cr3 = pmap->pm_cr3; 265 if (op == INVL_OP_TLB) { 266 flush->address_space = 0; 267 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; 268 } else { 269 270 flush->address_space = cr3; 271 flush->address_space &= ~CR3_PCID_MASK; 272 flush->flags = 0; 273 } 274 275 flush->hv_vp_set.valid_bank_mask = 0; 276 277 flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; 278 nr_bank = hv_cpumask_to_vpset(&flush->hv_vp_set, &mask, sc); 279 if (nr_bank < 0) 280 return EINVAL; 281 282 /* 283 * We can flush not more than max_gvas with one hypercall. Flush the 284 * whole address space if we were asked to do more. 285 */ 286 max_gvas = (PAGE_SIZE - sizeof(*flush) - nr_bank * 287 sizeof(flush->hv_vp_set.bank_contents[0])) / 288 sizeof(flush->hv_vp_set.bank_contents[0]); 289 290 if (addr2 == 0) { 291 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; 292 status = hv_do_rep_hypercall( 293 HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 294 0, nr_bank, (uint64_t)flush, (uint64_t)NULL); 295 } else if (addr2 && 296 ((addr2 - addr1)/HV_TLB_FLUSH_UNIT) > max_gvas) { 297 status = hv_do_rep_hypercall( 298 HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 299 0, nr_bank, (uint64_t)flush, (uint64_t)NULL); 300 } else { 301 gva_n = fill_gva_list(&flush->hv_vp_set.bank_contents[nr_bank], 302 addr1, addr2); 303 status = hv_do_rep_hypercall( 304 HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, 305 gva_n, nr_bank, (uint64_t)flush, (uint64_t)NULL); 306 } 307 return status; 308 } 309