1 #define pr_fmt(fmt) "Hyper-V: " fmt 2 3 #include <linux/hyperv.h> 4 #include <linux/log2.h> 5 #include <linux/slab.h> 6 #include <linux/types.h> 7 8 #include <asm/fpu/api.h> 9 #include <asm/mshyperv.h> 10 #include <asm/msr.h> 11 #include <asm/tlbflush.h> 12 13 #define CREATE_TRACE_POINTS 14 #include <asm/trace/hyperv.h> 15 16 /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ 17 struct hv_flush_pcpu { 18 u64 address_space; 19 u64 flags; 20 u64 processor_mask; 21 u64 gva_list[]; 22 }; 23 24 /* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ 25 struct hv_flush_pcpu_ex { 26 u64 address_space; 27 u64 flags; 28 struct { 29 u64 format; 30 u64 valid_bank_mask; 31 u64 bank_contents[]; 32 } hv_vp_set; 33 u64 gva_list[]; 34 }; 35 36 /* Each gva in gva_list encodes up to 4096 pages to flush */ 37 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) 38 39 static struct hv_flush_pcpu __percpu **pcpu_flush; 40 41 static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex; 42 43 /* 44 * Fills in gva_list starting from offset. Returns the number of items added. 45 */ 46 static inline int fill_gva_list(u64 gva_list[], int offset, 47 unsigned long start, unsigned long end) 48 { 49 int gva_n = offset; 50 unsigned long cur = start, diff; 51 52 do { 53 diff = end > cur ? end - cur : 0; 54 55 gva_list[gva_n] = cur & PAGE_MASK; 56 /* 57 * Lower 12 bits encode the number of additional 58 * pages to flush (in addition to the 'cur' page). 59 */ 60 if (diff >= HV_TLB_FLUSH_UNIT) 61 gva_list[gva_n] |= ~PAGE_MASK; 62 else if (diff) 63 gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT; 64 65 cur += HV_TLB_FLUSH_UNIT; 66 gva_n++; 67 68 } while (cur < end); 69 70 return gva_n - offset; 71 } 72 73 /* Return the number of banks in the resulting vp_set */ 74 static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush, 75 const struct cpumask *cpus) 76 { 77 int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; 78 79 /* valid_bank_mask can represent up to 64 banks */ 80 if (hv_max_vp_index / 64 >= 64) 81 return 0; 82 83 /* 84 * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex 85 * structs are not cleared between calls, we risk flushing unneeded 86 * vCPUs otherwise. 87 */ 88 for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) 89 flush->hv_vp_set.bank_contents[vcpu_bank] = 0; 90 91 /* 92 * Some banks may end up being empty but this is acceptable. 93 */ 94 for_each_cpu(cpu, cpus) { 95 vcpu = hv_cpu_number_to_vp_number(cpu); 96 vcpu_bank = vcpu / 64; 97 vcpu_offset = vcpu % 64; 98 __set_bit(vcpu_offset, (unsigned long *) 99 &flush->hv_vp_set.bank_contents[vcpu_bank]); 100 if (vcpu_bank >= nr_bank) 101 nr_bank = vcpu_bank + 1; 102 } 103 flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); 104 105 return nr_bank; 106 } 107 108 static void hyperv_flush_tlb_others(const struct cpumask *cpus, 109 const struct flush_tlb_info *info) 110 { 111 int cpu, vcpu, gva_n, max_gvas; 112 struct hv_flush_pcpu **flush_pcpu; 113 struct hv_flush_pcpu *flush; 114 u64 status = U64_MAX; 115 unsigned long flags; 116 117 trace_hyperv_mmu_flush_tlb_others(cpus, info); 118 119 if (!pcpu_flush || !hv_hypercall_pg) 120 goto do_native; 121 122 if (cpumask_empty(cpus)) 123 return; 124 125 local_irq_save(flags); 126 127 flush_pcpu = this_cpu_ptr(pcpu_flush); 128 129 if (unlikely(!*flush_pcpu)) 130 *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); 131 132 flush = *flush_pcpu; 133 134 if (unlikely(!flush)) { 135 local_irq_restore(flags); 136 goto do_native; 137 } 138 139 if (info->mm) { 140 flush->address_space = virt_to_phys(info->mm->pgd); 141 flush->flags = 0; 142 } else { 143 flush->address_space = 0; 144 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; 145 } 146 147 flush->processor_mask = 0; 148 if (cpumask_equal(cpus, cpu_present_mask)) { 149 flush->flags |= HV_FLUSH_ALL_PROCESSORS; 150 } else { 151 for_each_cpu(cpu, cpus) { 152 vcpu = hv_cpu_number_to_vp_number(cpu); 153 if (vcpu >= 64) 154 goto do_native; 155 156 __set_bit(vcpu, (unsigned long *) 157 &flush->processor_mask); 158 } 159 } 160 161 /* 162 * We can flush not more than max_gvas with one hypercall. Flush the 163 * whole address space if we were asked to do more. 164 */ 165 max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); 166 167 if (info->end == TLB_FLUSH_ALL) { 168 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; 169 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 170 flush, NULL); 171 } else if (info->end && 172 ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { 173 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 174 flush, NULL); 175 } else { 176 gva_n = fill_gva_list(flush->gva_list, 0, 177 info->start, info->end); 178 status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, 179 gva_n, 0, flush, NULL); 180 } 181 182 local_irq_restore(flags); 183 184 if (!(status & HV_HYPERCALL_RESULT_MASK)) 185 return; 186 do_native: 187 native_flush_tlb_others(cpus, info); 188 } 189 190 static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, 191 const struct flush_tlb_info *info) 192 { 193 int nr_bank = 0, max_gvas, gva_n; 194 struct hv_flush_pcpu_ex **flush_pcpu; 195 struct hv_flush_pcpu_ex *flush; 196 u64 status = U64_MAX; 197 unsigned long flags; 198 199 trace_hyperv_mmu_flush_tlb_others(cpus, info); 200 201 if (!pcpu_flush_ex || !hv_hypercall_pg) 202 goto do_native; 203 204 if (cpumask_empty(cpus)) 205 return; 206 207 local_irq_save(flags); 208 209 flush_pcpu = this_cpu_ptr(pcpu_flush_ex); 210 211 if (unlikely(!*flush_pcpu)) 212 *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); 213 214 flush = *flush_pcpu; 215 216 if (unlikely(!flush)) { 217 local_irq_restore(flags); 218 goto do_native; 219 } 220 221 if (info->mm) { 222 flush->address_space = virt_to_phys(info->mm->pgd); 223 flush->flags = 0; 224 } else { 225 flush->address_space = 0; 226 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; 227 } 228 229 flush->hv_vp_set.valid_bank_mask = 0; 230 231 if (!cpumask_equal(cpus, cpu_present_mask)) { 232 flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K; 233 nr_bank = cpumask_to_vp_set(flush, cpus); 234 } 235 236 if (!nr_bank) { 237 flush->hv_vp_set.format = HV_GENERIC_SET_ALL; 238 flush->flags |= HV_FLUSH_ALL_PROCESSORS; 239 } 240 241 /* 242 * We can flush not more than max_gvas with one hypercall. Flush the 243 * whole address space if we were asked to do more. 244 */ 245 max_gvas = 246 (PAGE_SIZE - sizeof(*flush) - nr_bank * 247 sizeof(flush->hv_vp_set.bank_contents[0])) / 248 sizeof(flush->gva_list[0]); 249 250 if (info->end == TLB_FLUSH_ALL) { 251 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; 252 status = hv_do_rep_hypercall( 253 HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 254 0, nr_bank, flush, NULL); 255 } else if (info->end && 256 ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { 257 status = hv_do_rep_hypercall( 258 HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 259 0, nr_bank, flush, NULL); 260 } else { 261 gva_n = fill_gva_list(flush->gva_list, nr_bank, 262 info->start, info->end); 263 status = hv_do_rep_hypercall( 264 HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, 265 gva_n, nr_bank, flush, NULL); 266 } 267 268 local_irq_restore(flags); 269 270 if (!(status & HV_HYPERCALL_RESULT_MASK)) 271 return; 272 do_native: 273 native_flush_tlb_others(cpus, info); 274 } 275 276 void hyperv_setup_mmu_ops(void) 277 { 278 if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) 279 return; 280 281 setup_clear_cpu_cap(X86_FEATURE_PCID); 282 283 if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) { 284 pr_info("Using hypercall for remote TLB flush\n"); 285 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others; 286 } else { 287 pr_info("Using ext hypercall for remote TLB flush\n"); 288 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex; 289 } 290 } 291 292 void hyper_alloc_mmu(void) 293 { 294 if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) 295 return; 296 297 if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) 298 pcpu_flush = alloc_percpu(struct hv_flush_pcpu *); 299 else 300 pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *); 301 } 302