1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * Authors: 6 * Haiyang Zhang <haiyangz@microsoft.com> 7 * Hank Janssen <hjanssen@microsoft.com> 8 */ 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/kernel.h> 12 #include <linux/mm.h> 13 #include <linux/slab.h> 14 #include <linux/vmalloc.h> 15 #include <linux/hyperv.h> 16 #include <linux/random.h> 17 #include <linux/clockchips.h> 18 #include <linux/delay.h> 19 #include <linux/interrupt.h> 20 #include <clocksource/hyperv_timer.h> 21 #include <asm/mshyperv.h> 22 #include "hyperv_vmbus.h" 23 24 /* The one and only */ 25 struct hv_context hv_context; 26 27 /* 28 * hv_init - Main initialization routine. 29 * 30 * This routine must be called before any other routines in here are called 31 */ 32 int hv_init(void) 33 { 34 hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context); 35 if (!hv_context.cpu_context) 36 return -ENOMEM; 37 return 0; 38 } 39 40 /* 41 * Functions for allocating and freeing memory with size and 42 * alignment HV_HYP_PAGE_SIZE. These functions are needed because 43 * the guest page size may not be the same as the Hyper-V page 44 * size. We depend upon kmalloc() aligning power-of-two size 45 * allocations to the allocation size boundary, so that the 46 * allocated memory appears to Hyper-V as a page of the size 47 * it expects. 48 */ 49 50 void *hv_alloc_hyperv_page(void) 51 { 52 BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); 53 54 if (PAGE_SIZE == HV_HYP_PAGE_SIZE) 55 return (void *)__get_free_page(GFP_KERNEL); 56 else 57 return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); 58 } 59 60 void *hv_alloc_hyperv_zeroed_page(void) 61 { 62 if (PAGE_SIZE == HV_HYP_PAGE_SIZE) 63 return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); 64 else 65 return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); 66 } 67 68 void hv_free_hyperv_page(unsigned long addr) 69 { 70 if (PAGE_SIZE == HV_HYP_PAGE_SIZE) 71 free_page(addr); 72 else 73 kfree((void *)addr); 74 } 75 76 /* 77 * hv_post_message - Post a message using the hypervisor message IPC. 78 * 79 * This involves a hypercall. 80 */ 81 int hv_post_message(union hv_connection_id connection_id, 82 enum hv_message_type message_type, 83 void *payload, size_t payload_size) 84 { 85 struct hv_input_post_message *aligned_msg; 86 struct hv_per_cpu_context *hv_cpu; 87 u64 status; 88 89 if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) 90 return -EMSGSIZE; 91 92 hv_cpu = get_cpu_ptr(hv_context.cpu_context); 93 aligned_msg = hv_cpu->post_msg_page; 94 aligned_msg->connectionid = connection_id; 95 aligned_msg->reserved = 0; 96 aligned_msg->message_type = message_type; 97 aligned_msg->payload_size = payload_size; 98 memcpy((void *)aligned_msg->payload, payload, payload_size); 99 100 status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL); 101 102 /* Preemption must remain disabled until after the hypercall 103 * so some other thread can't get scheduled onto this cpu and 104 * corrupt the per-cpu post_msg_page 105 */ 106 put_cpu_ptr(hv_cpu); 107 108 return hv_result(status); 109 } 110 111 int hv_synic_alloc(void) 112 { 113 int cpu; 114 struct hv_per_cpu_context *hv_cpu; 115 116 /* 117 * First, zero all per-cpu memory areas so hv_synic_free() can 118 * detect what memory has been allocated and cleanup properly 119 * after any failures. 120 */ 121 for_each_present_cpu(cpu) { 122 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); 123 memset(hv_cpu, 0, sizeof(*hv_cpu)); 124 } 125 126 hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask), 127 GFP_KERNEL); 128 if (hv_context.hv_numa_map == NULL) { 129 pr_err("Unable to allocate NUMA map\n"); 130 goto err; 131 } 132 133 for_each_present_cpu(cpu) { 134 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); 135 136 tasklet_init(&hv_cpu->msg_dpc, 137 vmbus_on_msg_dpc, (unsigned long) hv_cpu); 138 139 hv_cpu->synic_message_page = 140 (void *)get_zeroed_page(GFP_ATOMIC); 141 if (hv_cpu->synic_message_page == NULL) { 142 pr_err("Unable to allocate SYNIC message page\n"); 143 goto err; 144 } 145 146 hv_cpu->synic_event_page = (void *)get_zeroed_page(GFP_ATOMIC); 147 if (hv_cpu->synic_event_page == NULL) { 148 pr_err("Unable to allocate SYNIC event page\n"); 149 goto err; 150 } 151 152 hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC); 153 if (hv_cpu->post_msg_page == NULL) { 154 pr_err("Unable to allocate post msg page\n"); 155 goto err; 156 } 157 } 158 159 return 0; 160 err: 161 /* 162 * Any memory allocations that succeeded will be freed when 163 * the caller cleans up by calling hv_synic_free() 164 */ 165 return -ENOMEM; 166 } 167 168 169 void hv_synic_free(void) 170 { 171 int cpu; 172 173 for_each_present_cpu(cpu) { 174 struct hv_per_cpu_context *hv_cpu 175 = per_cpu_ptr(hv_context.cpu_context, cpu); 176 177 free_page((unsigned long)hv_cpu->synic_event_page); 178 free_page((unsigned long)hv_cpu->synic_message_page); 179 free_page((unsigned long)hv_cpu->post_msg_page); 180 } 181 182 kfree(hv_context.hv_numa_map); 183 } 184 185 /* 186 * hv_synic_init - Initialize the Synthetic Interrupt Controller. 187 * 188 * If it is already initialized by another entity (ie x2v shim), we need to 189 * retrieve the initialized message and event pages. Otherwise, we create and 190 * initialize the message and event pages. 191 */ 192 void hv_synic_enable_regs(unsigned int cpu) 193 { 194 struct hv_per_cpu_context *hv_cpu 195 = per_cpu_ptr(hv_context.cpu_context, cpu); 196 union hv_synic_simp simp; 197 union hv_synic_siefp siefp; 198 union hv_synic_sint shared_sint; 199 union hv_synic_scontrol sctrl; 200 201 /* Setup the Synic's message page */ 202 simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); 203 simp.simp_enabled = 1; 204 simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page) 205 >> HV_HYP_PAGE_SHIFT; 206 207 hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); 208 209 /* Setup the Synic's event page */ 210 siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); 211 siefp.siefp_enabled = 1; 212 siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page) 213 >> HV_HYP_PAGE_SHIFT; 214 215 hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); 216 217 /* Setup the shared SINT. */ 218 if (vmbus_irq != -1) 219 enable_percpu_irq(vmbus_irq, 0); 220 shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + 221 VMBUS_MESSAGE_SINT); 222 223 shared_sint.vector = vmbus_interrupt; 224 shared_sint.masked = false; 225 226 /* 227 * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64), 228 * it doesn't provide a recommendation flag and AEOI must be disabled. 229 */ 230 #ifdef HV_DEPRECATING_AEOI_RECOMMENDED 231 shared_sint.auto_eoi = 232 !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED); 233 #else 234 shared_sint.auto_eoi = 0; 235 #endif 236 hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, 237 shared_sint.as_uint64); 238 239 /* Enable the global synic bit */ 240 sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); 241 sctrl.enable = 1; 242 243 hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); 244 } 245 246 int hv_synic_init(unsigned int cpu) 247 { 248 hv_synic_enable_regs(cpu); 249 250 hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT); 251 252 return 0; 253 } 254 255 /* 256 * hv_synic_cleanup - Cleanup routine for hv_synic_init(). 257 */ 258 void hv_synic_disable_regs(unsigned int cpu) 259 { 260 union hv_synic_sint shared_sint; 261 union hv_synic_simp simp; 262 union hv_synic_siefp siefp; 263 union hv_synic_scontrol sctrl; 264 265 shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + 266 VMBUS_MESSAGE_SINT); 267 268 shared_sint.masked = 1; 269 270 /* Need to correctly cleanup in the case of SMP!!! */ 271 /* Disable the interrupt */ 272 hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, 273 shared_sint.as_uint64); 274 275 simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); 276 simp.simp_enabled = 0; 277 simp.base_simp_gpa = 0; 278 279 hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); 280 281 siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); 282 siefp.siefp_enabled = 0; 283 siefp.base_siefp_gpa = 0; 284 285 hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); 286 287 /* Disable the global synic bit */ 288 sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); 289 sctrl.enable = 0; 290 hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); 291 292 if (vmbus_irq != -1) 293 disable_percpu_irq(vmbus_irq); 294 } 295 296 #define HV_MAX_TRIES 3 297 /* 298 * Scan the event flags page of 'this' CPU looking for any bit that is set. If we find one 299 * bit set, then wait for a few milliseconds. Repeat these steps for a maximum of 3 times. 300 * Return 'true', if there is still any set bit after this operation; 'false', otherwise. 301 * 302 * If a bit is set, that means there is a pending channel interrupt. The expectation is 303 * that the normal interrupt handling mechanism will find and process the channel interrupt 304 * "very soon", and in the process clear the bit. 305 */ 306 static bool hv_synic_event_pending(void) 307 { 308 struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context); 309 union hv_synic_event_flags *event = 310 (union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT; 311 unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */ 312 bool pending; 313 u32 relid; 314 int tries = 0; 315 316 retry: 317 pending = false; 318 for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) { 319 /* Special case - VMBus channel protocol messages */ 320 if (relid == 0) 321 continue; 322 pending = true; 323 break; 324 } 325 if (pending && tries++ < HV_MAX_TRIES) { 326 usleep_range(10000, 20000); 327 goto retry; 328 } 329 return pending; 330 } 331 332 int hv_synic_cleanup(unsigned int cpu) 333 { 334 struct vmbus_channel *channel, *sc; 335 bool channel_found = false; 336 337 if (vmbus_connection.conn_state != CONNECTED) 338 goto always_cleanup; 339 340 /* 341 * Hyper-V does not provide a way to change the connect CPU once 342 * it is set; we must prevent the connect CPU from going offline 343 * while the VM is running normally. But in the panic or kexec() 344 * path where the vmbus is already disconnected, the CPU must be 345 * allowed to shut down. 346 */ 347 if (cpu == VMBUS_CONNECT_CPU) 348 return -EBUSY; 349 350 /* 351 * Search for channels which are bound to the CPU we're about to 352 * cleanup. In case we find one and vmbus is still connected, we 353 * fail; this will effectively prevent CPU offlining. 354 * 355 * TODO: Re-bind the channels to different CPUs. 356 */ 357 mutex_lock(&vmbus_connection.channel_mutex); 358 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 359 if (channel->target_cpu == cpu) { 360 channel_found = true; 361 break; 362 } 363 list_for_each_entry(sc, &channel->sc_list, sc_list) { 364 if (sc->target_cpu == cpu) { 365 channel_found = true; 366 break; 367 } 368 } 369 if (channel_found) 370 break; 371 } 372 mutex_unlock(&vmbus_connection.channel_mutex); 373 374 if (channel_found) 375 return -EBUSY; 376 377 /* 378 * channel_found == false means that any channels that were previously 379 * assigned to the CPU have been reassigned elsewhere with a call of 380 * vmbus_send_modifychannel(). Scan the event flags page looking for 381 * bits that are set and waiting with a timeout for vmbus_chan_sched() 382 * to process such bits. If bits are still set after this operation 383 * and VMBus is connected, fail the CPU offlining operation. 384 */ 385 if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending()) 386 return -EBUSY; 387 388 always_cleanup: 389 hv_stimer_legacy_cleanup(cpu); 390 391 hv_synic_disable_regs(cpu); 392 393 return 0; 394 } 395