1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2023, Microsoft Corporation. 4 * 5 * mshv_root module's main interrupt handler and associated functionality. 6 * 7 * Authors: Microsoft Linux virtualization team 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/slab.h> 12 #include <linux/mm.h> 13 #include <linux/io.h> 14 #include <linux/random.h> 15 #include <asm/mshyperv.h> 16 17 #include "mshv_eventfd.h" 18 #include "mshv.h" 19 20 static u32 synic_event_ring_get_queued_port(u32 sint_index) 21 { 22 struct hv_synic_event_ring_page **event_ring_page; 23 volatile struct hv_synic_event_ring *ring; 24 struct hv_synic_pages *spages; 25 u8 **synic_eventring_tail; 26 u32 message; 27 u8 tail; 28 29 spages = this_cpu_ptr(mshv_root.synic_pages); 30 event_ring_page = &spages->synic_event_ring_page; 31 synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail); 32 33 if (unlikely(!*synic_eventring_tail)) { 34 pr_debug("Missing synic event ring tail!\n"); 35 return 0; 36 } 37 tail = (*synic_eventring_tail)[sint_index]; 38 39 if (unlikely(!*event_ring_page)) { 40 pr_debug("Missing synic event ring page!\n"); 41 return 0; 42 } 43 44 ring = &(*event_ring_page)->sint_event_ring[sint_index]; 45 46 /* 47 * Get the message. 48 */ 49 message = ring->data[tail]; 50 51 if (!message) { 52 if (ring->ring_full) { 53 /* 54 * Ring is marked full, but we would have consumed all 55 * the messages. Notify the hypervisor that ring is now 56 * empty and check again. 57 */ 58 ring->ring_full = 0; 59 hv_call_notify_port_ring_empty(sint_index); 60 message = ring->data[tail]; 61 } 62 63 if (!message) { 64 ring->signal_masked = 0; 65 /* 66 * Unmask the signal and sync with hypervisor 67 * before one last check for any message. 68 */ 69 mb(); 70 message = ring->data[tail]; 71 72 /* 73 * Ok, lets bail out. 74 */ 75 if (!message) 76 return 0; 77 } 78 79 ring->signal_masked = 1; 80 } 81 82 /* 83 * Clear the message in the ring buffer. 84 */ 85 ring->data[tail] = 0; 86 87 if (++tail == HV_SYNIC_EVENT_RING_MESSAGE_COUNT) 88 tail = 0; 89 90 (*synic_eventring_tail)[sint_index] = tail; 91 92 return message; 93 } 94 95 static bool 96 mshv_doorbell_isr(struct hv_message *msg) 97 { 98 struct hv_notification_message_payload *notification; 99 u32 port; 100 101 if (msg->header.message_type != HVMSG_SYNIC_SINT_INTERCEPT) 102 return false; 103 104 notification = (struct hv_notification_message_payload *)msg->u.payload; 105 if (notification->sint_index != HV_SYNIC_DOORBELL_SINT_INDEX) 106 return false; 107 108 while ((port = synic_event_ring_get_queued_port(HV_SYNIC_DOORBELL_SINT_INDEX))) { 109 struct port_table_info ptinfo = { 0 }; 110 111 if (mshv_portid_lookup(port, &ptinfo)) { 112 pr_debug("Failed to get port info from port_table!\n"); 113 continue; 114 } 115 116 if (ptinfo.hv_port_type != HV_PORT_TYPE_DOORBELL) { 117 pr_debug("Not a doorbell port!, port: %d, port_type: %d\n", 118 port, ptinfo.hv_port_type); 119 continue; 120 } 121 122 /* Invoke the callback */ 123 ptinfo.hv_port_doorbell.doorbell_cb(port, 124 ptinfo.hv_port_doorbell.data); 125 } 126 127 return true; 128 } 129 130 static bool mshv_async_call_completion_isr(struct hv_message *msg) 131 { 132 bool handled = false; 133 struct hv_async_completion_message_payload *async_msg; 134 struct mshv_partition *partition; 135 u64 partition_id; 136 137 if (msg->header.message_type != HVMSG_ASYNC_CALL_COMPLETION) 138 goto out; 139 140 async_msg = 141 (struct hv_async_completion_message_payload *)msg->u.payload; 142 143 partition_id = async_msg->partition_id; 144 145 /* 146 * Hold this lock for the rest of the isr, because the partition could 147 * be released anytime. 148 * e.g. the MSHV_RUN_VP thread could wake on another cpu; it could 149 * release the partition unless we hold this! 150 */ 151 rcu_read_lock(); 152 153 partition = mshv_partition_find(partition_id); 154 155 if (unlikely(!partition)) { 156 pr_debug("failed to find partition %llu\n", partition_id); 157 goto unlock_out; 158 } 159 160 partition->async_hypercall_status = async_msg->status; 161 complete(&partition->async_hypercall); 162 163 handled = true; 164 165 unlock_out: 166 rcu_read_unlock(); 167 out: 168 return handled; 169 } 170 171 static void kick_vp(struct mshv_vp *vp) 172 { 173 atomic64_inc(&vp->run.vp_signaled_count); 174 vp->run.kicked_by_hv = 1; 175 wake_up(&vp->run.vp_suspend_queue); 176 } 177 178 static void 179 handle_bitset_message(const struct hv_vp_signal_bitset_scheduler_message *msg) 180 { 181 int bank_idx, vps_signaled = 0, bank_mask_size; 182 struct mshv_partition *partition; 183 const struct hv_vpset *vpset; 184 const u64 *bank_contents; 185 u64 partition_id = msg->partition_id; 186 187 if (msg->vp_bitset.bitset.format != HV_GENERIC_SET_SPARSE_4K) { 188 pr_debug("scheduler message format is not HV_GENERIC_SET_SPARSE_4K"); 189 return; 190 } 191 192 if (msg->vp_count == 0) { 193 pr_debug("scheduler message with no VP specified"); 194 return; 195 } 196 197 rcu_read_lock(); 198 199 partition = mshv_partition_find(partition_id); 200 if (unlikely(!partition)) { 201 pr_debug("failed to find partition %llu\n", partition_id); 202 goto unlock_out; 203 } 204 205 vpset = &msg->vp_bitset.bitset; 206 207 bank_idx = -1; 208 bank_contents = vpset->bank_contents; 209 bank_mask_size = sizeof(vpset->valid_bank_mask) * BITS_PER_BYTE; 210 211 while (true) { 212 int vp_bank_idx = -1; 213 int vp_bank_size = sizeof(*bank_contents) * BITS_PER_BYTE; 214 int vp_index; 215 216 bank_idx = find_next_bit((unsigned long *)&vpset->valid_bank_mask, 217 bank_mask_size, bank_idx + 1); 218 if (bank_idx == bank_mask_size) 219 break; 220 221 while (true) { 222 struct mshv_vp *vp; 223 224 vp_bank_idx = find_next_bit((unsigned long *)bank_contents, 225 vp_bank_size, vp_bank_idx + 1); 226 if (vp_bank_idx == vp_bank_size) 227 break; 228 229 vp_index = (bank_idx * vp_bank_size) + vp_bank_idx; 230 231 /* This shouldn't happen, but just in case. */ 232 if (unlikely(vp_index >= MSHV_MAX_VPS)) { 233 pr_debug("VP index %u out of bounds\n", 234 vp_index); 235 goto unlock_out; 236 } 237 238 vp = partition->pt_vp_array[vp_index]; 239 if (unlikely(!vp)) { 240 pr_debug("failed to find VP %u\n", vp_index); 241 goto unlock_out; 242 } 243 244 kick_vp(vp); 245 vps_signaled++; 246 } 247 248 bank_contents++; 249 } 250 251 unlock_out: 252 rcu_read_unlock(); 253 254 if (vps_signaled != msg->vp_count) 255 pr_debug("asked to signal %u VPs but only did %u\n", 256 msg->vp_count, vps_signaled); 257 } 258 259 static void 260 handle_pair_message(const struct hv_vp_signal_pair_scheduler_message *msg) 261 { 262 struct mshv_partition *partition = NULL; 263 struct mshv_vp *vp; 264 int idx; 265 266 rcu_read_lock(); 267 268 for (idx = 0; idx < msg->vp_count; idx++) { 269 u64 partition_id = msg->partition_ids[idx]; 270 u32 vp_index = msg->vp_indexes[idx]; 271 272 if (idx == 0 || partition->pt_id != partition_id) { 273 partition = mshv_partition_find(partition_id); 274 if (unlikely(!partition)) { 275 pr_debug("failed to find partition %llu\n", 276 partition_id); 277 break; 278 } 279 } 280 281 /* This shouldn't happen, but just in case. */ 282 if (unlikely(vp_index >= MSHV_MAX_VPS)) { 283 pr_debug("VP index %u out of bounds\n", vp_index); 284 break; 285 } 286 287 vp = partition->pt_vp_array[vp_index]; 288 if (!vp) { 289 pr_debug("failed to find VP %u\n", vp_index); 290 break; 291 } 292 293 kick_vp(vp); 294 } 295 296 rcu_read_unlock(); 297 } 298 299 static bool 300 mshv_scheduler_isr(struct hv_message *msg) 301 { 302 if (msg->header.message_type != HVMSG_SCHEDULER_VP_SIGNAL_BITSET && 303 msg->header.message_type != HVMSG_SCHEDULER_VP_SIGNAL_PAIR) 304 return false; 305 306 if (msg->header.message_type == HVMSG_SCHEDULER_VP_SIGNAL_BITSET) 307 handle_bitset_message((struct hv_vp_signal_bitset_scheduler_message *) 308 msg->u.payload); 309 else 310 handle_pair_message((struct hv_vp_signal_pair_scheduler_message *) 311 msg->u.payload); 312 313 return true; 314 } 315 316 static bool 317 mshv_intercept_isr(struct hv_message *msg) 318 { 319 struct mshv_partition *partition; 320 bool handled = false; 321 struct mshv_vp *vp; 322 u64 partition_id; 323 u32 vp_index; 324 325 partition_id = msg->header.sender; 326 327 rcu_read_lock(); 328 329 partition = mshv_partition_find(partition_id); 330 if (unlikely(!partition)) { 331 pr_debug("failed to find partition %llu\n", 332 partition_id); 333 goto unlock_out; 334 } 335 336 if (msg->header.message_type == HVMSG_X64_APIC_EOI) { 337 /* 338 * Check if this gsi is registered in the 339 * ack_notifier list and invoke the callback 340 * if registered. 341 */ 342 343 /* 344 * If there is a notifier, the ack callback is supposed 345 * to handle the VMEXIT. So we need not pass this message 346 * to vcpu thread. 347 */ 348 struct hv_x64_apic_eoi_message *eoi_msg = 349 (struct hv_x64_apic_eoi_message *)&msg->u.payload[0]; 350 351 if (mshv_notify_acked_gsi(partition, eoi_msg->interrupt_vector)) { 352 handled = true; 353 goto unlock_out; 354 } 355 } 356 357 /* 358 * We should get an opaque intercept message here for all intercept 359 * messages, since we're using the mapped VP intercept message page. 360 * 361 * The intercept message will have been placed in intercept message 362 * page at this point. 363 * 364 * Make sure the message type matches our expectation. 365 */ 366 if (msg->header.message_type != HVMSG_OPAQUE_INTERCEPT) { 367 pr_debug("wrong message type %d", msg->header.message_type); 368 goto unlock_out; 369 } 370 371 /* 372 * Since we directly index the vp, and it has to exist for us to be here 373 * (because the vp is only deleted when the partition is), no additional 374 * locking is needed here 375 */ 376 vp_index = 377 ((struct hv_opaque_intercept_message *)msg->u.payload)->vp_index; 378 vp = partition->pt_vp_array[vp_index]; 379 if (unlikely(!vp)) { 380 pr_debug("failed to find VP %u\n", vp_index); 381 goto unlock_out; 382 } 383 384 kick_vp(vp); 385 386 handled = true; 387 388 unlock_out: 389 rcu_read_unlock(); 390 391 return handled; 392 } 393 394 void mshv_isr(void) 395 { 396 struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages); 397 struct hv_message_page **msg_page = &spages->synic_message_page; 398 struct hv_message *msg; 399 bool handled; 400 401 if (unlikely(!(*msg_page))) { 402 pr_debug("Missing synic page!\n"); 403 return; 404 } 405 406 msg = &((*msg_page)->sint_message[HV_SYNIC_INTERCEPTION_SINT_INDEX]); 407 408 /* 409 * If the type isn't set, there isn't really a message; 410 * it may be some other hyperv interrupt 411 */ 412 if (msg->header.message_type == HVMSG_NONE) 413 return; 414 415 handled = mshv_doorbell_isr(msg); 416 417 if (!handled) 418 handled = mshv_scheduler_isr(msg); 419 420 if (!handled) 421 handled = mshv_async_call_completion_isr(msg); 422 423 if (!handled) 424 handled = mshv_intercept_isr(msg); 425 426 if (handled) { 427 /* 428 * Acknowledge message with hypervisor if another message is 429 * pending. 430 */ 431 msg->header.message_type = HVMSG_NONE; 432 /* 433 * Ensure the write is complete so the hypervisor will deliver 434 * the next message if available. 435 */ 436 mb(); 437 if (msg->header.message_flags.msg_pending) 438 hv_set_non_nested_msr(HV_MSR_EOM, 0); 439 440 #ifdef HYPERVISOR_CALLBACK_VECTOR 441 add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR); 442 #endif 443 } else { 444 pr_warn_once("%s: unknown message type 0x%x\n", __func__, 445 msg->header.message_type); 446 } 447 } 448 449 int mshv_synic_init(unsigned int cpu) 450 { 451 union hv_synic_simp simp; 452 union hv_synic_siefp siefp; 453 union hv_synic_sirbp sirbp; 454 #ifdef HYPERVISOR_CALLBACK_VECTOR 455 union hv_synic_sint sint; 456 #endif 457 union hv_synic_scontrol sctrl; 458 struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages); 459 struct hv_message_page **msg_page = &spages->synic_message_page; 460 struct hv_synic_event_flags_page **event_flags_page = 461 &spages->synic_event_flags_page; 462 struct hv_synic_event_ring_page **event_ring_page = 463 &spages->synic_event_ring_page; 464 465 /* Setup the Synic's message page */ 466 simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP); 467 simp.simp_enabled = true; 468 *msg_page = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT, 469 HV_HYP_PAGE_SIZE, 470 MEMREMAP_WB); 471 472 if (!(*msg_page)) 473 return -EFAULT; 474 475 hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64); 476 477 /* Setup the Synic's event flags page */ 478 siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP); 479 siefp.siefp_enabled = true; 480 *event_flags_page = memremap(siefp.base_siefp_gpa << PAGE_SHIFT, 481 PAGE_SIZE, MEMREMAP_WB); 482 483 if (!(*event_flags_page)) 484 goto cleanup; 485 486 hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64); 487 488 /* Setup the Synic's event ring page */ 489 sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP); 490 sirbp.sirbp_enabled = true; 491 *event_ring_page = memremap(sirbp.base_sirbp_gpa << PAGE_SHIFT, 492 PAGE_SIZE, MEMREMAP_WB); 493 494 if (!(*event_ring_page)) 495 goto cleanup; 496 497 hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64); 498 499 #ifdef HYPERVISOR_CALLBACK_VECTOR 500 /* Enable intercepts */ 501 sint.as_uint64 = 0; 502 sint.vector = HYPERVISOR_CALLBACK_VECTOR; 503 sint.masked = false; 504 sint.auto_eoi = hv_recommend_using_aeoi(); 505 hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX, 506 sint.as_uint64); 507 508 /* Doorbell SINT */ 509 sint.as_uint64 = 0; 510 sint.vector = HYPERVISOR_CALLBACK_VECTOR; 511 sint.masked = false; 512 sint.as_intercept = 1; 513 sint.auto_eoi = hv_recommend_using_aeoi(); 514 hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX, 515 sint.as_uint64); 516 #endif 517 518 /* Enable global synic bit */ 519 sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL); 520 sctrl.enable = 1; 521 hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64); 522 523 return 0; 524 525 cleanup: 526 if (*event_ring_page) { 527 sirbp.sirbp_enabled = false; 528 hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64); 529 memunmap(*event_ring_page); 530 } 531 if (*event_flags_page) { 532 siefp.siefp_enabled = false; 533 hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64); 534 memunmap(*event_flags_page); 535 } 536 if (*msg_page) { 537 simp.simp_enabled = false; 538 hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64); 539 memunmap(*msg_page); 540 } 541 542 return -EFAULT; 543 } 544 545 int mshv_synic_cleanup(unsigned int cpu) 546 { 547 union hv_synic_sint sint; 548 union hv_synic_simp simp; 549 union hv_synic_siefp siefp; 550 union hv_synic_sirbp sirbp; 551 union hv_synic_scontrol sctrl; 552 struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages); 553 struct hv_message_page **msg_page = &spages->synic_message_page; 554 struct hv_synic_event_flags_page **event_flags_page = 555 &spages->synic_event_flags_page; 556 struct hv_synic_event_ring_page **event_ring_page = 557 &spages->synic_event_ring_page; 558 559 /* Disable the interrupt */ 560 sint.as_uint64 = hv_get_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX); 561 sint.masked = true; 562 hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX, 563 sint.as_uint64); 564 565 /* Disable Doorbell SINT */ 566 sint.as_uint64 = hv_get_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX); 567 sint.masked = true; 568 hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX, 569 sint.as_uint64); 570 571 /* Disable Synic's event ring page */ 572 sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP); 573 sirbp.sirbp_enabled = false; 574 hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64); 575 memunmap(*event_ring_page); 576 577 /* Disable Synic's event flags page */ 578 siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP); 579 siefp.siefp_enabled = false; 580 hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64); 581 memunmap(*event_flags_page); 582 583 /* Disable Synic's message page */ 584 simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP); 585 simp.simp_enabled = false; 586 hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64); 587 memunmap(*msg_page); 588 589 /* Disable global synic bit */ 590 sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL); 591 sctrl.enable = 0; 592 hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64); 593 594 return 0; 595 } 596 597 int 598 mshv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb, void *data, 599 u64 gpa, u64 val, u64 flags) 600 { 601 struct hv_connection_info connection_info = { 0 }; 602 union hv_connection_id connection_id = { 0 }; 603 struct port_table_info *port_table_info; 604 struct hv_port_info port_info = { 0 }; 605 union hv_port_id port_id = { 0 }; 606 int ret; 607 608 port_table_info = kmalloc(sizeof(*port_table_info), GFP_KERNEL); 609 if (!port_table_info) 610 return -ENOMEM; 611 612 port_table_info->hv_port_type = HV_PORT_TYPE_DOORBELL; 613 port_table_info->hv_port_doorbell.doorbell_cb = doorbell_cb; 614 port_table_info->hv_port_doorbell.data = data; 615 ret = mshv_portid_alloc(port_table_info); 616 if (ret < 0) { 617 kfree(port_table_info); 618 return ret; 619 } 620 621 port_id.u.id = ret; 622 port_info.port_type = HV_PORT_TYPE_DOORBELL; 623 port_info.doorbell_port_info.target_sint = HV_SYNIC_DOORBELL_SINT_INDEX; 624 port_info.doorbell_port_info.target_vp = HV_ANY_VP; 625 ret = hv_call_create_port(hv_current_partition_id, port_id, partition_id, 626 &port_info, 627 0, 0, NUMA_NO_NODE); 628 629 if (ret < 0) { 630 mshv_portid_free(port_id.u.id); 631 return ret; 632 } 633 634 connection_id.u.id = port_id.u.id; 635 connection_info.port_type = HV_PORT_TYPE_DOORBELL; 636 connection_info.doorbell_connection_info.gpa = gpa; 637 connection_info.doorbell_connection_info.trigger_value = val; 638 connection_info.doorbell_connection_info.flags = flags; 639 640 ret = hv_call_connect_port(hv_current_partition_id, port_id, partition_id, 641 connection_id, &connection_info, 0, NUMA_NO_NODE); 642 if (ret < 0) { 643 hv_call_delete_port(hv_current_partition_id, port_id); 644 mshv_portid_free(port_id.u.id); 645 return ret; 646 } 647 648 // lets use the port_id as the doorbell_id 649 return port_id.u.id; 650 } 651 652 void 653 mshv_unregister_doorbell(u64 partition_id, int doorbell_portid) 654 { 655 union hv_port_id port_id = { 0 }; 656 union hv_connection_id connection_id = { 0 }; 657 658 connection_id.u.id = doorbell_portid; 659 hv_call_disconnect_port(partition_id, connection_id); 660 661 port_id.u.id = doorbell_portid; 662 hv_call_delete_port(hv_current_partition_id, port_id); 663 664 mshv_portid_free(doorbell_portid); 665 } 666