1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * Authors: 6 * Haiyang Zhang <haiyangz@microsoft.com> 7 * Hank Janssen <hjanssen@microsoft.com> 8 */ 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/kernel.h> 12 #include <linux/interrupt.h> 13 #include <linux/sched.h> 14 #include <linux/wait.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/list.h> 18 #include <linux/module.h> 19 #include <linux/completion.h> 20 #include <linux/delay.h> 21 #include <linux/cpu.h> 22 #include <linux/hyperv.h> 23 #include <linux/export.h> 24 #include <asm/mshyperv.h> 25 #include <linux/sched/isolation.h> 26 27 #include "hyperv_vmbus.h" 28 29 static void init_vp_index(struct vmbus_channel *channel); 30 31 const struct vmbus_device vmbus_devs[] = { 32 /* IDE */ 33 { .dev_type = HV_IDE, 34 HV_IDE_GUID, 35 .perf_device = true, 36 .allowed_in_isolated = false, 37 }, 38 39 /* SCSI */ 40 { .dev_type = HV_SCSI, 41 HV_SCSI_GUID, 42 .perf_device = true, 43 .allowed_in_isolated = true, 44 }, 45 46 /* Fibre Channel */ 47 { .dev_type = HV_FC, 48 HV_SYNTHFC_GUID, 49 .perf_device = true, 50 .allowed_in_isolated = false, 51 }, 52 53 /* Synthetic NIC */ 54 { .dev_type = HV_NIC, 55 HV_NIC_GUID, 56 .perf_device = true, 57 .allowed_in_isolated = true, 58 }, 59 60 /* Network Direct */ 61 { .dev_type = HV_ND, 62 HV_ND_GUID, 63 .perf_device = true, 64 .allowed_in_isolated = false, 65 }, 66 67 /* PCIE */ 68 { .dev_type = HV_PCIE, 69 HV_PCIE_GUID, 70 .perf_device = false, 71 .allowed_in_isolated = true, 72 }, 73 74 /* Synthetic Frame Buffer */ 75 { .dev_type = HV_FB, 76 HV_SYNTHVID_GUID, 77 .perf_device = false, 78 .allowed_in_isolated = false, 79 }, 80 81 /* Synthetic Keyboard */ 82 { .dev_type = HV_KBD, 83 HV_KBD_GUID, 84 .perf_device = false, 85 .allowed_in_isolated = false, 86 }, 87 88 /* Synthetic MOUSE */ 89 { .dev_type = HV_MOUSE, 90 HV_MOUSE_GUID, 91 .perf_device = false, 92 .allowed_in_isolated = false, 93 }, 94 95 /* KVP */ 96 { .dev_type = HV_KVP, 97 HV_KVP_GUID, 98 .perf_device = false, 99 .allowed_in_isolated = false, 100 }, 101 102 /* Time Synch */ 103 { .dev_type = HV_TS, 104 HV_TS_GUID, 105 .perf_device = false, 106 .allowed_in_isolated = true, 107 }, 108 109 /* Heartbeat */ 110 { .dev_type = HV_HB, 111 HV_HEART_BEAT_GUID, 112 .perf_device = false, 113 .allowed_in_isolated = true, 114 }, 115 116 /* Shutdown */ 117 { .dev_type = HV_SHUTDOWN, 118 HV_SHUTDOWN_GUID, 119 .perf_device = false, 120 .allowed_in_isolated = true, 121 }, 122 123 /* File copy */ 124 /* fcopy always uses 16KB ring buffer size and is working well for last many years */ 125 { .pref_ring_size = 0x4000, 126 .dev_type = HV_FCOPY, 127 HV_FCOPY_GUID, 128 .perf_device = false, 129 .allowed_in_isolated = false, 130 }, 131 132 /* Backup */ 133 { .dev_type = HV_BACKUP, 134 HV_VSS_GUID, 135 .perf_device = false, 136 .allowed_in_isolated = false, 137 }, 138 139 /* Dynamic Memory */ 140 { .dev_type = HV_DM, 141 HV_DM_GUID, 142 .perf_device = false, 143 .allowed_in_isolated = false, 144 }, 145 146 /* 147 * Unknown GUID 148 * 64 KB ring buffer + 4 KB header should be sufficient size for any Hyper-V device apart 149 * from HV_NIC and HV_SCSI. This case avoid the fallback for unknown devices to allocate 150 * much bigger (2 MB) of ring size. 151 */ 152 { .pref_ring_size = 0x11000, 153 .dev_type = HV_UNKNOWN, 154 .perf_device = false, 155 .allowed_in_isolated = false, 156 }, 157 }; 158 EXPORT_SYMBOL_GPL(vmbus_devs); 159 160 static const struct { 161 guid_t guid; 162 } vmbus_unsupported_devs[] = { 163 { HV_AVMA1_GUID }, 164 { HV_AVMA2_GUID }, 165 { HV_RDV_GUID }, 166 { HV_IMC_GUID }, 167 }; 168 169 /* 170 * The rescinded channel may be blocked waiting for a response from the host; 171 * take care of that. 172 */ 173 static void vmbus_rescind_cleanup(struct vmbus_channel *channel) 174 { 175 struct vmbus_channel_msginfo *msginfo; 176 unsigned long flags; 177 178 179 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 180 channel->rescind = true; 181 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 182 msglistentry) { 183 184 if (msginfo->waiting_channel == channel) { 185 complete(&msginfo->waitevent); 186 break; 187 } 188 } 189 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 190 } 191 192 static bool is_unsupported_vmbus_devs(const guid_t *guid) 193 { 194 int i; 195 196 for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++) 197 if (guid_equal(guid, &vmbus_unsupported_devs[i].guid)) 198 return true; 199 return false; 200 } 201 202 static u16 hv_get_dev_type(const struct vmbus_channel *channel) 203 { 204 const guid_t *guid = &channel->offermsg.offer.if_type; 205 u16 i; 206 207 if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid)) 208 return HV_UNKNOWN; 209 210 for (i = HV_IDE; i < HV_UNKNOWN; i++) { 211 if (guid_equal(guid, &vmbus_devs[i].guid)) 212 return i; 213 } 214 pr_info("Unknown GUID: %pUl\n", guid); 215 return i; 216 } 217 218 /** 219 * vmbus_prep_negotiate_resp() - Create default response for Negotiate message 220 * @icmsghdrp: Pointer to msg header structure 221 * @buf: Raw buffer channel data 222 * @buflen: Length of the raw buffer channel data. 223 * @fw_version: The framework versions we can support. 224 * @fw_vercnt: The size of @fw_version. 225 * @srv_version: The service versions we can support. 226 * @srv_vercnt: The size of @srv_version. 227 * @nego_fw_version: The selected framework version. 228 * @nego_srv_version: The selected service version. 229 * 230 * Note: Versions are given in decreasing order. 231 * 232 * Set up and fill in default negotiate response message. 233 * Mainly used by Hyper-V drivers. 234 */ 235 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, 236 u32 buflen, const int *fw_version, int fw_vercnt, 237 const int *srv_version, int srv_vercnt, 238 int *nego_fw_version, int *nego_srv_version) 239 { 240 int icframe_major, icframe_minor; 241 int icmsg_major, icmsg_minor; 242 int fw_major, fw_minor; 243 int srv_major, srv_minor; 244 int i, j; 245 bool found_match = false; 246 struct icmsg_negotiate *negop; 247 248 /* Check that there's enough space for icframe_vercnt, icmsg_vercnt */ 249 if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) { 250 pr_err_ratelimited("Invalid icmsg negotiate\n"); 251 return false; 252 } 253 254 icmsghdrp->icmsgsize = 0x10; 255 negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR]; 256 257 icframe_major = negop->icframe_vercnt; 258 icframe_minor = 0; 259 260 icmsg_major = negop->icmsg_vercnt; 261 icmsg_minor = 0; 262 263 /* Validate negop packet */ 264 if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || 265 icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || 266 ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) { 267 pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n", 268 icframe_major, icmsg_major); 269 goto fw_error; 270 } 271 272 /* 273 * Select the framework version number we will 274 * support. 275 */ 276 277 for (i = 0; i < fw_vercnt; i++) { 278 fw_major = (fw_version[i] >> 16); 279 fw_minor = (fw_version[i] & 0xFFFF); 280 281 for (j = 0; j < negop->icframe_vercnt; j++) { 282 if ((negop->icversion_data[j].major == fw_major) && 283 (negop->icversion_data[j].minor == fw_minor)) { 284 icframe_major = negop->icversion_data[j].major; 285 icframe_minor = negop->icversion_data[j].minor; 286 found_match = true; 287 break; 288 } 289 } 290 291 if (found_match) 292 break; 293 } 294 295 if (!found_match) 296 goto fw_error; 297 298 found_match = false; 299 300 for (i = 0; i < srv_vercnt; i++) { 301 srv_major = (srv_version[i] >> 16); 302 srv_minor = (srv_version[i] & 0xFFFF); 303 304 for (j = negop->icframe_vercnt; 305 (j < negop->icframe_vercnt + negop->icmsg_vercnt); 306 j++) { 307 308 if ((negop->icversion_data[j].major == srv_major) && 309 (negop->icversion_data[j].minor == srv_minor)) { 310 311 icmsg_major = negop->icversion_data[j].major; 312 icmsg_minor = negop->icversion_data[j].minor; 313 found_match = true; 314 break; 315 } 316 } 317 318 if (found_match) 319 break; 320 } 321 322 /* 323 * Respond with the framework and service 324 * version numbers we can support. 325 */ 326 327 fw_error: 328 if (!found_match) { 329 negop->icframe_vercnt = 0; 330 negop->icmsg_vercnt = 0; 331 } else { 332 negop->icframe_vercnt = 1; 333 negop->icmsg_vercnt = 1; 334 } 335 336 if (nego_fw_version) 337 *nego_fw_version = (icframe_major << 16) | icframe_minor; 338 339 if (nego_srv_version) 340 *nego_srv_version = (icmsg_major << 16) | icmsg_minor; 341 342 negop->icversion_data[0].major = icframe_major; 343 negop->icversion_data[0].minor = icframe_minor; 344 negop->icversion_data[1].major = icmsg_major; 345 negop->icversion_data[1].minor = icmsg_minor; 346 return found_match; 347 } 348 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp); 349 350 /* 351 * alloc_channel - Allocate and initialize a vmbus channel object 352 */ 353 static struct vmbus_channel *alloc_channel(void) 354 { 355 struct vmbus_channel *channel; 356 357 channel = kzalloc(sizeof(*channel), GFP_ATOMIC); 358 if (!channel) 359 return NULL; 360 361 spin_lock_init(&channel->sched_lock); 362 init_completion(&channel->rescind_event); 363 364 INIT_LIST_HEAD(&channel->sc_list); 365 366 tasklet_init(&channel->callback_event, 367 vmbus_on_event, (unsigned long)channel); 368 369 hv_ringbuffer_pre_init(channel); 370 371 return channel; 372 } 373 374 /* 375 * free_channel - Release the resources used by the vmbus channel object 376 */ 377 static void free_channel(struct vmbus_channel *channel) 378 { 379 tasklet_kill(&channel->callback_event); 380 vmbus_remove_channel_attr_group(channel); 381 382 kobject_put(&channel->kobj); 383 } 384 385 void vmbus_channel_map_relid(struct vmbus_channel *channel) 386 { 387 if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS)) 388 return; 389 /* 390 * The mapping of the channel's relid is visible from the CPUs that 391 * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will 392 * execute: 393 * 394 * (a) In the "normal (i.e., not resuming from hibernation)" path, 395 * the full barrier in virt_store_mb() guarantees that the store 396 * is propagated to all CPUs before the add_channel_work work 397 * is queued. In turn, add_channel_work is queued before the 398 * channel's ring buffer is allocated/initialized and the 399 * OPENCHANNEL message for the channel is sent in vmbus_open(). 400 * Hyper-V won't start sending the interrupts for the channel 401 * before the OPENCHANNEL message is acked. The memory barrier 402 * in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures 403 * that vmbus_chan_sched() must find the channel's relid in 404 * recv_int_page before retrieving the channel pointer from the 405 * array of channels. 406 * 407 * (b) In the "resuming from hibernation" path, the virt_store_mb() 408 * guarantees that the store is propagated to all CPUs before 409 * the VMBus connection is marked as ready for the resume event 410 * (cf. check_ready_for_resume_event()). The interrupt handler 411 * of the VMBus driver and vmbus_chan_sched() can not run before 412 * vmbus_bus_resume() has completed execution (cf. resume_noirq). 413 */ 414 virt_store_mb( 415 vmbus_connection.channels[channel->offermsg.child_relid], 416 channel); 417 } 418 419 void vmbus_channel_unmap_relid(struct vmbus_channel *channel) 420 { 421 if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS)) 422 return; 423 WRITE_ONCE( 424 vmbus_connection.channels[channel->offermsg.child_relid], 425 NULL); 426 } 427 428 static void vmbus_release_relid(u32 relid) 429 { 430 struct vmbus_channel_relid_released msg; 431 int ret; 432 433 memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); 434 msg.child_relid = relid; 435 msg.header.msgtype = CHANNELMSG_RELID_RELEASED; 436 ret = vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released), 437 true); 438 439 trace_vmbus_release_relid(&msg, ret); 440 } 441 442 void hv_process_channel_removal(struct vmbus_channel *channel) 443 { 444 lockdep_assert_held(&vmbus_connection.channel_mutex); 445 BUG_ON(!channel->rescind); 446 447 /* 448 * hv_process_channel_removal() could find INVALID_RELID only for 449 * hv_sock channels. See the inline comments in vmbus_onoffer(). 450 */ 451 WARN_ON(channel->offermsg.child_relid == INVALID_RELID && 452 !is_hvsock_channel(channel)); 453 454 /* 455 * Upon suspend, an in-use hv_sock channel is removed from the array of 456 * channels and the relid is invalidated. After hibernation, when the 457 * user-space application destroys the channel, it's unnecessary and 458 * unsafe to remove the channel from the array of channels. See also 459 * the inline comments before the call of vmbus_release_relid() below. 460 */ 461 if (channel->offermsg.child_relid != INVALID_RELID) 462 vmbus_channel_unmap_relid(channel); 463 464 if (channel->primary_channel == NULL) 465 list_del(&channel->listentry); 466 else 467 list_del(&channel->sc_list); 468 469 /* 470 * If this is a "perf" channel, updates the hv_numa_map[] masks so that 471 * init_vp_index() can (re-)use the CPU. 472 */ 473 if (hv_is_perf_channel(channel)) 474 hv_clear_allocated_cpu(channel->target_cpu); 475 476 /* 477 * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and 478 * the relid is invalidated; after hibernation, when the user-space app 479 * destroys the channel, the relid is INVALID_RELID, and in this case 480 * it's unnecessary and unsafe to release the old relid, since the same 481 * relid can refer to a completely different channel now. 482 */ 483 if (channel->offermsg.child_relid != INVALID_RELID) 484 vmbus_release_relid(channel->offermsg.child_relid); 485 486 free_channel(channel); 487 } 488 489 void vmbus_free_channels(void) 490 { 491 struct vmbus_channel *channel, *tmp; 492 493 list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list, 494 listentry) { 495 /* hv_process_channel_removal() needs this */ 496 channel->rescind = true; 497 498 vmbus_device_unregister(channel->device_obj); 499 } 500 } 501 502 /* Note: the function can run concurrently for primary/sub channels. */ 503 static void vmbus_add_channel_work(struct work_struct *work) 504 { 505 struct vmbus_channel *newchannel = 506 container_of(work, struct vmbus_channel, add_channel_work); 507 struct vmbus_channel *primary_channel = newchannel->primary_channel; 508 int ret; 509 510 /* 511 * This state is used to indicate a successful open 512 * so that when we do close the channel normally, we 513 * can cleanup properly. 514 */ 515 newchannel->state = CHANNEL_OPEN_STATE; 516 517 if (primary_channel != NULL) { 518 /* newchannel is a sub-channel. */ 519 struct hv_device *dev = primary_channel->device_obj; 520 521 if (vmbus_add_channel_kobj(dev, newchannel)) 522 goto err_deq_chan; 523 524 if (primary_channel->sc_creation_callback != NULL) 525 primary_channel->sc_creation_callback(newchannel); 526 527 newchannel->probe_done = true; 528 return; 529 } 530 531 /* 532 * Start the process of binding the primary channel to the driver 533 */ 534 newchannel->device_obj = vmbus_device_create( 535 &newchannel->offermsg.offer.if_type, 536 &newchannel->offermsg.offer.if_instance, 537 newchannel); 538 if (!newchannel->device_obj) 539 goto err_deq_chan; 540 541 newchannel->device_obj->device_id = newchannel->device_id; 542 /* 543 * Add the new device to the bus. This will kick off device-driver 544 * binding which eventually invokes the device driver's AddDevice() 545 * method. 546 * 547 * If vmbus_device_register() fails, the 'device_obj' is freed in 548 * vmbus_device_release() as called by device_unregister() in the 549 * error path of vmbus_device_register(). In the outside error 550 * path, there's no need to free it. 551 */ 552 ret = vmbus_device_register(newchannel->device_obj); 553 554 if (ret != 0) { 555 pr_err("unable to add child device object (relid %d)\n", 556 newchannel->offermsg.child_relid); 557 goto err_deq_chan; 558 } 559 560 newchannel->probe_done = true; 561 return; 562 563 err_deq_chan: 564 mutex_lock(&vmbus_connection.channel_mutex); 565 566 /* 567 * We need to set the flag, otherwise 568 * vmbus_onoffer_rescind() can be blocked. 569 */ 570 newchannel->probe_done = true; 571 572 if (primary_channel == NULL) 573 list_del(&newchannel->listentry); 574 else 575 list_del(&newchannel->sc_list); 576 577 /* vmbus_process_offer() has mapped the channel. */ 578 vmbus_channel_unmap_relid(newchannel); 579 580 mutex_unlock(&vmbus_connection.channel_mutex); 581 582 vmbus_release_relid(newchannel->offermsg.child_relid); 583 584 free_channel(newchannel); 585 } 586 587 /* 588 * vmbus_process_offer - Process the offer by creating a channel/device 589 * associated with this offer 590 */ 591 static void vmbus_process_offer(struct vmbus_channel *newchannel) 592 { 593 struct vmbus_channel *channel; 594 struct workqueue_struct *wq; 595 bool fnew = true; 596 597 /* 598 * Synchronize vmbus_process_offer() and CPU hotplugging: 599 * 600 * CPU1 CPU2 601 * 602 * [vmbus_process_offer()] [Hot removal of the CPU] 603 * 604 * CPU_READ_LOCK CPUS_WRITE_LOCK 605 * LOAD cpu_online_mask SEARCH chn_list 606 * STORE target_cpu LOAD target_cpu 607 * INSERT chn_list STORE cpu_online_mask 608 * CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK 609 * 610 * Forbids: CPU1's LOAD from *not* seing CPU2's STORE && 611 * CPU2's SEARCH from *not* seeing CPU1's INSERT 612 * 613 * Forbids: CPU2's SEARCH from seeing CPU1's INSERT && 614 * CPU2's LOAD from *not* seing CPU1's STORE 615 */ 616 cpus_read_lock(); 617 618 /* 619 * Serializes the modifications of the chn_list list as well as 620 * the accesses to next_numa_node_id in init_vp_index(). 621 */ 622 mutex_lock(&vmbus_connection.channel_mutex); 623 624 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 625 if (guid_equal(&channel->offermsg.offer.if_type, 626 &newchannel->offermsg.offer.if_type) && 627 guid_equal(&channel->offermsg.offer.if_instance, 628 &newchannel->offermsg.offer.if_instance)) { 629 fnew = false; 630 newchannel->primary_channel = channel; 631 break; 632 } 633 } 634 635 init_vp_index(newchannel); 636 637 /* Remember the channels that should be cleaned up upon suspend. */ 638 if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel)) 639 atomic_inc(&vmbus_connection.nr_chan_close_on_suspend); 640 641 /* 642 * Now that we have acquired the channel_mutex, 643 * we can release the potentially racing rescind thread. 644 */ 645 atomic_dec(&vmbus_connection.offer_in_progress); 646 647 if (fnew) { 648 list_add_tail(&newchannel->listentry, 649 &vmbus_connection.chn_list); 650 } else { 651 /* 652 * Check to see if this is a valid sub-channel. 653 */ 654 if (newchannel->offermsg.offer.sub_channel_index == 0) { 655 mutex_unlock(&vmbus_connection.channel_mutex); 656 cpus_read_unlock(); 657 /* 658 * Don't call free_channel(), because newchannel->kobj 659 * is not initialized yet. 660 */ 661 kfree(newchannel); 662 WARN_ON_ONCE(1); 663 return; 664 } 665 /* 666 * Process the sub-channel. 667 */ 668 list_add_tail(&newchannel->sc_list, &channel->sc_list); 669 } 670 671 vmbus_channel_map_relid(newchannel); 672 673 mutex_unlock(&vmbus_connection.channel_mutex); 674 cpus_read_unlock(); 675 676 /* 677 * vmbus_process_offer() mustn't call channel->sc_creation_callback() 678 * directly for sub-channels, because sc_creation_callback() -> 679 * vmbus_open() may never get the host's response to the 680 * OPEN_CHANNEL message (the host may rescind a channel at any time, 681 * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind() 682 * may not wake up the vmbus_open() as it's blocked due to a non-zero 683 * vmbus_connection.offer_in_progress, and finally we have a deadlock. 684 * 685 * The above is also true for primary channels, if the related device 686 * drivers use sync probing mode by default. 687 * 688 * And, usually the handling of primary channels and sub-channels can 689 * depend on each other, so we should offload them to different 690 * workqueues to avoid possible deadlock, e.g. in sync-probing mode, 691 * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() -> 692 * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock 693 * and waits for all the sub-channels to appear, but the latter 694 * can't get the rtnl_lock and this blocks the handling of 695 * sub-channels. 696 */ 697 INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work); 698 wq = fnew ? vmbus_connection.handle_primary_chan_wq : 699 vmbus_connection.handle_sub_chan_wq; 700 queue_work(wq, &newchannel->add_channel_work); 701 } 702 703 /* 704 * Check if CPUs used by other channels of the same device. 705 * It should only be called by init_vp_index(). 706 */ 707 static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn) 708 { 709 struct vmbus_channel *primary = chn->primary_channel; 710 struct vmbus_channel *sc; 711 712 lockdep_assert_held(&vmbus_connection.channel_mutex); 713 714 if (!primary) 715 return false; 716 717 if (primary->target_cpu == cpu) 718 return true; 719 720 list_for_each_entry(sc, &primary->sc_list, sc_list) 721 if (sc != chn && sc->target_cpu == cpu) 722 return true; 723 724 return false; 725 } 726 727 /* 728 * We use this state to statically distribute the channel interrupt load. 729 */ 730 static int next_numa_node_id; 731 732 /* 733 * We can statically distribute the incoming channel interrupt load 734 * by binding a channel to VCPU. 735 * 736 * For non-performance critical channels we assign the VMBUS_CONNECT_CPU. 737 * Performance critical channels will be distributed evenly among all 738 * the available NUMA nodes. Once the node is assigned, we will assign 739 * the CPU based on a simple round robin scheme. 740 */ 741 static void init_vp_index(struct vmbus_channel *channel) 742 { 743 bool perf_chn = hv_is_perf_channel(channel); 744 u32 i, ncpu = num_online_cpus(); 745 cpumask_var_t available_mask; 746 struct cpumask *allocated_mask; 747 const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); 748 u32 target_cpu; 749 int numa_node; 750 751 if (!perf_chn || 752 !alloc_cpumask_var(&available_mask, GFP_KERNEL) || 753 cpumask_empty(hk_mask)) { 754 /* 755 * If the channel is not a performance critical 756 * channel, bind it to VMBUS_CONNECT_CPU. 757 * In case alloc_cpumask_var() fails, bind it to 758 * VMBUS_CONNECT_CPU. 759 * If all the cpus are isolated, bind it to 760 * VMBUS_CONNECT_CPU. 761 */ 762 channel->target_cpu = VMBUS_CONNECT_CPU; 763 if (perf_chn) 764 hv_set_allocated_cpu(VMBUS_CONNECT_CPU); 765 return; 766 } 767 768 for (i = 1; i <= ncpu + 1; i++) { 769 while (true) { 770 numa_node = next_numa_node_id++; 771 if (numa_node == nr_node_ids) { 772 next_numa_node_id = 0; 773 continue; 774 } 775 if (cpumask_empty(cpumask_of_node(numa_node))) 776 continue; 777 break; 778 } 779 allocated_mask = &hv_context.hv_numa_map[numa_node]; 780 781 retry: 782 cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node)); 783 cpumask_and(available_mask, available_mask, hk_mask); 784 785 if (cpumask_empty(available_mask)) { 786 /* 787 * We have cycled through all the CPUs in the node; 788 * reset the allocated map. 789 */ 790 cpumask_clear(allocated_mask); 791 goto retry; 792 } 793 794 target_cpu = cpumask_first(available_mask); 795 cpumask_set_cpu(target_cpu, allocated_mask); 796 797 if (channel->offermsg.offer.sub_channel_index >= ncpu || 798 i > ncpu || !hv_cpuself_used(target_cpu, channel)) 799 break; 800 } 801 802 channel->target_cpu = target_cpu; 803 804 free_cpumask_var(available_mask); 805 } 806 807 #define UNLOAD_DELAY_UNIT_MS 10 /* 10 milliseconds */ 808 #define UNLOAD_WAIT_MS (100*1000) /* 100 seconds */ 809 #define UNLOAD_WAIT_LOOPS (UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS) 810 #define UNLOAD_MSG_MS (5*1000) /* Every 5 seconds */ 811 #define UNLOAD_MSG_LOOPS (UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS) 812 813 static void vmbus_wait_for_unload(void) 814 { 815 int cpu; 816 void *page_addr; 817 struct hv_message *msg; 818 struct vmbus_channel_message_header *hdr; 819 u32 message_type, i; 820 821 /* 822 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was 823 * used for initial contact or to CPU0 depending on host version. When 824 * we're crashing on a different CPU let's hope that IRQ handler on 825 * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still 826 * functional and vmbus_unload_response() will complete 827 * vmbus_connection.unload_event. If not, the last thing we can do is 828 * read message pages for all CPUs directly. 829 * 830 * Wait up to 100 seconds since an Azure host must writeback any dirty 831 * data in its disk cache before the VMbus UNLOAD request will 832 * complete. This flushing has been empirically observed to take up 833 * to 50 seconds in cases with a lot of dirty data, so allow additional 834 * leeway and for inaccuracies in mdelay(). But eventually time out so 835 * that the panic path can't get hung forever in case the response 836 * message isn't seen. 837 */ 838 for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) { 839 if (completion_done(&vmbus_connection.unload_event)) 840 goto completed; 841 842 for_each_present_cpu(cpu) { 843 struct hv_per_cpu_context *hv_cpu 844 = per_cpu_ptr(hv_context.cpu_context, cpu); 845 846 /* 847 * In a CoCo VM the hyp_synic_message_page is not allocated 848 * in hv_synic_alloc(). Instead it is set/cleared in 849 * hv_hyp_synic_enable_regs() and hv_hyp_synic_disable_regs() 850 * such that it is set only when the CPU is online. If 851 * not all present CPUs are online, the message page 852 * might be NULL, so skip such CPUs. 853 */ 854 page_addr = hv_cpu->hyp_synic_message_page; 855 if (!page_addr) 856 continue; 857 858 msg = (struct hv_message *)page_addr 859 + VMBUS_MESSAGE_SINT; 860 861 message_type = READ_ONCE(msg->header.message_type); 862 if (message_type == HVMSG_NONE) 863 continue; 864 865 hdr = (struct vmbus_channel_message_header *) 866 msg->u.payload; 867 868 if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE) 869 complete(&vmbus_connection.unload_event); 870 871 vmbus_signal_eom(msg, message_type); 872 } 873 874 /* 875 * Give a notice periodically so someone watching the 876 * serial output won't think it is completely hung. 877 */ 878 if (!(i % UNLOAD_MSG_LOOPS)) 879 pr_notice("Waiting for VMBus UNLOAD to complete\n"); 880 881 mdelay(UNLOAD_DELAY_UNIT_MS); 882 } 883 pr_err("Continuing even though VMBus UNLOAD did not complete\n"); 884 885 completed: 886 /* 887 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all 888 * maybe-pending messages on all CPUs to be able to receive new 889 * messages after we reconnect. 890 */ 891 for_each_present_cpu(cpu) { 892 struct hv_per_cpu_context *hv_cpu 893 = per_cpu_ptr(hv_context.cpu_context, cpu); 894 895 page_addr = hv_cpu->hyp_synic_message_page; 896 if (!page_addr) 897 continue; 898 899 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; 900 msg->header.message_type = HVMSG_NONE; 901 } 902 } 903 904 /* 905 * vmbus_unload_response - Handler for the unload response. 906 */ 907 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr) 908 { 909 /* 910 * This is a global event; just wakeup the waiting thread. 911 * Once we successfully unload, we can cleanup the monitor state. 912 * 913 * NB. A malicious or compromised Hyper-V could send a spurious 914 * message of type CHANNELMSG_UNLOAD_RESPONSE, and trigger a call 915 * of the complete() below. Make sure that unload_event has been 916 * initialized by the time this complete() is executed. 917 */ 918 complete(&vmbus_connection.unload_event); 919 } 920 921 void vmbus_initiate_unload(bool crash) 922 { 923 struct vmbus_channel_message_header hdr; 924 925 if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED) 926 return; 927 928 /* Pre-Win2012R2 hosts don't support reconnect */ 929 if (vmbus_proto_version < VERSION_WIN8_1) 930 return; 931 932 reinit_completion(&vmbus_connection.unload_event); 933 memset(&hdr, 0, sizeof(struct vmbus_channel_message_header)); 934 hdr.msgtype = CHANNELMSG_UNLOAD; 935 vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header), 936 !crash); 937 938 /* 939 * vmbus_initiate_unload() is also called on crash and the crash can be 940 * happening in an interrupt context, where scheduling is impossible. 941 */ 942 if (!crash) 943 wait_for_completion(&vmbus_connection.unload_event); 944 else 945 vmbus_wait_for_unload(); 946 } 947 948 static void vmbus_setup_channel_state(struct vmbus_channel *channel, 949 struct vmbus_channel_offer_channel *offer) 950 { 951 /* 952 * Setup state for signalling the host. 953 */ 954 channel->sig_event = VMBUS_EVENT_CONNECTION_ID; 955 956 channel->is_dedicated_interrupt = 957 (offer->is_dedicated_interrupt != 0); 958 channel->sig_event = offer->connection_id; 959 960 memcpy(&channel->offermsg, offer, 961 sizeof(struct vmbus_channel_offer_channel)); 962 channel->monitor_grp = (u8)offer->monitorid / 32; 963 channel->monitor_bit = (u8)offer->monitorid % 32; 964 channel->device_id = hv_get_dev_type(channel); 965 } 966 967 /* 968 * find_primary_channel_by_offer - Get the channel object given the new offer. 969 * This is only used in the resume path of hibernation. 970 */ 971 static struct vmbus_channel * 972 find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer) 973 { 974 struct vmbus_channel *channel = NULL, *iter; 975 const guid_t *inst1, *inst2; 976 977 /* Ignore sub-channel offers. */ 978 if (offer->offer.sub_channel_index != 0) 979 return NULL; 980 981 mutex_lock(&vmbus_connection.channel_mutex); 982 983 list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) { 984 inst1 = &iter->offermsg.offer.if_instance; 985 inst2 = &offer->offer.if_instance; 986 987 if (guid_equal(inst1, inst2)) { 988 channel = iter; 989 break; 990 } 991 } 992 993 mutex_unlock(&vmbus_connection.channel_mutex); 994 995 return channel; 996 } 997 998 static bool vmbus_is_valid_offer(const struct vmbus_channel_offer_channel *offer) 999 { 1000 const guid_t *guid = &offer->offer.if_type; 1001 u16 i; 1002 1003 if (!hv_is_isolation_supported()) 1004 return true; 1005 1006 if (is_hvsock_offer(offer)) 1007 return true; 1008 1009 for (i = 0; i < ARRAY_SIZE(vmbus_devs); i++) { 1010 if (guid_equal(guid, &vmbus_devs[i].guid)) 1011 return vmbus_devs[i].allowed_in_isolated; 1012 } 1013 return false; 1014 } 1015 1016 /* 1017 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. 1018 * 1019 */ 1020 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) 1021 { 1022 struct vmbus_channel_offer_channel *offer; 1023 struct vmbus_channel *oldchannel, *newchannel; 1024 size_t offer_sz; 1025 bool co_ring_buffer, co_external_memory; 1026 1027 offer = (struct vmbus_channel_offer_channel *)hdr; 1028 1029 trace_vmbus_onoffer(offer); 1030 1031 if (!vmbus_is_valid_offer(offer)) { 1032 pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n", 1033 offer->child_relid); 1034 atomic_dec(&vmbus_connection.offer_in_progress); 1035 return; 1036 } 1037 1038 co_ring_buffer = is_co_ring_buffer(offer); 1039 co_external_memory = is_co_external_memory(offer); 1040 if (!co_ring_buffer && co_external_memory) { 1041 pr_err("Invalid offer relid=%d: the ring buffer isn't encrypted\n", 1042 offer->child_relid); 1043 return; 1044 } 1045 if (co_ring_buffer || co_external_memory) { 1046 if (vmbus_proto_version < VERSION_WIN10_V6_0 || !vmbus_is_confidential()) { 1047 pr_err("Invalid offer relid=%d: no support for confidential VMBus\n", 1048 offer->child_relid); 1049 atomic_dec(&vmbus_connection.offer_in_progress); 1050 return; 1051 } 1052 } 1053 1054 oldchannel = find_primary_channel_by_offer(offer); 1055 1056 if (oldchannel != NULL) { 1057 /* 1058 * We're resuming from hibernation: all the sub-channel and 1059 * hv_sock channels we had before the hibernation should have 1060 * been cleaned up, and now we must be seeing a re-offered 1061 * primary channel that we had before the hibernation. 1062 */ 1063 1064 /* 1065 * { Initially: channel relid = INVALID_RELID, 1066 * channels[valid_relid] = NULL } 1067 * 1068 * CPU1 CPU2 1069 * 1070 * [vmbus_onoffer()] [vmbus_device_release()] 1071 * 1072 * LOCK channel_mutex LOCK channel_mutex 1073 * STORE channel relid = valid_relid LOAD r1 = channel relid 1074 * MAP_RELID channel if (r1 != INVALID_RELID) 1075 * UNLOCK channel_mutex UNMAP_RELID channel 1076 * UNLOCK channel_mutex 1077 * 1078 * Forbids: r1 == valid_relid && 1079 * channels[valid_relid] == channel 1080 * 1081 * Note. r1 can be INVALID_RELID only for an hv_sock channel. 1082 * None of the hv_sock channels which were present before the 1083 * suspend are re-offered upon the resume. See the WARN_ON() 1084 * in hv_process_channel_removal(). 1085 */ 1086 mutex_lock(&vmbus_connection.channel_mutex); 1087 1088 atomic_dec(&vmbus_connection.offer_in_progress); 1089 1090 WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID); 1091 /* Fix up the relid. */ 1092 oldchannel->offermsg.child_relid = offer->child_relid; 1093 1094 offer_sz = sizeof(*offer); 1095 if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) { 1096 /* 1097 * This is not an error, since the host can also change 1098 * the other field(s) of the offer, e.g. on WS RS5 1099 * (Build 17763), the offer->connection_id of the 1100 * Mellanox VF vmbus device can change when the host 1101 * reoffers the device upon resume. 1102 */ 1103 pr_debug("vmbus offer changed: relid=%d\n", 1104 offer->child_relid); 1105 1106 print_hex_dump_debug("Old vmbus offer: ", 1107 DUMP_PREFIX_OFFSET, 16, 4, 1108 &oldchannel->offermsg, offer_sz, 1109 false); 1110 print_hex_dump_debug("New vmbus offer: ", 1111 DUMP_PREFIX_OFFSET, 16, 4, 1112 offer, offer_sz, false); 1113 1114 /* Fix up the old channel. */ 1115 vmbus_setup_channel_state(oldchannel, offer); 1116 } 1117 1118 /* Add the channel back to the array of channels. */ 1119 vmbus_channel_map_relid(oldchannel); 1120 mutex_unlock(&vmbus_connection.channel_mutex); 1121 return; 1122 } 1123 1124 /* Allocate the channel object and save this offer. */ 1125 newchannel = alloc_channel(); 1126 if (!newchannel) { 1127 vmbus_release_relid(offer->child_relid); 1128 atomic_dec(&vmbus_connection.offer_in_progress); 1129 pr_err("Unable to allocate channel object\n"); 1130 return; 1131 } 1132 newchannel->co_ring_buffer = co_ring_buffer; 1133 newchannel->co_external_memory = co_external_memory; 1134 1135 vmbus_setup_channel_state(newchannel, offer); 1136 1137 vmbus_process_offer(newchannel); 1138 } 1139 1140 static void check_ready_for_suspend_event(void) 1141 { 1142 /* 1143 * If all the sub-channels or hv_sock channels have been cleaned up, 1144 * then it's safe to suspend. 1145 */ 1146 if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend)) 1147 complete(&vmbus_connection.ready_for_suspend_event); 1148 } 1149 1150 /* 1151 * vmbus_onoffer_rescind - Rescind offer handler. 1152 * 1153 * We queue a work item to process this offer synchronously 1154 */ 1155 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) 1156 { 1157 struct vmbus_channel_rescind_offer *rescind; 1158 struct vmbus_channel *channel; 1159 struct device *dev; 1160 bool clean_up_chan_for_suspend; 1161 1162 rescind = (struct vmbus_channel_rescind_offer *)hdr; 1163 1164 trace_vmbus_onoffer_rescind(rescind); 1165 1166 /* 1167 * The offer msg and the corresponding rescind msg 1168 * from the host are guranteed to be ordered - 1169 * offer comes in first and then the rescind. 1170 * Since we process these events in work elements, 1171 * and with preemption, we may end up processing 1172 * the events out of order. We rely on the synchronization 1173 * provided by offer_in_progress and by channel_mutex for 1174 * ordering these events: 1175 * 1176 * { Initially: offer_in_progress = 1 } 1177 * 1178 * CPU1 CPU2 1179 * 1180 * [vmbus_onoffer()] [vmbus_onoffer_rescind()] 1181 * 1182 * LOCK channel_mutex WAIT_ON offer_in_progress == 0 1183 * DECREMENT offer_in_progress LOCK channel_mutex 1184 * STORE channels[] LOAD channels[] 1185 * UNLOCK channel_mutex UNLOCK channel_mutex 1186 * 1187 * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE 1188 */ 1189 1190 while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { 1191 /* 1192 * We wait here until any channel offer is currently 1193 * being processed. 1194 */ 1195 msleep(1); 1196 } 1197 1198 mutex_lock(&vmbus_connection.channel_mutex); 1199 channel = relid2channel(rescind->child_relid); 1200 if (channel != NULL) { 1201 /* 1202 * Guarantee that no other instance of vmbus_onoffer_rescind() 1203 * has got a reference to the channel object. Synchronize on 1204 * &vmbus_connection.channel_mutex. 1205 */ 1206 if (channel->rescind_ref) { 1207 mutex_unlock(&vmbus_connection.channel_mutex); 1208 return; 1209 } 1210 channel->rescind_ref = true; 1211 } 1212 mutex_unlock(&vmbus_connection.channel_mutex); 1213 1214 if (channel == NULL) { 1215 /* 1216 * We failed in processing the offer message; 1217 * we would have cleaned up the relid in that 1218 * failure path. 1219 */ 1220 return; 1221 } 1222 1223 clean_up_chan_for_suspend = is_hvsock_channel(channel) || 1224 is_sub_channel(channel); 1225 /* 1226 * Before setting channel->rescind in vmbus_rescind_cleanup(), we 1227 * should make sure the channel callback is not running any more. 1228 */ 1229 vmbus_reset_channel_cb(channel); 1230 1231 /* 1232 * Now wait for offer handling to complete. 1233 */ 1234 vmbus_rescind_cleanup(channel); 1235 while (READ_ONCE(channel->probe_done) == false) { 1236 /* 1237 * We wait here until any channel offer is currently 1238 * being processed. 1239 */ 1240 msleep(1); 1241 } 1242 1243 /* 1244 * At this point, the rescind handling can proceed safely. 1245 */ 1246 1247 if (channel->device_obj) { 1248 if (channel->chn_rescind_callback) { 1249 channel->chn_rescind_callback(channel); 1250 1251 if (clean_up_chan_for_suspend) 1252 check_ready_for_suspend_event(); 1253 1254 return; 1255 } 1256 /* 1257 * We will have to unregister this device from the 1258 * driver core. 1259 */ 1260 dev = get_device(&channel->device_obj->device); 1261 if (dev) { 1262 vmbus_device_unregister(channel->device_obj); 1263 put_device(dev); 1264 } 1265 } else if (channel->primary_channel != NULL) { 1266 /* 1267 * Sub-channel is being rescinded. Following is the channel 1268 * close sequence when initiated from the driveri (refer to 1269 * vmbus_close() for details): 1270 * 1. Close all sub-channels first 1271 * 2. Then close the primary channel. 1272 */ 1273 mutex_lock(&vmbus_connection.channel_mutex); 1274 if (channel->state == CHANNEL_OPEN_STATE) { 1275 /* 1276 * The channel is currently not open; 1277 * it is safe for us to cleanup the channel. 1278 */ 1279 hv_process_channel_removal(channel); 1280 } else { 1281 complete(&channel->rescind_event); 1282 } 1283 mutex_unlock(&vmbus_connection.channel_mutex); 1284 } 1285 1286 /* The "channel" may have been freed. Do not access it any longer. */ 1287 1288 if (clean_up_chan_for_suspend) 1289 check_ready_for_suspend_event(); 1290 } 1291 1292 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) 1293 { 1294 BUG_ON(!is_hvsock_channel(channel)); 1295 1296 /* We always get a rescind msg when a connection is closed. */ 1297 while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind)) 1298 msleep(1); 1299 1300 vmbus_device_unregister(channel->device_obj); 1301 } 1302 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); 1303 1304 1305 /* 1306 * vmbus_onoffers_delivered - 1307 * The CHANNELMSG_ALLOFFERS_DELIVERED message arrives after all 1308 * boot-time offers are delivered. A boot-time offer is for the primary 1309 * channel for any virtual hardware configured in the VM at the time it boots. 1310 * Boot-time offers include offers for physical devices assigned to the VM 1311 * via Hyper-V's Discrete Device Assignment (DDA) functionality that are 1312 * handled as virtual PCI devices in Linux (e.g., NVMe devices and GPUs). 1313 * Boot-time offers do not include offers for VMBus sub-channels. Because 1314 * devices can be hot-added to the VM after it is booted, additional channel 1315 * offers that aren't boot-time offers can be received at any time after the 1316 * all-offers-delivered message. 1317 * 1318 * SR-IOV NIC Virtual Functions (VFs) assigned to a VM are not considered 1319 * to be assigned to the VM at boot-time, and offers for VFs may occur after 1320 * the all-offers-delivered message. VFs are optional accelerators to the 1321 * synthetic VMBus NIC and are effectively hot-added only after the VMBus 1322 * NIC channel is opened (once it knows the guest can support it, via the 1323 * sriov bit in the netvsc protocol). 1324 */ 1325 static void vmbus_onoffers_delivered( 1326 struct vmbus_channel_message_header *hdr) 1327 { 1328 complete(&vmbus_connection.all_offers_delivered_event); 1329 } 1330 1331 /* 1332 * vmbus_onopen_result - Open result handler. 1333 * 1334 * This is invoked when we received a response to our channel open request. 1335 * Find the matching request, copy the response and signal the requesting 1336 * thread. 1337 */ 1338 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr) 1339 { 1340 struct vmbus_channel_open_result *result; 1341 struct vmbus_channel_msginfo *msginfo; 1342 struct vmbus_channel_message_header *requestheader; 1343 struct vmbus_channel_open_channel *openmsg; 1344 unsigned long flags; 1345 1346 result = (struct vmbus_channel_open_result *)hdr; 1347 1348 trace_vmbus_onopen_result(result); 1349 1350 /* 1351 * Find the open msg, copy the result and signal/unblock the wait event 1352 */ 1353 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1354 1355 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1356 msglistentry) { 1357 requestheader = 1358 (struct vmbus_channel_message_header *)msginfo->msg; 1359 1360 if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) { 1361 openmsg = 1362 (struct vmbus_channel_open_channel *)msginfo->msg; 1363 if (openmsg->child_relid == result->child_relid && 1364 openmsg->openid == result->openid) { 1365 memcpy(&msginfo->response.open_result, 1366 result, 1367 sizeof( 1368 struct vmbus_channel_open_result)); 1369 complete(&msginfo->waitevent); 1370 break; 1371 } 1372 } 1373 } 1374 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1375 } 1376 1377 /* 1378 * vmbus_ongpadl_created - GPADL created handler. 1379 * 1380 * This is invoked when we received a response to our gpadl create request. 1381 * Find the matching request, copy the response and signal the requesting 1382 * thread. 1383 */ 1384 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr) 1385 { 1386 struct vmbus_channel_gpadl_created *gpadlcreated; 1387 struct vmbus_channel_msginfo *msginfo; 1388 struct vmbus_channel_message_header *requestheader; 1389 struct vmbus_channel_gpadl_header *gpadlheader; 1390 unsigned long flags; 1391 1392 gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr; 1393 1394 trace_vmbus_ongpadl_created(gpadlcreated); 1395 1396 /* 1397 * Find the establish msg, copy the result and signal/unblock the wait 1398 * event 1399 */ 1400 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1401 1402 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1403 msglistentry) { 1404 requestheader = 1405 (struct vmbus_channel_message_header *)msginfo->msg; 1406 1407 if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) { 1408 gpadlheader = 1409 (struct vmbus_channel_gpadl_header *)requestheader; 1410 1411 if ((gpadlcreated->child_relid == 1412 gpadlheader->child_relid) && 1413 (gpadlcreated->gpadl == gpadlheader->gpadl)) { 1414 memcpy(&msginfo->response.gpadl_created, 1415 gpadlcreated, 1416 sizeof( 1417 struct vmbus_channel_gpadl_created)); 1418 complete(&msginfo->waitevent); 1419 break; 1420 } 1421 } 1422 } 1423 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1424 } 1425 1426 /* 1427 * vmbus_onmodifychannel_response - Modify Channel response handler. 1428 * 1429 * This is invoked when we received a response to our channel modify request. 1430 * Find the matching request, copy the response and signal the requesting thread. 1431 */ 1432 static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr) 1433 { 1434 struct vmbus_channel_modifychannel_response *response; 1435 struct vmbus_channel_msginfo *msginfo; 1436 unsigned long flags; 1437 1438 response = (struct vmbus_channel_modifychannel_response *)hdr; 1439 1440 trace_vmbus_onmodifychannel_response(response); 1441 1442 /* 1443 * Find the modify msg, copy the response and signal/unblock the wait event. 1444 */ 1445 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1446 1447 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) { 1448 struct vmbus_channel_message_header *responseheader = 1449 (struct vmbus_channel_message_header *)msginfo->msg; 1450 1451 if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) { 1452 struct vmbus_channel_modifychannel *modifymsg; 1453 1454 modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg; 1455 if (modifymsg->child_relid == response->child_relid) { 1456 memcpy(&msginfo->response.modify_response, response, 1457 sizeof(*response)); 1458 complete(&msginfo->waitevent); 1459 break; 1460 } 1461 } 1462 } 1463 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1464 } 1465 1466 /* 1467 * vmbus_ongpadl_torndown - GPADL torndown handler. 1468 * 1469 * This is invoked when we received a response to our gpadl teardown request. 1470 * Find the matching request, copy the response and signal the requesting 1471 * thread. 1472 */ 1473 static void vmbus_ongpadl_torndown( 1474 struct vmbus_channel_message_header *hdr) 1475 { 1476 struct vmbus_channel_gpadl_torndown *gpadl_torndown; 1477 struct vmbus_channel_msginfo *msginfo; 1478 struct vmbus_channel_message_header *requestheader; 1479 struct vmbus_channel_gpadl_teardown *gpadl_teardown; 1480 unsigned long flags; 1481 1482 gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr; 1483 1484 trace_vmbus_ongpadl_torndown(gpadl_torndown); 1485 1486 /* 1487 * Find the open msg, copy the result and signal/unblock the wait event 1488 */ 1489 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1490 1491 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1492 msglistentry) { 1493 requestheader = 1494 (struct vmbus_channel_message_header *)msginfo->msg; 1495 1496 if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) { 1497 gpadl_teardown = 1498 (struct vmbus_channel_gpadl_teardown *)requestheader; 1499 1500 if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) { 1501 memcpy(&msginfo->response.gpadl_torndown, 1502 gpadl_torndown, 1503 sizeof( 1504 struct vmbus_channel_gpadl_torndown)); 1505 complete(&msginfo->waitevent); 1506 break; 1507 } 1508 } 1509 } 1510 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1511 } 1512 1513 /* 1514 * vmbus_onversion_response - Version response handler 1515 * 1516 * This is invoked when we received a response to our initiate contact request. 1517 * Find the matching request, copy the response and signal the requesting 1518 * thread. 1519 */ 1520 static void vmbus_onversion_response( 1521 struct vmbus_channel_message_header *hdr) 1522 { 1523 struct vmbus_channel_msginfo *msginfo; 1524 struct vmbus_channel_message_header *requestheader; 1525 struct vmbus_channel_version_response *version_response; 1526 unsigned long flags; 1527 1528 version_response = (struct vmbus_channel_version_response *)hdr; 1529 1530 trace_vmbus_onversion_response(version_response); 1531 1532 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1533 1534 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1535 msglistentry) { 1536 requestheader = 1537 (struct vmbus_channel_message_header *)msginfo->msg; 1538 1539 if (requestheader->msgtype == 1540 CHANNELMSG_INITIATE_CONTACT) { 1541 memcpy(&msginfo->response.version_response, 1542 version_response, 1543 sizeof(struct vmbus_channel_version_response)); 1544 complete(&msginfo->waitevent); 1545 } 1546 } 1547 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1548 } 1549 1550 /* Channel message dispatch table */ 1551 const struct vmbus_channel_message_table_entry 1552 channel_message_table[CHANNELMSG_COUNT] = { 1553 { CHANNELMSG_INVALID, 0, NULL, 0}, 1554 { CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer, 1555 sizeof(struct vmbus_channel_offer_channel)}, 1556 { CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind, 1557 sizeof(struct vmbus_channel_rescind_offer) }, 1558 { CHANNELMSG_REQUESTOFFERS, 0, NULL, 0}, 1559 { CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered, 0}, 1560 { CHANNELMSG_OPENCHANNEL, 0, NULL, 0}, 1561 { CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result, 1562 sizeof(struct vmbus_channel_open_result)}, 1563 { CHANNELMSG_CLOSECHANNEL, 0, NULL, 0}, 1564 { CHANNELMSG_GPADL_HEADER, 0, NULL, 0}, 1565 { CHANNELMSG_GPADL_BODY, 0, NULL, 0}, 1566 { CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created, 1567 sizeof(struct vmbus_channel_gpadl_created)}, 1568 { CHANNELMSG_GPADL_TEARDOWN, 0, NULL, 0}, 1569 { CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown, 1570 sizeof(struct vmbus_channel_gpadl_torndown) }, 1571 { CHANNELMSG_RELID_RELEASED, 0, NULL, 0}, 1572 { CHANNELMSG_INITIATE_CONTACT, 0, NULL, 0}, 1573 { CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response, 1574 sizeof(struct vmbus_channel_version_response)}, 1575 { CHANNELMSG_UNLOAD, 0, NULL, 0}, 1576 { CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response, 0}, 1577 { CHANNELMSG_18, 0, NULL, 0}, 1578 { CHANNELMSG_19, 0, NULL, 0}, 1579 { CHANNELMSG_20, 0, NULL, 0}, 1580 { CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0}, 1581 { CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0}, 1582 { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0}, 1583 { CHANNELMSG_MODIFYCHANNEL_RESPONSE, 1, vmbus_onmodifychannel_response, 1584 sizeof(struct vmbus_channel_modifychannel_response)}, 1585 }; 1586 1587 /* 1588 * vmbus_onmessage - Handler for channel protocol messages. 1589 * 1590 * This is invoked in the vmbus worker thread context. 1591 */ 1592 void vmbus_onmessage(struct vmbus_channel_message_header *hdr) 1593 { 1594 trace_vmbus_on_message(hdr); 1595 1596 /* 1597 * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go 1598 * out of bound and the message_handler pointer can not be NULL. 1599 */ 1600 channel_message_table[hdr->msgtype].message_handler(hdr); 1601 } 1602 1603 /* 1604 * vmbus_request_offers - Send a request to get all our pending offers 1605 * and wait for all boot-time offers to arrive. 1606 */ 1607 int vmbus_request_offers(void) 1608 { 1609 struct vmbus_channel_message_header *msg; 1610 struct vmbus_channel_msginfo *msginfo; 1611 int ret; 1612 1613 msginfo = kzalloc(sizeof(*msginfo) + 1614 sizeof(struct vmbus_channel_message_header), 1615 GFP_KERNEL); 1616 if (!msginfo) 1617 return -ENOMEM; 1618 1619 msg = (struct vmbus_channel_message_header *)msginfo->msg; 1620 1621 msg->msgtype = CHANNELMSG_REQUESTOFFERS; 1622 1623 /* 1624 * This REQUESTOFFERS message will result in the host sending an all 1625 * offers delivered message after all the boot-time offers are sent. 1626 */ 1627 ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header), 1628 true); 1629 1630 trace_vmbus_request_offers(ret); 1631 1632 if (ret != 0) { 1633 pr_err("Unable to request offers - %d\n", ret); 1634 1635 goto cleanup; 1636 } 1637 1638 /* 1639 * Wait for the host to send all boot-time offers. 1640 * Keeping it as a best-effort mechanism, where a warning is 1641 * printed if a timeout occurs, and execution is resumed. 1642 */ 1643 if (!wait_for_completion_timeout(&vmbus_connection.all_offers_delivered_event, 1644 secs_to_jiffies(60))) { 1645 pr_warn("timed out waiting for all boot-time offers to be delivered.\n"); 1646 } 1647 1648 /* 1649 * Flush handling of offer messages (which may initiate work on 1650 * other work queues). 1651 */ 1652 flush_workqueue(vmbus_connection.work_queue); 1653 1654 /* 1655 * Flush workqueue for processing the incoming offers. Subchannel 1656 * offers and their processing can happen later, so there is no need to 1657 * flush that workqueue here. 1658 */ 1659 flush_workqueue(vmbus_connection.handle_primary_chan_wq); 1660 1661 cleanup: 1662 kfree(msginfo); 1663 1664 return ret; 1665 } 1666 1667 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel, 1668 void (*sc_cr_cb)(struct vmbus_channel *new_sc)) 1669 { 1670 primary_channel->sc_creation_callback = sc_cr_cb; 1671 } 1672 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback); 1673 1674 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, 1675 void (*chn_rescind_cb)(struct vmbus_channel *)) 1676 { 1677 channel->chn_rescind_callback = chn_rescind_cb; 1678 } 1679 EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback); 1680