1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * Authors: 6 * Haiyang Zhang <haiyangz@microsoft.com> 7 * Hank Janssen <hjanssen@microsoft.com> 8 */ 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/kernel.h> 12 #include <linux/interrupt.h> 13 #include <linux/sched.h> 14 #include <linux/wait.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/list.h> 18 #include <linux/module.h> 19 #include <linux/completion.h> 20 #include <linux/delay.h> 21 #include <linux/cpu.h> 22 #include <linux/hyperv.h> 23 #include <linux/export.h> 24 #include <asm/mshyperv.h> 25 #include <linux/sched/isolation.h> 26 27 #include "hyperv_vmbus.h" 28 29 static void init_vp_index(struct vmbus_channel *channel); 30 31 const struct vmbus_device vmbus_devs[] = { 32 /* IDE */ 33 { .dev_type = HV_IDE, 34 HV_IDE_GUID, 35 .perf_device = true, 36 .allowed_in_isolated = false, 37 }, 38 39 /* SCSI */ 40 { .dev_type = HV_SCSI, 41 HV_SCSI_GUID, 42 .perf_device = true, 43 .allowed_in_isolated = true, 44 }, 45 46 /* Fibre Channel */ 47 { .dev_type = HV_FC, 48 HV_SYNTHFC_GUID, 49 .perf_device = true, 50 .allowed_in_isolated = false, 51 }, 52 53 /* Synthetic NIC */ 54 { .dev_type = HV_NIC, 55 HV_NIC_GUID, 56 .perf_device = true, 57 .allowed_in_isolated = true, 58 }, 59 60 /* Network Direct */ 61 { .dev_type = HV_ND, 62 HV_ND_GUID, 63 .perf_device = true, 64 .allowed_in_isolated = false, 65 }, 66 67 /* PCIE */ 68 { .dev_type = HV_PCIE, 69 HV_PCIE_GUID, 70 .perf_device = false, 71 .allowed_in_isolated = true, 72 }, 73 74 /* Synthetic Frame Buffer */ 75 { .dev_type = HV_FB, 76 HV_SYNTHVID_GUID, 77 .perf_device = false, 78 .allowed_in_isolated = false, 79 }, 80 81 /* Synthetic Keyboard */ 82 { .dev_type = HV_KBD, 83 HV_KBD_GUID, 84 .perf_device = false, 85 .allowed_in_isolated = false, 86 }, 87 88 /* Synthetic MOUSE */ 89 { .dev_type = HV_MOUSE, 90 HV_MOUSE_GUID, 91 .perf_device = false, 92 .allowed_in_isolated = false, 93 }, 94 95 /* KVP */ 96 { .dev_type = HV_KVP, 97 HV_KVP_GUID, 98 .perf_device = false, 99 .allowed_in_isolated = false, 100 }, 101 102 /* Time Synch */ 103 { .dev_type = HV_TS, 104 HV_TS_GUID, 105 .perf_device = false, 106 .allowed_in_isolated = true, 107 }, 108 109 /* Heartbeat */ 110 { .dev_type = HV_HB, 111 HV_HEART_BEAT_GUID, 112 .perf_device = false, 113 .allowed_in_isolated = true, 114 }, 115 116 /* Shutdown */ 117 { .dev_type = HV_SHUTDOWN, 118 HV_SHUTDOWN_GUID, 119 .perf_device = false, 120 .allowed_in_isolated = true, 121 }, 122 123 /* File copy */ 124 /* fcopy always uses 16KB ring buffer size and is working well for last many years */ 125 { .pref_ring_size = 0x4000, 126 .dev_type = HV_FCOPY, 127 HV_FCOPY_GUID, 128 .perf_device = false, 129 .allowed_in_isolated = false, 130 }, 131 132 /* Backup */ 133 { .dev_type = HV_BACKUP, 134 HV_VSS_GUID, 135 .perf_device = false, 136 .allowed_in_isolated = false, 137 }, 138 139 /* Dynamic Memory */ 140 { .dev_type = HV_DM, 141 HV_DM_GUID, 142 .perf_device = false, 143 .allowed_in_isolated = false, 144 }, 145 146 /* 147 * Unknown GUID 148 * 64 KB ring buffer + 4 KB header should be sufficient size for any Hyper-V device apart 149 * from HV_NIC and HV_SCSI. This case avoid the fallback for unknown devices to allocate 150 * much bigger (2 MB) of ring size. 151 */ 152 { .pref_ring_size = 0x11000, 153 .dev_type = HV_UNKNOWN, 154 .perf_device = false, 155 .allowed_in_isolated = false, 156 }, 157 }; 158 EXPORT_SYMBOL_GPL(vmbus_devs); 159 160 static const struct { 161 guid_t guid; 162 } vmbus_unsupported_devs[] = { 163 { HV_AVMA1_GUID }, 164 { HV_AVMA2_GUID }, 165 { HV_RDV_GUID }, 166 { HV_IMC_GUID }, 167 }; 168 169 /* 170 * The rescinded channel may be blocked waiting for a response from the host; 171 * take care of that. 172 */ 173 static void vmbus_rescind_cleanup(struct vmbus_channel *channel) 174 { 175 struct vmbus_channel_msginfo *msginfo; 176 unsigned long flags; 177 178 179 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 180 channel->rescind = true; 181 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 182 msglistentry) { 183 184 if (msginfo->waiting_channel == channel) { 185 complete(&msginfo->waitevent); 186 break; 187 } 188 } 189 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 190 } 191 192 static bool is_unsupported_vmbus_devs(const guid_t *guid) 193 { 194 int i; 195 196 for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++) 197 if (guid_equal(guid, &vmbus_unsupported_devs[i].guid)) 198 return true; 199 return false; 200 } 201 202 static u16 hv_get_dev_type(const struct vmbus_channel *channel) 203 { 204 const guid_t *guid = &channel->offermsg.offer.if_type; 205 u16 i; 206 207 if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid)) 208 return HV_UNKNOWN; 209 210 for (i = HV_IDE; i < HV_UNKNOWN; i++) { 211 if (guid_equal(guid, &vmbus_devs[i].guid)) 212 return i; 213 } 214 pr_info("Unknown GUID: %pUl\n", guid); 215 return i; 216 } 217 218 /** 219 * vmbus_prep_negotiate_resp() - Create default response for Negotiate message 220 * @icmsghdrp: Pointer to msg header structure 221 * @buf: Raw buffer channel data 222 * @buflen: Length of the raw buffer channel data. 223 * @fw_version: The framework versions we can support. 224 * @fw_vercnt: The size of @fw_version. 225 * @srv_version: The service versions we can support. 226 * @srv_vercnt: The size of @srv_version. 227 * @nego_fw_version: The selected framework version. 228 * @nego_srv_version: The selected service version. 229 * 230 * Note: Versions are given in decreasing order. 231 * 232 * Set up and fill in default negotiate response message. 233 * Mainly used by Hyper-V drivers. 234 */ 235 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, 236 u32 buflen, const int *fw_version, int fw_vercnt, 237 const int *srv_version, int srv_vercnt, 238 int *nego_fw_version, int *nego_srv_version) 239 { 240 int icframe_major, icframe_minor; 241 int icmsg_major, icmsg_minor; 242 int fw_major, fw_minor; 243 int srv_major, srv_minor; 244 int i, j; 245 bool found_match = false; 246 struct icmsg_negotiate *negop; 247 248 /* Check that there's enough space for icframe_vercnt, icmsg_vercnt */ 249 if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) { 250 pr_err_ratelimited("Invalid icmsg negotiate\n"); 251 return false; 252 } 253 254 icmsghdrp->icmsgsize = 0x10; 255 negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR]; 256 257 icframe_major = negop->icframe_vercnt; 258 icframe_minor = 0; 259 260 icmsg_major = negop->icmsg_vercnt; 261 icmsg_minor = 0; 262 263 /* Validate negop packet */ 264 if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || 265 icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || 266 ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) { 267 pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n", 268 icframe_major, icmsg_major); 269 goto fw_error; 270 } 271 272 /* 273 * Select the framework version number we will 274 * support. 275 */ 276 277 for (i = 0; i < fw_vercnt; i++) { 278 fw_major = (fw_version[i] >> 16); 279 fw_minor = (fw_version[i] & 0xFFFF); 280 281 for (j = 0; j < negop->icframe_vercnt; j++) { 282 if ((negop->icversion_data[j].major == fw_major) && 283 (negop->icversion_data[j].minor == fw_minor)) { 284 icframe_major = negop->icversion_data[j].major; 285 icframe_minor = negop->icversion_data[j].minor; 286 found_match = true; 287 break; 288 } 289 } 290 291 if (found_match) 292 break; 293 } 294 295 if (!found_match) 296 goto fw_error; 297 298 found_match = false; 299 300 for (i = 0; i < srv_vercnt; i++) { 301 srv_major = (srv_version[i] >> 16); 302 srv_minor = (srv_version[i] & 0xFFFF); 303 304 for (j = negop->icframe_vercnt; 305 (j < negop->icframe_vercnt + negop->icmsg_vercnt); 306 j++) { 307 308 if ((negop->icversion_data[j].major == srv_major) && 309 (negop->icversion_data[j].minor == srv_minor)) { 310 311 icmsg_major = negop->icversion_data[j].major; 312 icmsg_minor = negop->icversion_data[j].minor; 313 found_match = true; 314 break; 315 } 316 } 317 318 if (found_match) 319 break; 320 } 321 322 /* 323 * Respond with the framework and service 324 * version numbers we can support. 325 */ 326 327 fw_error: 328 if (!found_match) { 329 negop->icframe_vercnt = 0; 330 negop->icmsg_vercnt = 0; 331 } else { 332 negop->icframe_vercnt = 1; 333 negop->icmsg_vercnt = 1; 334 } 335 336 if (nego_fw_version) 337 *nego_fw_version = (icframe_major << 16) | icframe_minor; 338 339 if (nego_srv_version) 340 *nego_srv_version = (icmsg_major << 16) | icmsg_minor; 341 342 negop->icversion_data[0].major = icframe_major; 343 negop->icversion_data[0].minor = icframe_minor; 344 negop->icversion_data[1].major = icmsg_major; 345 negop->icversion_data[1].minor = icmsg_minor; 346 return found_match; 347 } 348 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp); 349 350 /* 351 * alloc_channel - Allocate and initialize a vmbus channel object 352 */ 353 static struct vmbus_channel *alloc_channel(void) 354 { 355 struct vmbus_channel *channel; 356 357 channel = kzalloc(sizeof(*channel), GFP_ATOMIC); 358 if (!channel) 359 return NULL; 360 361 spin_lock_init(&channel->sched_lock); 362 init_completion(&channel->rescind_event); 363 364 INIT_LIST_HEAD(&channel->sc_list); 365 366 tasklet_init(&channel->callback_event, 367 vmbus_on_event, (unsigned long)channel); 368 369 hv_ringbuffer_pre_init(channel); 370 371 return channel; 372 } 373 374 /* 375 * free_channel - Release the resources used by the vmbus channel object 376 */ 377 static void free_channel(struct vmbus_channel *channel) 378 { 379 tasklet_kill(&channel->callback_event); 380 vmbus_remove_channel_attr_group(channel); 381 382 kobject_put(&channel->kobj); 383 } 384 385 void vmbus_channel_map_relid(struct vmbus_channel *channel) 386 { 387 if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS)) 388 return; 389 /* 390 * The mapping of the channel's relid is visible from the CPUs that 391 * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will 392 * execute: 393 * 394 * (a) In the "normal (i.e., not resuming from hibernation)" path, 395 * the full barrier in virt_store_mb() guarantees that the store 396 * is propagated to all CPUs before the add_channel_work work 397 * is queued. In turn, add_channel_work is queued before the 398 * channel's ring buffer is allocated/initialized and the 399 * OPENCHANNEL message for the channel is sent in vmbus_open(). 400 * Hyper-V won't start sending the interrupts for the channel 401 * before the OPENCHANNEL message is acked. The memory barrier 402 * in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures 403 * that vmbus_chan_sched() must find the channel's relid in 404 * recv_int_page before retrieving the channel pointer from the 405 * array of channels. 406 * 407 * (b) In the "resuming from hibernation" path, the virt_store_mb() 408 * guarantees that the store is propagated to all CPUs before 409 * the VMBus connection is marked as ready for the resume event 410 * (cf. check_ready_for_resume_event()). The interrupt handler 411 * of the VMBus driver and vmbus_chan_sched() can not run before 412 * vmbus_bus_resume() has completed execution (cf. resume_noirq). 413 */ 414 virt_store_mb( 415 vmbus_connection.channels[channel->offermsg.child_relid], 416 channel); 417 } 418 419 void vmbus_channel_unmap_relid(struct vmbus_channel *channel) 420 { 421 if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS)) 422 return; 423 WRITE_ONCE( 424 vmbus_connection.channels[channel->offermsg.child_relid], 425 NULL); 426 } 427 428 static void vmbus_release_relid(u32 relid) 429 { 430 struct vmbus_channel_relid_released msg; 431 int ret; 432 433 memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); 434 msg.child_relid = relid; 435 msg.header.msgtype = CHANNELMSG_RELID_RELEASED; 436 ret = vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released), 437 true); 438 439 trace_vmbus_release_relid(&msg, ret); 440 } 441 442 void hv_process_channel_removal(struct vmbus_channel *channel) 443 { 444 lockdep_assert_held(&vmbus_connection.channel_mutex); 445 BUG_ON(!channel->rescind); 446 447 /* 448 * hv_process_channel_removal() could find INVALID_RELID only for 449 * hv_sock channels. See the inline comments in vmbus_onoffer(). 450 */ 451 WARN_ON(channel->offermsg.child_relid == INVALID_RELID && 452 !is_hvsock_channel(channel)); 453 454 /* 455 * Upon suspend, an in-use hv_sock channel is removed from the array of 456 * channels and the relid is invalidated. After hibernation, when the 457 * user-space application destroys the channel, it's unnecessary and 458 * unsafe to remove the channel from the array of channels. See also 459 * the inline comments before the call of vmbus_release_relid() below. 460 */ 461 if (channel->offermsg.child_relid != INVALID_RELID) 462 vmbus_channel_unmap_relid(channel); 463 464 if (channel->primary_channel == NULL) 465 list_del(&channel->listentry); 466 else 467 list_del(&channel->sc_list); 468 469 /* 470 * If this is a "perf" channel, updates the hv_numa_map[] masks so that 471 * init_vp_index() can (re-)use the CPU. 472 */ 473 if (hv_is_perf_channel(channel)) 474 hv_clear_allocated_cpu(channel->target_cpu); 475 476 /* 477 * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and 478 * the relid is invalidated; after hibernation, when the user-space app 479 * destroys the channel, the relid is INVALID_RELID, and in this case 480 * it's unnecessary and unsafe to release the old relid, since the same 481 * relid can refer to a completely different channel now. 482 */ 483 if (channel->offermsg.child_relid != INVALID_RELID) 484 vmbus_release_relid(channel->offermsg.child_relid); 485 486 free_channel(channel); 487 } 488 489 void vmbus_free_channels(void) 490 { 491 struct vmbus_channel *channel, *tmp; 492 493 list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list, 494 listentry) { 495 /* hv_process_channel_removal() needs this */ 496 channel->rescind = true; 497 498 vmbus_device_unregister(channel->device_obj); 499 } 500 } 501 502 /* Note: the function can run concurrently for primary/sub channels. */ 503 static void vmbus_add_channel_work(struct work_struct *work) 504 { 505 struct vmbus_channel *newchannel = 506 container_of(work, struct vmbus_channel, add_channel_work); 507 struct vmbus_channel *primary_channel = newchannel->primary_channel; 508 int ret; 509 510 /* 511 * This state is used to indicate a successful open 512 * so that when we do close the channel normally, we 513 * can cleanup properly. 514 */ 515 newchannel->state = CHANNEL_OPEN_STATE; 516 517 if (primary_channel != NULL) { 518 /* newchannel is a sub-channel. */ 519 struct hv_device *dev = primary_channel->device_obj; 520 521 if (vmbus_add_channel_kobj(dev, newchannel)) 522 goto err_deq_chan; 523 524 if (primary_channel->sc_creation_callback != NULL) 525 primary_channel->sc_creation_callback(newchannel); 526 527 newchannel->probe_done = true; 528 return; 529 } 530 531 /* 532 * Start the process of binding the primary channel to the driver 533 */ 534 newchannel->device_obj = vmbus_device_create( 535 &newchannel->offermsg.offer.if_type, 536 &newchannel->offermsg.offer.if_instance, 537 newchannel); 538 if (!newchannel->device_obj) 539 goto err_deq_chan; 540 541 newchannel->device_obj->device_id = newchannel->device_id; 542 /* 543 * Add the new device to the bus. This will kick off device-driver 544 * binding which eventually invokes the device driver's AddDevice() 545 * method. 546 * 547 * If vmbus_device_register() fails, the 'device_obj' is freed in 548 * vmbus_device_release() as called by device_unregister() in the 549 * error path of vmbus_device_register(). In the outside error 550 * path, there's no need to free it. 551 */ 552 ret = vmbus_device_register(newchannel->device_obj); 553 554 if (ret != 0) { 555 pr_err("unable to add child device object (relid %d)\n", 556 newchannel->offermsg.child_relid); 557 goto err_deq_chan; 558 } 559 560 newchannel->probe_done = true; 561 return; 562 563 err_deq_chan: 564 mutex_lock(&vmbus_connection.channel_mutex); 565 566 /* 567 * We need to set the flag, otherwise 568 * vmbus_onoffer_rescind() can be blocked. 569 */ 570 newchannel->probe_done = true; 571 572 if (primary_channel == NULL) 573 list_del(&newchannel->listentry); 574 else 575 list_del(&newchannel->sc_list); 576 577 /* vmbus_process_offer() has mapped the channel. */ 578 vmbus_channel_unmap_relid(newchannel); 579 580 mutex_unlock(&vmbus_connection.channel_mutex); 581 582 vmbus_release_relid(newchannel->offermsg.child_relid); 583 584 free_channel(newchannel); 585 } 586 587 /* 588 * vmbus_process_offer - Process the offer by creating a channel/device 589 * associated with this offer 590 */ 591 static void vmbus_process_offer(struct vmbus_channel *newchannel) 592 { 593 struct vmbus_channel *channel; 594 struct workqueue_struct *wq; 595 bool fnew = true; 596 597 /* 598 * Synchronize vmbus_process_offer() and CPU hotplugging: 599 * 600 * CPU1 CPU2 601 * 602 * [vmbus_process_offer()] [Hot removal of the CPU] 603 * 604 * CPU_READ_LOCK CPUS_WRITE_LOCK 605 * LOAD cpu_online_mask SEARCH chn_list 606 * STORE target_cpu LOAD target_cpu 607 * INSERT chn_list STORE cpu_online_mask 608 * CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK 609 * 610 * Forbids: CPU1's LOAD from *not* seing CPU2's STORE && 611 * CPU2's SEARCH from *not* seeing CPU1's INSERT 612 * 613 * Forbids: CPU2's SEARCH from seeing CPU1's INSERT && 614 * CPU2's LOAD from *not* seing CPU1's STORE 615 */ 616 cpus_read_lock(); 617 618 /* 619 * Serializes the modifications of the chn_list list as well as 620 * the accesses to next_numa_node_id in init_vp_index(). 621 */ 622 mutex_lock(&vmbus_connection.channel_mutex); 623 624 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 625 if (guid_equal(&channel->offermsg.offer.if_type, 626 &newchannel->offermsg.offer.if_type) && 627 guid_equal(&channel->offermsg.offer.if_instance, 628 &newchannel->offermsg.offer.if_instance)) { 629 fnew = false; 630 newchannel->primary_channel = channel; 631 break; 632 } 633 } 634 635 init_vp_index(newchannel); 636 637 /* Remember the channels that should be cleaned up upon suspend. */ 638 if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel)) 639 atomic_inc(&vmbus_connection.nr_chan_close_on_suspend); 640 641 /* 642 * Now that we have acquired the channel_mutex, 643 * we can release the potentially racing rescind thread. 644 */ 645 atomic_dec(&vmbus_connection.offer_in_progress); 646 647 if (fnew) { 648 list_add_tail(&newchannel->listentry, 649 &vmbus_connection.chn_list); 650 } else { 651 /* 652 * Check to see if this is a valid sub-channel. 653 */ 654 if (newchannel->offermsg.offer.sub_channel_index == 0) { 655 mutex_unlock(&vmbus_connection.channel_mutex); 656 cpus_read_unlock(); 657 /* 658 * Don't call free_channel(), because newchannel->kobj 659 * is not initialized yet. 660 */ 661 kfree(newchannel); 662 WARN_ON_ONCE(1); 663 return; 664 } 665 /* 666 * Process the sub-channel. 667 */ 668 list_add_tail(&newchannel->sc_list, &channel->sc_list); 669 } 670 671 vmbus_channel_map_relid(newchannel); 672 673 mutex_unlock(&vmbus_connection.channel_mutex); 674 cpus_read_unlock(); 675 676 /* 677 * vmbus_process_offer() mustn't call channel->sc_creation_callback() 678 * directly for sub-channels, because sc_creation_callback() -> 679 * vmbus_open() may never get the host's response to the 680 * OPEN_CHANNEL message (the host may rescind a channel at any time, 681 * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind() 682 * may not wake up the vmbus_open() as it's blocked due to a non-zero 683 * vmbus_connection.offer_in_progress, and finally we have a deadlock. 684 * 685 * The above is also true for primary channels, if the related device 686 * drivers use sync probing mode by default. 687 * 688 * And, usually the handling of primary channels and sub-channels can 689 * depend on each other, so we should offload them to different 690 * workqueues to avoid possible deadlock, e.g. in sync-probing mode, 691 * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() -> 692 * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock 693 * and waits for all the sub-channels to appear, but the latter 694 * can't get the rtnl_lock and this blocks the handling of 695 * sub-channels. 696 */ 697 INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work); 698 wq = fnew ? vmbus_connection.handle_primary_chan_wq : 699 vmbus_connection.handle_sub_chan_wq; 700 queue_work(wq, &newchannel->add_channel_work); 701 } 702 703 /* 704 * Check if CPUs used by other channels of the same device. 705 * It should only be called by init_vp_index(). 706 */ 707 static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn) 708 { 709 struct vmbus_channel *primary = chn->primary_channel; 710 struct vmbus_channel *sc; 711 712 lockdep_assert_held(&vmbus_connection.channel_mutex); 713 714 if (!primary) 715 return false; 716 717 if (primary->target_cpu == cpu) 718 return true; 719 720 list_for_each_entry(sc, &primary->sc_list, sc_list) 721 if (sc != chn && sc->target_cpu == cpu) 722 return true; 723 724 return false; 725 } 726 727 /* 728 * We use this state to statically distribute the channel interrupt load. 729 */ 730 static int next_numa_node_id; 731 732 /* 733 * We can statically distribute the incoming channel interrupt load 734 * by binding a channel to VCPU. 735 * 736 * For non-performance critical channels we assign the VMBUS_CONNECT_CPU. 737 * Performance critical channels will be distributed evenly among all 738 * the available NUMA nodes. Once the node is assigned, we will assign 739 * the CPU based on a simple round robin scheme. 740 */ 741 static void init_vp_index(struct vmbus_channel *channel) 742 { 743 bool perf_chn = hv_is_perf_channel(channel); 744 u32 i, ncpu = num_online_cpus(); 745 cpumask_var_t available_mask; 746 struct cpumask *allocated_mask; 747 const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); 748 u32 target_cpu; 749 int numa_node; 750 751 if (!perf_chn || 752 !alloc_cpumask_var(&available_mask, GFP_KERNEL) || 753 cpumask_empty(hk_mask)) { 754 /* 755 * If the channel is not a performance critical 756 * channel, bind it to VMBUS_CONNECT_CPU. 757 * In case alloc_cpumask_var() fails, bind it to 758 * VMBUS_CONNECT_CPU. 759 * If all the cpus are isolated, bind it to 760 * VMBUS_CONNECT_CPU. 761 */ 762 channel->target_cpu = VMBUS_CONNECT_CPU; 763 if (perf_chn) 764 hv_set_allocated_cpu(VMBUS_CONNECT_CPU); 765 return; 766 } 767 768 for (i = 1; i <= ncpu + 1; i++) { 769 while (true) { 770 numa_node = next_numa_node_id++; 771 if (numa_node == nr_node_ids) { 772 next_numa_node_id = 0; 773 continue; 774 } 775 if (cpumask_empty(cpumask_of_node(numa_node))) 776 continue; 777 break; 778 } 779 allocated_mask = &hv_context.hv_numa_map[numa_node]; 780 781 retry: 782 cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node)); 783 cpumask_and(available_mask, available_mask, hk_mask); 784 785 if (cpumask_empty(available_mask)) { 786 /* 787 * We have cycled through all the CPUs in the node; 788 * reset the allocated map. 789 */ 790 cpumask_clear(allocated_mask); 791 goto retry; 792 } 793 794 target_cpu = cpumask_first(available_mask); 795 cpumask_set_cpu(target_cpu, allocated_mask); 796 797 if (channel->offermsg.offer.sub_channel_index >= ncpu || 798 i > ncpu || !hv_cpuself_used(target_cpu, channel)) 799 break; 800 } 801 802 channel->target_cpu = target_cpu; 803 804 free_cpumask_var(available_mask); 805 } 806 807 #define UNLOAD_DELAY_UNIT_MS 10 /* 10 milliseconds */ 808 #define UNLOAD_WAIT_MS (100*1000) /* 100 seconds */ 809 #define UNLOAD_WAIT_LOOPS (UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS) 810 #define UNLOAD_MSG_MS (5*1000) /* Every 5 seconds */ 811 #define UNLOAD_MSG_LOOPS (UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS) 812 813 static void vmbus_wait_for_unload(void) 814 { 815 int cpu; 816 void *page_addr; 817 struct hv_message *msg; 818 struct vmbus_channel_message_header *hdr; 819 u32 message_type, i; 820 821 /* 822 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was 823 * used for initial contact or to CPU0 depending on host version. When 824 * we're crashing on a different CPU let's hope that IRQ handler on 825 * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still 826 * functional and vmbus_unload_response() will complete 827 * vmbus_connection.unload_event. If not, the last thing we can do is 828 * read message pages for all CPUs directly. 829 * 830 * Wait up to 100 seconds since an Azure host must writeback any dirty 831 * data in its disk cache before the VMbus UNLOAD request will 832 * complete. This flushing has been empirically observed to take up 833 * to 50 seconds in cases with a lot of dirty data, so allow additional 834 * leeway and for inaccuracies in mdelay(). But eventually time out so 835 * that the panic path can't get hung forever in case the response 836 * message isn't seen. 837 */ 838 for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) { 839 if (completion_done(&vmbus_connection.unload_event)) 840 goto completed; 841 842 for_each_present_cpu(cpu) { 843 struct hv_per_cpu_context *hv_cpu 844 = per_cpu_ptr(hv_context.cpu_context, cpu); 845 846 /* 847 * In a CoCo VM the synic_message_page is not allocated 848 * in hv_synic_alloc(). Instead it is set/cleared in 849 * hv_synic_enable_regs() and hv_synic_disable_regs() 850 * such that it is set only when the CPU is online. If 851 * not all present CPUs are online, the message page 852 * might be NULL, so skip such CPUs. 853 */ 854 page_addr = hv_cpu->synic_message_page; 855 if (!page_addr) 856 continue; 857 858 msg = (struct hv_message *)page_addr 859 + VMBUS_MESSAGE_SINT; 860 861 message_type = READ_ONCE(msg->header.message_type); 862 if (message_type == HVMSG_NONE) 863 continue; 864 865 hdr = (struct vmbus_channel_message_header *) 866 msg->u.payload; 867 868 if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE) 869 complete(&vmbus_connection.unload_event); 870 871 vmbus_signal_eom(msg, message_type); 872 } 873 874 /* 875 * Give a notice periodically so someone watching the 876 * serial output won't think it is completely hung. 877 */ 878 if (!(i % UNLOAD_MSG_LOOPS)) 879 pr_notice("Waiting for VMBus UNLOAD to complete\n"); 880 881 mdelay(UNLOAD_DELAY_UNIT_MS); 882 } 883 pr_err("Continuing even though VMBus UNLOAD did not complete\n"); 884 885 completed: 886 /* 887 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all 888 * maybe-pending messages on all CPUs to be able to receive new 889 * messages after we reconnect. 890 */ 891 for_each_present_cpu(cpu) { 892 struct hv_per_cpu_context *hv_cpu 893 = per_cpu_ptr(hv_context.cpu_context, cpu); 894 895 page_addr = hv_cpu->synic_message_page; 896 if (!page_addr) 897 continue; 898 899 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; 900 msg->header.message_type = HVMSG_NONE; 901 } 902 } 903 904 /* 905 * vmbus_unload_response - Handler for the unload response. 906 */ 907 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr) 908 { 909 /* 910 * This is a global event; just wakeup the waiting thread. 911 * Once we successfully unload, we can cleanup the monitor state. 912 * 913 * NB. A malicious or compromised Hyper-V could send a spurious 914 * message of type CHANNELMSG_UNLOAD_RESPONSE, and trigger a call 915 * of the complete() below. Make sure that unload_event has been 916 * initialized by the time this complete() is executed. 917 */ 918 complete(&vmbus_connection.unload_event); 919 } 920 921 void vmbus_initiate_unload(bool crash) 922 { 923 struct vmbus_channel_message_header hdr; 924 925 if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED) 926 return; 927 928 /* Pre-Win2012R2 hosts don't support reconnect */ 929 if (vmbus_proto_version < VERSION_WIN8_1) 930 return; 931 932 reinit_completion(&vmbus_connection.unload_event); 933 memset(&hdr, 0, sizeof(struct vmbus_channel_message_header)); 934 hdr.msgtype = CHANNELMSG_UNLOAD; 935 vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header), 936 !crash); 937 938 /* 939 * vmbus_initiate_unload() is also called on crash and the crash can be 940 * happening in an interrupt context, where scheduling is impossible. 941 */ 942 if (!crash) 943 wait_for_completion(&vmbus_connection.unload_event); 944 else 945 vmbus_wait_for_unload(); 946 } 947 948 static void vmbus_setup_channel_state(struct vmbus_channel *channel, 949 struct vmbus_channel_offer_channel *offer) 950 { 951 /* 952 * Setup state for signalling the host. 953 */ 954 channel->sig_event = VMBUS_EVENT_CONNECTION_ID; 955 956 channel->is_dedicated_interrupt = 957 (offer->is_dedicated_interrupt != 0); 958 channel->sig_event = offer->connection_id; 959 960 memcpy(&channel->offermsg, offer, 961 sizeof(struct vmbus_channel_offer_channel)); 962 channel->monitor_grp = (u8)offer->monitorid / 32; 963 channel->monitor_bit = (u8)offer->monitorid % 32; 964 channel->device_id = hv_get_dev_type(channel); 965 } 966 967 /* 968 * find_primary_channel_by_offer - Get the channel object given the new offer. 969 * This is only used in the resume path of hibernation. 970 */ 971 static struct vmbus_channel * 972 find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer) 973 { 974 struct vmbus_channel *channel = NULL, *iter; 975 const guid_t *inst1, *inst2; 976 977 /* Ignore sub-channel offers. */ 978 if (offer->offer.sub_channel_index != 0) 979 return NULL; 980 981 mutex_lock(&vmbus_connection.channel_mutex); 982 983 list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) { 984 inst1 = &iter->offermsg.offer.if_instance; 985 inst2 = &offer->offer.if_instance; 986 987 if (guid_equal(inst1, inst2)) { 988 channel = iter; 989 break; 990 } 991 } 992 993 mutex_unlock(&vmbus_connection.channel_mutex); 994 995 return channel; 996 } 997 998 static bool vmbus_is_valid_offer(const struct vmbus_channel_offer_channel *offer) 999 { 1000 const guid_t *guid = &offer->offer.if_type; 1001 u16 i; 1002 1003 if (!hv_is_isolation_supported()) 1004 return true; 1005 1006 if (is_hvsock_offer(offer)) 1007 return true; 1008 1009 for (i = 0; i < ARRAY_SIZE(vmbus_devs); i++) { 1010 if (guid_equal(guid, &vmbus_devs[i].guid)) 1011 return vmbus_devs[i].allowed_in_isolated; 1012 } 1013 return false; 1014 } 1015 1016 /* 1017 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. 1018 * 1019 */ 1020 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) 1021 { 1022 struct vmbus_channel_offer_channel *offer; 1023 struct vmbus_channel *oldchannel, *newchannel; 1024 size_t offer_sz; 1025 1026 offer = (struct vmbus_channel_offer_channel *)hdr; 1027 1028 trace_vmbus_onoffer(offer); 1029 1030 if (!vmbus_is_valid_offer(offer)) { 1031 pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n", 1032 offer->child_relid); 1033 atomic_dec(&vmbus_connection.offer_in_progress); 1034 return; 1035 } 1036 1037 oldchannel = find_primary_channel_by_offer(offer); 1038 1039 if (oldchannel != NULL) { 1040 /* 1041 * We're resuming from hibernation: all the sub-channel and 1042 * hv_sock channels we had before the hibernation should have 1043 * been cleaned up, and now we must be seeing a re-offered 1044 * primary channel that we had before the hibernation. 1045 */ 1046 1047 /* 1048 * { Initially: channel relid = INVALID_RELID, 1049 * channels[valid_relid] = NULL } 1050 * 1051 * CPU1 CPU2 1052 * 1053 * [vmbus_onoffer()] [vmbus_device_release()] 1054 * 1055 * LOCK channel_mutex LOCK channel_mutex 1056 * STORE channel relid = valid_relid LOAD r1 = channel relid 1057 * MAP_RELID channel if (r1 != INVALID_RELID) 1058 * UNLOCK channel_mutex UNMAP_RELID channel 1059 * UNLOCK channel_mutex 1060 * 1061 * Forbids: r1 == valid_relid && 1062 * channels[valid_relid] == channel 1063 * 1064 * Note. r1 can be INVALID_RELID only for an hv_sock channel. 1065 * None of the hv_sock channels which were present before the 1066 * suspend are re-offered upon the resume. See the WARN_ON() 1067 * in hv_process_channel_removal(). 1068 */ 1069 mutex_lock(&vmbus_connection.channel_mutex); 1070 1071 atomic_dec(&vmbus_connection.offer_in_progress); 1072 1073 WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID); 1074 /* Fix up the relid. */ 1075 oldchannel->offermsg.child_relid = offer->child_relid; 1076 1077 offer_sz = sizeof(*offer); 1078 if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) { 1079 /* 1080 * This is not an error, since the host can also change 1081 * the other field(s) of the offer, e.g. on WS RS5 1082 * (Build 17763), the offer->connection_id of the 1083 * Mellanox VF vmbus device can change when the host 1084 * reoffers the device upon resume. 1085 */ 1086 pr_debug("vmbus offer changed: relid=%d\n", 1087 offer->child_relid); 1088 1089 print_hex_dump_debug("Old vmbus offer: ", 1090 DUMP_PREFIX_OFFSET, 16, 4, 1091 &oldchannel->offermsg, offer_sz, 1092 false); 1093 print_hex_dump_debug("New vmbus offer: ", 1094 DUMP_PREFIX_OFFSET, 16, 4, 1095 offer, offer_sz, false); 1096 1097 /* Fix up the old channel. */ 1098 vmbus_setup_channel_state(oldchannel, offer); 1099 } 1100 1101 /* Add the channel back to the array of channels. */ 1102 vmbus_channel_map_relid(oldchannel); 1103 mutex_unlock(&vmbus_connection.channel_mutex); 1104 return; 1105 } 1106 1107 /* Allocate the channel object and save this offer. */ 1108 newchannel = alloc_channel(); 1109 if (!newchannel) { 1110 vmbus_release_relid(offer->child_relid); 1111 atomic_dec(&vmbus_connection.offer_in_progress); 1112 pr_err("Unable to allocate channel object\n"); 1113 return; 1114 } 1115 1116 vmbus_setup_channel_state(newchannel, offer); 1117 1118 vmbus_process_offer(newchannel); 1119 } 1120 1121 static void check_ready_for_suspend_event(void) 1122 { 1123 /* 1124 * If all the sub-channels or hv_sock channels have been cleaned up, 1125 * then it's safe to suspend. 1126 */ 1127 if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend)) 1128 complete(&vmbus_connection.ready_for_suspend_event); 1129 } 1130 1131 /* 1132 * vmbus_onoffer_rescind - Rescind offer handler. 1133 * 1134 * We queue a work item to process this offer synchronously 1135 */ 1136 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) 1137 { 1138 struct vmbus_channel_rescind_offer *rescind; 1139 struct vmbus_channel *channel; 1140 struct device *dev; 1141 bool clean_up_chan_for_suspend; 1142 1143 rescind = (struct vmbus_channel_rescind_offer *)hdr; 1144 1145 trace_vmbus_onoffer_rescind(rescind); 1146 1147 /* 1148 * The offer msg and the corresponding rescind msg 1149 * from the host are guranteed to be ordered - 1150 * offer comes in first and then the rescind. 1151 * Since we process these events in work elements, 1152 * and with preemption, we may end up processing 1153 * the events out of order. We rely on the synchronization 1154 * provided by offer_in_progress and by channel_mutex for 1155 * ordering these events: 1156 * 1157 * { Initially: offer_in_progress = 1 } 1158 * 1159 * CPU1 CPU2 1160 * 1161 * [vmbus_onoffer()] [vmbus_onoffer_rescind()] 1162 * 1163 * LOCK channel_mutex WAIT_ON offer_in_progress == 0 1164 * DECREMENT offer_in_progress LOCK channel_mutex 1165 * STORE channels[] LOAD channels[] 1166 * UNLOCK channel_mutex UNLOCK channel_mutex 1167 * 1168 * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE 1169 */ 1170 1171 while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { 1172 /* 1173 * We wait here until any channel offer is currently 1174 * being processed. 1175 */ 1176 msleep(1); 1177 } 1178 1179 mutex_lock(&vmbus_connection.channel_mutex); 1180 channel = relid2channel(rescind->child_relid); 1181 if (channel != NULL) { 1182 /* 1183 * Guarantee that no other instance of vmbus_onoffer_rescind() 1184 * has got a reference to the channel object. Synchronize on 1185 * &vmbus_connection.channel_mutex. 1186 */ 1187 if (channel->rescind_ref) { 1188 mutex_unlock(&vmbus_connection.channel_mutex); 1189 return; 1190 } 1191 channel->rescind_ref = true; 1192 } 1193 mutex_unlock(&vmbus_connection.channel_mutex); 1194 1195 if (channel == NULL) { 1196 /* 1197 * We failed in processing the offer message; 1198 * we would have cleaned up the relid in that 1199 * failure path. 1200 */ 1201 return; 1202 } 1203 1204 clean_up_chan_for_suspend = is_hvsock_channel(channel) || 1205 is_sub_channel(channel); 1206 /* 1207 * Before setting channel->rescind in vmbus_rescind_cleanup(), we 1208 * should make sure the channel callback is not running any more. 1209 */ 1210 vmbus_reset_channel_cb(channel); 1211 1212 /* 1213 * Now wait for offer handling to complete. 1214 */ 1215 vmbus_rescind_cleanup(channel); 1216 while (READ_ONCE(channel->probe_done) == false) { 1217 /* 1218 * We wait here until any channel offer is currently 1219 * being processed. 1220 */ 1221 msleep(1); 1222 } 1223 1224 /* 1225 * At this point, the rescind handling can proceed safely. 1226 */ 1227 1228 if (channel->device_obj) { 1229 if (channel->chn_rescind_callback) { 1230 channel->chn_rescind_callback(channel); 1231 1232 if (clean_up_chan_for_suspend) 1233 check_ready_for_suspend_event(); 1234 1235 return; 1236 } 1237 /* 1238 * We will have to unregister this device from the 1239 * driver core. 1240 */ 1241 dev = get_device(&channel->device_obj->device); 1242 if (dev) { 1243 vmbus_device_unregister(channel->device_obj); 1244 put_device(dev); 1245 } 1246 } else if (channel->primary_channel != NULL) { 1247 /* 1248 * Sub-channel is being rescinded. Following is the channel 1249 * close sequence when initiated from the driveri (refer to 1250 * vmbus_close() for details): 1251 * 1. Close all sub-channels first 1252 * 2. Then close the primary channel. 1253 */ 1254 mutex_lock(&vmbus_connection.channel_mutex); 1255 if (channel->state == CHANNEL_OPEN_STATE) { 1256 /* 1257 * The channel is currently not open; 1258 * it is safe for us to cleanup the channel. 1259 */ 1260 hv_process_channel_removal(channel); 1261 } else { 1262 complete(&channel->rescind_event); 1263 } 1264 mutex_unlock(&vmbus_connection.channel_mutex); 1265 } 1266 1267 /* The "channel" may have been freed. Do not access it any longer. */ 1268 1269 if (clean_up_chan_for_suspend) 1270 check_ready_for_suspend_event(); 1271 } 1272 1273 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) 1274 { 1275 BUG_ON(!is_hvsock_channel(channel)); 1276 1277 /* We always get a rescind msg when a connection is closed. */ 1278 while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind)) 1279 msleep(1); 1280 1281 vmbus_device_unregister(channel->device_obj); 1282 } 1283 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); 1284 1285 1286 /* 1287 * vmbus_onoffers_delivered - 1288 * The CHANNELMSG_ALLOFFERS_DELIVERED message arrives after all 1289 * boot-time offers are delivered. A boot-time offer is for the primary 1290 * channel for any virtual hardware configured in the VM at the time it boots. 1291 * Boot-time offers include offers for physical devices assigned to the VM 1292 * via Hyper-V's Discrete Device Assignment (DDA) functionality that are 1293 * handled as virtual PCI devices in Linux (e.g., NVMe devices and GPUs). 1294 * Boot-time offers do not include offers for VMBus sub-channels. Because 1295 * devices can be hot-added to the VM after it is booted, additional channel 1296 * offers that aren't boot-time offers can be received at any time after the 1297 * all-offers-delivered message. 1298 * 1299 * SR-IOV NIC Virtual Functions (VFs) assigned to a VM are not considered 1300 * to be assigned to the VM at boot-time, and offers for VFs may occur after 1301 * the all-offers-delivered message. VFs are optional accelerators to the 1302 * synthetic VMBus NIC and are effectively hot-added only after the VMBus 1303 * NIC channel is opened (once it knows the guest can support it, via the 1304 * sriov bit in the netvsc protocol). 1305 */ 1306 static void vmbus_onoffers_delivered( 1307 struct vmbus_channel_message_header *hdr) 1308 { 1309 complete(&vmbus_connection.all_offers_delivered_event); 1310 } 1311 1312 /* 1313 * vmbus_onopen_result - Open result handler. 1314 * 1315 * This is invoked when we received a response to our channel open request. 1316 * Find the matching request, copy the response and signal the requesting 1317 * thread. 1318 */ 1319 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr) 1320 { 1321 struct vmbus_channel_open_result *result; 1322 struct vmbus_channel_msginfo *msginfo; 1323 struct vmbus_channel_message_header *requestheader; 1324 struct vmbus_channel_open_channel *openmsg; 1325 unsigned long flags; 1326 1327 result = (struct vmbus_channel_open_result *)hdr; 1328 1329 trace_vmbus_onopen_result(result); 1330 1331 /* 1332 * Find the open msg, copy the result and signal/unblock the wait event 1333 */ 1334 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1335 1336 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1337 msglistentry) { 1338 requestheader = 1339 (struct vmbus_channel_message_header *)msginfo->msg; 1340 1341 if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) { 1342 openmsg = 1343 (struct vmbus_channel_open_channel *)msginfo->msg; 1344 if (openmsg->child_relid == result->child_relid && 1345 openmsg->openid == result->openid) { 1346 memcpy(&msginfo->response.open_result, 1347 result, 1348 sizeof( 1349 struct vmbus_channel_open_result)); 1350 complete(&msginfo->waitevent); 1351 break; 1352 } 1353 } 1354 } 1355 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1356 } 1357 1358 /* 1359 * vmbus_ongpadl_created - GPADL created handler. 1360 * 1361 * This is invoked when we received a response to our gpadl create request. 1362 * Find the matching request, copy the response and signal the requesting 1363 * thread. 1364 */ 1365 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr) 1366 { 1367 struct vmbus_channel_gpadl_created *gpadlcreated; 1368 struct vmbus_channel_msginfo *msginfo; 1369 struct vmbus_channel_message_header *requestheader; 1370 struct vmbus_channel_gpadl_header *gpadlheader; 1371 unsigned long flags; 1372 1373 gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr; 1374 1375 trace_vmbus_ongpadl_created(gpadlcreated); 1376 1377 /* 1378 * Find the establish msg, copy the result and signal/unblock the wait 1379 * event 1380 */ 1381 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1382 1383 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1384 msglistentry) { 1385 requestheader = 1386 (struct vmbus_channel_message_header *)msginfo->msg; 1387 1388 if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) { 1389 gpadlheader = 1390 (struct vmbus_channel_gpadl_header *)requestheader; 1391 1392 if ((gpadlcreated->child_relid == 1393 gpadlheader->child_relid) && 1394 (gpadlcreated->gpadl == gpadlheader->gpadl)) { 1395 memcpy(&msginfo->response.gpadl_created, 1396 gpadlcreated, 1397 sizeof( 1398 struct vmbus_channel_gpadl_created)); 1399 complete(&msginfo->waitevent); 1400 break; 1401 } 1402 } 1403 } 1404 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1405 } 1406 1407 /* 1408 * vmbus_onmodifychannel_response - Modify Channel response handler. 1409 * 1410 * This is invoked when we received a response to our channel modify request. 1411 * Find the matching request, copy the response and signal the requesting thread. 1412 */ 1413 static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr) 1414 { 1415 struct vmbus_channel_modifychannel_response *response; 1416 struct vmbus_channel_msginfo *msginfo; 1417 unsigned long flags; 1418 1419 response = (struct vmbus_channel_modifychannel_response *)hdr; 1420 1421 trace_vmbus_onmodifychannel_response(response); 1422 1423 /* 1424 * Find the modify msg, copy the response and signal/unblock the wait event. 1425 */ 1426 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1427 1428 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) { 1429 struct vmbus_channel_message_header *responseheader = 1430 (struct vmbus_channel_message_header *)msginfo->msg; 1431 1432 if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) { 1433 struct vmbus_channel_modifychannel *modifymsg; 1434 1435 modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg; 1436 if (modifymsg->child_relid == response->child_relid) { 1437 memcpy(&msginfo->response.modify_response, response, 1438 sizeof(*response)); 1439 complete(&msginfo->waitevent); 1440 break; 1441 } 1442 } 1443 } 1444 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1445 } 1446 1447 /* 1448 * vmbus_ongpadl_torndown - GPADL torndown handler. 1449 * 1450 * This is invoked when we received a response to our gpadl teardown request. 1451 * Find the matching request, copy the response and signal the requesting 1452 * thread. 1453 */ 1454 static void vmbus_ongpadl_torndown( 1455 struct vmbus_channel_message_header *hdr) 1456 { 1457 struct vmbus_channel_gpadl_torndown *gpadl_torndown; 1458 struct vmbus_channel_msginfo *msginfo; 1459 struct vmbus_channel_message_header *requestheader; 1460 struct vmbus_channel_gpadl_teardown *gpadl_teardown; 1461 unsigned long flags; 1462 1463 gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr; 1464 1465 trace_vmbus_ongpadl_torndown(gpadl_torndown); 1466 1467 /* 1468 * Find the open msg, copy the result and signal/unblock the wait event 1469 */ 1470 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1471 1472 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1473 msglistentry) { 1474 requestheader = 1475 (struct vmbus_channel_message_header *)msginfo->msg; 1476 1477 if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) { 1478 gpadl_teardown = 1479 (struct vmbus_channel_gpadl_teardown *)requestheader; 1480 1481 if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) { 1482 memcpy(&msginfo->response.gpadl_torndown, 1483 gpadl_torndown, 1484 sizeof( 1485 struct vmbus_channel_gpadl_torndown)); 1486 complete(&msginfo->waitevent); 1487 break; 1488 } 1489 } 1490 } 1491 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1492 } 1493 1494 /* 1495 * vmbus_onversion_response - Version response handler 1496 * 1497 * This is invoked when we received a response to our initiate contact request. 1498 * Find the matching request, copy the response and signal the requesting 1499 * thread. 1500 */ 1501 static void vmbus_onversion_response( 1502 struct vmbus_channel_message_header *hdr) 1503 { 1504 struct vmbus_channel_msginfo *msginfo; 1505 struct vmbus_channel_message_header *requestheader; 1506 struct vmbus_channel_version_response *version_response; 1507 unsigned long flags; 1508 1509 version_response = (struct vmbus_channel_version_response *)hdr; 1510 1511 trace_vmbus_onversion_response(version_response); 1512 1513 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1514 1515 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1516 msglistentry) { 1517 requestheader = 1518 (struct vmbus_channel_message_header *)msginfo->msg; 1519 1520 if (requestheader->msgtype == 1521 CHANNELMSG_INITIATE_CONTACT) { 1522 memcpy(&msginfo->response.version_response, 1523 version_response, 1524 sizeof(struct vmbus_channel_version_response)); 1525 complete(&msginfo->waitevent); 1526 } 1527 } 1528 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1529 } 1530 1531 /* Channel message dispatch table */ 1532 const struct vmbus_channel_message_table_entry 1533 channel_message_table[CHANNELMSG_COUNT] = { 1534 { CHANNELMSG_INVALID, 0, NULL, 0}, 1535 { CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer, 1536 sizeof(struct vmbus_channel_offer_channel)}, 1537 { CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind, 1538 sizeof(struct vmbus_channel_rescind_offer) }, 1539 { CHANNELMSG_REQUESTOFFERS, 0, NULL, 0}, 1540 { CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered, 0}, 1541 { CHANNELMSG_OPENCHANNEL, 0, NULL, 0}, 1542 { CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result, 1543 sizeof(struct vmbus_channel_open_result)}, 1544 { CHANNELMSG_CLOSECHANNEL, 0, NULL, 0}, 1545 { CHANNELMSG_GPADL_HEADER, 0, NULL, 0}, 1546 { CHANNELMSG_GPADL_BODY, 0, NULL, 0}, 1547 { CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created, 1548 sizeof(struct vmbus_channel_gpadl_created)}, 1549 { CHANNELMSG_GPADL_TEARDOWN, 0, NULL, 0}, 1550 { CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown, 1551 sizeof(struct vmbus_channel_gpadl_torndown) }, 1552 { CHANNELMSG_RELID_RELEASED, 0, NULL, 0}, 1553 { CHANNELMSG_INITIATE_CONTACT, 0, NULL, 0}, 1554 { CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response, 1555 sizeof(struct vmbus_channel_version_response)}, 1556 { CHANNELMSG_UNLOAD, 0, NULL, 0}, 1557 { CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response, 0}, 1558 { CHANNELMSG_18, 0, NULL, 0}, 1559 { CHANNELMSG_19, 0, NULL, 0}, 1560 { CHANNELMSG_20, 0, NULL, 0}, 1561 { CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0}, 1562 { CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0}, 1563 { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0}, 1564 { CHANNELMSG_MODIFYCHANNEL_RESPONSE, 1, vmbus_onmodifychannel_response, 1565 sizeof(struct vmbus_channel_modifychannel_response)}, 1566 }; 1567 1568 /* 1569 * vmbus_onmessage - Handler for channel protocol messages. 1570 * 1571 * This is invoked in the vmbus worker thread context. 1572 */ 1573 void vmbus_onmessage(struct vmbus_channel_message_header *hdr) 1574 { 1575 trace_vmbus_on_message(hdr); 1576 1577 /* 1578 * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go 1579 * out of bound and the message_handler pointer can not be NULL. 1580 */ 1581 channel_message_table[hdr->msgtype].message_handler(hdr); 1582 } 1583 1584 /* 1585 * vmbus_request_offers - Send a request to get all our pending offers 1586 * and wait for all boot-time offers to arrive. 1587 */ 1588 int vmbus_request_offers(void) 1589 { 1590 struct vmbus_channel_message_header *msg; 1591 struct vmbus_channel_msginfo *msginfo; 1592 int ret; 1593 1594 msginfo = kzalloc(sizeof(*msginfo) + 1595 sizeof(struct vmbus_channel_message_header), 1596 GFP_KERNEL); 1597 if (!msginfo) 1598 return -ENOMEM; 1599 1600 msg = (struct vmbus_channel_message_header *)msginfo->msg; 1601 1602 msg->msgtype = CHANNELMSG_REQUESTOFFERS; 1603 1604 /* 1605 * This REQUESTOFFERS message will result in the host sending an all 1606 * offers delivered message after all the boot-time offers are sent. 1607 */ 1608 ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header), 1609 true); 1610 1611 trace_vmbus_request_offers(ret); 1612 1613 if (ret != 0) { 1614 pr_err("Unable to request offers - %d\n", ret); 1615 1616 goto cleanup; 1617 } 1618 1619 /* 1620 * Wait for the host to send all boot-time offers. 1621 * Keeping it as a best-effort mechanism, where a warning is 1622 * printed if a timeout occurs, and execution is resumed. 1623 */ 1624 if (!wait_for_completion_timeout(&vmbus_connection.all_offers_delivered_event, 1625 secs_to_jiffies(60))) { 1626 pr_warn("timed out waiting for all boot-time offers to be delivered.\n"); 1627 } 1628 1629 /* 1630 * Flush handling of offer messages (which may initiate work on 1631 * other work queues). 1632 */ 1633 flush_workqueue(vmbus_connection.work_queue); 1634 1635 /* 1636 * Flush workqueue for processing the incoming offers. Subchannel 1637 * offers and their processing can happen later, so there is no need to 1638 * flush that workqueue here. 1639 */ 1640 flush_workqueue(vmbus_connection.handle_primary_chan_wq); 1641 1642 cleanup: 1643 kfree(msginfo); 1644 1645 return ret; 1646 } 1647 1648 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel, 1649 void (*sc_cr_cb)(struct vmbus_channel *new_sc)) 1650 { 1651 primary_channel->sc_creation_callback = sc_cr_cb; 1652 } 1653 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback); 1654 1655 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, 1656 void (*chn_rescind_cb)(struct vmbus_channel *)) 1657 { 1658 channel->chn_rescind_callback = chn_rescind_cb; 1659 } 1660 EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback); 1661