1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * Authors: 6 * Haiyang Zhang <haiyangz@microsoft.com> 7 * Hank Janssen <hjanssen@microsoft.com> 8 */ 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/kernel.h> 12 #include <linux/interrupt.h> 13 #include <linux/sched.h> 14 #include <linux/wait.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/list.h> 18 #include <linux/module.h> 19 #include <linux/completion.h> 20 #include <linux/delay.h> 21 #include <linux/cpu.h> 22 #include <linux/hyperv.h> 23 #include <linux/export.h> 24 #include <asm/mshyperv.h> 25 #include <linux/sched/isolation.h> 26 27 #include "hyperv_vmbus.h" 28 29 static void init_vp_index(struct vmbus_channel *channel); 30 31 const struct vmbus_device vmbus_devs[] = { 32 /* IDE */ 33 { .dev_type = HV_IDE, 34 HV_IDE_GUID, 35 .perf_device = true, 36 .allowed_in_isolated = false, 37 }, 38 39 /* SCSI */ 40 { .dev_type = HV_SCSI, 41 HV_SCSI_GUID, 42 .perf_device = true, 43 .allowed_in_isolated = true, 44 }, 45 46 /* Fibre Channel */ 47 { .dev_type = HV_FC, 48 HV_SYNTHFC_GUID, 49 .perf_device = true, 50 .allowed_in_isolated = false, 51 }, 52 53 /* Synthetic NIC */ 54 { .dev_type = HV_NIC, 55 HV_NIC_GUID, 56 .perf_device = true, 57 .allowed_in_isolated = true, 58 }, 59 60 /* Network Direct */ 61 { .dev_type = HV_ND, 62 HV_ND_GUID, 63 .perf_device = true, 64 .allowed_in_isolated = false, 65 }, 66 67 /* PCIE */ 68 { .dev_type = HV_PCIE, 69 HV_PCIE_GUID, 70 .perf_device = false, 71 .allowed_in_isolated = true, 72 }, 73 74 /* Synthetic Frame Buffer */ 75 { .dev_type = HV_FB, 76 HV_SYNTHVID_GUID, 77 .perf_device = false, 78 .allowed_in_isolated = false, 79 }, 80 81 /* Synthetic Keyboard */ 82 { .dev_type = HV_KBD, 83 HV_KBD_GUID, 84 .perf_device = false, 85 .allowed_in_isolated = false, 86 }, 87 88 /* Synthetic MOUSE */ 89 { .dev_type = HV_MOUSE, 90 HV_MOUSE_GUID, 91 .perf_device = false, 92 .allowed_in_isolated = false, 93 }, 94 95 /* KVP */ 96 { .dev_type = HV_KVP, 97 HV_KVP_GUID, 98 .perf_device = false, 99 .allowed_in_isolated = false, 100 }, 101 102 /* Time Synch */ 103 { .dev_type = HV_TS, 104 HV_TS_GUID, 105 .perf_device = false, 106 .allowed_in_isolated = true, 107 }, 108 109 /* Heartbeat */ 110 { .dev_type = HV_HB, 111 HV_HEART_BEAT_GUID, 112 .perf_device = false, 113 .allowed_in_isolated = true, 114 }, 115 116 /* Shutdown */ 117 { .dev_type = HV_SHUTDOWN, 118 HV_SHUTDOWN_GUID, 119 .perf_device = false, 120 .allowed_in_isolated = true, 121 }, 122 123 /* File copy */ 124 /* fcopy always uses 16KB ring buffer size and is working well for last many years */ 125 { .pref_ring_size = 0x4000, 126 .dev_type = HV_FCOPY, 127 HV_FCOPY_GUID, 128 .perf_device = false, 129 .allowed_in_isolated = false, 130 }, 131 132 /* Backup */ 133 { .dev_type = HV_BACKUP, 134 HV_VSS_GUID, 135 .perf_device = false, 136 .allowed_in_isolated = false, 137 }, 138 139 /* Dynamic Memory */ 140 { .dev_type = HV_DM, 141 HV_DM_GUID, 142 .perf_device = false, 143 .allowed_in_isolated = false, 144 }, 145 146 /* 147 * Unknown GUID 148 * 64 KB ring buffer + 4 KB header should be sufficient size for any Hyper-V device apart 149 * from HV_NIC and HV_SCSI. This case avoid the fallback for unknown devices to allocate 150 * much bigger (2 MB) of ring size. 151 */ 152 { .pref_ring_size = 0x11000, 153 .dev_type = HV_UNKNOWN, 154 .perf_device = false, 155 .allowed_in_isolated = false, 156 }, 157 }; 158 EXPORT_SYMBOL_GPL(vmbus_devs); 159 160 static const struct { 161 guid_t guid; 162 } vmbus_unsupported_devs[] = { 163 { HV_AVMA1_GUID }, 164 { HV_AVMA2_GUID }, 165 { HV_RDV_GUID }, 166 { HV_IMC_GUID }, 167 }; 168 169 /* 170 * The rescinded channel may be blocked waiting for a response from the host; 171 * take care of that. 172 */ 173 static void vmbus_rescind_cleanup(struct vmbus_channel *channel) 174 { 175 struct vmbus_channel_msginfo *msginfo; 176 unsigned long flags; 177 178 179 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 180 channel->rescind = true; 181 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 182 msglistentry) { 183 184 if (msginfo->waiting_channel == channel) { 185 complete(&msginfo->waitevent); 186 break; 187 } 188 } 189 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 190 } 191 192 static bool is_unsupported_vmbus_devs(const guid_t *guid) 193 { 194 int i; 195 196 for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++) 197 if (guid_equal(guid, &vmbus_unsupported_devs[i].guid)) 198 return true; 199 return false; 200 } 201 202 static u16 hv_get_dev_type(const struct vmbus_channel *channel) 203 { 204 const guid_t *guid = &channel->offermsg.offer.if_type; 205 u16 i; 206 207 if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid)) 208 return HV_UNKNOWN; 209 210 for (i = HV_IDE; i < HV_UNKNOWN; i++) { 211 if (guid_equal(guid, &vmbus_devs[i].guid)) 212 return i; 213 } 214 pr_info("Unknown GUID: %pUl\n", guid); 215 return i; 216 } 217 218 /** 219 * vmbus_prep_negotiate_resp() - Create default response for Negotiate message 220 * @icmsghdrp: Pointer to msg header structure 221 * @buf: Raw buffer channel data 222 * @buflen: Length of the raw buffer channel data. 223 * @fw_version: The framework versions we can support. 224 * @fw_vercnt: The size of @fw_version. 225 * @srv_version: The service versions we can support. 226 * @srv_vercnt: The size of @srv_version. 227 * @nego_fw_version: The selected framework version. 228 * @nego_srv_version: The selected service version. 229 * 230 * Note: Versions are given in decreasing order. 231 * 232 * Set up and fill in default negotiate response message. 233 * Mainly used by Hyper-V drivers. 234 */ 235 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, 236 u32 buflen, const int *fw_version, int fw_vercnt, 237 const int *srv_version, int srv_vercnt, 238 int *nego_fw_version, int *nego_srv_version) 239 { 240 int icframe_major, icframe_minor; 241 int icmsg_major, icmsg_minor; 242 int fw_major, fw_minor; 243 int srv_major, srv_minor; 244 int i, j; 245 bool found_match = false; 246 struct icmsg_negotiate *negop; 247 248 /* Check that there's enough space for icframe_vercnt, icmsg_vercnt */ 249 if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) { 250 pr_err_ratelimited("Invalid icmsg negotiate\n"); 251 return false; 252 } 253 254 icmsghdrp->icmsgsize = 0x10; 255 negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR]; 256 257 icframe_major = negop->icframe_vercnt; 258 icframe_minor = 0; 259 260 icmsg_major = negop->icmsg_vercnt; 261 icmsg_minor = 0; 262 263 /* Validate negop packet */ 264 if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || 265 icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || 266 ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) { 267 pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n", 268 icframe_major, icmsg_major); 269 goto fw_error; 270 } 271 272 /* 273 * Select the framework version number we will 274 * support. 275 */ 276 277 for (i = 0; i < fw_vercnt; i++) { 278 fw_major = (fw_version[i] >> 16); 279 fw_minor = (fw_version[i] & 0xFFFF); 280 281 for (j = 0; j < negop->icframe_vercnt; j++) { 282 if ((negop->icversion_data[j].major == fw_major) && 283 (negop->icversion_data[j].minor == fw_minor)) { 284 icframe_major = negop->icversion_data[j].major; 285 icframe_minor = negop->icversion_data[j].minor; 286 found_match = true; 287 break; 288 } 289 } 290 291 if (found_match) 292 break; 293 } 294 295 if (!found_match) 296 goto fw_error; 297 298 found_match = false; 299 300 for (i = 0; i < srv_vercnt; i++) { 301 srv_major = (srv_version[i] >> 16); 302 srv_minor = (srv_version[i] & 0xFFFF); 303 304 for (j = negop->icframe_vercnt; 305 (j < negop->icframe_vercnt + negop->icmsg_vercnt); 306 j++) { 307 308 if ((negop->icversion_data[j].major == srv_major) && 309 (negop->icversion_data[j].minor == srv_minor)) { 310 311 icmsg_major = negop->icversion_data[j].major; 312 icmsg_minor = negop->icversion_data[j].minor; 313 found_match = true; 314 break; 315 } 316 } 317 318 if (found_match) 319 break; 320 } 321 322 /* 323 * Respond with the framework and service 324 * version numbers we can support. 325 */ 326 327 fw_error: 328 if (!found_match) { 329 negop->icframe_vercnt = 0; 330 negop->icmsg_vercnt = 0; 331 } else { 332 negop->icframe_vercnt = 1; 333 negop->icmsg_vercnt = 1; 334 } 335 336 if (nego_fw_version) 337 *nego_fw_version = (icframe_major << 16) | icframe_minor; 338 339 if (nego_srv_version) 340 *nego_srv_version = (icmsg_major << 16) | icmsg_minor; 341 342 negop->icversion_data[0].major = icframe_major; 343 negop->icversion_data[0].minor = icframe_minor; 344 negop->icversion_data[1].major = icmsg_major; 345 negop->icversion_data[1].minor = icmsg_minor; 346 return found_match; 347 } 348 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp); 349 350 /* 351 * alloc_channel - Allocate and initialize a vmbus channel object 352 */ 353 static struct vmbus_channel *alloc_channel(void) 354 { 355 struct vmbus_channel *channel; 356 357 channel = kzalloc_obj(*channel, GFP_ATOMIC); 358 if (!channel) 359 return NULL; 360 361 spin_lock_init(&channel->sched_lock); 362 init_completion(&channel->rescind_event); 363 364 INIT_LIST_HEAD(&channel->sc_list); 365 366 tasklet_init(&channel->callback_event, 367 vmbus_on_event, (unsigned long)channel); 368 369 hv_ringbuffer_pre_init(channel); 370 371 return channel; 372 } 373 374 /* 375 * free_channel - Release the resources used by the vmbus channel object 376 */ 377 static void free_channel(struct vmbus_channel *channel) 378 { 379 tasklet_kill(&channel->callback_event); 380 vmbus_remove_channel_attr_group(channel); 381 382 kobject_put(&channel->kobj); 383 } 384 385 void vmbus_channel_map_relid(struct vmbus_channel *channel) 386 { 387 u32 new_relid = channel->offermsg.child_relid; 388 389 if (WARN_ON(new_relid >= MAX_CHANNEL_RELIDS)) 390 return; 391 392 /* 393 * This function is always called in the tasklet for the connect CPU. 394 * So updating the relid hiwater mark does not need to be atomic. 395 */ 396 if (new_relid > READ_ONCE(vmbus_connection.relid_hiwater)) 397 WRITE_ONCE(vmbus_connection.relid_hiwater, new_relid); 398 399 /* 400 * The mapping of the channel's relid is visible from the CPUs that 401 * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will 402 * execute: 403 * 404 * (a) In the "normal (i.e., not resuming from hibernation)" path, 405 * the full barrier in virt_store_mb() guarantees that the store 406 * is propagated to all CPUs before the add_channel_work work 407 * is queued. In turn, add_channel_work is queued before the 408 * channel's ring buffer is allocated/initialized and the 409 * OPENCHANNEL message for the channel is sent in vmbus_open(). 410 * Hyper-V won't start sending the interrupts for the channel 411 * before the OPENCHANNEL message is acked. The memory barrier 412 * in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures 413 * that vmbus_chan_sched() must find the channel's relid in 414 * recv_int_page before retrieving the channel pointer from the 415 * array of channels. 416 * 417 * (b) In the "resuming from hibernation" path, the virt_store_mb() 418 * guarantees that the store is propagated to all CPUs before 419 * the VMBus connection is marked as ready for the resume event 420 * (cf. check_ready_for_resume_event()). The interrupt handler 421 * of the VMBus driver and vmbus_chan_sched() can not run before 422 * vmbus_bus_resume() has completed execution (cf. resume_noirq). 423 */ 424 virt_store_mb(vmbus_connection.channels[new_relid], channel); 425 } 426 427 void vmbus_channel_unmap_relid(struct vmbus_channel *channel) 428 { 429 if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS)) 430 return; 431 WRITE_ONCE( 432 vmbus_connection.channels[channel->offermsg.child_relid], 433 NULL); 434 } 435 436 static void vmbus_release_relid(u32 relid) 437 { 438 struct vmbus_channel_relid_released msg; 439 int ret; 440 441 memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); 442 msg.child_relid = relid; 443 msg.header.msgtype = CHANNELMSG_RELID_RELEASED; 444 ret = vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released), 445 true); 446 447 trace_vmbus_release_relid(&msg, ret); 448 } 449 450 void hv_process_channel_removal(struct vmbus_channel *channel) 451 { 452 lockdep_assert_held(&vmbus_connection.channel_mutex); 453 BUG_ON(!channel->rescind); 454 455 /* 456 * hv_process_channel_removal() could find INVALID_RELID only for 457 * hv_sock channels. See the inline comments in vmbus_onoffer(). 458 */ 459 WARN_ON(channel->offermsg.child_relid == INVALID_RELID && 460 !is_hvsock_channel(channel)); 461 462 /* 463 * Upon suspend, an in-use hv_sock channel is removed from the array of 464 * channels and the relid is invalidated. After hibernation, when the 465 * user-space application destroys the channel, it's unnecessary and 466 * unsafe to remove the channel from the array of channels. See also 467 * the inline comments before the call of vmbus_release_relid() below. 468 */ 469 if (channel->offermsg.child_relid != INVALID_RELID) 470 vmbus_channel_unmap_relid(channel); 471 472 if (channel->primary_channel == NULL) 473 list_del(&channel->listentry); 474 else 475 list_del(&channel->sc_list); 476 477 /* 478 * If this is a "perf" channel, updates the hv_numa_map[] masks so that 479 * init_vp_index() can (re-)use the CPU. 480 */ 481 if (hv_is_perf_channel(channel)) 482 hv_clear_allocated_cpu(channel->target_cpu); 483 484 /* 485 * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and 486 * the relid is invalidated; after hibernation, when the user-space app 487 * destroys the channel, the relid is INVALID_RELID, and in this case 488 * it's unnecessary and unsafe to release the old relid, since the same 489 * relid can refer to a completely different channel now. 490 */ 491 if (channel->offermsg.child_relid != INVALID_RELID) 492 vmbus_release_relid(channel->offermsg.child_relid); 493 494 free_channel(channel); 495 } 496 497 void vmbus_free_channels(void) 498 { 499 struct vmbus_channel *channel, *tmp; 500 501 list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list, 502 listentry) { 503 /* hv_process_channel_removal() needs this */ 504 channel->rescind = true; 505 506 vmbus_device_unregister(channel->device_obj); 507 } 508 } 509 510 /* Note: the function can run concurrently for primary/sub channels. */ 511 static void vmbus_add_channel_work(struct work_struct *work) 512 { 513 struct vmbus_channel *newchannel = 514 container_of(work, struct vmbus_channel, add_channel_work); 515 struct vmbus_channel *primary_channel = newchannel->primary_channel; 516 int ret; 517 518 /* 519 * This state is used to indicate a successful open 520 * so that when we do close the channel normally, we 521 * can cleanup properly. 522 */ 523 newchannel->state = CHANNEL_OPEN_STATE; 524 525 if (primary_channel != NULL) { 526 /* newchannel is a sub-channel. */ 527 struct hv_device *dev = primary_channel->device_obj; 528 529 if (vmbus_add_channel_kobj(dev, newchannel)) 530 goto err_deq_chan; 531 532 if (primary_channel->sc_creation_callback != NULL) 533 primary_channel->sc_creation_callback(newchannel); 534 535 newchannel->probe_done = true; 536 return; 537 } 538 539 /* 540 * Start the process of binding the primary channel to the driver 541 */ 542 newchannel->device_obj = vmbus_device_create( 543 &newchannel->offermsg.offer.if_type, 544 &newchannel->offermsg.offer.if_instance, 545 newchannel); 546 if (!newchannel->device_obj) 547 goto err_deq_chan; 548 549 newchannel->device_obj->device_id = newchannel->device_id; 550 /* 551 * Add the new device to the bus. This will kick off device-driver 552 * binding which eventually invokes the device driver's AddDevice() 553 * method. 554 * 555 * If vmbus_device_register() fails, the 'device_obj' is freed in 556 * vmbus_device_release() as called by device_unregister() in the 557 * error path of vmbus_device_register(). In the outside error 558 * path, there's no need to free it. 559 */ 560 ret = vmbus_device_register(newchannel->device_obj); 561 562 if (ret != 0) { 563 pr_err("unable to add child device object (relid %d)\n", 564 newchannel->offermsg.child_relid); 565 goto err_deq_chan; 566 } 567 568 newchannel->probe_done = true; 569 return; 570 571 err_deq_chan: 572 mutex_lock(&vmbus_connection.channel_mutex); 573 574 /* 575 * We need to set the flag, otherwise 576 * vmbus_onoffer_rescind() can be blocked. 577 */ 578 newchannel->probe_done = true; 579 580 if (primary_channel == NULL) 581 list_del(&newchannel->listentry); 582 else 583 list_del(&newchannel->sc_list); 584 585 /* vmbus_process_offer() has mapped the channel. */ 586 vmbus_channel_unmap_relid(newchannel); 587 588 mutex_unlock(&vmbus_connection.channel_mutex); 589 590 vmbus_release_relid(newchannel->offermsg.child_relid); 591 592 free_channel(newchannel); 593 } 594 595 /* 596 * vmbus_process_offer - Process the offer by creating a channel/device 597 * associated with this offer 598 */ 599 static void vmbus_process_offer(struct vmbus_channel *newchannel) 600 { 601 struct vmbus_channel *channel; 602 struct workqueue_struct *wq; 603 bool fnew = true; 604 605 /* 606 * Synchronize vmbus_process_offer() and CPU hotplugging: 607 * 608 * CPU1 CPU2 609 * 610 * [vmbus_process_offer()] [Hot removal of the CPU] 611 * 612 * CPU_READ_LOCK CPUS_WRITE_LOCK 613 * LOAD cpu_online_mask SEARCH chn_list 614 * STORE target_cpu LOAD target_cpu 615 * INSERT chn_list STORE cpu_online_mask 616 * CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK 617 * 618 * Forbids: CPU1's LOAD from *not* seing CPU2's STORE && 619 * CPU2's SEARCH from *not* seeing CPU1's INSERT 620 * 621 * Forbids: CPU2's SEARCH from seeing CPU1's INSERT && 622 * CPU2's LOAD from *not* seing CPU1's STORE 623 */ 624 cpus_read_lock(); 625 626 /* 627 * Serializes the modifications of the chn_list list as well as 628 * the accesses to next_numa_node_id in init_vp_index(). 629 */ 630 mutex_lock(&vmbus_connection.channel_mutex); 631 632 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 633 if (guid_equal(&channel->offermsg.offer.if_type, 634 &newchannel->offermsg.offer.if_type) && 635 guid_equal(&channel->offermsg.offer.if_instance, 636 &newchannel->offermsg.offer.if_instance)) { 637 fnew = false; 638 newchannel->primary_channel = channel; 639 break; 640 } 641 } 642 643 init_vp_index(newchannel); 644 645 /* Remember the channels that should be cleaned up upon suspend. */ 646 if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel)) 647 atomic_inc(&vmbus_connection.nr_chan_close_on_suspend); 648 649 /* 650 * Now that we have acquired the channel_mutex, 651 * we can release the potentially racing rescind thread. 652 */ 653 atomic_dec(&vmbus_connection.offer_in_progress); 654 655 if (fnew) { 656 list_add_tail(&newchannel->listentry, 657 &vmbus_connection.chn_list); 658 } else { 659 /* 660 * Check to see if this is a valid sub-channel. 661 */ 662 if (newchannel->offermsg.offer.sub_channel_index == 0) { 663 mutex_unlock(&vmbus_connection.channel_mutex); 664 cpus_read_unlock(); 665 /* 666 * Don't call free_channel(), because newchannel->kobj 667 * is not initialized yet. 668 */ 669 kfree(newchannel); 670 WARN_ON_ONCE(1); 671 return; 672 } 673 /* 674 * Process the sub-channel. 675 */ 676 list_add_tail(&newchannel->sc_list, &channel->sc_list); 677 } 678 679 vmbus_channel_map_relid(newchannel); 680 681 mutex_unlock(&vmbus_connection.channel_mutex); 682 cpus_read_unlock(); 683 684 /* 685 * vmbus_process_offer() mustn't call channel->sc_creation_callback() 686 * directly for sub-channels, because sc_creation_callback() -> 687 * vmbus_open() may never get the host's response to the 688 * OPEN_CHANNEL message (the host may rescind a channel at any time, 689 * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind() 690 * may not wake up the vmbus_open() as it's blocked due to a non-zero 691 * vmbus_connection.offer_in_progress, and finally we have a deadlock. 692 * 693 * The above is also true for primary channels, if the related device 694 * drivers use sync probing mode by default. 695 * 696 * And, usually the handling of primary channels and sub-channels can 697 * depend on each other, so we should offload them to different 698 * workqueues to avoid possible deadlock, e.g. in sync-probing mode, 699 * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() -> 700 * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock 701 * and waits for all the sub-channels to appear, but the latter 702 * can't get the rtnl_lock and this blocks the handling of 703 * sub-channels. 704 */ 705 INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work); 706 wq = fnew ? vmbus_connection.handle_primary_chan_wq : 707 vmbus_connection.handle_sub_chan_wq; 708 queue_work(wq, &newchannel->add_channel_work); 709 } 710 711 /* 712 * Check if CPUs used by other channels of the same device. 713 * It should only be called by init_vp_index(). 714 */ 715 static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn) 716 { 717 struct vmbus_channel *primary = chn->primary_channel; 718 struct vmbus_channel *sc; 719 720 lockdep_assert_held(&vmbus_connection.channel_mutex); 721 722 if (!primary) 723 return false; 724 725 if (primary->target_cpu == cpu) 726 return true; 727 728 list_for_each_entry(sc, &primary->sc_list, sc_list) 729 if (sc != chn && sc->target_cpu == cpu) 730 return true; 731 732 return false; 733 } 734 735 /* 736 * We use this state to statically distribute the channel interrupt load. 737 */ 738 static int next_numa_node_id; 739 740 /* 741 * We can statically distribute the incoming channel interrupt load 742 * by binding a channel to VCPU. 743 * 744 * For non-performance critical channels we assign the VMBUS_CONNECT_CPU. 745 * Performance critical channels will be distributed evenly among all 746 * the available NUMA nodes. Once the node is assigned, we will assign 747 * the CPU based on a simple round robin scheme. 748 */ 749 static void init_vp_index(struct vmbus_channel *channel) 750 { 751 bool perf_chn = hv_is_perf_channel(channel); 752 u32 i, ncpu = num_online_cpus(); 753 cpumask_var_t available_mask; 754 struct cpumask *allocated_mask; 755 const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); 756 u32 target_cpu; 757 int numa_node; 758 759 if (!perf_chn || 760 !alloc_cpumask_var(&available_mask, GFP_KERNEL) || 761 cpumask_empty(hk_mask)) { 762 /* 763 * If the channel is not a performance critical 764 * channel, bind it to VMBUS_CONNECT_CPU. 765 * In case alloc_cpumask_var() fails, bind it to 766 * VMBUS_CONNECT_CPU. 767 * If all the cpus are isolated, bind it to 768 * VMBUS_CONNECT_CPU. 769 */ 770 channel->target_cpu = VMBUS_CONNECT_CPU; 771 if (perf_chn) 772 hv_set_allocated_cpu(VMBUS_CONNECT_CPU); 773 return; 774 } 775 776 for (i = 1; i <= ncpu + 1; i++) { 777 while (true) { 778 numa_node = next_numa_node_id++; 779 if (numa_node == nr_node_ids) { 780 next_numa_node_id = 0; 781 continue; 782 } 783 if (cpumask_empty(cpumask_of_node(numa_node))) 784 continue; 785 break; 786 } 787 allocated_mask = &hv_context.hv_numa_map[numa_node]; 788 789 retry: 790 cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node)); 791 cpumask_and(available_mask, available_mask, hk_mask); 792 793 if (cpumask_empty(available_mask)) { 794 /* 795 * We have cycled through all the CPUs in the node; 796 * reset the allocated map. 797 */ 798 cpumask_clear(allocated_mask); 799 goto retry; 800 } 801 802 target_cpu = cpumask_first(available_mask); 803 cpumask_set_cpu(target_cpu, allocated_mask); 804 805 if (channel->offermsg.offer.sub_channel_index >= ncpu || 806 i > ncpu || !hv_cpuself_used(target_cpu, channel)) 807 break; 808 } 809 810 channel->target_cpu = target_cpu; 811 812 free_cpumask_var(available_mask); 813 } 814 815 #define UNLOAD_DELAY_UNIT_MS 10 /* 10 milliseconds */ 816 #define UNLOAD_WAIT_MS (100*1000) /* 100 seconds */ 817 #define UNLOAD_WAIT_LOOPS (UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS) 818 #define UNLOAD_MSG_MS (5*1000) /* Every 5 seconds */ 819 #define UNLOAD_MSG_LOOPS (UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS) 820 821 static void vmbus_wait_for_unload(void) 822 { 823 int cpu; 824 void *page_addr; 825 struct hv_message *msg; 826 struct vmbus_channel_message_header *hdr; 827 u32 message_type, i; 828 829 /* 830 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was 831 * used for initial contact or to CPU0 depending on host version. When 832 * we're crashing on a different CPU let's hope that IRQ handler on 833 * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still 834 * functional and vmbus_unload_response() will complete 835 * vmbus_connection.unload_event. If not, the last thing we can do is 836 * read message pages for all CPUs directly. 837 * 838 * Wait up to 100 seconds since an Azure host must writeback any dirty 839 * data in its disk cache before the VMbus UNLOAD request will 840 * complete. This flushing has been empirically observed to take up 841 * to 50 seconds in cases with a lot of dirty data, so allow additional 842 * leeway and for inaccuracies in mdelay(). But eventually time out so 843 * that the panic path can't get hung forever in case the response 844 * message isn't seen. 845 */ 846 for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) { 847 if (completion_done(&vmbus_connection.unload_event)) 848 goto completed; 849 850 for_each_present_cpu(cpu) { 851 struct hv_per_cpu_context *hv_cpu 852 = per_cpu_ptr(hv_context.cpu_context, cpu); 853 854 /* 855 * In a CoCo VM the hyp_synic_message_page is not allocated 856 * in hv_synic_alloc(). Instead it is set/cleared in 857 * hv_hyp_synic_enable_regs() and hv_hyp_synic_disable_regs() 858 * such that it is set only when the CPU is online. If 859 * not all present CPUs are online, the message page 860 * might be NULL, so skip such CPUs. 861 */ 862 page_addr = hv_cpu->hyp_synic_message_page; 863 if (!page_addr) 864 continue; 865 866 msg = (struct hv_message *)page_addr 867 + VMBUS_MESSAGE_SINT; 868 869 message_type = READ_ONCE(msg->header.message_type); 870 if (message_type == HVMSG_NONE) 871 continue; 872 873 hdr = (struct vmbus_channel_message_header *) 874 msg->u.payload; 875 876 if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE) 877 complete(&vmbus_connection.unload_event); 878 879 vmbus_signal_eom(msg, message_type); 880 } 881 882 /* 883 * Give a notice periodically so someone watching the 884 * serial output won't think it is completely hung. 885 */ 886 if (!(i % UNLOAD_MSG_LOOPS)) 887 pr_notice("Waiting for VMBus UNLOAD to complete\n"); 888 889 mdelay(UNLOAD_DELAY_UNIT_MS); 890 } 891 pr_err("Continuing even though VMBus UNLOAD did not complete\n"); 892 893 completed: 894 /* 895 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all 896 * maybe-pending messages on all CPUs to be able to receive new 897 * messages after we reconnect. 898 */ 899 for_each_present_cpu(cpu) { 900 struct hv_per_cpu_context *hv_cpu 901 = per_cpu_ptr(hv_context.cpu_context, cpu); 902 903 page_addr = hv_cpu->hyp_synic_message_page; 904 if (!page_addr) 905 continue; 906 907 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; 908 msg->header.message_type = HVMSG_NONE; 909 } 910 } 911 912 /* 913 * vmbus_unload_response - Handler for the unload response. 914 */ 915 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr) 916 { 917 /* 918 * This is a global event; just wakeup the waiting thread. 919 * Once we successfully unload, we can cleanup the monitor state. 920 * 921 * NB. A malicious or compromised Hyper-V could send a spurious 922 * message of type CHANNELMSG_UNLOAD_RESPONSE, and trigger a call 923 * of the complete() below. Make sure that unload_event has been 924 * initialized by the time this complete() is executed. 925 */ 926 complete(&vmbus_connection.unload_event); 927 } 928 929 void vmbus_initiate_unload(bool crash) 930 { 931 struct vmbus_channel_message_header hdr; 932 933 if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED) 934 return; 935 936 /* Pre-Win2012R2 hosts don't support reconnect */ 937 if (vmbus_proto_version < VERSION_WIN8_1) 938 return; 939 940 reinit_completion(&vmbus_connection.unload_event); 941 memset(&hdr, 0, sizeof(struct vmbus_channel_message_header)); 942 hdr.msgtype = CHANNELMSG_UNLOAD; 943 vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header), 944 !crash); 945 946 /* 947 * vmbus_initiate_unload() is also called on crash and the crash can be 948 * happening in an interrupt context, where scheduling is impossible. 949 */ 950 if (!crash) 951 wait_for_completion(&vmbus_connection.unload_event); 952 else 953 vmbus_wait_for_unload(); 954 } 955 956 static void vmbus_setup_channel_state(struct vmbus_channel *channel, 957 struct vmbus_channel_offer_channel *offer) 958 { 959 /* 960 * Setup state for signalling the host. 961 */ 962 channel->sig_event = VMBUS_EVENT_CONNECTION_ID; 963 964 channel->is_dedicated_interrupt = 965 (offer->is_dedicated_interrupt != 0); 966 channel->sig_event = offer->connection_id; 967 968 memcpy(&channel->offermsg, offer, 969 sizeof(struct vmbus_channel_offer_channel)); 970 channel->monitor_grp = (u8)offer->monitorid / 32; 971 channel->monitor_bit = (u8)offer->monitorid % 32; 972 channel->device_id = hv_get_dev_type(channel); 973 } 974 975 /* 976 * find_primary_channel_by_offer - Get the channel object given the new offer. 977 * This is only used in the resume path of hibernation. 978 */ 979 static struct vmbus_channel * 980 find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer) 981 { 982 struct vmbus_channel *channel = NULL, *iter; 983 const guid_t *inst1, *inst2; 984 985 /* Ignore sub-channel offers. */ 986 if (offer->offer.sub_channel_index != 0) 987 return NULL; 988 989 mutex_lock(&vmbus_connection.channel_mutex); 990 991 list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) { 992 inst1 = &iter->offermsg.offer.if_instance; 993 inst2 = &offer->offer.if_instance; 994 995 if (guid_equal(inst1, inst2)) { 996 channel = iter; 997 break; 998 } 999 } 1000 1001 mutex_unlock(&vmbus_connection.channel_mutex); 1002 1003 return channel; 1004 } 1005 1006 static bool vmbus_is_valid_offer(const struct vmbus_channel_offer_channel *offer) 1007 { 1008 const guid_t *guid = &offer->offer.if_type; 1009 u16 i; 1010 1011 if (!hv_is_isolation_supported()) 1012 return true; 1013 1014 if (is_hvsock_offer(offer)) 1015 return true; 1016 1017 for (i = 0; i < ARRAY_SIZE(vmbus_devs); i++) { 1018 if (guid_equal(guid, &vmbus_devs[i].guid)) 1019 return vmbus_devs[i].allowed_in_isolated; 1020 } 1021 return false; 1022 } 1023 1024 /* 1025 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. 1026 * 1027 */ 1028 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) 1029 { 1030 struct vmbus_channel_offer_channel *offer; 1031 struct vmbus_channel *oldchannel, *newchannel; 1032 size_t offer_sz; 1033 bool co_ring_buffer, co_external_memory; 1034 1035 offer = (struct vmbus_channel_offer_channel *)hdr; 1036 1037 trace_vmbus_onoffer(offer); 1038 1039 if (!vmbus_is_valid_offer(offer)) { 1040 pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n", 1041 offer->child_relid); 1042 atomic_dec(&vmbus_connection.offer_in_progress); 1043 return; 1044 } 1045 1046 co_ring_buffer = is_co_ring_buffer(offer); 1047 co_external_memory = is_co_external_memory(offer); 1048 if (!co_ring_buffer && co_external_memory) { 1049 pr_err("Invalid offer relid=%d: the ring buffer isn't encrypted\n", 1050 offer->child_relid); 1051 return; 1052 } 1053 if (co_ring_buffer || co_external_memory) { 1054 if (vmbus_proto_version < VERSION_WIN10_V6_0 || !vmbus_is_confidential()) { 1055 pr_err("Invalid offer relid=%d: no support for confidential VMBus\n", 1056 offer->child_relid); 1057 atomic_dec(&vmbus_connection.offer_in_progress); 1058 return; 1059 } 1060 } 1061 1062 oldchannel = find_primary_channel_by_offer(offer); 1063 1064 if (oldchannel != NULL) { 1065 /* 1066 * We're resuming from hibernation: all the sub-channel and 1067 * hv_sock channels we had before the hibernation should have 1068 * been cleaned up, and now we must be seeing a re-offered 1069 * primary channel that we had before the hibernation. 1070 */ 1071 1072 /* 1073 * { Initially: channel relid = INVALID_RELID, 1074 * channels[valid_relid] = NULL } 1075 * 1076 * CPU1 CPU2 1077 * 1078 * [vmbus_onoffer()] [vmbus_device_release()] 1079 * 1080 * LOCK channel_mutex LOCK channel_mutex 1081 * STORE channel relid = valid_relid LOAD r1 = channel relid 1082 * MAP_RELID channel if (r1 != INVALID_RELID) 1083 * UNLOCK channel_mutex UNMAP_RELID channel 1084 * UNLOCK channel_mutex 1085 * 1086 * Forbids: r1 == valid_relid && 1087 * channels[valid_relid] == channel 1088 * 1089 * Note. r1 can be INVALID_RELID only for an hv_sock channel. 1090 * None of the hv_sock channels which were present before the 1091 * suspend are re-offered upon the resume. See the WARN_ON() 1092 * in hv_process_channel_removal(). 1093 */ 1094 mutex_lock(&vmbus_connection.channel_mutex); 1095 1096 atomic_dec(&vmbus_connection.offer_in_progress); 1097 1098 WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID); 1099 /* Fix up the relid. */ 1100 oldchannel->offermsg.child_relid = offer->child_relid; 1101 1102 offer_sz = sizeof(*offer); 1103 if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) { 1104 /* 1105 * This is not an error, since the host can also change 1106 * the other field(s) of the offer, e.g. on WS RS5 1107 * (Build 17763), the offer->connection_id of the 1108 * Mellanox VF vmbus device can change when the host 1109 * reoffers the device upon resume. 1110 */ 1111 pr_debug("vmbus offer changed: relid=%d\n", 1112 offer->child_relid); 1113 1114 print_hex_dump_debug("Old vmbus offer: ", 1115 DUMP_PREFIX_OFFSET, 16, 4, 1116 &oldchannel->offermsg, offer_sz, 1117 false); 1118 print_hex_dump_debug("New vmbus offer: ", 1119 DUMP_PREFIX_OFFSET, 16, 4, 1120 offer, offer_sz, false); 1121 1122 /* Fix up the old channel. */ 1123 vmbus_setup_channel_state(oldchannel, offer); 1124 } 1125 1126 /* Add the channel back to the array of channels. */ 1127 vmbus_channel_map_relid(oldchannel); 1128 mutex_unlock(&vmbus_connection.channel_mutex); 1129 return; 1130 } 1131 1132 /* Allocate the channel object and save this offer. */ 1133 newchannel = alloc_channel(); 1134 if (!newchannel) { 1135 vmbus_release_relid(offer->child_relid); 1136 atomic_dec(&vmbus_connection.offer_in_progress); 1137 pr_err("Unable to allocate channel object\n"); 1138 return; 1139 } 1140 newchannel->co_ring_buffer = co_ring_buffer; 1141 newchannel->co_external_memory = co_external_memory; 1142 1143 vmbus_setup_channel_state(newchannel, offer); 1144 1145 vmbus_process_offer(newchannel); 1146 } 1147 1148 static void check_ready_for_suspend_event(void) 1149 { 1150 /* 1151 * If all the sub-channels or hv_sock channels have been cleaned up, 1152 * then it's safe to suspend. 1153 */ 1154 if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend)) 1155 complete(&vmbus_connection.ready_for_suspend_event); 1156 } 1157 1158 /* 1159 * vmbus_onoffer_rescind - Rescind offer handler. 1160 * 1161 * We queue a work item to process this offer synchronously 1162 */ 1163 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) 1164 { 1165 struct vmbus_channel_rescind_offer *rescind; 1166 struct vmbus_channel *channel; 1167 struct device *dev; 1168 bool clean_up_chan_for_suspend; 1169 1170 rescind = (struct vmbus_channel_rescind_offer *)hdr; 1171 1172 trace_vmbus_onoffer_rescind(rescind); 1173 1174 /* 1175 * The offer msg and the corresponding rescind msg 1176 * from the host are guranteed to be ordered - 1177 * offer comes in first and then the rescind. 1178 * Since we process these events in work elements, 1179 * and with preemption, we may end up processing 1180 * the events out of order. We rely on the synchronization 1181 * provided by offer_in_progress and by channel_mutex for 1182 * ordering these events: 1183 * 1184 * { Initially: offer_in_progress = 1 } 1185 * 1186 * CPU1 CPU2 1187 * 1188 * [vmbus_onoffer()] [vmbus_onoffer_rescind()] 1189 * 1190 * LOCK channel_mutex WAIT_ON offer_in_progress == 0 1191 * DECREMENT offer_in_progress LOCK channel_mutex 1192 * STORE channels[] LOAD channels[] 1193 * UNLOCK channel_mutex UNLOCK channel_mutex 1194 * 1195 * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE 1196 */ 1197 1198 while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { 1199 /* 1200 * We wait here until any channel offer is currently 1201 * being processed. 1202 */ 1203 msleep(1); 1204 } 1205 1206 mutex_lock(&vmbus_connection.channel_mutex); 1207 channel = relid2channel(rescind->child_relid); 1208 if (channel != NULL) { 1209 /* 1210 * Guarantee that no other instance of vmbus_onoffer_rescind() 1211 * has got a reference to the channel object. Synchronize on 1212 * &vmbus_connection.channel_mutex. 1213 */ 1214 if (channel->rescind_ref) { 1215 mutex_unlock(&vmbus_connection.channel_mutex); 1216 return; 1217 } 1218 channel->rescind_ref = true; 1219 } 1220 mutex_unlock(&vmbus_connection.channel_mutex); 1221 1222 if (channel == NULL) { 1223 /* 1224 * We failed in processing the offer message; 1225 * we would have cleaned up the relid in that 1226 * failure path. 1227 */ 1228 return; 1229 } 1230 1231 clean_up_chan_for_suspend = is_hvsock_channel(channel) || 1232 is_sub_channel(channel); 1233 /* 1234 * Before setting channel->rescind in vmbus_rescind_cleanup(), we 1235 * should make sure the channel callback is not running any more. 1236 */ 1237 vmbus_reset_channel_cb(channel); 1238 1239 /* 1240 * Now wait for offer handling to complete. 1241 */ 1242 vmbus_rescind_cleanup(channel); 1243 while (READ_ONCE(channel->probe_done) == false) { 1244 /* 1245 * We wait here until any channel offer is currently 1246 * being processed. 1247 */ 1248 msleep(1); 1249 } 1250 1251 /* 1252 * At this point, the rescind handling can proceed safely. 1253 */ 1254 1255 if (channel->device_obj) { 1256 if (channel->chn_rescind_callback) { 1257 channel->chn_rescind_callback(channel); 1258 1259 if (clean_up_chan_for_suspend) 1260 check_ready_for_suspend_event(); 1261 1262 return; 1263 } 1264 /* 1265 * We will have to unregister this device from the 1266 * driver core. 1267 */ 1268 dev = get_device(&channel->device_obj->device); 1269 if (dev) { 1270 vmbus_device_unregister(channel->device_obj); 1271 put_device(dev); 1272 } 1273 } else if (channel->primary_channel != NULL) { 1274 /* 1275 * Sub-channel is being rescinded. Following is the channel 1276 * close sequence when initiated from the driveri (refer to 1277 * vmbus_close() for details): 1278 * 1. Close all sub-channels first 1279 * 2. Then close the primary channel. 1280 */ 1281 mutex_lock(&vmbus_connection.channel_mutex); 1282 if (channel->state == CHANNEL_OPEN_STATE) { 1283 /* 1284 * The channel is currently not open; 1285 * it is safe for us to cleanup the channel. 1286 */ 1287 hv_process_channel_removal(channel); 1288 } else { 1289 complete(&channel->rescind_event); 1290 } 1291 mutex_unlock(&vmbus_connection.channel_mutex); 1292 } 1293 1294 /* The "channel" may have been freed. Do not access it any longer. */ 1295 1296 if (clean_up_chan_for_suspend) 1297 check_ready_for_suspend_event(); 1298 } 1299 1300 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) 1301 { 1302 BUG_ON(!is_hvsock_channel(channel)); 1303 1304 /* We always get a rescind msg when a connection is closed. */ 1305 while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind)) 1306 msleep(1); 1307 1308 vmbus_device_unregister(channel->device_obj); 1309 } 1310 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); 1311 1312 1313 /* 1314 * vmbus_onoffers_delivered - 1315 * The CHANNELMSG_ALLOFFERS_DELIVERED message arrives after all 1316 * boot-time offers are delivered. A boot-time offer is for the primary 1317 * channel for any virtual hardware configured in the VM at the time it boots. 1318 * Boot-time offers include offers for physical devices assigned to the VM 1319 * via Hyper-V's Discrete Device Assignment (DDA) functionality that are 1320 * handled as virtual PCI devices in Linux (e.g., NVMe devices and GPUs). 1321 * Boot-time offers do not include offers for VMBus sub-channels. Because 1322 * devices can be hot-added to the VM after it is booted, additional channel 1323 * offers that aren't boot-time offers can be received at any time after the 1324 * all-offers-delivered message. 1325 * 1326 * SR-IOV NIC Virtual Functions (VFs) assigned to a VM are not considered 1327 * to be assigned to the VM at boot-time, and offers for VFs may occur after 1328 * the all-offers-delivered message. VFs are optional accelerators to the 1329 * synthetic VMBus NIC and are effectively hot-added only after the VMBus 1330 * NIC channel is opened (once it knows the guest can support it, via the 1331 * sriov bit in the netvsc protocol). 1332 */ 1333 static void vmbus_onoffers_delivered( 1334 struct vmbus_channel_message_header *hdr) 1335 { 1336 complete(&vmbus_connection.all_offers_delivered_event); 1337 } 1338 1339 /* 1340 * vmbus_onopen_result - Open result handler. 1341 * 1342 * This is invoked when we received a response to our channel open request. 1343 * Find the matching request, copy the response and signal the requesting 1344 * thread. 1345 */ 1346 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr) 1347 { 1348 struct vmbus_channel_open_result *result; 1349 struct vmbus_channel_msginfo *msginfo; 1350 struct vmbus_channel_message_header *requestheader; 1351 struct vmbus_channel_open_channel *openmsg; 1352 unsigned long flags; 1353 1354 result = (struct vmbus_channel_open_result *)hdr; 1355 1356 trace_vmbus_onopen_result(result); 1357 1358 /* 1359 * Find the open msg, copy the result and signal/unblock the wait event 1360 */ 1361 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1362 1363 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1364 msglistentry) { 1365 requestheader = 1366 (struct vmbus_channel_message_header *)msginfo->msg; 1367 1368 if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) { 1369 openmsg = 1370 (struct vmbus_channel_open_channel *)msginfo->msg; 1371 if (openmsg->child_relid == result->child_relid && 1372 openmsg->openid == result->openid) { 1373 memcpy(&msginfo->response.open_result, 1374 result, 1375 sizeof( 1376 struct vmbus_channel_open_result)); 1377 complete(&msginfo->waitevent); 1378 break; 1379 } 1380 } 1381 } 1382 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1383 } 1384 1385 /* 1386 * vmbus_ongpadl_created - GPADL created handler. 1387 * 1388 * This is invoked when we received a response to our gpadl create request. 1389 * Find the matching request, copy the response and signal the requesting 1390 * thread. 1391 */ 1392 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr) 1393 { 1394 struct vmbus_channel_gpadl_created *gpadlcreated; 1395 struct vmbus_channel_msginfo *msginfo; 1396 struct vmbus_channel_message_header *requestheader; 1397 struct vmbus_channel_gpadl_header *gpadlheader; 1398 unsigned long flags; 1399 1400 gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr; 1401 1402 trace_vmbus_ongpadl_created(gpadlcreated); 1403 1404 /* 1405 * Find the establish msg, copy the result and signal/unblock the wait 1406 * event 1407 */ 1408 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1409 1410 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1411 msglistentry) { 1412 requestheader = 1413 (struct vmbus_channel_message_header *)msginfo->msg; 1414 1415 if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) { 1416 gpadlheader = 1417 (struct vmbus_channel_gpadl_header *)requestheader; 1418 1419 if ((gpadlcreated->child_relid == 1420 gpadlheader->child_relid) && 1421 (gpadlcreated->gpadl == gpadlheader->gpadl)) { 1422 memcpy(&msginfo->response.gpadl_created, 1423 gpadlcreated, 1424 sizeof( 1425 struct vmbus_channel_gpadl_created)); 1426 complete(&msginfo->waitevent); 1427 break; 1428 } 1429 } 1430 } 1431 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1432 } 1433 1434 /* 1435 * vmbus_onmodifychannel_response - Modify Channel response handler. 1436 * 1437 * This is invoked when we received a response to our channel modify request. 1438 * Find the matching request, copy the response and signal the requesting thread. 1439 */ 1440 static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr) 1441 { 1442 struct vmbus_channel_modifychannel_response *response; 1443 struct vmbus_channel_msginfo *msginfo; 1444 unsigned long flags; 1445 1446 response = (struct vmbus_channel_modifychannel_response *)hdr; 1447 1448 trace_vmbus_onmodifychannel_response(response); 1449 1450 /* 1451 * Find the modify msg, copy the response and signal/unblock the wait event. 1452 */ 1453 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1454 1455 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) { 1456 struct vmbus_channel_message_header *responseheader = 1457 (struct vmbus_channel_message_header *)msginfo->msg; 1458 1459 if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) { 1460 struct vmbus_channel_modifychannel *modifymsg; 1461 1462 modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg; 1463 if (modifymsg->child_relid == response->child_relid) { 1464 memcpy(&msginfo->response.modify_response, response, 1465 sizeof(*response)); 1466 complete(&msginfo->waitevent); 1467 break; 1468 } 1469 } 1470 } 1471 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1472 } 1473 1474 /* 1475 * vmbus_ongpadl_torndown - GPADL torndown handler. 1476 * 1477 * This is invoked when we received a response to our gpadl teardown request. 1478 * Find the matching request, copy the response and signal the requesting 1479 * thread. 1480 */ 1481 static void vmbus_ongpadl_torndown( 1482 struct vmbus_channel_message_header *hdr) 1483 { 1484 struct vmbus_channel_gpadl_torndown *gpadl_torndown; 1485 struct vmbus_channel_msginfo *msginfo; 1486 struct vmbus_channel_message_header *requestheader; 1487 struct vmbus_channel_gpadl_teardown *gpadl_teardown; 1488 unsigned long flags; 1489 1490 gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr; 1491 1492 trace_vmbus_ongpadl_torndown(gpadl_torndown); 1493 1494 /* 1495 * Find the open msg, copy the result and signal/unblock the wait event 1496 */ 1497 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1498 1499 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1500 msglistentry) { 1501 requestheader = 1502 (struct vmbus_channel_message_header *)msginfo->msg; 1503 1504 if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) { 1505 gpadl_teardown = 1506 (struct vmbus_channel_gpadl_teardown *)requestheader; 1507 1508 if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) { 1509 memcpy(&msginfo->response.gpadl_torndown, 1510 gpadl_torndown, 1511 sizeof( 1512 struct vmbus_channel_gpadl_torndown)); 1513 complete(&msginfo->waitevent); 1514 break; 1515 } 1516 } 1517 } 1518 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1519 } 1520 1521 /* 1522 * vmbus_onversion_response - Version response handler 1523 * 1524 * This is invoked when we received a response to our initiate contact request. 1525 * Find the matching request, copy the response and signal the requesting 1526 * thread. 1527 */ 1528 static void vmbus_onversion_response( 1529 struct vmbus_channel_message_header *hdr) 1530 { 1531 struct vmbus_channel_msginfo *msginfo; 1532 struct vmbus_channel_message_header *requestheader; 1533 struct vmbus_channel_version_response *version_response; 1534 unsigned long flags; 1535 1536 version_response = (struct vmbus_channel_version_response *)hdr; 1537 1538 trace_vmbus_onversion_response(version_response); 1539 1540 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1541 1542 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1543 msglistentry) { 1544 requestheader = 1545 (struct vmbus_channel_message_header *)msginfo->msg; 1546 1547 if (requestheader->msgtype == 1548 CHANNELMSG_INITIATE_CONTACT) { 1549 memcpy(&msginfo->response.version_response, 1550 version_response, 1551 sizeof(struct vmbus_channel_version_response)); 1552 complete(&msginfo->waitevent); 1553 } 1554 } 1555 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1556 } 1557 1558 /* Channel message dispatch table */ 1559 const struct vmbus_channel_message_table_entry 1560 channel_message_table[CHANNELMSG_COUNT] = { 1561 { CHANNELMSG_INVALID, 0, NULL, 0}, 1562 { CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer, 1563 sizeof(struct vmbus_channel_offer_channel)}, 1564 { CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind, 1565 sizeof(struct vmbus_channel_rescind_offer) }, 1566 { CHANNELMSG_REQUESTOFFERS, 0, NULL, 0}, 1567 { CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered, 0}, 1568 { CHANNELMSG_OPENCHANNEL, 0, NULL, 0}, 1569 { CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result, 1570 sizeof(struct vmbus_channel_open_result)}, 1571 { CHANNELMSG_CLOSECHANNEL, 0, NULL, 0}, 1572 { CHANNELMSG_GPADL_HEADER, 0, NULL, 0}, 1573 { CHANNELMSG_GPADL_BODY, 0, NULL, 0}, 1574 { CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created, 1575 sizeof(struct vmbus_channel_gpadl_created)}, 1576 { CHANNELMSG_GPADL_TEARDOWN, 0, NULL, 0}, 1577 { CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown, 1578 sizeof(struct vmbus_channel_gpadl_torndown) }, 1579 { CHANNELMSG_RELID_RELEASED, 0, NULL, 0}, 1580 { CHANNELMSG_INITIATE_CONTACT, 0, NULL, 0}, 1581 { CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response, 1582 sizeof(struct vmbus_channel_version_response)}, 1583 { CHANNELMSG_UNLOAD, 0, NULL, 0}, 1584 { CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response, 0}, 1585 { CHANNELMSG_18, 0, NULL, 0}, 1586 { CHANNELMSG_19, 0, NULL, 0}, 1587 { CHANNELMSG_20, 0, NULL, 0}, 1588 { CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0}, 1589 { CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0}, 1590 { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0}, 1591 { CHANNELMSG_MODIFYCHANNEL_RESPONSE, 1, vmbus_onmodifychannel_response, 1592 sizeof(struct vmbus_channel_modifychannel_response)}, 1593 }; 1594 1595 /* 1596 * vmbus_onmessage - Handler for channel protocol messages. 1597 * 1598 * This is invoked in the vmbus worker thread context. 1599 */ 1600 void vmbus_onmessage(struct vmbus_channel_message_header *hdr) 1601 { 1602 trace_vmbus_on_message(hdr); 1603 1604 /* 1605 * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go 1606 * out of bound and the message_handler pointer can not be NULL. 1607 */ 1608 channel_message_table[hdr->msgtype].message_handler(hdr); 1609 } 1610 1611 /* 1612 * vmbus_request_offers - Send a request to get all our pending offers 1613 * and wait for all boot-time offers to arrive. 1614 */ 1615 int vmbus_request_offers(void) 1616 { 1617 struct vmbus_channel_message_header *msg; 1618 struct vmbus_channel_msginfo *msginfo; 1619 int ret; 1620 1621 msginfo = kzalloc(sizeof(*msginfo) + 1622 sizeof(struct vmbus_channel_message_header), 1623 GFP_KERNEL); 1624 if (!msginfo) 1625 return -ENOMEM; 1626 1627 msg = (struct vmbus_channel_message_header *)msginfo->msg; 1628 1629 msg->msgtype = CHANNELMSG_REQUESTOFFERS; 1630 1631 /* 1632 * This REQUESTOFFERS message will result in the host sending an all 1633 * offers delivered message after all the boot-time offers are sent. 1634 */ 1635 ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header), 1636 true); 1637 1638 trace_vmbus_request_offers(ret); 1639 1640 if (ret != 0) { 1641 pr_err("Unable to request offers - %d\n", ret); 1642 1643 goto cleanup; 1644 } 1645 1646 /* 1647 * Wait for the host to send all boot-time offers. 1648 * Keeping it as a best-effort mechanism, where a warning is 1649 * printed if a timeout occurs, and execution is resumed. 1650 */ 1651 if (!wait_for_completion_timeout(&vmbus_connection.all_offers_delivered_event, 1652 secs_to_jiffies(60))) { 1653 pr_warn("timed out waiting for all boot-time offers to be delivered.\n"); 1654 } 1655 1656 /* 1657 * Flush handling of offer messages (which may initiate work on 1658 * other work queues). 1659 */ 1660 flush_workqueue(vmbus_connection.work_queue); 1661 1662 /* 1663 * Flush workqueue for processing the incoming offers. Subchannel 1664 * offers and their processing can happen later, so there is no need to 1665 * flush that workqueue here. 1666 */ 1667 flush_workqueue(vmbus_connection.handle_primary_chan_wq); 1668 1669 cleanup: 1670 kfree(msginfo); 1671 1672 return ret; 1673 } 1674 1675 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel, 1676 void (*sc_cr_cb)(struct vmbus_channel *new_sc)) 1677 { 1678 primary_channel->sc_creation_callback = sc_cr_cb; 1679 } 1680 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback); 1681 1682 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, 1683 void (*chn_rescind_cb)(struct vmbus_channel *)) 1684 { 1685 channel->chn_rescind_callback = chn_rescind_cb; 1686 } 1687 EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback); 1688