1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * Authors: 6 * Haiyang Zhang <haiyangz@microsoft.com> 7 * Hank Janssen <hjanssen@microsoft.com> 8 */ 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/kernel.h> 12 #include <linux/interrupt.h> 13 #include <linux/sched.h> 14 #include <linux/wait.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/list.h> 18 #include <linux/module.h> 19 #include <linux/completion.h> 20 #include <linux/delay.h> 21 #include <linux/cpu.h> 22 #include <linux/hyperv.h> 23 #include <linux/export.h> 24 #include <asm/mshyperv.h> 25 #include <linux/sched/isolation.h> 26 27 #include "hyperv_vmbus.h" 28 29 static void init_vp_index(struct vmbus_channel *channel); 30 31 const struct vmbus_device vmbus_devs[] = { 32 /* IDE */ 33 { .dev_type = HV_IDE, 34 HV_IDE_GUID, 35 .perf_device = true, 36 .allowed_in_isolated = false, 37 }, 38 39 /* SCSI */ 40 { .dev_type = HV_SCSI, 41 HV_SCSI_GUID, 42 .perf_device = true, 43 .allowed_in_isolated = true, 44 }, 45 46 /* Fibre Channel */ 47 { .dev_type = HV_FC, 48 HV_SYNTHFC_GUID, 49 .perf_device = true, 50 .allowed_in_isolated = false, 51 }, 52 53 /* Synthetic NIC */ 54 { .dev_type = HV_NIC, 55 HV_NIC_GUID, 56 .perf_device = true, 57 .allowed_in_isolated = true, 58 }, 59 60 /* Network Direct */ 61 { .dev_type = HV_ND, 62 HV_ND_GUID, 63 .perf_device = true, 64 .allowed_in_isolated = false, 65 }, 66 67 /* PCIE */ 68 { .dev_type = HV_PCIE, 69 HV_PCIE_GUID, 70 .perf_device = false, 71 .allowed_in_isolated = true, 72 }, 73 74 /* Synthetic Frame Buffer */ 75 { .dev_type = HV_FB, 76 HV_SYNTHVID_GUID, 77 .perf_device = false, 78 .allowed_in_isolated = false, 79 }, 80 81 /* Synthetic Keyboard */ 82 { .dev_type = HV_KBD, 83 HV_KBD_GUID, 84 .perf_device = false, 85 .allowed_in_isolated = false, 86 }, 87 88 /* Synthetic MOUSE */ 89 { .dev_type = HV_MOUSE, 90 HV_MOUSE_GUID, 91 .perf_device = false, 92 .allowed_in_isolated = false, 93 }, 94 95 /* KVP */ 96 { .dev_type = HV_KVP, 97 HV_KVP_GUID, 98 .perf_device = false, 99 .allowed_in_isolated = false, 100 }, 101 102 /* Time Synch */ 103 { .dev_type = HV_TS, 104 HV_TS_GUID, 105 .perf_device = false, 106 .allowed_in_isolated = true, 107 }, 108 109 /* Heartbeat */ 110 { .dev_type = HV_HB, 111 HV_HEART_BEAT_GUID, 112 .perf_device = false, 113 .allowed_in_isolated = true, 114 }, 115 116 /* Shutdown */ 117 { .dev_type = HV_SHUTDOWN, 118 HV_SHUTDOWN_GUID, 119 .perf_device = false, 120 .allowed_in_isolated = true, 121 }, 122 123 /* File copy */ 124 /* fcopy always uses 16KB ring buffer size and is working well for last many years */ 125 { .pref_ring_size = 0x4000, 126 .dev_type = HV_FCOPY, 127 HV_FCOPY_GUID, 128 .perf_device = false, 129 .allowed_in_isolated = false, 130 }, 131 132 /* Backup */ 133 { .dev_type = HV_BACKUP, 134 HV_VSS_GUID, 135 .perf_device = false, 136 .allowed_in_isolated = false, 137 }, 138 139 /* Dynamic Memory */ 140 { .dev_type = HV_DM, 141 HV_DM_GUID, 142 .perf_device = false, 143 .allowed_in_isolated = false, 144 }, 145 146 /* 147 * Unknown GUID 148 * 64 KB ring buffer + 4 KB header should be sufficient size for any Hyper-V device apart 149 * from HV_NIC and HV_SCSI. This case avoid the fallback for unknown devices to allocate 150 * much bigger (2 MB) of ring size. 151 */ 152 { .pref_ring_size = 0x11000, 153 .dev_type = HV_UNKNOWN, 154 .perf_device = false, 155 .allowed_in_isolated = false, 156 }, 157 }; 158 EXPORT_SYMBOL_GPL(vmbus_devs); 159 160 static const struct { 161 guid_t guid; 162 } vmbus_unsupported_devs[] = { 163 { HV_AVMA1_GUID }, 164 { HV_AVMA2_GUID }, 165 { HV_RDV_GUID }, 166 { HV_IMC_GUID }, 167 }; 168 169 /* 170 * The rescinded channel may be blocked waiting for a response from the host; 171 * take care of that. 172 */ 173 static void vmbus_rescind_cleanup(struct vmbus_channel *channel) 174 { 175 struct vmbus_channel_msginfo *msginfo; 176 unsigned long flags; 177 178 179 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 180 channel->rescind = true; 181 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 182 msglistentry) { 183 184 if (msginfo->waiting_channel == channel) { 185 complete(&msginfo->waitevent); 186 break; 187 } 188 } 189 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 190 } 191 192 static bool is_unsupported_vmbus_devs(const guid_t *guid) 193 { 194 int i; 195 196 for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++) 197 if (guid_equal(guid, &vmbus_unsupported_devs[i].guid)) 198 return true; 199 return false; 200 } 201 202 static u16 hv_get_dev_type(const struct vmbus_channel *channel) 203 { 204 const guid_t *guid = &channel->offermsg.offer.if_type; 205 u16 i; 206 207 if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid)) 208 return HV_UNKNOWN; 209 210 for (i = HV_IDE; i < HV_UNKNOWN; i++) { 211 if (guid_equal(guid, &vmbus_devs[i].guid)) 212 return i; 213 } 214 pr_info("Unknown GUID: %pUl\n", guid); 215 return i; 216 } 217 218 /** 219 * vmbus_prep_negotiate_resp() - Create default response for Negotiate message 220 * @icmsghdrp: Pointer to msg header structure 221 * @buf: Raw buffer channel data 222 * @buflen: Length of the raw buffer channel data. 223 * @fw_version: The framework versions we can support. 224 * @fw_vercnt: The size of @fw_version. 225 * @srv_version: The service versions we can support. 226 * @srv_vercnt: The size of @srv_version. 227 * @nego_fw_version: The selected framework version. 228 * @nego_srv_version: The selected service version. 229 * 230 * Note: Versions are given in decreasing order. 231 * 232 * Set up and fill in default negotiate response message. 233 * Mainly used by Hyper-V drivers. 234 */ 235 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, 236 u32 buflen, const int *fw_version, int fw_vercnt, 237 const int *srv_version, int srv_vercnt, 238 int *nego_fw_version, int *nego_srv_version) 239 { 240 int icframe_major, icframe_minor; 241 int icmsg_major, icmsg_minor; 242 int fw_major, fw_minor; 243 int srv_major, srv_minor; 244 int i, j; 245 bool found_match = false; 246 struct icmsg_negotiate *negop; 247 248 /* Check that there's enough space for icframe_vercnt, icmsg_vercnt */ 249 if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) { 250 pr_err_ratelimited("Invalid icmsg negotiate\n"); 251 return false; 252 } 253 254 icmsghdrp->icmsgsize = 0x10; 255 negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR]; 256 257 icframe_major = negop->icframe_vercnt; 258 icframe_minor = 0; 259 260 icmsg_major = negop->icmsg_vercnt; 261 icmsg_minor = 0; 262 263 /* Validate negop packet */ 264 if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || 265 icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || 266 ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) { 267 pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n", 268 icframe_major, icmsg_major); 269 goto fw_error; 270 } 271 272 /* 273 * Select the framework version number we will 274 * support. 275 */ 276 277 for (i = 0; i < fw_vercnt; i++) { 278 fw_major = (fw_version[i] >> 16); 279 fw_minor = (fw_version[i] & 0xFFFF); 280 281 for (j = 0; j < negop->icframe_vercnt; j++) { 282 if ((negop->icversion_data[j].major == fw_major) && 283 (negop->icversion_data[j].minor == fw_minor)) { 284 icframe_major = negop->icversion_data[j].major; 285 icframe_minor = negop->icversion_data[j].minor; 286 found_match = true; 287 break; 288 } 289 } 290 291 if (found_match) 292 break; 293 } 294 295 if (!found_match) 296 goto fw_error; 297 298 found_match = false; 299 300 for (i = 0; i < srv_vercnt; i++) { 301 srv_major = (srv_version[i] >> 16); 302 srv_minor = (srv_version[i] & 0xFFFF); 303 304 for (j = negop->icframe_vercnt; 305 (j < negop->icframe_vercnt + negop->icmsg_vercnt); 306 j++) { 307 308 if ((negop->icversion_data[j].major == srv_major) && 309 (negop->icversion_data[j].minor == srv_minor)) { 310 311 icmsg_major = negop->icversion_data[j].major; 312 icmsg_minor = negop->icversion_data[j].minor; 313 found_match = true; 314 break; 315 } 316 } 317 318 if (found_match) 319 break; 320 } 321 322 /* 323 * Respond with the framework and service 324 * version numbers we can support. 325 */ 326 327 fw_error: 328 if (!found_match) { 329 negop->icframe_vercnt = 0; 330 negop->icmsg_vercnt = 0; 331 } else { 332 negop->icframe_vercnt = 1; 333 negop->icmsg_vercnt = 1; 334 } 335 336 if (nego_fw_version) 337 *nego_fw_version = (icframe_major << 16) | icframe_minor; 338 339 if (nego_srv_version) 340 *nego_srv_version = (icmsg_major << 16) | icmsg_minor; 341 342 negop->icversion_data[0].major = icframe_major; 343 negop->icversion_data[0].minor = icframe_minor; 344 negop->icversion_data[1].major = icmsg_major; 345 negop->icversion_data[1].minor = icmsg_minor; 346 return found_match; 347 } 348 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp); 349 350 /* 351 * alloc_channel - Allocate and initialize a vmbus channel object 352 */ 353 static struct vmbus_channel *alloc_channel(void) 354 { 355 struct vmbus_channel *channel; 356 357 channel = kzalloc_obj(*channel, GFP_ATOMIC); 358 if (!channel) 359 return NULL; 360 361 spin_lock_init(&channel->sched_lock); 362 init_completion(&channel->rescind_event); 363 364 INIT_LIST_HEAD(&channel->sc_list); 365 366 tasklet_init(&channel->callback_event, 367 vmbus_on_event, (unsigned long)channel); 368 369 hv_ringbuffer_pre_init(channel); 370 371 return channel; 372 } 373 374 /* 375 * free_channel - Release the resources used by the vmbus channel object 376 */ 377 static void free_channel(struct vmbus_channel *channel) 378 { 379 tasklet_kill(&channel->callback_event); 380 vmbus_remove_channel_attr_group(channel); 381 382 kobject_put(&channel->kobj); 383 } 384 385 void vmbus_channel_map_relid(struct vmbus_channel *channel) 386 { 387 u32 new_relid = channel->offermsg.child_relid; 388 389 if (WARN_ON(new_relid >= MAX_CHANNEL_RELIDS)) 390 return; 391 392 /* 393 * This function is always called in the tasklet for the connect CPU. 394 * So updating the relid hiwater mark does not need to be atomic. 395 */ 396 if (new_relid > READ_ONCE(vmbus_connection.relid_hiwater)) 397 WRITE_ONCE(vmbus_connection.relid_hiwater, new_relid); 398 399 /* 400 * The mapping of the channel's relid is visible from the CPUs that 401 * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will 402 * execute: 403 * 404 * (a) In the "normal (i.e., not resuming from hibernation)" path, 405 * the full barrier in virt_store_mb() guarantees that the store 406 * is propagated to all CPUs before the add_channel_work work 407 * is queued. In turn, add_channel_work is queued before the 408 * channel's ring buffer is allocated/initialized and the 409 * OPENCHANNEL message for the channel is sent in vmbus_open(). 410 * Hyper-V won't start sending the interrupts for the channel 411 * before the OPENCHANNEL message is acked. The memory barrier 412 * in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures 413 * that vmbus_chan_sched() must find the channel's relid in 414 * recv_int_page before retrieving the channel pointer from the 415 * array of channels. 416 * 417 * (b) In the "resuming from hibernation" path, the virt_store_mb() 418 * guarantees that the store is propagated to all CPUs before 419 * the VMBus connection is marked as ready for the resume event 420 * (cf. check_ready_for_resume_event()). The interrupt handler 421 * of the VMBus driver and vmbus_chan_sched() can not run before 422 * vmbus_bus_resume() has completed execution (cf. resume_noirq). 423 */ 424 virt_store_mb(vmbus_connection.channels[new_relid], channel); 425 } 426 427 void vmbus_channel_unmap_relid(struct vmbus_channel *channel) 428 { 429 if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS)) 430 return; 431 WRITE_ONCE( 432 vmbus_connection.channels[channel->offermsg.child_relid], 433 NULL); 434 } 435 436 static void vmbus_release_relid(u32 relid) 437 { 438 struct vmbus_channel_relid_released msg; 439 int ret; 440 441 memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); 442 msg.child_relid = relid; 443 msg.header.msgtype = CHANNELMSG_RELID_RELEASED; 444 ret = vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released), 445 true); 446 447 trace_vmbus_release_relid(&msg, ret); 448 } 449 450 void hv_process_channel_removal(struct vmbus_channel *channel) 451 { 452 lockdep_assert_held(&vmbus_connection.channel_mutex); 453 BUG_ON(!channel->rescind); 454 455 /* 456 * hv_process_channel_removal() could find INVALID_RELID only for 457 * hv_sock channels. See the inline comments in vmbus_onoffer(). 458 */ 459 WARN_ON(channel->offermsg.child_relid == INVALID_RELID && 460 !is_hvsock_channel(channel)); 461 462 /* 463 * Upon suspend, an in-use hv_sock channel is removed from the array of 464 * channels and the relid is invalidated. After hibernation, when the 465 * user-space application destroys the channel, it's unnecessary and 466 * unsafe to remove the channel from the array of channels. See also 467 * the inline comments before the call of vmbus_release_relid() below. 468 */ 469 if (channel->offermsg.child_relid != INVALID_RELID) 470 vmbus_channel_unmap_relid(channel); 471 472 if (channel->primary_channel == NULL) 473 list_del(&channel->listentry); 474 else 475 list_del(&channel->sc_list); 476 477 /* 478 * If this is a "perf" channel, updates the hv_numa_map[] masks so that 479 * init_vp_index() can (re-)use the CPU. 480 */ 481 if (hv_is_perf_channel(channel)) 482 hv_clear_allocated_cpu(channel->target_cpu); 483 484 /* 485 * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and 486 * the relid is invalidated; after hibernation, when the user-space app 487 * destroys the channel, the relid is INVALID_RELID, and in this case 488 * it's unnecessary and unsafe to release the old relid, since the same 489 * relid can refer to a completely different channel now. 490 */ 491 if (channel->offermsg.child_relid != INVALID_RELID) 492 vmbus_release_relid(channel->offermsg.child_relid); 493 494 free_channel(channel); 495 } 496 497 void vmbus_free_channels(void) 498 { 499 struct vmbus_channel *channel, *tmp; 500 501 list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list, 502 listentry) { 503 /* hv_process_channel_removal() needs this */ 504 channel->rescind = true; 505 506 vmbus_device_unregister(channel->device_obj); 507 } 508 } 509 510 /* Note: the function can run concurrently for primary/sub channels. */ 511 static void vmbus_add_channel_work(struct work_struct *work) 512 { 513 struct vmbus_channel *newchannel = 514 container_of(work, struct vmbus_channel, add_channel_work); 515 struct vmbus_channel *primary_channel = newchannel->primary_channel; 516 int ret; 517 518 /* 519 * This state is used to indicate a successful open 520 * so that when we do close the channel normally, we 521 * can cleanup properly. 522 */ 523 newchannel->state = CHANNEL_OPEN_STATE; 524 525 if (primary_channel != NULL) { 526 /* newchannel is a sub-channel. */ 527 struct hv_device *dev = primary_channel->device_obj; 528 529 if (vmbus_add_channel_kobj(dev, newchannel)) 530 goto err_deq_chan; 531 532 if (primary_channel->sc_creation_callback != NULL) 533 primary_channel->sc_creation_callback(newchannel); 534 535 newchannel->probe_done = true; 536 return; 537 } 538 539 /* 540 * Start the process of binding the primary channel to the driver 541 */ 542 newchannel->device_obj = vmbus_device_create( 543 &newchannel->offermsg.offer.if_type, 544 &newchannel->offermsg.offer.if_instance, 545 newchannel); 546 if (!newchannel->device_obj) 547 goto err_deq_chan; 548 549 newchannel->device_obj->device_id = newchannel->device_id; 550 /* 551 * Add the new device to the bus. This will kick off device-driver 552 * binding which eventually invokes the device driver's AddDevice() 553 * method. 554 * 555 * If vmbus_device_register() fails, the 'device_obj' is freed in 556 * vmbus_device_release() as called by device_unregister() in the 557 * error path of vmbus_device_register(). In the outside error 558 * path, there's no need to free it. 559 */ 560 ret = vmbus_device_register(newchannel->device_obj); 561 562 if (ret != 0) { 563 pr_err("unable to add child device object (relid %d)\n", 564 newchannel->offermsg.child_relid); 565 goto err_deq_chan; 566 } 567 568 newchannel->probe_done = true; 569 return; 570 571 err_deq_chan: 572 mutex_lock(&vmbus_connection.channel_mutex); 573 574 /* 575 * We need to set the flag, otherwise 576 * vmbus_onoffer_rescind() can be blocked. 577 */ 578 newchannel->probe_done = true; 579 580 if (primary_channel == NULL) 581 list_del(&newchannel->listentry); 582 else 583 list_del(&newchannel->sc_list); 584 585 /* vmbus_process_offer() has mapped the channel. */ 586 vmbus_channel_unmap_relid(newchannel); 587 588 mutex_unlock(&vmbus_connection.channel_mutex); 589 590 vmbus_release_relid(newchannel->offermsg.child_relid); 591 592 free_channel(newchannel); 593 } 594 595 /* 596 * vmbus_process_offer - Process the offer by creating a channel/device 597 * associated with this offer 598 */ 599 static void vmbus_process_offer(struct vmbus_channel *newchannel) 600 { 601 struct vmbus_channel *channel; 602 struct workqueue_struct *wq; 603 bool fnew = true; 604 605 /* 606 * Synchronize vmbus_process_offer() and CPU hotplugging: 607 * 608 * CPU1 CPU2 609 * 610 * [vmbus_process_offer()] [Hot removal of the CPU] 611 * 612 * CPU_READ_LOCK CPUS_WRITE_LOCK 613 * LOAD cpu_online_mask SEARCH chn_list 614 * STORE target_cpu LOAD target_cpu 615 * INSERT chn_list STORE cpu_online_mask 616 * CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK 617 * 618 * Forbids: CPU1's LOAD from *not* seing CPU2's STORE && 619 * CPU2's SEARCH from *not* seeing CPU1's INSERT 620 * 621 * Forbids: CPU2's SEARCH from seeing CPU1's INSERT && 622 * CPU2's LOAD from *not* seing CPU1's STORE 623 */ 624 cpus_read_lock(); 625 626 /* 627 * Serializes the modifications of the chn_list list as well as 628 * the accesses to next_numa_node_id in init_vp_index(). 629 */ 630 mutex_lock(&vmbus_connection.channel_mutex); 631 632 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 633 if (guid_equal(&channel->offermsg.offer.if_type, 634 &newchannel->offermsg.offer.if_type) && 635 guid_equal(&channel->offermsg.offer.if_instance, 636 &newchannel->offermsg.offer.if_instance)) { 637 fnew = false; 638 newchannel->primary_channel = channel; 639 break; 640 } 641 } 642 643 init_vp_index(newchannel); 644 645 /* Remember the channels that should be cleaned up upon suspend. */ 646 if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel)) 647 atomic_inc(&vmbus_connection.nr_chan_close_on_suspend); 648 649 /* 650 * Now that we have acquired the channel_mutex, 651 * we can release the potentially racing rescind thread. 652 */ 653 atomic_dec(&vmbus_connection.offer_in_progress); 654 655 if (fnew) { 656 list_add_tail(&newchannel->listentry, 657 &vmbus_connection.chn_list); 658 } else { 659 /* 660 * Check to see if this is a valid sub-channel. 661 */ 662 if (newchannel->offermsg.offer.sub_channel_index == 0) { 663 mutex_unlock(&vmbus_connection.channel_mutex); 664 cpus_read_unlock(); 665 /* 666 * Don't call free_channel(), because newchannel->kobj 667 * is not initialized yet. 668 */ 669 kfree(newchannel); 670 WARN_ON_ONCE(1); 671 return; 672 } 673 /* 674 * Process the sub-channel. 675 */ 676 list_add_tail(&newchannel->sc_list, &channel->sc_list); 677 } 678 679 vmbus_channel_map_relid(newchannel); 680 681 mutex_unlock(&vmbus_connection.channel_mutex); 682 cpus_read_unlock(); 683 684 /* 685 * vmbus_process_offer() mustn't call channel->sc_creation_callback() 686 * directly for sub-channels, because sc_creation_callback() -> 687 * vmbus_open() may never get the host's response to the 688 * OPEN_CHANNEL message (the host may rescind a channel at any time, 689 * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind() 690 * may not wake up the vmbus_open() as it's blocked due to a non-zero 691 * vmbus_connection.offer_in_progress, and finally we have a deadlock. 692 * 693 * The above is also true for primary channels, if the related device 694 * drivers use sync probing mode by default. 695 * 696 * And, usually the handling of primary channels and sub-channels can 697 * depend on each other, so we should offload them to different 698 * workqueues to avoid possible deadlock, e.g. in sync-probing mode, 699 * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() -> 700 * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock 701 * and waits for all the sub-channels to appear, but the latter 702 * can't get the rtnl_lock and this blocks the handling of 703 * sub-channels. 704 */ 705 INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work); 706 wq = fnew ? vmbus_connection.handle_primary_chan_wq : 707 vmbus_connection.handle_sub_chan_wq; 708 queue_work(wq, &newchannel->add_channel_work); 709 } 710 711 /* 712 * Check if CPUs used by other channels of the same device. 713 * It should only be called by init_vp_index(). 714 */ 715 static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn) 716 { 717 struct vmbus_channel *primary = chn->primary_channel; 718 struct vmbus_channel *sc; 719 720 lockdep_assert_held(&vmbus_connection.channel_mutex); 721 722 if (!primary) 723 return false; 724 725 if (primary->target_cpu == cpu) 726 return true; 727 728 list_for_each_entry(sc, &primary->sc_list, sc_list) 729 if (sc != chn && sc->target_cpu == cpu) 730 return true; 731 732 return false; 733 } 734 735 /* 736 * We use this state to statically distribute the channel interrupt load. 737 */ 738 static int next_numa_node_id; 739 740 /* 741 * We can statically distribute the incoming channel interrupt load 742 * by binding a channel to VCPU. 743 * 744 * For non-performance critical channels we assign the VMBUS_CONNECT_CPU. 745 * Performance critical channels will be distributed evenly among all 746 * the available NUMA nodes. Once the node is assigned, we will assign 747 * the CPU based on a simple round robin scheme. 748 */ 749 static void init_vp_index(struct vmbus_channel *channel) 750 { 751 bool perf_chn = hv_is_perf_channel(channel); 752 u32 i, ncpu = num_online_cpus(); 753 cpumask_var_t available_mask; 754 struct cpumask *allocated_mask; 755 const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); 756 u32 target_cpu; 757 int numa_node; 758 759 if (!perf_chn || 760 !alloc_cpumask_var(&available_mask, GFP_KERNEL) || 761 cpumask_empty(hk_mask)) { 762 /* 763 * If the channel is not a performance critical 764 * channel, bind it to VMBUS_CONNECT_CPU. 765 * In case alloc_cpumask_var() fails, bind it to 766 * VMBUS_CONNECT_CPU. 767 * If all the cpus are isolated, bind it to 768 * VMBUS_CONNECT_CPU. 769 */ 770 channel->target_cpu = VMBUS_CONNECT_CPU; 771 if (perf_chn) 772 hv_set_allocated_cpu(VMBUS_CONNECT_CPU); 773 return; 774 } 775 776 for (i = 1; i <= ncpu + 1; i++) { 777 while (true) { 778 numa_node = next_numa_node_id++; 779 if (numa_node == nr_node_ids) { 780 next_numa_node_id = 0; 781 continue; 782 } 783 if (cpumask_empty(cpumask_of_node(numa_node))) 784 continue; 785 break; 786 } 787 allocated_mask = &hv_context.hv_numa_map[numa_node]; 788 789 retry: 790 cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node)); 791 cpumask_and(available_mask, available_mask, hk_mask); 792 793 if (cpumask_empty(available_mask)) { 794 /* 795 * We have cycled through all the CPUs in the node; 796 * reset the allocated map. 797 */ 798 cpumask_clear(allocated_mask); 799 goto retry; 800 } 801 802 target_cpu = cpumask_first(available_mask); 803 cpumask_set_cpu(target_cpu, allocated_mask); 804 805 if (channel->offermsg.offer.sub_channel_index >= ncpu || 806 i > ncpu || !hv_cpuself_used(target_cpu, channel)) 807 break; 808 } 809 810 channel->target_cpu = target_cpu; 811 812 free_cpumask_var(available_mask); 813 } 814 815 #define UNLOAD_DELAY_UNIT_MS 10 /* 10 milliseconds */ 816 #define UNLOAD_WAIT_MS (100*1000) /* 100 seconds */ 817 #define UNLOAD_WAIT_LOOPS (UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS) 818 #define UNLOAD_MSG_MS (5*1000) /* Every 5 seconds */ 819 #define UNLOAD_MSG_LOOPS (UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS) 820 821 static void vmbus_wait_for_unload(void) 822 { 823 int cpu; 824 void *page_addr; 825 struct hv_message *msg; 826 struct vmbus_channel_message_header *hdr; 827 u32 message_type, i; 828 829 /* 830 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was 831 * used for initial contact or to CPU0 depending on host version. When 832 * we're crashing on a different CPU let's hope that IRQ handler on 833 * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still 834 * functional and vmbus_unload_response() will complete 835 * vmbus_connection.unload_event. If not, the last thing we can do is 836 * read message pages for all CPUs directly. 837 * 838 * Wait up to 100 seconds since an Azure host must writeback any dirty 839 * data in its disk cache before the VMbus UNLOAD request will 840 * complete. This flushing has been empirically observed to take up 841 * to 50 seconds in cases with a lot of dirty data, so allow additional 842 * leeway and for inaccuracies in mdelay(). But eventually time out so 843 * that the panic path can't get hung forever in case the response 844 * message isn't seen. 845 */ 846 for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) { 847 if (completion_done(&vmbus_connection.unload_event)) 848 goto completed; 849 850 for_each_present_cpu(cpu) { 851 struct hv_per_cpu_context *hv_cpu 852 = per_cpu_ptr(hv_context.cpu_context, cpu); 853 854 /* 855 * In a CoCo VM the hyp_synic_message_page is not allocated 856 * in hv_synic_alloc(). Instead it is set/cleared in 857 * hv_hyp_synic_enable_regs() and hv_hyp_synic_disable_regs() 858 * such that it is set only when the CPU is online. If 859 * not all present CPUs are online, the message page 860 * might be NULL, so skip such CPUs. 861 */ 862 page_addr = hv_cpu->hyp_synic_message_page; 863 if (!page_addr) 864 continue; 865 866 msg = (struct hv_message *)page_addr 867 + VMBUS_MESSAGE_SINT; 868 869 message_type = READ_ONCE(msg->header.message_type); 870 if (message_type == HVMSG_NONE) 871 continue; 872 873 hdr = (struct vmbus_channel_message_header *) 874 msg->u.payload; 875 876 if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE) 877 complete(&vmbus_connection.unload_event); 878 879 vmbus_signal_eom(msg, message_type); 880 } 881 882 /* 883 * Give a notice periodically so someone watching the 884 * serial output won't think it is completely hung. 885 */ 886 if (!(i % UNLOAD_MSG_LOOPS)) 887 pr_notice("Waiting for VMBus UNLOAD to complete\n"); 888 889 mdelay(UNLOAD_DELAY_UNIT_MS); 890 } 891 pr_err("Continuing even though VMBus UNLOAD did not complete\n"); 892 893 completed: 894 /* 895 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all 896 * maybe-pending messages on all CPUs to be able to receive new 897 * messages after we reconnect. 898 */ 899 for_each_present_cpu(cpu) { 900 struct hv_per_cpu_context *hv_cpu 901 = per_cpu_ptr(hv_context.cpu_context, cpu); 902 903 page_addr = hv_cpu->hyp_synic_message_page; 904 if (!page_addr) 905 continue; 906 907 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; 908 msg->header.message_type = HVMSG_NONE; 909 } 910 } 911 912 /* 913 * vmbus_unload_response - Handler for the unload response. 914 */ 915 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr) 916 { 917 /* 918 * This is a global event; just wakeup the waiting thread. 919 * Once we successfully unload, we can cleanup the monitor state. 920 * 921 * NB. A malicious or compromised Hyper-V could send a spurious 922 * message of type CHANNELMSG_UNLOAD_RESPONSE, and trigger a call 923 * of the complete() below. Make sure that unload_event has been 924 * initialized by the time this complete() is executed. 925 */ 926 complete(&vmbus_connection.unload_event); 927 } 928 929 void vmbus_initiate_unload(bool crash) 930 { 931 struct vmbus_channel_message_header hdr; 932 933 if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED) 934 return; 935 936 /* Pre-Win2012R2 hosts don't support reconnect */ 937 if (vmbus_proto_version < VERSION_WIN8_1) 938 return; 939 940 reinit_completion(&vmbus_connection.unload_event); 941 memset(&hdr, 0, sizeof(struct vmbus_channel_message_header)); 942 hdr.msgtype = CHANNELMSG_UNLOAD; 943 vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header), 944 !crash); 945 946 /* 947 * vmbus_initiate_unload() is also called on crash and the crash can be 948 * happening in an interrupt context, where scheduling is impossible. 949 */ 950 if (!crash) 951 wait_for_completion(&vmbus_connection.unload_event); 952 else 953 vmbus_wait_for_unload(); 954 } 955 EXPORT_SYMBOL_GPL(vmbus_initiate_unload); 956 957 static void vmbus_setup_channel_state(struct vmbus_channel *channel, 958 struct vmbus_channel_offer_channel *offer) 959 { 960 /* 961 * Setup state for signalling the host. 962 */ 963 channel->sig_event = VMBUS_EVENT_CONNECTION_ID; 964 965 channel->is_dedicated_interrupt = 966 (offer->is_dedicated_interrupt != 0); 967 channel->sig_event = offer->connection_id; 968 969 memcpy(&channel->offermsg, offer, 970 sizeof(struct vmbus_channel_offer_channel)); 971 channel->monitor_grp = (u8)offer->monitorid / 32; 972 channel->monitor_bit = (u8)offer->monitorid % 32; 973 channel->device_id = hv_get_dev_type(channel); 974 } 975 976 /* 977 * find_primary_channel_by_offer - Get the channel object given the new offer. 978 * This is only used in the resume path of hibernation. 979 */ 980 static struct vmbus_channel * 981 find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer) 982 { 983 struct vmbus_channel *channel = NULL, *iter; 984 const guid_t *inst1, *inst2; 985 986 /* Ignore sub-channel offers. */ 987 if (offer->offer.sub_channel_index != 0) 988 return NULL; 989 990 mutex_lock(&vmbus_connection.channel_mutex); 991 992 list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) { 993 inst1 = &iter->offermsg.offer.if_instance; 994 inst2 = &offer->offer.if_instance; 995 996 if (guid_equal(inst1, inst2)) { 997 channel = iter; 998 break; 999 } 1000 } 1001 1002 mutex_unlock(&vmbus_connection.channel_mutex); 1003 1004 return channel; 1005 } 1006 1007 static bool vmbus_is_valid_offer(const struct vmbus_channel_offer_channel *offer) 1008 { 1009 const guid_t *guid = &offer->offer.if_type; 1010 u16 i; 1011 1012 if (!hv_is_isolation_supported()) 1013 return true; 1014 1015 if (is_hvsock_offer(offer)) 1016 return true; 1017 1018 for (i = 0; i < ARRAY_SIZE(vmbus_devs); i++) { 1019 if (guid_equal(guid, &vmbus_devs[i].guid)) 1020 return vmbus_devs[i].allowed_in_isolated; 1021 } 1022 return false; 1023 } 1024 1025 /* 1026 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. 1027 * 1028 */ 1029 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) 1030 { 1031 struct vmbus_channel_offer_channel *offer; 1032 struct vmbus_channel *oldchannel, *newchannel; 1033 size_t offer_sz; 1034 bool co_ring_buffer, co_external_memory; 1035 1036 offer = (struct vmbus_channel_offer_channel *)hdr; 1037 1038 trace_vmbus_onoffer(offer); 1039 1040 if (!vmbus_is_valid_offer(offer)) { 1041 pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n", 1042 offer->child_relid); 1043 atomic_dec(&vmbus_connection.offer_in_progress); 1044 return; 1045 } 1046 1047 co_ring_buffer = is_co_ring_buffer(offer); 1048 co_external_memory = is_co_external_memory(offer); 1049 if (!co_ring_buffer && co_external_memory) { 1050 pr_err("Invalid offer relid=%d: the ring buffer isn't encrypted\n", 1051 offer->child_relid); 1052 return; 1053 } 1054 if (co_ring_buffer || co_external_memory) { 1055 if (vmbus_proto_version < VERSION_WIN10_V6_0 || !vmbus_is_confidential()) { 1056 pr_err("Invalid offer relid=%d: no support for confidential VMBus\n", 1057 offer->child_relid); 1058 atomic_dec(&vmbus_connection.offer_in_progress); 1059 return; 1060 } 1061 } 1062 1063 oldchannel = find_primary_channel_by_offer(offer); 1064 1065 if (oldchannel != NULL) { 1066 /* 1067 * We're resuming from hibernation: all the sub-channel and 1068 * hv_sock channels we had before the hibernation should have 1069 * been cleaned up, and now we must be seeing a re-offered 1070 * primary channel that we had before the hibernation. 1071 */ 1072 1073 /* 1074 * { Initially: channel relid = INVALID_RELID, 1075 * channels[valid_relid] = NULL } 1076 * 1077 * CPU1 CPU2 1078 * 1079 * [vmbus_onoffer()] [vmbus_device_release()] 1080 * 1081 * LOCK channel_mutex LOCK channel_mutex 1082 * STORE channel relid = valid_relid LOAD r1 = channel relid 1083 * MAP_RELID channel if (r1 != INVALID_RELID) 1084 * UNLOCK channel_mutex UNMAP_RELID channel 1085 * UNLOCK channel_mutex 1086 * 1087 * Forbids: r1 == valid_relid && 1088 * channels[valid_relid] == channel 1089 * 1090 * Note. r1 can be INVALID_RELID only for an hv_sock channel. 1091 * None of the hv_sock channels which were present before the 1092 * suspend are re-offered upon the resume. See the WARN_ON() 1093 * in hv_process_channel_removal(). 1094 */ 1095 mutex_lock(&vmbus_connection.channel_mutex); 1096 1097 atomic_dec(&vmbus_connection.offer_in_progress); 1098 1099 WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID); 1100 /* Fix up the relid. */ 1101 oldchannel->offermsg.child_relid = offer->child_relid; 1102 1103 offer_sz = sizeof(*offer); 1104 if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) { 1105 /* 1106 * This is not an error, since the host can also change 1107 * the other field(s) of the offer, e.g. on WS RS5 1108 * (Build 17763), the offer->connection_id of the 1109 * Mellanox VF vmbus device can change when the host 1110 * reoffers the device upon resume. 1111 */ 1112 pr_debug("vmbus offer changed: relid=%d\n", 1113 offer->child_relid); 1114 1115 print_hex_dump_debug("Old vmbus offer: ", 1116 DUMP_PREFIX_OFFSET, 16, 4, 1117 &oldchannel->offermsg, offer_sz, 1118 false); 1119 print_hex_dump_debug("New vmbus offer: ", 1120 DUMP_PREFIX_OFFSET, 16, 4, 1121 offer, offer_sz, false); 1122 1123 /* Fix up the old channel. */ 1124 vmbus_setup_channel_state(oldchannel, offer); 1125 } 1126 1127 /* Add the channel back to the array of channels. */ 1128 vmbus_channel_map_relid(oldchannel); 1129 mutex_unlock(&vmbus_connection.channel_mutex); 1130 return; 1131 } 1132 1133 /* Allocate the channel object and save this offer. */ 1134 newchannel = alloc_channel(); 1135 if (!newchannel) { 1136 vmbus_release_relid(offer->child_relid); 1137 atomic_dec(&vmbus_connection.offer_in_progress); 1138 pr_err("Unable to allocate channel object\n"); 1139 return; 1140 } 1141 newchannel->co_ring_buffer = co_ring_buffer; 1142 newchannel->co_external_memory = co_external_memory; 1143 1144 vmbus_setup_channel_state(newchannel, offer); 1145 1146 vmbus_process_offer(newchannel); 1147 } 1148 1149 static void check_ready_for_suspend_event(void) 1150 { 1151 /* 1152 * If all the sub-channels or hv_sock channels have been cleaned up, 1153 * then it's safe to suspend. 1154 */ 1155 if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend)) 1156 complete(&vmbus_connection.ready_for_suspend_event); 1157 } 1158 1159 /* 1160 * vmbus_onoffer_rescind - Rescind offer handler. 1161 * 1162 * We queue a work item to process this offer synchronously 1163 */ 1164 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) 1165 { 1166 struct vmbus_channel_rescind_offer *rescind; 1167 struct vmbus_channel *channel; 1168 struct device *dev; 1169 bool clean_up_chan_for_suspend; 1170 1171 rescind = (struct vmbus_channel_rescind_offer *)hdr; 1172 1173 trace_vmbus_onoffer_rescind(rescind); 1174 1175 /* 1176 * The offer msg and the corresponding rescind msg 1177 * from the host are guranteed to be ordered - 1178 * offer comes in first and then the rescind. 1179 * Since we process these events in work elements, 1180 * and with preemption, we may end up processing 1181 * the events out of order. We rely on the synchronization 1182 * provided by offer_in_progress and by channel_mutex for 1183 * ordering these events: 1184 * 1185 * { Initially: offer_in_progress = 1 } 1186 * 1187 * CPU1 CPU2 1188 * 1189 * [vmbus_onoffer()] [vmbus_onoffer_rescind()] 1190 * 1191 * LOCK channel_mutex WAIT_ON offer_in_progress == 0 1192 * DECREMENT offer_in_progress LOCK channel_mutex 1193 * STORE channels[] LOAD channels[] 1194 * UNLOCK channel_mutex UNLOCK channel_mutex 1195 * 1196 * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE 1197 */ 1198 1199 while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { 1200 /* 1201 * We wait here until any channel offer is currently 1202 * being processed. 1203 */ 1204 msleep(1); 1205 } 1206 1207 mutex_lock(&vmbus_connection.channel_mutex); 1208 channel = relid2channel(rescind->child_relid); 1209 if (channel != NULL) { 1210 /* 1211 * Guarantee that no other instance of vmbus_onoffer_rescind() 1212 * has got a reference to the channel object. Synchronize on 1213 * &vmbus_connection.channel_mutex. 1214 */ 1215 if (channel->rescind_ref) { 1216 mutex_unlock(&vmbus_connection.channel_mutex); 1217 return; 1218 } 1219 channel->rescind_ref = true; 1220 } 1221 mutex_unlock(&vmbus_connection.channel_mutex); 1222 1223 if (channel == NULL) { 1224 /* 1225 * We failed in processing the offer message; 1226 * we would have cleaned up the relid in that 1227 * failure path. 1228 */ 1229 return; 1230 } 1231 1232 clean_up_chan_for_suspend = is_hvsock_channel(channel) || 1233 is_sub_channel(channel); 1234 /* 1235 * Before setting channel->rescind in vmbus_rescind_cleanup(), we 1236 * should make sure the channel callback is not running any more. 1237 */ 1238 vmbus_reset_channel_cb(channel); 1239 1240 /* 1241 * Now wait for offer handling to complete. 1242 */ 1243 vmbus_rescind_cleanup(channel); 1244 while (READ_ONCE(channel->probe_done) == false) { 1245 /* 1246 * We wait here until any channel offer is currently 1247 * being processed. 1248 */ 1249 msleep(1); 1250 } 1251 1252 /* 1253 * At this point, the rescind handling can proceed safely. 1254 */ 1255 1256 if (channel->device_obj) { 1257 if (channel->chn_rescind_callback) { 1258 channel->chn_rescind_callback(channel); 1259 1260 if (clean_up_chan_for_suspend) 1261 check_ready_for_suspend_event(); 1262 1263 return; 1264 } 1265 /* 1266 * We will have to unregister this device from the 1267 * driver core. 1268 */ 1269 dev = get_device(&channel->device_obj->device); 1270 if (dev) { 1271 vmbus_device_unregister(channel->device_obj); 1272 put_device(dev); 1273 } 1274 } else if (channel->primary_channel != NULL) { 1275 /* 1276 * Sub-channel is being rescinded. Following is the channel 1277 * close sequence when initiated from the driveri (refer to 1278 * vmbus_close() for details): 1279 * 1. Close all sub-channels first 1280 * 2. Then close the primary channel. 1281 */ 1282 mutex_lock(&vmbus_connection.channel_mutex); 1283 if (channel->state == CHANNEL_OPEN_STATE) { 1284 /* 1285 * The channel is currently not open; 1286 * it is safe for us to cleanup the channel. 1287 */ 1288 hv_process_channel_removal(channel); 1289 } else { 1290 complete(&channel->rescind_event); 1291 } 1292 mutex_unlock(&vmbus_connection.channel_mutex); 1293 } 1294 1295 /* The "channel" may have been freed. Do not access it any longer. */ 1296 1297 if (clean_up_chan_for_suspend) 1298 check_ready_for_suspend_event(); 1299 } 1300 1301 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) 1302 { 1303 BUG_ON(!is_hvsock_channel(channel)); 1304 1305 /* We always get a rescind msg when a connection is closed. */ 1306 while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind)) 1307 msleep(1); 1308 1309 vmbus_device_unregister(channel->device_obj); 1310 } 1311 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); 1312 1313 1314 /* 1315 * vmbus_onoffers_delivered - 1316 * The CHANNELMSG_ALLOFFERS_DELIVERED message arrives after all 1317 * boot-time offers are delivered. A boot-time offer is for the primary 1318 * channel for any virtual hardware configured in the VM at the time it boots. 1319 * Boot-time offers include offers for physical devices assigned to the VM 1320 * via Hyper-V's Discrete Device Assignment (DDA) functionality that are 1321 * handled as virtual PCI devices in Linux (e.g., NVMe devices and GPUs). 1322 * Boot-time offers do not include offers for VMBus sub-channels. Because 1323 * devices can be hot-added to the VM after it is booted, additional channel 1324 * offers that aren't boot-time offers can be received at any time after the 1325 * all-offers-delivered message. 1326 * 1327 * SR-IOV NIC Virtual Functions (VFs) assigned to a VM are not considered 1328 * to be assigned to the VM at boot-time, and offers for VFs may occur after 1329 * the all-offers-delivered message. VFs are optional accelerators to the 1330 * synthetic VMBus NIC and are effectively hot-added only after the VMBus 1331 * NIC channel is opened (once it knows the guest can support it, via the 1332 * sriov bit in the netvsc protocol). 1333 */ 1334 static void vmbus_onoffers_delivered( 1335 struct vmbus_channel_message_header *hdr) 1336 { 1337 complete(&vmbus_connection.all_offers_delivered_event); 1338 } 1339 1340 /* 1341 * vmbus_onopen_result - Open result handler. 1342 * 1343 * This is invoked when we received a response to our channel open request. 1344 * Find the matching request, copy the response and signal the requesting 1345 * thread. 1346 */ 1347 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr) 1348 { 1349 struct vmbus_channel_open_result *result; 1350 struct vmbus_channel_msginfo *msginfo; 1351 struct vmbus_channel_message_header *requestheader; 1352 struct vmbus_channel_open_channel *openmsg; 1353 unsigned long flags; 1354 1355 result = (struct vmbus_channel_open_result *)hdr; 1356 1357 trace_vmbus_onopen_result(result); 1358 1359 /* 1360 * Find the open msg, copy the result and signal/unblock the wait event 1361 */ 1362 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1363 1364 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1365 msglistentry) { 1366 requestheader = 1367 (struct vmbus_channel_message_header *)msginfo->msg; 1368 1369 if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) { 1370 openmsg = 1371 (struct vmbus_channel_open_channel *)msginfo->msg; 1372 if (openmsg->child_relid == result->child_relid && 1373 openmsg->openid == result->openid) { 1374 memcpy(&msginfo->response.open_result, 1375 result, 1376 sizeof( 1377 struct vmbus_channel_open_result)); 1378 complete(&msginfo->waitevent); 1379 break; 1380 } 1381 } 1382 } 1383 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1384 } 1385 1386 /* 1387 * vmbus_ongpadl_created - GPADL created handler. 1388 * 1389 * This is invoked when we received a response to our gpadl create request. 1390 * Find the matching request, copy the response and signal the requesting 1391 * thread. 1392 */ 1393 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr) 1394 { 1395 struct vmbus_channel_gpadl_created *gpadlcreated; 1396 struct vmbus_channel_msginfo *msginfo; 1397 struct vmbus_channel_message_header *requestheader; 1398 struct vmbus_channel_gpadl_header *gpadlheader; 1399 unsigned long flags; 1400 1401 gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr; 1402 1403 trace_vmbus_ongpadl_created(gpadlcreated); 1404 1405 /* 1406 * Find the establish msg, copy the result and signal/unblock the wait 1407 * event 1408 */ 1409 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1410 1411 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1412 msglistentry) { 1413 requestheader = 1414 (struct vmbus_channel_message_header *)msginfo->msg; 1415 1416 if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) { 1417 gpadlheader = 1418 (struct vmbus_channel_gpadl_header *)requestheader; 1419 1420 if ((gpadlcreated->child_relid == 1421 gpadlheader->child_relid) && 1422 (gpadlcreated->gpadl == gpadlheader->gpadl)) { 1423 memcpy(&msginfo->response.gpadl_created, 1424 gpadlcreated, 1425 sizeof( 1426 struct vmbus_channel_gpadl_created)); 1427 complete(&msginfo->waitevent); 1428 break; 1429 } 1430 } 1431 } 1432 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1433 } 1434 1435 /* 1436 * vmbus_onmodifychannel_response - Modify Channel response handler. 1437 * 1438 * This is invoked when we received a response to our channel modify request. 1439 * Find the matching request, copy the response and signal the requesting thread. 1440 */ 1441 static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr) 1442 { 1443 struct vmbus_channel_modifychannel_response *response; 1444 struct vmbus_channel_msginfo *msginfo; 1445 unsigned long flags; 1446 1447 response = (struct vmbus_channel_modifychannel_response *)hdr; 1448 1449 trace_vmbus_onmodifychannel_response(response); 1450 1451 /* 1452 * Find the modify msg, copy the response and signal/unblock the wait event. 1453 */ 1454 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1455 1456 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) { 1457 struct vmbus_channel_message_header *responseheader = 1458 (struct vmbus_channel_message_header *)msginfo->msg; 1459 1460 if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) { 1461 struct vmbus_channel_modifychannel *modifymsg; 1462 1463 modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg; 1464 if (modifymsg->child_relid == response->child_relid) { 1465 memcpy(&msginfo->response.modify_response, response, 1466 sizeof(*response)); 1467 complete(&msginfo->waitevent); 1468 break; 1469 } 1470 } 1471 } 1472 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1473 } 1474 1475 /* 1476 * vmbus_ongpadl_torndown - GPADL torndown handler. 1477 * 1478 * This is invoked when we received a response to our gpadl teardown request. 1479 * Find the matching request, copy the response and signal the requesting 1480 * thread. 1481 */ 1482 static void vmbus_ongpadl_torndown( 1483 struct vmbus_channel_message_header *hdr) 1484 { 1485 struct vmbus_channel_gpadl_torndown *gpadl_torndown; 1486 struct vmbus_channel_msginfo *msginfo; 1487 struct vmbus_channel_message_header *requestheader; 1488 struct vmbus_channel_gpadl_teardown *gpadl_teardown; 1489 unsigned long flags; 1490 1491 gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr; 1492 1493 trace_vmbus_ongpadl_torndown(gpadl_torndown); 1494 1495 /* 1496 * Find the open msg, copy the result and signal/unblock the wait event 1497 */ 1498 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1499 1500 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1501 msglistentry) { 1502 requestheader = 1503 (struct vmbus_channel_message_header *)msginfo->msg; 1504 1505 if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) { 1506 gpadl_teardown = 1507 (struct vmbus_channel_gpadl_teardown *)requestheader; 1508 1509 if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) { 1510 memcpy(&msginfo->response.gpadl_torndown, 1511 gpadl_torndown, 1512 sizeof( 1513 struct vmbus_channel_gpadl_torndown)); 1514 complete(&msginfo->waitevent); 1515 break; 1516 } 1517 } 1518 } 1519 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1520 } 1521 1522 /* 1523 * vmbus_onversion_response - Version response handler 1524 * 1525 * This is invoked when we received a response to our initiate contact request. 1526 * Find the matching request, copy the response and signal the requesting 1527 * thread. 1528 */ 1529 static void vmbus_onversion_response( 1530 struct vmbus_channel_message_header *hdr) 1531 { 1532 struct vmbus_channel_msginfo *msginfo; 1533 struct vmbus_channel_message_header *requestheader; 1534 struct vmbus_channel_version_response *version_response; 1535 unsigned long flags; 1536 1537 version_response = (struct vmbus_channel_version_response *)hdr; 1538 1539 trace_vmbus_onversion_response(version_response); 1540 1541 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1542 1543 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1544 msglistentry) { 1545 requestheader = 1546 (struct vmbus_channel_message_header *)msginfo->msg; 1547 1548 if (requestheader->msgtype == 1549 CHANNELMSG_INITIATE_CONTACT) { 1550 memcpy(&msginfo->response.version_response, 1551 version_response, 1552 sizeof(struct vmbus_channel_version_response)); 1553 complete(&msginfo->waitevent); 1554 } 1555 } 1556 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1557 } 1558 1559 /* Channel message dispatch table */ 1560 const struct vmbus_channel_message_table_entry 1561 channel_message_table[CHANNELMSG_COUNT] = { 1562 { CHANNELMSG_INVALID, 0, NULL, 0}, 1563 { CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer, 1564 sizeof(struct vmbus_channel_offer_channel)}, 1565 { CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind, 1566 sizeof(struct vmbus_channel_rescind_offer) }, 1567 { CHANNELMSG_REQUESTOFFERS, 0, NULL, 0}, 1568 { CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered, 0}, 1569 { CHANNELMSG_OPENCHANNEL, 0, NULL, 0}, 1570 { CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result, 1571 sizeof(struct vmbus_channel_open_result)}, 1572 { CHANNELMSG_CLOSECHANNEL, 0, NULL, 0}, 1573 { CHANNELMSG_GPADL_HEADER, 0, NULL, 0}, 1574 { CHANNELMSG_GPADL_BODY, 0, NULL, 0}, 1575 { CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created, 1576 sizeof(struct vmbus_channel_gpadl_created)}, 1577 { CHANNELMSG_GPADL_TEARDOWN, 0, NULL, 0}, 1578 { CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown, 1579 sizeof(struct vmbus_channel_gpadl_torndown) }, 1580 { CHANNELMSG_RELID_RELEASED, 0, NULL, 0}, 1581 { CHANNELMSG_INITIATE_CONTACT, 0, NULL, 0}, 1582 { CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response, 1583 sizeof(struct vmbus_channel_version_response)}, 1584 { CHANNELMSG_UNLOAD, 0, NULL, 0}, 1585 { CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response, 0}, 1586 { CHANNELMSG_18, 0, NULL, 0}, 1587 { CHANNELMSG_19, 0, NULL, 0}, 1588 { CHANNELMSG_20, 0, NULL, 0}, 1589 { CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0}, 1590 { CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0}, 1591 { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0}, 1592 { CHANNELMSG_MODIFYCHANNEL_RESPONSE, 1, vmbus_onmodifychannel_response, 1593 sizeof(struct vmbus_channel_modifychannel_response)}, 1594 }; 1595 1596 /* 1597 * vmbus_onmessage - Handler for channel protocol messages. 1598 * 1599 * This is invoked in the vmbus worker thread context. 1600 */ 1601 void vmbus_onmessage(struct vmbus_channel_message_header *hdr) 1602 { 1603 trace_vmbus_on_message(hdr); 1604 1605 /* 1606 * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go 1607 * out of bound and the message_handler pointer can not be NULL. 1608 */ 1609 channel_message_table[hdr->msgtype].message_handler(hdr); 1610 } 1611 1612 /* 1613 * vmbus_request_offers - Send a request to get all our pending offers 1614 * and wait for all boot-time offers to arrive. 1615 */ 1616 int vmbus_request_offers(void) 1617 { 1618 struct vmbus_channel_message_header *msg; 1619 struct vmbus_channel_msginfo *msginfo; 1620 int ret; 1621 1622 msginfo = kzalloc(sizeof(*msginfo) + 1623 sizeof(struct vmbus_channel_message_header), 1624 GFP_KERNEL); 1625 if (!msginfo) 1626 return -ENOMEM; 1627 1628 msg = (struct vmbus_channel_message_header *)msginfo->msg; 1629 1630 msg->msgtype = CHANNELMSG_REQUESTOFFERS; 1631 1632 /* 1633 * This REQUESTOFFERS message will result in the host sending an all 1634 * offers delivered message after all the boot-time offers are sent. 1635 */ 1636 ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header), 1637 true); 1638 1639 trace_vmbus_request_offers(ret); 1640 1641 if (ret != 0) { 1642 pr_err("Unable to request offers - %d\n", ret); 1643 1644 goto cleanup; 1645 } 1646 1647 /* 1648 * Wait for the host to send all boot-time offers. 1649 * Keeping it as a best-effort mechanism, where a warning is 1650 * printed if a timeout occurs, and execution is resumed. 1651 */ 1652 if (!wait_for_completion_timeout(&vmbus_connection.all_offers_delivered_event, 1653 secs_to_jiffies(60))) { 1654 pr_warn("timed out waiting for all boot-time offers to be delivered.\n"); 1655 } 1656 1657 /* 1658 * Flush handling of offer messages (which may initiate work on 1659 * other work queues). 1660 */ 1661 flush_workqueue(vmbus_connection.work_queue); 1662 1663 /* 1664 * Flush workqueue for processing the incoming offers. Subchannel 1665 * offers and their processing can happen later, so there is no need to 1666 * flush that workqueue here. 1667 */ 1668 flush_workqueue(vmbus_connection.handle_primary_chan_wq); 1669 1670 cleanup: 1671 kfree(msginfo); 1672 1673 return ret; 1674 } 1675 1676 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel, 1677 void (*sc_cr_cb)(struct vmbus_channel *new_sc)) 1678 { 1679 primary_channel->sc_creation_callback = sc_cr_cb; 1680 } 1681 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback); 1682 1683 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, 1684 void (*chn_rescind_cb)(struct vmbus_channel *)) 1685 { 1686 channel->chn_rescind_callback = chn_rescind_cb; 1687 } 1688 EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback); 1689