1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * Authors: 6 * Haiyang Zhang <haiyangz@microsoft.com> 7 * Hank Janssen <hjanssen@microsoft.com> 8 * K. Y. Srinivasan <kys@microsoft.com> 9 */ 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/init.h> 13 #include <linux/module.h> 14 #include <linux/device.h> 15 #include <linux/platform_device.h> 16 #include <linux/interrupt.h> 17 #include <linux/sysctl.h> 18 #include <linux/slab.h> 19 #include <linux/acpi.h> 20 #include <linux/completion.h> 21 #include <linux/hyperv.h> 22 #include <linux/kernel_stat.h> 23 #include <linux/of_address.h> 24 #include <linux/clockchips.h> 25 #include <linux/cpu.h> 26 #include <linux/sched/isolation.h> 27 #include <linux/sched/task_stack.h> 28 #include <linux/smpboot.h> 29 30 #include <linux/delay.h> 31 #include <linux/panic_notifier.h> 32 #include <linux/ptrace.h> 33 #include <linux/sysfb.h> 34 #include <linux/efi.h> 35 #include <linux/kernel.h> 36 #include <linux/syscore_ops.h> 37 #include <linux/dma-map-ops.h> 38 #include <linux/pci.h> 39 #include <linux/export.h> 40 #include <clocksource/hyperv_timer.h> 41 #include <asm/mshyperv.h> 42 #include "hyperv_vmbus.h" 43 44 struct vmbus_dynid { 45 struct list_head node; 46 struct hv_vmbus_device_id id; 47 }; 48 49 /* VMBus Root Device */ 50 static struct device *vmbus_root_device; 51 52 static int hyperv_cpuhp_online; 53 54 static DEFINE_PER_CPU(long, vmbus_evt); 55 56 /* Values parsed from ACPI DSDT */ 57 int vmbus_irq; 58 int vmbus_interrupt; 59 60 /* 61 * If the Confidential VMBus is used, the data on the "wire" is not 62 * visible to either the host or the hypervisor. 63 */ 64 static bool is_confidential; 65 66 bool vmbus_is_confidential(void) 67 { 68 return is_confidential; 69 } 70 EXPORT_SYMBOL_GPL(vmbus_is_confidential); 71 72 /* 73 * The panic notifier below is responsible solely for unloading the 74 * vmbus connection, which is necessary in a panic event. 75 * 76 * Notice an intrincate relation of this notifier with Hyper-V 77 * framebuffer panic notifier exists - we need vmbus connection alive 78 * there in order to succeed, so we need to order both with each other 79 * [see hvfb_on_panic()] - this is done using notifiers' priorities. 80 */ 81 static int hv_panic_vmbus_unload(struct notifier_block *nb, unsigned long val, 82 void *args) 83 { 84 vmbus_initiate_unload(true); 85 return NOTIFY_DONE; 86 } 87 static struct notifier_block hyperv_panic_vmbus_unload_block = { 88 .notifier_call = hv_panic_vmbus_unload, 89 .priority = INT_MIN + 1, /* almost the latest one to execute */ 90 }; 91 92 static const char *fb_mmio_name = "fb_range"; 93 static struct resource *fb_mmio; 94 static struct resource *hyperv_mmio; 95 static DEFINE_MUTEX(hyperv_mmio_lock); 96 97 struct device *hv_get_vmbus_root_device(void) 98 { 99 return vmbus_root_device; 100 } 101 EXPORT_SYMBOL_GPL(hv_get_vmbus_root_device); 102 103 bool hv_vmbus_exists(void) 104 { 105 return vmbus_root_device != NULL; 106 } 107 EXPORT_SYMBOL_GPL(hv_vmbus_exists); 108 109 static u8 channel_monitor_group(const struct vmbus_channel *channel) 110 { 111 return (u8)channel->offermsg.monitorid / 32; 112 } 113 114 static u8 channel_monitor_offset(const struct vmbus_channel *channel) 115 { 116 return (u8)channel->offermsg.monitorid % 32; 117 } 118 119 static u32 channel_pending(const struct vmbus_channel *channel, 120 const struct hv_monitor_page *monitor_page) 121 { 122 u8 monitor_group = channel_monitor_group(channel); 123 124 return monitor_page->trigger_group[monitor_group].pending; 125 } 126 127 static u32 channel_latency(const struct vmbus_channel *channel, 128 const struct hv_monitor_page *monitor_page) 129 { 130 u8 monitor_group = channel_monitor_group(channel); 131 u8 monitor_offset = channel_monitor_offset(channel); 132 133 return monitor_page->latency[monitor_group][monitor_offset]; 134 } 135 136 static u32 channel_conn_id(struct vmbus_channel *channel, 137 struct hv_monitor_page *monitor_page) 138 { 139 u8 monitor_group = channel_monitor_group(channel); 140 u8 monitor_offset = channel_monitor_offset(channel); 141 142 return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; 143 } 144 145 static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, 146 char *buf) 147 { 148 struct hv_device *hv_dev = device_to_hv_device(dev); 149 150 if (!hv_dev->channel) 151 return -ENODEV; 152 return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.child_relid); 153 } 154 static DEVICE_ATTR_RO(id); 155 156 static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, 157 char *buf) 158 { 159 struct hv_device *hv_dev = device_to_hv_device(dev); 160 161 if (!hv_dev->channel) 162 return -ENODEV; 163 return sysfs_emit(buf, "%d\n", hv_dev->channel->state); 164 } 165 static DEVICE_ATTR_RO(state); 166 167 static ssize_t monitor_id_show(struct device *dev, 168 struct device_attribute *dev_attr, char *buf) 169 { 170 struct hv_device *hv_dev = device_to_hv_device(dev); 171 172 if (!hv_dev->channel) 173 return -ENODEV; 174 return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.monitorid); 175 } 176 static DEVICE_ATTR_RO(monitor_id); 177 178 static ssize_t class_id_show(struct device *dev, 179 struct device_attribute *dev_attr, char *buf) 180 { 181 struct hv_device *hv_dev = device_to_hv_device(dev); 182 183 if (!hv_dev->channel) 184 return -ENODEV; 185 return sysfs_emit(buf, "{%pUl}\n", 186 &hv_dev->channel->offermsg.offer.if_type); 187 } 188 static DEVICE_ATTR_RO(class_id); 189 190 static ssize_t device_id_show(struct device *dev, 191 struct device_attribute *dev_attr, char *buf) 192 { 193 struct hv_device *hv_dev = device_to_hv_device(dev); 194 195 if (!hv_dev->channel) 196 return -ENODEV; 197 return sysfs_emit(buf, "{%pUl}\n", 198 &hv_dev->channel->offermsg.offer.if_instance); 199 } 200 static DEVICE_ATTR_RO(device_id); 201 202 static ssize_t modalias_show(struct device *dev, 203 struct device_attribute *dev_attr, char *buf) 204 { 205 struct hv_device *hv_dev = device_to_hv_device(dev); 206 207 return sysfs_emit(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type); 208 } 209 static DEVICE_ATTR_RO(modalias); 210 211 #ifdef CONFIG_NUMA 212 static ssize_t numa_node_show(struct device *dev, 213 struct device_attribute *attr, char *buf) 214 { 215 struct hv_device *hv_dev = device_to_hv_device(dev); 216 217 if (!hv_dev->channel) 218 return -ENODEV; 219 220 return sysfs_emit(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu)); 221 } 222 static DEVICE_ATTR_RO(numa_node); 223 #endif 224 225 static ssize_t server_monitor_pending_show(struct device *dev, 226 struct device_attribute *dev_attr, 227 char *buf) 228 { 229 struct hv_device *hv_dev = device_to_hv_device(dev); 230 231 if (!hv_dev->channel) 232 return -ENODEV; 233 return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel, 234 vmbus_connection.monitor_pages[0])); 235 } 236 static DEVICE_ATTR_RO(server_monitor_pending); 237 238 static ssize_t client_monitor_pending_show(struct device *dev, 239 struct device_attribute *dev_attr, 240 char *buf) 241 { 242 struct hv_device *hv_dev = device_to_hv_device(dev); 243 244 if (!hv_dev->channel) 245 return -ENODEV; 246 return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel, 247 vmbus_connection.monitor_pages[1])); 248 } 249 static DEVICE_ATTR_RO(client_monitor_pending); 250 251 static ssize_t server_monitor_latency_show(struct device *dev, 252 struct device_attribute *dev_attr, 253 char *buf) 254 { 255 struct hv_device *hv_dev = device_to_hv_device(dev); 256 257 if (!hv_dev->channel) 258 return -ENODEV; 259 return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel, 260 vmbus_connection.monitor_pages[0])); 261 } 262 static DEVICE_ATTR_RO(server_monitor_latency); 263 264 static ssize_t client_monitor_latency_show(struct device *dev, 265 struct device_attribute *dev_attr, 266 char *buf) 267 { 268 struct hv_device *hv_dev = device_to_hv_device(dev); 269 270 if (!hv_dev->channel) 271 return -ENODEV; 272 return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel, 273 vmbus_connection.monitor_pages[1])); 274 } 275 static DEVICE_ATTR_RO(client_monitor_latency); 276 277 static ssize_t server_monitor_conn_id_show(struct device *dev, 278 struct device_attribute *dev_attr, 279 char *buf) 280 { 281 struct hv_device *hv_dev = device_to_hv_device(dev); 282 283 if (!hv_dev->channel) 284 return -ENODEV; 285 return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel, 286 vmbus_connection.monitor_pages[0])); 287 } 288 static DEVICE_ATTR_RO(server_monitor_conn_id); 289 290 static ssize_t client_monitor_conn_id_show(struct device *dev, 291 struct device_attribute *dev_attr, 292 char *buf) 293 { 294 struct hv_device *hv_dev = device_to_hv_device(dev); 295 296 if (!hv_dev->channel) 297 return -ENODEV; 298 return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel, 299 vmbus_connection.monitor_pages[1])); 300 } 301 static DEVICE_ATTR_RO(client_monitor_conn_id); 302 303 static ssize_t out_intr_mask_show(struct device *dev, 304 struct device_attribute *dev_attr, char *buf) 305 { 306 struct hv_device *hv_dev = device_to_hv_device(dev); 307 struct hv_ring_buffer_debug_info outbound; 308 int ret; 309 310 if (!hv_dev->channel) 311 return -ENODEV; 312 313 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 314 &outbound); 315 if (ret < 0) 316 return ret; 317 318 return sysfs_emit(buf, "%d\n", outbound.current_interrupt_mask); 319 } 320 static DEVICE_ATTR_RO(out_intr_mask); 321 322 static ssize_t out_read_index_show(struct device *dev, 323 struct device_attribute *dev_attr, char *buf) 324 { 325 struct hv_device *hv_dev = device_to_hv_device(dev); 326 struct hv_ring_buffer_debug_info outbound; 327 int ret; 328 329 if (!hv_dev->channel) 330 return -ENODEV; 331 332 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 333 &outbound); 334 if (ret < 0) 335 return ret; 336 return sysfs_emit(buf, "%u\n", outbound.current_read_index); 337 } 338 static DEVICE_ATTR_RO(out_read_index); 339 340 static ssize_t out_write_index_show(struct device *dev, 341 struct device_attribute *dev_attr, 342 char *buf) 343 { 344 struct hv_device *hv_dev = device_to_hv_device(dev); 345 struct hv_ring_buffer_debug_info outbound; 346 int ret; 347 348 if (!hv_dev->channel) 349 return -ENODEV; 350 351 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 352 &outbound); 353 if (ret < 0) 354 return ret; 355 return sysfs_emit(buf, "%u\n", outbound.current_write_index); 356 } 357 static DEVICE_ATTR_RO(out_write_index); 358 359 static ssize_t out_read_bytes_avail_show(struct device *dev, 360 struct device_attribute *dev_attr, 361 char *buf) 362 { 363 struct hv_device *hv_dev = device_to_hv_device(dev); 364 struct hv_ring_buffer_debug_info outbound; 365 int ret; 366 367 if (!hv_dev->channel) 368 return -ENODEV; 369 370 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 371 &outbound); 372 if (ret < 0) 373 return ret; 374 return sysfs_emit(buf, "%d\n", outbound.bytes_avail_toread); 375 } 376 static DEVICE_ATTR_RO(out_read_bytes_avail); 377 378 static ssize_t out_write_bytes_avail_show(struct device *dev, 379 struct device_attribute *dev_attr, 380 char *buf) 381 { 382 struct hv_device *hv_dev = device_to_hv_device(dev); 383 struct hv_ring_buffer_debug_info outbound; 384 int ret; 385 386 if (!hv_dev->channel) 387 return -ENODEV; 388 389 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 390 &outbound); 391 if (ret < 0) 392 return ret; 393 return sysfs_emit(buf, "%d\n", outbound.bytes_avail_towrite); 394 } 395 static DEVICE_ATTR_RO(out_write_bytes_avail); 396 397 static ssize_t in_intr_mask_show(struct device *dev, 398 struct device_attribute *dev_attr, char *buf) 399 { 400 struct hv_device *hv_dev = device_to_hv_device(dev); 401 struct hv_ring_buffer_debug_info inbound; 402 int ret; 403 404 if (!hv_dev->channel) 405 return -ENODEV; 406 407 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 408 if (ret < 0) 409 return ret; 410 411 return sysfs_emit(buf, "%d\n", inbound.current_interrupt_mask); 412 } 413 static DEVICE_ATTR_RO(in_intr_mask); 414 415 static ssize_t in_read_index_show(struct device *dev, 416 struct device_attribute *dev_attr, char *buf) 417 { 418 struct hv_device *hv_dev = device_to_hv_device(dev); 419 struct hv_ring_buffer_debug_info inbound; 420 int ret; 421 422 if (!hv_dev->channel) 423 return -ENODEV; 424 425 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 426 if (ret < 0) 427 return ret; 428 429 return sysfs_emit(buf, "%d\n", inbound.current_read_index); 430 } 431 static DEVICE_ATTR_RO(in_read_index); 432 433 static ssize_t in_write_index_show(struct device *dev, 434 struct device_attribute *dev_attr, char *buf) 435 { 436 struct hv_device *hv_dev = device_to_hv_device(dev); 437 struct hv_ring_buffer_debug_info inbound; 438 int ret; 439 440 if (!hv_dev->channel) 441 return -ENODEV; 442 443 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 444 if (ret < 0) 445 return ret; 446 447 return sysfs_emit(buf, "%d\n", inbound.current_write_index); 448 } 449 static DEVICE_ATTR_RO(in_write_index); 450 451 static ssize_t in_read_bytes_avail_show(struct device *dev, 452 struct device_attribute *dev_attr, 453 char *buf) 454 { 455 struct hv_device *hv_dev = device_to_hv_device(dev); 456 struct hv_ring_buffer_debug_info inbound; 457 int ret; 458 459 if (!hv_dev->channel) 460 return -ENODEV; 461 462 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 463 if (ret < 0) 464 return ret; 465 466 return sysfs_emit(buf, "%d\n", inbound.bytes_avail_toread); 467 } 468 static DEVICE_ATTR_RO(in_read_bytes_avail); 469 470 static ssize_t in_write_bytes_avail_show(struct device *dev, 471 struct device_attribute *dev_attr, 472 char *buf) 473 { 474 struct hv_device *hv_dev = device_to_hv_device(dev); 475 struct hv_ring_buffer_debug_info inbound; 476 int ret; 477 478 if (!hv_dev->channel) 479 return -ENODEV; 480 481 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 482 if (ret < 0) 483 return ret; 484 485 return sysfs_emit(buf, "%d\n", inbound.bytes_avail_towrite); 486 } 487 static DEVICE_ATTR_RO(in_write_bytes_avail); 488 489 static ssize_t channel_vp_mapping_show(struct device *dev, 490 struct device_attribute *dev_attr, 491 char *buf) 492 { 493 struct hv_device *hv_dev = device_to_hv_device(dev); 494 struct vmbus_channel *channel = hv_dev->channel, *cur_sc; 495 int n_written; 496 struct list_head *cur; 497 498 if (!channel) 499 return -ENODEV; 500 501 mutex_lock(&vmbus_connection.channel_mutex); 502 503 n_written = sysfs_emit(buf, "%u:%u\n", 504 channel->offermsg.child_relid, 505 channel->target_cpu); 506 507 list_for_each(cur, &channel->sc_list) { 508 509 cur_sc = list_entry(cur, struct vmbus_channel, sc_list); 510 n_written += sysfs_emit_at(buf, n_written, "%u:%u\n", 511 cur_sc->offermsg.child_relid, 512 cur_sc->target_cpu); 513 } 514 515 mutex_unlock(&vmbus_connection.channel_mutex); 516 517 return n_written; 518 } 519 static DEVICE_ATTR_RO(channel_vp_mapping); 520 521 static ssize_t vendor_show(struct device *dev, 522 struct device_attribute *dev_attr, 523 char *buf) 524 { 525 struct hv_device *hv_dev = device_to_hv_device(dev); 526 527 return sysfs_emit(buf, "0x%x\n", hv_dev->vendor_id); 528 } 529 static DEVICE_ATTR_RO(vendor); 530 531 static ssize_t device_show(struct device *dev, 532 struct device_attribute *dev_attr, 533 char *buf) 534 { 535 struct hv_device *hv_dev = device_to_hv_device(dev); 536 537 return sysfs_emit(buf, "0x%x\n", hv_dev->device_id); 538 } 539 static DEVICE_ATTR_RO(device); 540 541 static ssize_t driver_override_store(struct device *dev, 542 struct device_attribute *attr, 543 const char *buf, size_t count) 544 { 545 struct hv_device *hv_dev = device_to_hv_device(dev); 546 int ret; 547 548 ret = driver_set_override(dev, &hv_dev->driver_override, buf, count); 549 if (ret) 550 return ret; 551 552 return count; 553 } 554 555 static ssize_t driver_override_show(struct device *dev, 556 struct device_attribute *attr, char *buf) 557 { 558 struct hv_device *hv_dev = device_to_hv_device(dev); 559 ssize_t len; 560 561 device_lock(dev); 562 len = sysfs_emit(buf, "%s\n", hv_dev->driver_override); 563 device_unlock(dev); 564 565 return len; 566 } 567 static DEVICE_ATTR_RW(driver_override); 568 569 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ 570 static struct attribute *vmbus_dev_attrs[] = { 571 &dev_attr_id.attr, 572 &dev_attr_state.attr, 573 &dev_attr_monitor_id.attr, 574 &dev_attr_class_id.attr, 575 &dev_attr_device_id.attr, 576 &dev_attr_modalias.attr, 577 #ifdef CONFIG_NUMA 578 &dev_attr_numa_node.attr, 579 #endif 580 &dev_attr_server_monitor_pending.attr, 581 &dev_attr_client_monitor_pending.attr, 582 &dev_attr_server_monitor_latency.attr, 583 &dev_attr_client_monitor_latency.attr, 584 &dev_attr_server_monitor_conn_id.attr, 585 &dev_attr_client_monitor_conn_id.attr, 586 &dev_attr_out_intr_mask.attr, 587 &dev_attr_out_read_index.attr, 588 &dev_attr_out_write_index.attr, 589 &dev_attr_out_read_bytes_avail.attr, 590 &dev_attr_out_write_bytes_avail.attr, 591 &dev_attr_in_intr_mask.attr, 592 &dev_attr_in_read_index.attr, 593 &dev_attr_in_write_index.attr, 594 &dev_attr_in_read_bytes_avail.attr, 595 &dev_attr_in_write_bytes_avail.attr, 596 &dev_attr_channel_vp_mapping.attr, 597 &dev_attr_vendor.attr, 598 &dev_attr_device.attr, 599 &dev_attr_driver_override.attr, 600 NULL, 601 }; 602 603 /* 604 * Device-level attribute_group callback function. Returns the permission for 605 * each attribute, and returns 0 if an attribute is not visible. 606 */ 607 static umode_t vmbus_dev_attr_is_visible(struct kobject *kobj, 608 struct attribute *attr, int idx) 609 { 610 struct device *dev = kobj_to_dev(kobj); 611 const struct hv_device *hv_dev = device_to_hv_device(dev); 612 613 /* Hide the monitor attributes if the monitor mechanism is not used. */ 614 if (!hv_dev->channel->offermsg.monitor_allocated && 615 (attr == &dev_attr_monitor_id.attr || 616 attr == &dev_attr_server_monitor_pending.attr || 617 attr == &dev_attr_client_monitor_pending.attr || 618 attr == &dev_attr_server_monitor_latency.attr || 619 attr == &dev_attr_client_monitor_latency.attr || 620 attr == &dev_attr_server_monitor_conn_id.attr || 621 attr == &dev_attr_client_monitor_conn_id.attr)) 622 return 0; 623 624 return attr->mode; 625 } 626 627 static const struct attribute_group vmbus_dev_group = { 628 .attrs = vmbus_dev_attrs, 629 .is_visible = vmbus_dev_attr_is_visible 630 }; 631 __ATTRIBUTE_GROUPS(vmbus_dev); 632 633 /* Set up the attribute for /sys/bus/vmbus/hibernation */ 634 static ssize_t hibernation_show(const struct bus_type *bus, char *buf) 635 { 636 return sprintf(buf, "%d\n", !!hv_is_hibernation_supported()); 637 } 638 639 static BUS_ATTR_RO(hibernation); 640 641 static struct attribute *vmbus_bus_attrs[] = { 642 &bus_attr_hibernation.attr, 643 NULL, 644 }; 645 static const struct attribute_group vmbus_bus_group = { 646 .attrs = vmbus_bus_attrs, 647 }; 648 __ATTRIBUTE_GROUPS(vmbus_bus); 649 650 /* 651 * vmbus_uevent - add uevent for our device 652 * 653 * This routine is invoked when a device is added or removed on the vmbus to 654 * generate a uevent to udev in the userspace. The udev will then look at its 655 * rule and the uevent generated here to load the appropriate driver 656 * 657 * The alias string will be of the form vmbus:guid where guid is the string 658 * representation of the device guid (each byte of the guid will be 659 * represented with two hex characters. 660 */ 661 static int vmbus_uevent(const struct device *device, struct kobj_uevent_env *env) 662 { 663 const struct hv_device *dev = device_to_hv_device(device); 664 const char *format = "MODALIAS=vmbus:%*phN"; 665 666 return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type); 667 } 668 669 static const struct hv_vmbus_device_id * 670 hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid) 671 { 672 if (id == NULL) 673 return NULL; /* empty device table */ 674 675 for (; !guid_is_null(&id->guid); id++) 676 if (guid_equal(&id->guid, guid)) 677 return id; 678 679 return NULL; 680 } 681 682 static const struct hv_vmbus_device_id * 683 hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid) 684 { 685 const struct hv_vmbus_device_id *id = NULL; 686 struct vmbus_dynid *dynid; 687 688 spin_lock(&drv->dynids.lock); 689 list_for_each_entry(dynid, &drv->dynids.list, node) { 690 if (guid_equal(&dynid->id.guid, guid)) { 691 id = &dynid->id; 692 break; 693 } 694 } 695 spin_unlock(&drv->dynids.lock); 696 697 return id; 698 } 699 700 static const struct hv_vmbus_device_id vmbus_device_null; 701 702 /* 703 * Return a matching hv_vmbus_device_id pointer. 704 * If there is no match, return NULL. 705 */ 706 static const struct hv_vmbus_device_id *hv_vmbus_get_id(const struct hv_driver *drv, 707 struct hv_device *dev) 708 { 709 const guid_t *guid = &dev->dev_type; 710 const struct hv_vmbus_device_id *id; 711 712 /* When driver_override is set, only bind to the matching driver */ 713 if (dev->driver_override && strcmp(dev->driver_override, drv->name)) 714 return NULL; 715 716 /* Look at the dynamic ids first, before the static ones */ 717 id = hv_vmbus_dynid_match((struct hv_driver *)drv, guid); 718 if (!id) 719 id = hv_vmbus_dev_match(drv->id_table, guid); 720 721 /* driver_override will always match, send a dummy id */ 722 if (!id && dev->driver_override) 723 id = &vmbus_device_null; 724 725 return id; 726 } 727 728 /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices 729 * 730 * This function can race with vmbus_device_register(). This function is 731 * typically running on a user thread in response to writing to the "new_id" 732 * sysfs entry for a driver. vmbus_device_register() is running on a 733 * workqueue thread in response to the Hyper-V host offering a device to the 734 * guest. This function calls driver_attach(), which looks for an existing 735 * device matching the new id, and attaches the driver to which the new id 736 * has been assigned. vmbus_device_register() calls device_register(), which 737 * looks for a driver that matches the device being registered. If both 738 * operations are running simultaneously, the device driver probe function runs 739 * on whichever thread establishes the linkage between the driver and device. 740 * 741 * In most cases, it doesn't matter which thread runs the driver probe 742 * function. But if vmbus_device_register() does not find a matching driver, 743 * it proceeds to create the "channels" subdirectory and numbered per-channel 744 * subdirectory in sysfs. While that multi-step creation is in progress, this 745 * function could run the driver probe function. If the probe function checks 746 * for, or operates on, entries in the "channels" subdirectory, including by 747 * calling hv_create_ring_sysfs(), the operation may or may not succeed 748 * depending on the race. The race can't create a kernel failure in VMBus 749 * or device subsystem code, but probe functions in VMBus drivers doing such 750 * operations must be prepared for the failure case. 751 */ 752 static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid) 753 { 754 struct vmbus_dynid *dynid; 755 756 dynid = kzalloc_obj(*dynid); 757 if (!dynid) 758 return -ENOMEM; 759 760 dynid->id.guid = *guid; 761 762 spin_lock(&drv->dynids.lock); 763 list_add_tail(&dynid->node, &drv->dynids.list); 764 spin_unlock(&drv->dynids.lock); 765 766 return driver_attach(&drv->driver); 767 } 768 769 static void vmbus_free_dynids(struct hv_driver *drv) 770 { 771 struct vmbus_dynid *dynid, *n; 772 773 spin_lock(&drv->dynids.lock); 774 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { 775 list_del(&dynid->node); 776 kfree(dynid); 777 } 778 spin_unlock(&drv->dynids.lock); 779 } 780 781 /* 782 * store_new_id - sysfs frontend to vmbus_add_dynid() 783 * 784 * Allow GUIDs to be added to an existing driver via sysfs. 785 */ 786 static ssize_t new_id_store(struct device_driver *driver, const char *buf, 787 size_t count) 788 { 789 struct hv_driver *drv = drv_to_hv_drv(driver); 790 guid_t guid; 791 ssize_t retval; 792 793 retval = guid_parse(buf, &guid); 794 if (retval) 795 return retval; 796 797 if (hv_vmbus_dynid_match(drv, &guid)) 798 return -EEXIST; 799 800 retval = vmbus_add_dynid(drv, &guid); 801 if (retval) 802 return retval; 803 return count; 804 } 805 static DRIVER_ATTR_WO(new_id); 806 807 /* 808 * store_remove_id - remove a PCI device ID from this driver 809 * 810 * Removes a dynamic pci device ID to this driver. 811 */ 812 static ssize_t remove_id_store(struct device_driver *driver, const char *buf, 813 size_t count) 814 { 815 struct hv_driver *drv = drv_to_hv_drv(driver); 816 struct vmbus_dynid *dynid, *n; 817 guid_t guid; 818 ssize_t retval; 819 820 retval = guid_parse(buf, &guid); 821 if (retval) 822 return retval; 823 824 retval = -ENODEV; 825 spin_lock(&drv->dynids.lock); 826 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { 827 struct hv_vmbus_device_id *id = &dynid->id; 828 829 if (guid_equal(&id->guid, &guid)) { 830 list_del(&dynid->node); 831 kfree(dynid); 832 retval = count; 833 break; 834 } 835 } 836 spin_unlock(&drv->dynids.lock); 837 838 return retval; 839 } 840 static DRIVER_ATTR_WO(remove_id); 841 842 static struct attribute *vmbus_drv_attrs[] = { 843 &driver_attr_new_id.attr, 844 &driver_attr_remove_id.attr, 845 NULL, 846 }; 847 ATTRIBUTE_GROUPS(vmbus_drv); 848 849 850 /* 851 * vmbus_match - Attempt to match the specified device to the specified driver 852 */ 853 static int vmbus_match(struct device *device, const struct device_driver *driver) 854 { 855 const struct hv_driver *drv = drv_to_hv_drv(driver); 856 struct hv_device *hv_dev = device_to_hv_device(device); 857 858 /* The hv_sock driver handles all hv_sock offers. */ 859 if (is_hvsock_channel(hv_dev->channel)) 860 return drv->hvsock; 861 862 if (hv_vmbus_get_id(drv, hv_dev)) 863 return 1; 864 865 return 0; 866 } 867 868 /* 869 * vmbus_probe - Add the new vmbus's child device 870 */ 871 static int vmbus_probe(struct device *child_device) 872 { 873 int ret = 0; 874 struct hv_driver *drv = 875 drv_to_hv_drv(child_device->driver); 876 struct hv_device *dev = device_to_hv_device(child_device); 877 const struct hv_vmbus_device_id *dev_id; 878 879 dev_id = hv_vmbus_get_id(drv, dev); 880 if (drv->probe) { 881 ret = drv->probe(dev, dev_id); 882 if (ret != 0) 883 pr_err("probe failed for device %s (%d)\n", 884 dev_name(child_device), ret); 885 886 } else { 887 pr_err("probe not set for driver %s\n", 888 dev_name(child_device)); 889 ret = -ENODEV; 890 } 891 return ret; 892 } 893 894 /* 895 * vmbus_dma_configure -- Configure DMA coherence for VMbus device 896 */ 897 static int vmbus_dma_configure(struct device *child_device) 898 { 899 /* 900 * On ARM64, propagate the DMA coherence setting from the top level 901 * VMbus ACPI device to the child VMbus device being added here. 902 * On x86/x64 coherence is assumed and these calls have no effect. 903 */ 904 hv_setup_dma_ops(child_device, 905 device_get_dma_attr(vmbus_root_device) == DEV_DMA_COHERENT); 906 return 0; 907 } 908 909 /* 910 * vmbus_remove - Remove a vmbus device 911 */ 912 static void vmbus_remove(struct device *child_device) 913 { 914 struct hv_driver *drv; 915 struct hv_device *dev = device_to_hv_device(child_device); 916 917 if (child_device->driver) { 918 drv = drv_to_hv_drv(child_device->driver); 919 if (drv->remove) 920 drv->remove(dev); 921 } 922 } 923 924 /* 925 * vmbus_shutdown - Shutdown a vmbus device 926 */ 927 static void vmbus_shutdown(struct device *child_device) 928 { 929 struct hv_driver *drv; 930 struct hv_device *dev = device_to_hv_device(child_device); 931 932 933 /* The device may not be attached yet */ 934 if (!child_device->driver) 935 return; 936 937 drv = drv_to_hv_drv(child_device->driver); 938 939 if (drv->shutdown) 940 drv->shutdown(dev); 941 } 942 943 #ifdef CONFIG_PM_SLEEP 944 /* 945 * vmbus_suspend - Suspend a vmbus device 946 */ 947 static int vmbus_suspend(struct device *child_device) 948 { 949 struct hv_driver *drv; 950 struct hv_device *dev = device_to_hv_device(child_device); 951 952 /* The device may not be attached yet */ 953 if (!child_device->driver) 954 return 0; 955 956 drv = drv_to_hv_drv(child_device->driver); 957 if (!drv->suspend) 958 return -EOPNOTSUPP; 959 960 return drv->suspend(dev); 961 } 962 963 /* 964 * vmbus_resume - Resume a vmbus device 965 */ 966 static int vmbus_resume(struct device *child_device) 967 { 968 struct hv_driver *drv; 969 struct hv_device *dev = device_to_hv_device(child_device); 970 971 /* The device may not be attached yet */ 972 if (!child_device->driver) 973 return 0; 974 975 drv = drv_to_hv_drv(child_device->driver); 976 if (!drv->resume) 977 return -EOPNOTSUPP; 978 979 return drv->resume(dev); 980 } 981 #else 982 #define vmbus_suspend NULL 983 #define vmbus_resume NULL 984 #endif /* CONFIG_PM_SLEEP */ 985 986 /* 987 * vmbus_device_release - Final callback release of the vmbus child device 988 */ 989 static void vmbus_device_release(struct device *device) 990 { 991 struct hv_device *hv_dev = device_to_hv_device(device); 992 struct vmbus_channel *channel = hv_dev->channel; 993 994 hv_debug_rm_dev_dir(hv_dev); 995 996 mutex_lock(&vmbus_connection.channel_mutex); 997 hv_process_channel_removal(channel); 998 mutex_unlock(&vmbus_connection.channel_mutex); 999 kfree(hv_dev); 1000 } 1001 1002 /* 1003 * Note: we must use the "noirq" ops: see the comment before vmbus_bus_pm. 1004 * 1005 * suspend_noirq/resume_noirq are set to NULL to support Suspend-to-Idle: we 1006 * shouldn't suspend the vmbus devices upon Suspend-to-Idle, otherwise there 1007 * is no way to wake up a Generation-2 VM. 1008 * 1009 * The other 4 ops are for hibernation. 1010 */ 1011 1012 static const struct dev_pm_ops vmbus_pm = { 1013 .suspend_noirq = NULL, 1014 .resume_noirq = NULL, 1015 .freeze_noirq = vmbus_suspend, 1016 .thaw_noirq = vmbus_resume, 1017 .poweroff_noirq = vmbus_suspend, 1018 .restore_noirq = vmbus_resume, 1019 }; 1020 1021 /* The one and only one */ 1022 static const struct bus_type hv_bus = { 1023 .name = "vmbus", 1024 .match = vmbus_match, 1025 .shutdown = vmbus_shutdown, 1026 .remove = vmbus_remove, 1027 .probe = vmbus_probe, 1028 .uevent = vmbus_uevent, 1029 .dma_configure = vmbus_dma_configure, 1030 .dev_groups = vmbus_dev_groups, 1031 .drv_groups = vmbus_drv_groups, 1032 .bus_groups = vmbus_bus_groups, 1033 .pm = &vmbus_pm, 1034 }; 1035 1036 struct onmessage_work_context { 1037 struct work_struct work; 1038 struct { 1039 struct hv_message_header header; 1040 u8 payload[]; 1041 } msg; 1042 }; 1043 1044 static void vmbus_onmessage_work(struct work_struct *work) 1045 { 1046 struct onmessage_work_context *ctx; 1047 1048 /* Do not process messages if we're in DISCONNECTED state */ 1049 if (vmbus_connection.conn_state == DISCONNECTED) 1050 return; 1051 1052 ctx = container_of(work, struct onmessage_work_context, 1053 work); 1054 vmbus_onmessage((struct vmbus_channel_message_header *) 1055 &ctx->msg.payload); 1056 kfree(ctx); 1057 } 1058 1059 static void __vmbus_on_msg_dpc(void *message_page_addr) 1060 { 1061 struct hv_message msg_copy, *msg; 1062 struct vmbus_channel_message_header *hdr; 1063 enum vmbus_channel_message_type msgtype; 1064 const struct vmbus_channel_message_table_entry *entry; 1065 struct onmessage_work_context *ctx; 1066 __u8 payload_size; 1067 u32 message_type; 1068 1069 if (!message_page_addr) 1070 return; 1071 msg = (struct hv_message *)message_page_addr + VMBUS_MESSAGE_SINT; 1072 1073 /* 1074 * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as 1075 * it is being used in 'struct vmbus_channel_message_header' definition 1076 * which is supposed to match hypervisor ABI. 1077 */ 1078 BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32)); 1079 1080 /* 1081 * Since the message is in memory shared with the host, an erroneous or 1082 * malicious Hyper-V could modify the message while vmbus_on_msg_dpc() 1083 * or individual message handlers are executing; to prevent this, copy 1084 * the message into private memory. 1085 */ 1086 memcpy(&msg_copy, msg, sizeof(struct hv_message)); 1087 1088 message_type = msg_copy.header.message_type; 1089 if (message_type == HVMSG_NONE) 1090 /* no msg */ 1091 return; 1092 1093 hdr = (struct vmbus_channel_message_header *)msg_copy.u.payload; 1094 msgtype = hdr->msgtype; 1095 1096 trace_vmbus_on_msg_dpc(hdr); 1097 1098 if (msgtype >= CHANNELMSG_COUNT) { 1099 WARN_ONCE(1, "unknown msgtype=%d\n", msgtype); 1100 goto msg_handled; 1101 } 1102 1103 payload_size = msg_copy.header.payload_size; 1104 if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) { 1105 WARN_ONCE(1, "payload size is too large (%d)\n", payload_size); 1106 goto msg_handled; 1107 } 1108 1109 entry = &channel_message_table[msgtype]; 1110 1111 if (!entry->message_handler) 1112 goto msg_handled; 1113 1114 if (payload_size < entry->min_payload_len) { 1115 WARN_ONCE(1, "message too short: msgtype=%d len=%d\n", msgtype, payload_size); 1116 goto msg_handled; 1117 } 1118 1119 if (entry->handler_type == VMHT_BLOCKING) { 1120 ctx = kmalloc_flex(*ctx, msg.payload, payload_size, GFP_ATOMIC); 1121 if (ctx == NULL) 1122 return; 1123 1124 INIT_WORK(&ctx->work, vmbus_onmessage_work); 1125 ctx->msg.header = msg_copy.header; 1126 memcpy(&ctx->msg.payload, msg_copy.u.payload, payload_size); 1127 1128 /* 1129 * The host can generate a rescind message while we 1130 * may still be handling the original offer. We deal with 1131 * this condition by relying on the synchronization provided 1132 * by offer_in_progress and by channel_mutex. See also the 1133 * inline comments in vmbus_onoffer_rescind(). 1134 */ 1135 switch (msgtype) { 1136 case CHANNELMSG_RESCIND_CHANNELOFFER: 1137 /* 1138 * If we are handling the rescind message; 1139 * schedule the work on the global work queue. 1140 * 1141 * The OFFER message and the RESCIND message should 1142 * not be handled by the same serialized work queue, 1143 * because the OFFER handler may call vmbus_open(), 1144 * which tries to open the channel by sending an 1145 * OPEN_CHANNEL message to the host and waits for 1146 * the host's response; however, if the host has 1147 * rescinded the channel before it receives the 1148 * OPEN_CHANNEL message, the host just silently 1149 * ignores the OPEN_CHANNEL message; as a result, 1150 * the guest's OFFER handler hangs for ever, if we 1151 * handle the RESCIND message in the same serialized 1152 * work queue: the RESCIND handler can not start to 1153 * run before the OFFER handler finishes. 1154 */ 1155 if (vmbus_connection.ignore_any_offer_msg) 1156 break; 1157 queue_work(vmbus_connection.rescind_work_queue, &ctx->work); 1158 break; 1159 1160 case CHANNELMSG_OFFERCHANNEL: 1161 /* 1162 * The host sends the offer message of a given channel 1163 * before sending the rescind message of the same 1164 * channel. These messages are sent to the guest's 1165 * connect CPU; the guest then starts processing them 1166 * in the tasklet handler on this CPU: 1167 * 1168 * VMBUS_CONNECT_CPU 1169 * 1170 * [vmbus_on_msg_dpc()] 1171 * atomic_inc() // CHANNELMSG_OFFERCHANNEL 1172 * queue_work() 1173 * ... 1174 * [vmbus_on_msg_dpc()] 1175 * schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER 1176 * 1177 * We rely on the memory-ordering properties of the 1178 * queue_work() and schedule_work() primitives, which 1179 * guarantee that the atomic increment will be visible 1180 * to the CPUs which will execute the offer & rescind 1181 * works by the time these works will start execution. 1182 */ 1183 if (vmbus_connection.ignore_any_offer_msg) 1184 break; 1185 atomic_inc(&vmbus_connection.offer_in_progress); 1186 fallthrough; 1187 1188 default: 1189 queue_work(vmbus_connection.work_queue, &ctx->work); 1190 } 1191 } else 1192 entry->message_handler(hdr); 1193 1194 msg_handled: 1195 vmbus_signal_eom(msg, message_type); 1196 } 1197 1198 void vmbus_on_msg_dpc(unsigned long data) 1199 { 1200 struct hv_per_cpu_context *hv_cpu = (void *)data; 1201 1202 __vmbus_on_msg_dpc(hv_cpu->hyp_synic_message_page); 1203 __vmbus_on_msg_dpc(hv_cpu->para_synic_message_page); 1204 } 1205 1206 #ifdef CONFIG_PM_SLEEP 1207 /* 1208 * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for 1209 * hibernation, because hv_sock connections can not persist across hibernation. 1210 */ 1211 static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) 1212 { 1213 struct onmessage_work_context *ctx; 1214 struct vmbus_channel_rescind_offer *rescind; 1215 1216 WARN_ON(!is_hvsock_channel(channel)); 1217 1218 /* 1219 * Allocation size is small and the allocation should really not fail, 1220 * otherwise the state of the hv_sock connections ends up in limbo. 1221 */ 1222 ctx = kzalloc(sizeof(*ctx) + sizeof(*rescind), 1223 GFP_KERNEL | __GFP_NOFAIL); 1224 1225 /* 1226 * So far, these are not really used by Linux. Just set them to the 1227 * reasonable values conforming to the definitions of the fields. 1228 */ 1229 ctx->msg.header.message_type = 1; 1230 ctx->msg.header.payload_size = sizeof(*rescind); 1231 1232 /* These values are actually used by Linux. */ 1233 rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload; 1234 rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER; 1235 rescind->child_relid = channel->offermsg.child_relid; 1236 1237 INIT_WORK(&ctx->work, vmbus_onmessage_work); 1238 1239 queue_work(vmbus_connection.work_queue, &ctx->work); 1240 } 1241 #endif /* CONFIG_PM_SLEEP */ 1242 1243 /* 1244 * Schedule all channels with events pending. 1245 * The event page can be directly checked to get the id of 1246 * the channel that has the interrupt pending. 1247 */ 1248 static void vmbus_chan_sched(void *event_page_addr) 1249 { 1250 unsigned long *recv_int_page; 1251 u32 maxbits, relid; 1252 union hv_synic_event_flags *event; 1253 1254 if (!event_page_addr) 1255 return; 1256 event = (union hv_synic_event_flags *)event_page_addr + VMBUS_MESSAGE_SINT; 1257 1258 maxbits = READ_ONCE(vmbus_connection.relid_hiwater) + 1; 1259 recv_int_page = event->flags; 1260 1261 if (unlikely(!recv_int_page)) 1262 return; 1263 1264 for_each_set_bit(relid, recv_int_page, maxbits) { 1265 void (*callback_fn)(void *context); 1266 struct vmbus_channel *channel; 1267 1268 if (!sync_test_and_clear_bit(relid, recv_int_page)) 1269 continue; 1270 1271 /* Special case - vmbus channel protocol msg */ 1272 if (relid == 0) 1273 continue; 1274 1275 /* 1276 * Pairs with the kfree_rcu() in vmbus_chan_release(). 1277 * Guarantees that the channel data structure doesn't 1278 * get freed while the channel pointer below is being 1279 * dereferenced. 1280 */ 1281 rcu_read_lock(); 1282 1283 /* Find channel based on relid */ 1284 channel = relid2channel(relid); 1285 if (channel == NULL) 1286 goto sched_unlock_rcu; 1287 1288 if (channel->rescind) 1289 goto sched_unlock_rcu; 1290 1291 /* 1292 * Make sure that the ring buffer data structure doesn't get 1293 * freed while we dereference the ring buffer pointer. Test 1294 * for the channel's onchannel_callback being NULL within a 1295 * sched_lock critical section. See also the inline comments 1296 * in vmbus_reset_channel_cb(). 1297 */ 1298 spin_lock(&channel->sched_lock); 1299 1300 callback_fn = channel->onchannel_callback; 1301 if (unlikely(callback_fn == NULL)) 1302 goto sched_unlock; 1303 1304 trace_vmbus_chan_sched(channel); 1305 1306 ++channel->interrupts; 1307 1308 switch (channel->callback_mode) { 1309 case HV_CALL_ISR: 1310 (*callback_fn)(channel->channel_callback_context); 1311 break; 1312 1313 case HV_CALL_BATCHED: 1314 hv_begin_read(&channel->inbound); 1315 fallthrough; 1316 case HV_CALL_DIRECT: 1317 tasklet_schedule(&channel->callback_event); 1318 } 1319 1320 sched_unlock: 1321 spin_unlock(&channel->sched_lock); 1322 sched_unlock_rcu: 1323 rcu_read_unlock(); 1324 } 1325 } 1326 1327 static void vmbus_message_sched(struct hv_per_cpu_context *hv_cpu, void *message_page_addr) 1328 { 1329 struct hv_message *msg; 1330 1331 if (!message_page_addr) 1332 return; 1333 msg = (struct hv_message *)message_page_addr + VMBUS_MESSAGE_SINT; 1334 1335 /* Check if there are actual msgs to be processed */ 1336 if (msg->header.message_type != HVMSG_NONE) { 1337 if (msg->header.message_type == HVMSG_TIMER_EXPIRED) { 1338 hv_stimer0_isr(); 1339 vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED); 1340 } else { 1341 tasklet_schedule(&hv_cpu->msg_dpc); 1342 } 1343 } 1344 } 1345 1346 static void __vmbus_isr(void) 1347 { 1348 struct hv_per_cpu_context *hv_cpu 1349 = this_cpu_ptr(hv_context.cpu_context); 1350 1351 vmbus_chan_sched(hv_cpu->hyp_synic_event_page); 1352 vmbus_chan_sched(hv_cpu->para_synic_event_page); 1353 1354 vmbus_message_sched(hv_cpu, hv_cpu->hyp_synic_message_page); 1355 vmbus_message_sched(hv_cpu, hv_cpu->para_synic_message_page); 1356 } 1357 1358 static DEFINE_PER_CPU(bool, vmbus_irq_pending); 1359 static DEFINE_PER_CPU(struct task_struct *, vmbus_irqd); 1360 1361 static void vmbus_irqd_wake(void) 1362 { 1363 struct task_struct *tsk = __this_cpu_read(vmbus_irqd); 1364 1365 __this_cpu_write(vmbus_irq_pending, true); 1366 wake_up_process(tsk); 1367 } 1368 1369 static void vmbus_irqd_setup(unsigned int cpu) 1370 { 1371 sched_set_fifo(current); 1372 } 1373 1374 static int vmbus_irqd_should_run(unsigned int cpu) 1375 { 1376 return __this_cpu_read(vmbus_irq_pending); 1377 } 1378 1379 static void run_vmbus_irqd(unsigned int cpu) 1380 { 1381 __this_cpu_write(vmbus_irq_pending, false); 1382 __vmbus_isr(); 1383 } 1384 1385 static bool vmbus_irq_initialized; 1386 1387 static struct smp_hotplug_thread vmbus_irq_threads = { 1388 .store = &vmbus_irqd, 1389 .setup = vmbus_irqd_setup, 1390 .thread_should_run = vmbus_irqd_should_run, 1391 .thread_fn = run_vmbus_irqd, 1392 .thread_comm = "vmbus_irq/%u", 1393 }; 1394 1395 void vmbus_isr(void) 1396 { 1397 if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 1398 vmbus_irqd_wake(); 1399 } else { 1400 lockdep_hardirq_threaded(); 1401 __vmbus_isr(); 1402 } 1403 } 1404 EXPORT_SYMBOL_FOR_MODULES(vmbus_isr, "mshv_vtl"); 1405 1406 static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id) 1407 { 1408 vmbus_isr(); 1409 return IRQ_HANDLED; 1410 } 1411 1412 static void vmbus_percpu_work(struct work_struct *work) 1413 { 1414 unsigned int cpu = smp_processor_id(); 1415 1416 hv_synic_init(cpu); 1417 } 1418 1419 static int vmbus_alloc_synic_and_connect(void) 1420 { 1421 int ret, cpu; 1422 struct work_struct __percpu *works; 1423 1424 ret = hv_synic_alloc(); 1425 if (ret < 0) 1426 goto err_alloc; 1427 1428 works = alloc_percpu(struct work_struct); 1429 if (!works) { 1430 ret = -ENOMEM; 1431 goto err_alloc; 1432 } 1433 1434 /* 1435 * Initialize the per-cpu interrupt state and stimer state. 1436 * Then connect to the host. 1437 */ 1438 cpus_read_lock(); 1439 for_each_online_cpu(cpu) { 1440 struct work_struct *work = per_cpu_ptr(works, cpu); 1441 1442 INIT_WORK(work, vmbus_percpu_work); 1443 schedule_work_on(cpu, work); 1444 } 1445 1446 for_each_online_cpu(cpu) 1447 flush_work(per_cpu_ptr(works, cpu)); 1448 1449 /* Register the callbacks for possible CPU online/offline'ing */ 1450 ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online", 1451 hv_synic_init, hv_synic_cleanup); 1452 cpus_read_unlock(); 1453 free_percpu(works); 1454 if (ret < 0) 1455 goto err_alloc; 1456 hyperv_cpuhp_online = ret; 1457 1458 ret = vmbus_connect(); 1459 if (ret) 1460 goto err_connect; 1461 return 0; 1462 1463 err_connect: 1464 cpuhp_remove_state(hyperv_cpuhp_online); 1465 return -ENODEV; 1466 err_alloc: 1467 hv_synic_free(); 1468 return -ENOMEM; 1469 } 1470 1471 /* 1472 * vmbus_bus_init -Main vmbus driver initialization routine. 1473 * 1474 * Here, we 1475 * - initialize the vmbus driver context 1476 * - invoke the vmbus hv main init routine 1477 * - retrieve the channel offers 1478 */ 1479 static int vmbus_bus_init(void) 1480 { 1481 int ret; 1482 1483 ret = hv_init(); 1484 if (ret != 0) { 1485 pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); 1486 return ret; 1487 } 1488 1489 ret = bus_register(&hv_bus); 1490 if (ret) 1491 return ret; 1492 1493 /* 1494 * VMbus interrupts are best modeled as per-cpu interrupts. If 1495 * on an architecture with support for per-cpu IRQs (e.g. ARM64), 1496 * allocate a per-cpu IRQ using standard Linux kernel functionality. 1497 * If not on such an architecture (e.g., x86/x64), then rely on 1498 * code in the arch-specific portion of the code tree to connect 1499 * the VMbus interrupt handler. 1500 */ 1501 1502 if (IS_ENABLED(CONFIG_PREEMPT_RT) && !vmbus_irq_initialized) { 1503 ret = smpboot_register_percpu_thread(&vmbus_irq_threads); 1504 if (ret) 1505 goto err_kthread; 1506 vmbus_irq_initialized = true; 1507 } 1508 1509 if (vmbus_irq == -1) { 1510 hv_setup_vmbus_handler(vmbus_isr); 1511 } else { 1512 ret = request_percpu_irq(vmbus_irq, vmbus_percpu_isr, 1513 "Hyper-V VMbus", &vmbus_evt); 1514 if (ret) { 1515 pr_err("Can't request Hyper-V VMbus IRQ %d, Err %d", 1516 vmbus_irq, ret); 1517 goto err_setup; 1518 } 1519 } 1520 1521 /* 1522 * Cache the value as getting it involves a VM exit on x86(_64), and 1523 * doing that on each VP while initializing SynIC's wastes time. 1524 */ 1525 is_confidential = ms_hyperv.confidential_vmbus_available; 1526 if (is_confidential) 1527 pr_info("Establishing connection to the confidential VMBus\n"); 1528 hv_para_set_sint_proxy(!is_confidential); 1529 ret = vmbus_alloc_synic_and_connect(); 1530 if (ret) 1531 goto err_connect; 1532 1533 /* 1534 * Always register the vmbus unload panic notifier because we 1535 * need to shut the VMbus channel connection on panic. 1536 */ 1537 atomic_notifier_chain_register(&panic_notifier_list, 1538 &hyperv_panic_vmbus_unload_block); 1539 1540 vmbus_request_offers(); 1541 1542 return 0; 1543 1544 err_connect: 1545 if (vmbus_irq == -1) 1546 hv_remove_vmbus_handler(); 1547 else 1548 free_percpu_irq(vmbus_irq, &vmbus_evt); 1549 err_setup: 1550 if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) { 1551 smpboot_unregister_percpu_thread(&vmbus_irq_threads); 1552 vmbus_irq_initialized = false; 1553 } 1554 err_kthread: 1555 bus_unregister(&hv_bus); 1556 return ret; 1557 } 1558 1559 /** 1560 * __vmbus_driver_register() - Register a vmbus's driver 1561 * @hv_driver: Pointer to driver structure you want to register 1562 * @owner: owner module of the drv 1563 * @mod_name: module name string 1564 * 1565 * Registers the given driver with Linux through the 'driver_register()' call 1566 * and sets up the hyper-v vmbus handling for this driver. 1567 * It will return the state of the 'driver_register()' call. 1568 * 1569 */ 1570 int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) 1571 { 1572 int ret; 1573 1574 if (!hv_vmbus_exists()) 1575 return -ENODEV; 1576 1577 pr_info("registering driver %s\n", hv_driver->name); 1578 1579 hv_driver->driver.name = hv_driver->name; 1580 hv_driver->driver.owner = owner; 1581 hv_driver->driver.mod_name = mod_name; 1582 hv_driver->driver.bus = &hv_bus; 1583 1584 spin_lock_init(&hv_driver->dynids.lock); 1585 INIT_LIST_HEAD(&hv_driver->dynids.list); 1586 1587 ret = driver_register(&hv_driver->driver); 1588 1589 return ret; 1590 } 1591 EXPORT_SYMBOL_GPL(__vmbus_driver_register); 1592 1593 /** 1594 * vmbus_driver_unregister() - Unregister a vmbus's driver 1595 * @hv_driver: Pointer to driver structure you want to 1596 * un-register 1597 * 1598 * Un-register the given driver that was previous registered with a call to 1599 * vmbus_driver_register() 1600 */ 1601 void vmbus_driver_unregister(struct hv_driver *hv_driver) 1602 { 1603 if (hv_vmbus_exists()) { 1604 pr_info("unregistering driver %s\n", hv_driver->name); 1605 driver_unregister(&hv_driver->driver); 1606 vmbus_free_dynids(hv_driver); 1607 } 1608 } 1609 EXPORT_SYMBOL_GPL(vmbus_driver_unregister); 1610 1611 1612 /* 1613 * Called when last reference to channel is gone. 1614 */ 1615 static void vmbus_chan_release(struct kobject *kobj) 1616 { 1617 struct vmbus_channel *channel 1618 = container_of(kobj, struct vmbus_channel, kobj); 1619 1620 kfree_rcu(channel, rcu); 1621 } 1622 1623 struct vmbus_chan_attribute { 1624 struct attribute attr; 1625 ssize_t (*show)(struct vmbus_channel *chan, char *buf); 1626 ssize_t (*store)(struct vmbus_channel *chan, 1627 const char *buf, size_t count); 1628 }; 1629 #define VMBUS_CHAN_ATTR(_name, _mode, _show, _store) \ 1630 struct vmbus_chan_attribute chan_attr_##_name \ 1631 = __ATTR(_name, _mode, _show, _store) 1632 #define VMBUS_CHAN_ATTR_RW(_name) \ 1633 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RW(_name) 1634 #define VMBUS_CHAN_ATTR_RO(_name) \ 1635 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RO(_name) 1636 #define VMBUS_CHAN_ATTR_WO(_name) \ 1637 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_WO(_name) 1638 1639 static ssize_t vmbus_chan_attr_show(struct kobject *kobj, 1640 struct attribute *attr, char *buf) 1641 { 1642 const struct vmbus_chan_attribute *attribute 1643 = container_of(attr, struct vmbus_chan_attribute, attr); 1644 struct vmbus_channel *chan 1645 = container_of(kobj, struct vmbus_channel, kobj); 1646 1647 if (!attribute->show) 1648 return -EIO; 1649 1650 return attribute->show(chan, buf); 1651 } 1652 1653 static ssize_t vmbus_chan_attr_store(struct kobject *kobj, 1654 struct attribute *attr, const char *buf, 1655 size_t count) 1656 { 1657 const struct vmbus_chan_attribute *attribute 1658 = container_of(attr, struct vmbus_chan_attribute, attr); 1659 struct vmbus_channel *chan 1660 = container_of(kobj, struct vmbus_channel, kobj); 1661 1662 if (!attribute->store) 1663 return -EIO; 1664 1665 return attribute->store(chan, buf, count); 1666 } 1667 1668 static const struct sysfs_ops vmbus_chan_sysfs_ops = { 1669 .show = vmbus_chan_attr_show, 1670 .store = vmbus_chan_attr_store, 1671 }; 1672 1673 static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf) 1674 { 1675 struct hv_ring_buffer_info *rbi = &channel->outbound; 1676 ssize_t ret; 1677 1678 mutex_lock(&rbi->ring_buffer_mutex); 1679 if (!rbi->ring_buffer) { 1680 mutex_unlock(&rbi->ring_buffer_mutex); 1681 return -EINVAL; 1682 } 1683 1684 ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); 1685 mutex_unlock(&rbi->ring_buffer_mutex); 1686 return ret; 1687 } 1688 static VMBUS_CHAN_ATTR_RO(out_mask); 1689 1690 static ssize_t in_mask_show(struct vmbus_channel *channel, char *buf) 1691 { 1692 struct hv_ring_buffer_info *rbi = &channel->inbound; 1693 ssize_t ret; 1694 1695 mutex_lock(&rbi->ring_buffer_mutex); 1696 if (!rbi->ring_buffer) { 1697 mutex_unlock(&rbi->ring_buffer_mutex); 1698 return -EINVAL; 1699 } 1700 1701 ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); 1702 mutex_unlock(&rbi->ring_buffer_mutex); 1703 return ret; 1704 } 1705 static VMBUS_CHAN_ATTR_RO(in_mask); 1706 1707 static ssize_t read_avail_show(struct vmbus_channel *channel, char *buf) 1708 { 1709 struct hv_ring_buffer_info *rbi = &channel->inbound; 1710 ssize_t ret; 1711 1712 mutex_lock(&rbi->ring_buffer_mutex); 1713 if (!rbi->ring_buffer) { 1714 mutex_unlock(&rbi->ring_buffer_mutex); 1715 return -EINVAL; 1716 } 1717 1718 ret = sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi)); 1719 mutex_unlock(&rbi->ring_buffer_mutex); 1720 return ret; 1721 } 1722 static VMBUS_CHAN_ATTR_RO(read_avail); 1723 1724 static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf) 1725 { 1726 struct hv_ring_buffer_info *rbi = &channel->outbound; 1727 ssize_t ret; 1728 1729 mutex_lock(&rbi->ring_buffer_mutex); 1730 if (!rbi->ring_buffer) { 1731 mutex_unlock(&rbi->ring_buffer_mutex); 1732 return -EINVAL; 1733 } 1734 1735 ret = sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi)); 1736 mutex_unlock(&rbi->ring_buffer_mutex); 1737 return ret; 1738 } 1739 static VMBUS_CHAN_ATTR_RO(write_avail); 1740 1741 static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf) 1742 { 1743 return sprintf(buf, "%u\n", channel->target_cpu); 1744 } 1745 1746 int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu) 1747 { 1748 u32 origin_cpu; 1749 int ret = 0; 1750 1751 lockdep_assert_cpus_held(); 1752 lockdep_assert_held(&vmbus_connection.channel_mutex); 1753 1754 if (vmbus_proto_version < VERSION_WIN10_V4_1) 1755 return -EIO; 1756 1757 /* Validate target_cpu for the cpumask_test_cpu() operation below. */ 1758 if (target_cpu >= nr_cpumask_bits) 1759 return -EINVAL; 1760 1761 if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ))) 1762 return -EINVAL; 1763 1764 if (!cpu_online(target_cpu)) 1765 return -EINVAL; 1766 1767 /* 1768 * Synchronizes vmbus_channel_set_cpu() and channel closure: 1769 * 1770 * { Initially: state = CHANNEL_OPENED } 1771 * 1772 * CPU1 CPU2 1773 * 1774 * [vmbus_channel_set_cpu()] [vmbus_disconnect_ring()] 1775 * 1776 * LOCK channel_mutex LOCK channel_mutex 1777 * LOAD r1 = state LOAD r2 = state 1778 * IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED) 1779 * SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN 1780 * [...] SEND CLOSECHANNEL 1781 * UNLOCK channel_mutex UNLOCK channel_mutex 1782 * 1783 * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes 1784 * CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND 1785 * 1786 * Note. The host processes the channel messages "sequentially", in 1787 * the order in which they are received on a per-partition basis. 1788 */ 1789 1790 /* 1791 * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels; 1792 * avoid sending the message and fail here for such channels. 1793 */ 1794 if (channel->state != CHANNEL_OPENED_STATE) { 1795 ret = -EIO; 1796 goto end; 1797 } 1798 1799 origin_cpu = channel->target_cpu; 1800 if (target_cpu == origin_cpu) 1801 goto end; 1802 1803 if (vmbus_send_modifychannel(channel, 1804 hv_cpu_number_to_vp_number(target_cpu))) { 1805 ret = -EIO; 1806 goto end; 1807 } 1808 1809 /* 1810 * For version before VERSION_WIN10_V5_3, the following warning holds: 1811 * 1812 * Warning. At this point, there is *no* guarantee that the host will 1813 * have successfully processed the vmbus_send_modifychannel() request. 1814 * See the header comment of vmbus_send_modifychannel() for more info. 1815 * 1816 * Lags in the processing of the above vmbus_send_modifychannel() can 1817 * result in missed interrupts if the "old" target CPU is taken offline 1818 * before Hyper-V starts sending interrupts to the "new" target CPU. 1819 * But apart from this offlining scenario, the code tolerates such 1820 * lags. It will function correctly even if a channel interrupt comes 1821 * in on a CPU that is different from the channel target_cpu value. 1822 */ 1823 1824 channel->target_cpu = target_cpu; 1825 1826 /* See init_vp_index(). */ 1827 if (hv_is_perf_channel(channel)) 1828 hv_update_allocated_cpus(origin_cpu, target_cpu); 1829 1830 /* Currently set only for storvsc channels. */ 1831 if (channel->change_target_cpu_callback) { 1832 (*channel->change_target_cpu_callback)(channel, 1833 origin_cpu, target_cpu); 1834 } 1835 1836 end: 1837 return ret; 1838 } 1839 1840 static ssize_t target_cpu_store(struct vmbus_channel *channel, 1841 const char *buf, size_t count) 1842 { 1843 u32 target_cpu; 1844 ssize_t ret; 1845 1846 if (sscanf(buf, "%u", &target_cpu) != 1) 1847 return -EIO; 1848 1849 cpus_read_lock(); 1850 mutex_lock(&vmbus_connection.channel_mutex); 1851 ret = vmbus_channel_set_cpu(channel, target_cpu); 1852 mutex_unlock(&vmbus_connection.channel_mutex); 1853 cpus_read_unlock(); 1854 1855 return ret ?: count; 1856 } 1857 static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store); 1858 1859 static ssize_t channel_pending_show(struct vmbus_channel *channel, 1860 char *buf) 1861 { 1862 return sprintf(buf, "%d\n", 1863 channel_pending(channel, 1864 vmbus_connection.monitor_pages[1])); 1865 } 1866 static VMBUS_CHAN_ATTR(pending, 0444, channel_pending_show, NULL); 1867 1868 static ssize_t channel_latency_show(struct vmbus_channel *channel, 1869 char *buf) 1870 { 1871 return sprintf(buf, "%d\n", 1872 channel_latency(channel, 1873 vmbus_connection.monitor_pages[1])); 1874 } 1875 static VMBUS_CHAN_ATTR(latency, 0444, channel_latency_show, NULL); 1876 1877 static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf) 1878 { 1879 return sprintf(buf, "%llu\n", channel->interrupts); 1880 } 1881 static VMBUS_CHAN_ATTR(interrupts, 0444, channel_interrupts_show, NULL); 1882 1883 static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf) 1884 { 1885 return sprintf(buf, "%llu\n", channel->sig_events); 1886 } 1887 static VMBUS_CHAN_ATTR(events, 0444, channel_events_show, NULL); 1888 1889 static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel, 1890 char *buf) 1891 { 1892 return sprintf(buf, "%llu\n", 1893 (unsigned long long)channel->intr_in_full); 1894 } 1895 static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL); 1896 1897 static ssize_t channel_intr_out_empty_show(struct vmbus_channel *channel, 1898 char *buf) 1899 { 1900 return sprintf(buf, "%llu\n", 1901 (unsigned long long)channel->intr_out_empty); 1902 } 1903 static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL); 1904 1905 static ssize_t channel_out_full_first_show(struct vmbus_channel *channel, 1906 char *buf) 1907 { 1908 return sprintf(buf, "%llu\n", 1909 (unsigned long long)channel->out_full_first); 1910 } 1911 static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL); 1912 1913 static ssize_t channel_out_full_total_show(struct vmbus_channel *channel, 1914 char *buf) 1915 { 1916 return sprintf(buf, "%llu\n", 1917 (unsigned long long)channel->out_full_total); 1918 } 1919 static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL); 1920 1921 static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel, 1922 char *buf) 1923 { 1924 return sprintf(buf, "%u\n", channel->offermsg.monitorid); 1925 } 1926 static VMBUS_CHAN_ATTR(monitor_id, 0444, subchannel_monitor_id_show, NULL); 1927 1928 static ssize_t subchannel_id_show(struct vmbus_channel *channel, 1929 char *buf) 1930 { 1931 return sprintf(buf, "%u\n", 1932 channel->offermsg.offer.sub_channel_index); 1933 } 1934 static VMBUS_CHAN_ATTR_RO(subchannel_id); 1935 1936 static int hv_mmap_ring_buffer_wrapper(struct file *filp, struct kobject *kobj, 1937 const struct bin_attribute *attr, 1938 struct vm_area_struct *vma) 1939 { 1940 struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj); 1941 struct vm_area_desc desc; 1942 int err; 1943 1944 /* 1945 * hv_(create|remove)_ring_sysfs implementation ensures that 1946 * mmap_prepare_ring_buffer is not NULL. 1947 */ 1948 compat_set_desc_from_vma(&desc, filp, vma); 1949 err = channel->mmap_prepare_ring_buffer(channel, &desc); 1950 if (err) 1951 return err; 1952 1953 return __compat_vma_mmap(&desc, vma); 1954 } 1955 1956 static struct bin_attribute chan_attr_ring_buffer = { 1957 .attr = { 1958 .name = "ring", 1959 .mode = 0600, 1960 }, 1961 .mmap = hv_mmap_ring_buffer_wrapper, 1962 }; 1963 static struct attribute *vmbus_chan_attrs[] = { 1964 &chan_attr_out_mask.attr, 1965 &chan_attr_in_mask.attr, 1966 &chan_attr_read_avail.attr, 1967 &chan_attr_write_avail.attr, 1968 &chan_attr_cpu.attr, 1969 &chan_attr_pending.attr, 1970 &chan_attr_latency.attr, 1971 &chan_attr_interrupts.attr, 1972 &chan_attr_events.attr, 1973 &chan_attr_intr_in_full.attr, 1974 &chan_attr_intr_out_empty.attr, 1975 &chan_attr_out_full_first.attr, 1976 &chan_attr_out_full_total.attr, 1977 &chan_attr_monitor_id.attr, 1978 &chan_attr_subchannel_id.attr, 1979 NULL 1980 }; 1981 1982 static const struct bin_attribute *vmbus_chan_bin_attrs[] = { 1983 &chan_attr_ring_buffer, 1984 NULL 1985 }; 1986 1987 /* 1988 * Channel-level attribute_group callback function. Returns the permission for 1989 * each attribute, and returns 0 if an attribute is not visible. 1990 */ 1991 static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj, 1992 struct attribute *attr, int idx) 1993 { 1994 const struct vmbus_channel *channel = 1995 container_of(kobj, struct vmbus_channel, kobj); 1996 1997 /* Hide the monitor attributes if the monitor mechanism is not used. */ 1998 if (!channel->offermsg.monitor_allocated && 1999 (attr == &chan_attr_pending.attr || 2000 attr == &chan_attr_latency.attr || 2001 attr == &chan_attr_monitor_id.attr)) 2002 return 0; 2003 2004 return attr->mode; 2005 } 2006 2007 static umode_t vmbus_chan_bin_attr_is_visible(struct kobject *kobj, 2008 const struct bin_attribute *attr, int idx) 2009 { 2010 const struct vmbus_channel *channel = 2011 container_of(kobj, struct vmbus_channel, kobj); 2012 2013 /* Hide ring attribute if channel's ring_sysfs_visible is set to false */ 2014 if (attr == &chan_attr_ring_buffer && !channel->ring_sysfs_visible) 2015 return 0; 2016 2017 return attr->attr.mode; 2018 } 2019 2020 static size_t vmbus_chan_bin_size(struct kobject *kobj, 2021 const struct bin_attribute *bin_attr, int a) 2022 { 2023 const struct vmbus_channel *channel = 2024 container_of(kobj, struct vmbus_channel, kobj); 2025 2026 return channel->ringbuffer_pagecount << PAGE_SHIFT; 2027 } 2028 2029 static const struct attribute_group vmbus_chan_group = { 2030 .attrs = vmbus_chan_attrs, 2031 .bin_attrs = vmbus_chan_bin_attrs, 2032 .is_visible = vmbus_chan_attr_is_visible, 2033 .is_bin_visible = vmbus_chan_bin_attr_is_visible, 2034 .bin_size = vmbus_chan_bin_size, 2035 }; 2036 2037 static const struct kobj_type vmbus_chan_ktype = { 2038 .sysfs_ops = &vmbus_chan_sysfs_ops, 2039 .release = vmbus_chan_release, 2040 }; 2041 2042 /** 2043 * hv_create_ring_sysfs() - create "ring" sysfs entry corresponding to ring buffers for a channel. 2044 * @channel: Pointer to vmbus_channel structure 2045 * @hv_mmap_prepare_ring_buffer: function pointer for initializing the function to be called on mmap 2046 * channel's "ring" sysfs node, which is for the ring buffer of that channel. 2047 * Function pointer is of below type: 2048 * int (*hv_mmap_prepare_ring_buffer)(struct vmbus_channel *channel, 2049 * struct vm_area_desc *desc)) 2050 * This has a pointer to the channel and a pointer to vm_area_desc, 2051 * used for mmap_prepare, as arguments. 2052 * 2053 * Sysfs node for ring buffer of a channel is created along with other fields, however its 2054 * visibility is disabled by default. Sysfs creation needs to be controlled when the use-case 2055 * is running. 2056 * For example, HV_NIC device is used either by uio_hv_generic or hv_netvsc at any given point of 2057 * time, and "ring" sysfs is needed only when uio_hv_generic is bound to that device. To avoid 2058 * exposing the ring buffer by default, this function is responsible to enable visibility of 2059 * ring for userspace to use. 2060 * Note: Race conditions can happen with userspace and it is not encouraged to create new 2061 * use-cases for this. This was added to maintain backward compatibility, while solving 2062 * one of the race conditions in uio_hv_generic while creating sysfs. See comments with 2063 * vmbus_add_dynid() and vmbus_device_register(). 2064 * 2065 * Returns 0 on success or error code on failure. 2066 */ 2067 int hv_create_ring_sysfs(struct vmbus_channel *channel, 2068 int (*hv_mmap_prepare_ring_buffer)(struct vmbus_channel *channel, 2069 struct vm_area_desc *desc)) 2070 { 2071 struct kobject *kobj = &channel->kobj; 2072 2073 channel->mmap_prepare_ring_buffer = hv_mmap_prepare_ring_buffer; 2074 channel->ring_sysfs_visible = true; 2075 2076 return sysfs_update_group(kobj, &vmbus_chan_group); 2077 } 2078 EXPORT_SYMBOL_GPL(hv_create_ring_sysfs); 2079 2080 /** 2081 * hv_remove_ring_sysfs() - remove ring sysfs entry corresponding to ring buffers for a channel. 2082 * @channel: Pointer to vmbus_channel structure 2083 * 2084 * Hide "ring" sysfs for a channel by changing its is_visible attribute and updating sysfs group. 2085 * 2086 * Returns 0 on success or error code on failure. 2087 */ 2088 int hv_remove_ring_sysfs(struct vmbus_channel *channel) 2089 { 2090 struct kobject *kobj = &channel->kobj; 2091 int ret; 2092 2093 channel->ring_sysfs_visible = false; 2094 ret = sysfs_update_group(kobj, &vmbus_chan_group); 2095 channel->mmap_prepare_ring_buffer = NULL; 2096 return ret; 2097 } 2098 EXPORT_SYMBOL_GPL(hv_remove_ring_sysfs); 2099 2100 /* 2101 * vmbus_add_channel_kobj - setup a sub-directory under device/channels 2102 */ 2103 int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) 2104 { 2105 const struct device *device = &dev->device; 2106 struct kobject *kobj = &channel->kobj; 2107 u32 relid = channel->offermsg.child_relid; 2108 int ret; 2109 2110 kobj->kset = dev->channels_kset; 2111 ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL, 2112 "%u", relid); 2113 if (ret) { 2114 kobject_put(kobj); 2115 return ret; 2116 } 2117 2118 ret = sysfs_create_group(kobj, &vmbus_chan_group); 2119 2120 if (ret) { 2121 /* 2122 * The calling functions' error handling paths will cleanup the 2123 * empty channel directory. 2124 */ 2125 kobject_put(kobj); 2126 dev_err(device, "Unable to set up channel sysfs files\n"); 2127 return ret; 2128 } 2129 2130 kobject_uevent(kobj, KOBJ_ADD); 2131 2132 return 0; 2133 } 2134 2135 /* 2136 * vmbus_remove_channel_attr_group - remove the channel's attribute group 2137 */ 2138 void vmbus_remove_channel_attr_group(struct vmbus_channel *channel) 2139 { 2140 sysfs_remove_group(&channel->kobj, &vmbus_chan_group); 2141 } 2142 2143 /* 2144 * vmbus_device_create - Creates and registers a new child device 2145 * on the vmbus. 2146 */ 2147 struct hv_device *vmbus_device_create(const guid_t *type, 2148 const guid_t *instance, 2149 struct vmbus_channel *channel) 2150 { 2151 struct hv_device *child_device_obj; 2152 2153 child_device_obj = kzalloc_obj(struct hv_device); 2154 if (!child_device_obj) { 2155 pr_err("Unable to allocate device object for child device\n"); 2156 return NULL; 2157 } 2158 2159 child_device_obj->channel = channel; 2160 guid_copy(&child_device_obj->dev_type, type); 2161 guid_copy(&child_device_obj->dev_instance, instance); 2162 child_device_obj->vendor_id = PCI_VENDOR_ID_MICROSOFT; 2163 2164 return child_device_obj; 2165 } 2166 2167 /* 2168 * vmbus_device_register - Register the child device 2169 */ 2170 int vmbus_device_register(struct hv_device *child_device_obj) 2171 { 2172 struct kobject *kobj = &child_device_obj->device.kobj; 2173 int ret; 2174 2175 dev_set_name(&child_device_obj->device, "%pUl", 2176 &child_device_obj->channel->offermsg.offer.if_instance); 2177 2178 child_device_obj->device.bus = &hv_bus; 2179 child_device_obj->device.parent = vmbus_root_device; 2180 child_device_obj->device.release = vmbus_device_release; 2181 2182 child_device_obj->device.dma_parms = &child_device_obj->dma_parms; 2183 child_device_obj->device.dma_mask = &child_device_obj->dma_mask; 2184 dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); 2185 2186 /* 2187 * Register with the LDM. This will kick off the driver/device 2188 * binding...which will eventually call vmbus_match() and vmbus_probe() 2189 */ 2190 ret = device_register(&child_device_obj->device); 2191 if (ret) { 2192 pr_err("Unable to register child device\n"); 2193 put_device(&child_device_obj->device); 2194 return ret; 2195 } 2196 2197 /* 2198 * If device_register() found a driver to assign to the device, the 2199 * driver's probe function has already run at this point. If that 2200 * probe function accesses or operates on the "channels" subdirectory 2201 * in sysfs, those operations will have failed because the "channels" 2202 * subdirectory doesn't exist until the code below runs. Or if the 2203 * probe function creates a /dev entry, a user space program could 2204 * find and open the /dev entry, and then create a race by accessing 2205 * the "channels" subdirectory while the creation steps are in progress 2206 * here. The race can't result in a kernel failure, but the user space 2207 * program may get an error in accessing "channels" or its 2208 * subdirectories. See also comments with vmbus_add_dynid() about a 2209 * related race condition. 2210 */ 2211 child_device_obj->channels_kset = kset_create_and_add("channels", 2212 NULL, kobj); 2213 if (!child_device_obj->channels_kset) { 2214 ret = -ENOMEM; 2215 goto err_dev_unregister; 2216 } 2217 2218 ret = vmbus_add_channel_kobj(child_device_obj, 2219 child_device_obj->channel); 2220 if (ret) { 2221 pr_err("Unable to register primary channel\n"); 2222 goto err_kset_unregister; 2223 } 2224 hv_debug_add_dev_dir(child_device_obj); 2225 2226 return 0; 2227 2228 err_kset_unregister: 2229 kset_unregister(child_device_obj->channels_kset); 2230 2231 err_dev_unregister: 2232 device_unregister(&child_device_obj->device); 2233 return ret; 2234 } 2235 2236 /* 2237 * vmbus_device_unregister - Remove the specified child device 2238 * from the vmbus. 2239 */ 2240 void vmbus_device_unregister(struct hv_device *device_obj) 2241 { 2242 pr_debug("child device %s unregistered\n", 2243 dev_name(&device_obj->device)); 2244 2245 kset_unregister(device_obj->channels_kset); 2246 2247 /* 2248 * Kick off the process of unregistering the device. 2249 * This will call vmbus_remove() and eventually vmbus_device_release() 2250 */ 2251 device_unregister(&device_obj->device); 2252 } 2253 EXPORT_SYMBOL_GPL(vmbus_device_unregister); 2254 2255 #ifdef CONFIG_ACPI 2256 /* 2257 * VMBUS is an acpi enumerated device. Get the information we 2258 * need from DSDT. 2259 */ 2260 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) 2261 { 2262 resource_size_t start = 0; 2263 resource_size_t end = 0; 2264 struct resource *new_res; 2265 struct resource **old_res = &hyperv_mmio; 2266 struct resource **prev_res = NULL; 2267 struct resource r; 2268 2269 switch (res->type) { 2270 2271 /* 2272 * "Address" descriptors are for bus windows. Ignore 2273 * "memory" descriptors, which are for registers on 2274 * devices. 2275 */ 2276 case ACPI_RESOURCE_TYPE_ADDRESS32: 2277 start = res->data.address32.address.minimum; 2278 end = res->data.address32.address.maximum; 2279 break; 2280 2281 case ACPI_RESOURCE_TYPE_ADDRESS64: 2282 start = res->data.address64.address.minimum; 2283 end = res->data.address64.address.maximum; 2284 break; 2285 2286 /* 2287 * The IRQ information is needed only on ARM64, which Hyper-V 2288 * sets up in the extended format. IRQ information is present 2289 * on x86/x64 in the non-extended format but it is not used by 2290 * Linux. So don't bother checking for the non-extended format. 2291 */ 2292 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: 2293 if (!acpi_dev_resource_interrupt(res, 0, &r)) { 2294 pr_err("Unable to parse Hyper-V ACPI interrupt\n"); 2295 return AE_ERROR; 2296 } 2297 /* ARM64 INTID for VMbus */ 2298 vmbus_interrupt = res->data.extended_irq.interrupts[0]; 2299 /* Linux IRQ number */ 2300 vmbus_irq = r.start; 2301 return AE_OK; 2302 2303 default: 2304 /* Unused resource type */ 2305 return AE_OK; 2306 2307 } 2308 /* 2309 * Ignore ranges that are below 1MB, as they're not 2310 * necessary or useful here. 2311 */ 2312 if (end < 0x100000) 2313 return AE_OK; 2314 2315 new_res = kzalloc_obj(*new_res, GFP_ATOMIC); 2316 if (!new_res) 2317 return AE_NO_MEMORY; 2318 2319 /* If this range overlaps the virtual TPM, truncate it. */ 2320 if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) 2321 end = VTPM_BASE_ADDRESS; 2322 2323 new_res->name = "hyperv mmio"; 2324 new_res->flags = IORESOURCE_MEM; 2325 new_res->start = start; 2326 new_res->end = end; 2327 2328 /* 2329 * If two ranges are adjacent, merge them. 2330 */ 2331 do { 2332 if (!*old_res) { 2333 *old_res = new_res; 2334 break; 2335 } 2336 2337 if (((*old_res)->end + 1) == new_res->start) { 2338 (*old_res)->end = new_res->end; 2339 kfree(new_res); 2340 break; 2341 } 2342 2343 if ((*old_res)->start == new_res->end + 1) { 2344 (*old_res)->start = new_res->start; 2345 kfree(new_res); 2346 break; 2347 } 2348 2349 if ((*old_res)->start > new_res->end) { 2350 new_res->sibling = *old_res; 2351 if (prev_res) 2352 (*prev_res)->sibling = new_res; 2353 *old_res = new_res; 2354 break; 2355 } 2356 2357 prev_res = old_res; 2358 old_res = &(*old_res)->sibling; 2359 2360 } while (1); 2361 2362 return AE_OK; 2363 } 2364 #endif 2365 2366 static void vmbus_mmio_remove(void) 2367 { 2368 struct resource *cur_res; 2369 struct resource *next_res; 2370 2371 if (hyperv_mmio) { 2372 if (fb_mmio) { 2373 __release_region(hyperv_mmio, fb_mmio->start, 2374 resource_size(fb_mmio)); 2375 fb_mmio = NULL; 2376 } 2377 2378 for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { 2379 next_res = cur_res->sibling; 2380 kfree(cur_res); 2381 } 2382 } 2383 } 2384 2385 static void __maybe_unused vmbus_reserve_fb(void) 2386 { 2387 resource_size_t start = 0, size; 2388 struct pci_dev *pdev; 2389 2390 if (efi_enabled(EFI_BOOT)) { 2391 /* Gen2 VM: get FB base from EFI framebuffer */ 2392 if (IS_ENABLED(CONFIG_SYSFB)) { 2393 start = sysfb_primary_display.screen.lfb_base; 2394 size = max_t(__u32, sysfb_primary_display.screen.lfb_size, 0x800000); 2395 } 2396 } else { 2397 /* Gen1 VM: get FB base from PCI */ 2398 pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT, 2399 PCI_DEVICE_ID_HYPERV_VIDEO, NULL); 2400 if (!pdev) 2401 return; 2402 2403 if (pdev->resource[0].flags & IORESOURCE_MEM) { 2404 start = pci_resource_start(pdev, 0); 2405 size = pci_resource_len(pdev, 0); 2406 } 2407 2408 /* 2409 * Release the PCI device so hyperv_drm driver can grab it 2410 * later. 2411 */ 2412 pci_dev_put(pdev); 2413 } 2414 2415 if (!start) 2416 return; 2417 2418 /* 2419 * Make a claim for the frame buffer in the resource tree under the 2420 * first node, which will be the one below 4GB. The length seems to 2421 * be underreported, particularly in a Generation 1 VM. So start out 2422 * reserving a larger area and make it smaller until it succeeds. 2423 */ 2424 for (; !fb_mmio && (size >= 0x100000); size >>= 1) 2425 fb_mmio = __request_region(hyperv_mmio, start, size, fb_mmio_name, 0); 2426 } 2427 2428 /** 2429 * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. 2430 * @new: If successful, supplied a pointer to the 2431 * allocated MMIO space. 2432 * @device_obj: Identifies the caller 2433 * @min: Minimum guest physical address of the 2434 * allocation 2435 * @max: Maximum guest physical address 2436 * @size: Size of the range to be allocated 2437 * @align: Alignment of the range to be allocated 2438 * @fb_overlap_ok: Whether this allocation can be allowed 2439 * to overlap the video frame buffer. 2440 * 2441 * This function walks the resources granted to VMBus by the 2442 * _CRS object in the ACPI namespace underneath the parent 2443 * "bridge" whether that's a root PCI bus in the Generation 1 2444 * case or a Module Device in the Generation 2 case. It then 2445 * attempts to allocate from the global MMIO pool in a way that 2446 * matches the constraints supplied in these parameters and by 2447 * that _CRS. 2448 * 2449 * Return: 0 on success, -errno on failure 2450 */ 2451 int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, 2452 resource_size_t min, resource_size_t max, 2453 resource_size_t size, resource_size_t align, 2454 bool fb_overlap_ok) 2455 { 2456 struct resource *iter, *shadow; 2457 resource_size_t range_min, range_max, start, end; 2458 const char *dev_n = dev_name(&device_obj->device); 2459 int retval; 2460 2461 retval = -ENXIO; 2462 mutex_lock(&hyperv_mmio_lock); 2463 2464 /* 2465 * If overlaps with frame buffers are allowed, then first attempt to 2466 * make the allocation from within the reserved region. Because it 2467 * is already reserved, no shadow allocation is necessary. 2468 */ 2469 if (fb_overlap_ok && fb_mmio && !(min > fb_mmio->end) && 2470 !(max < fb_mmio->start)) { 2471 2472 range_min = fb_mmio->start; 2473 range_max = fb_mmio->end; 2474 start = (range_min + align - 1) & ~(align - 1); 2475 for (; start + size - 1 <= range_max; start += align) { 2476 *new = request_mem_region_exclusive(start, size, dev_n); 2477 if (*new) { 2478 retval = 0; 2479 goto exit; 2480 } 2481 } 2482 } 2483 2484 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 2485 if ((iter->start >= max) || (iter->end <= min)) 2486 continue; 2487 2488 range_min = iter->start; 2489 range_max = iter->end; 2490 start = (range_min + align - 1) & ~(align - 1); 2491 for (; start + size - 1 <= range_max; start += align) { 2492 end = start + size - 1; 2493 2494 /* Skip the whole fb_mmio region if not fb_overlap_ok */ 2495 if (!fb_overlap_ok && fb_mmio && 2496 (((start >= fb_mmio->start) && (start <= fb_mmio->end)) || 2497 ((end >= fb_mmio->start) && (end <= fb_mmio->end)))) 2498 continue; 2499 2500 shadow = __request_region(iter, start, size, NULL, 2501 IORESOURCE_BUSY); 2502 if (!shadow) 2503 continue; 2504 2505 *new = request_mem_region_exclusive(start, size, dev_n); 2506 if (*new) { 2507 shadow->name = (char *)*new; 2508 retval = 0; 2509 goto exit; 2510 } 2511 2512 __release_region(iter, start, size); 2513 } 2514 } 2515 2516 exit: 2517 mutex_unlock(&hyperv_mmio_lock); 2518 return retval; 2519 } 2520 EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); 2521 2522 /** 2523 * vmbus_free_mmio() - Free a memory-mapped I/O range. 2524 * @start: Base address of region to release. 2525 * @size: Size of the range to be allocated 2526 * 2527 * This function releases anything requested by 2528 * vmbus_mmio_allocate(). 2529 */ 2530 void vmbus_free_mmio(resource_size_t start, resource_size_t size) 2531 { 2532 struct resource *iter; 2533 2534 mutex_lock(&hyperv_mmio_lock); 2535 2536 /* 2537 * If all bytes of the MMIO range to be released are within the 2538 * special case fb_mmio shadow region, skip releasing the shadow 2539 * region since no corresponding __request_region() was done 2540 * in vmbus_allocate_mmio(). 2541 */ 2542 if (fb_mmio && start >= fb_mmio->start && 2543 (start + size - 1 <= fb_mmio->end)) 2544 goto skip_shadow_release; 2545 2546 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 2547 if ((iter->start >= start + size) || (iter->end <= start)) 2548 continue; 2549 2550 __release_region(iter, start, size); 2551 } 2552 2553 skip_shadow_release: 2554 release_mem_region(start, size); 2555 mutex_unlock(&hyperv_mmio_lock); 2556 2557 } 2558 EXPORT_SYMBOL_GPL(vmbus_free_mmio); 2559 2560 #ifdef CONFIG_ACPI 2561 static int vmbus_acpi_add(struct platform_device *pdev) 2562 { 2563 acpi_status result; 2564 int ret_val = -ENODEV; 2565 struct acpi_device *ancestor; 2566 struct acpi_device *device = ACPI_COMPANION(&pdev->dev); 2567 2568 vmbus_root_device = &device->dev; 2569 2570 /* 2571 * Older versions of Hyper-V for ARM64 fail to include the _CCA 2572 * method on the top level VMbus device in the DSDT. But devices 2573 * are hardware coherent in all current Hyper-V use cases, so fix 2574 * up the ACPI device to behave as if _CCA is present and indicates 2575 * hardware coherence. 2576 */ 2577 ACPI_COMPANION_SET(&device->dev, device); 2578 if (IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) && 2579 device_get_dma_attr(&device->dev) == DEV_DMA_NOT_SUPPORTED) { 2580 pr_info("No ACPI _CCA found; assuming coherent device I/O\n"); 2581 device->flags.cca_seen = true; 2582 device->flags.coherent_dma = true; 2583 } 2584 2585 result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, 2586 vmbus_walk_resources, NULL); 2587 2588 if (ACPI_FAILURE(result)) 2589 goto acpi_walk_err; 2590 /* 2591 * Some ancestor of the vmbus acpi device (Gen1 or Gen2 2592 * firmware) is the VMOD that has the mmio ranges. Get that. 2593 */ 2594 for (ancestor = acpi_dev_parent(device); 2595 ancestor && ancestor->handle != ACPI_ROOT_OBJECT; 2596 ancestor = acpi_dev_parent(ancestor)) { 2597 result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, 2598 vmbus_walk_resources, NULL); 2599 2600 if (ACPI_FAILURE(result)) 2601 continue; 2602 if (hyperv_mmio) { 2603 vmbus_reserve_fb(); 2604 break; 2605 } 2606 } 2607 ret_val = 0; 2608 2609 acpi_walk_err: 2610 if (ret_val) 2611 vmbus_mmio_remove(); 2612 return ret_val; 2613 } 2614 #else 2615 static int vmbus_acpi_add(struct platform_device *pdev) 2616 { 2617 return 0; 2618 } 2619 #endif 2620 #ifndef HYPERVISOR_CALLBACK_VECTOR 2621 static int vmbus_set_irq(struct platform_device *pdev) 2622 { 2623 struct irq_data *data; 2624 int irq; 2625 irq_hw_number_t hwirq; 2626 2627 irq = platform_get_irq(pdev, 0); 2628 /* platform_get_irq() may not return 0. */ 2629 if (irq < 0) 2630 return irq; 2631 2632 data = irq_get_irq_data(irq); 2633 if (!data) { 2634 pr_err("No interrupt data for VMBus virq %d\n", irq); 2635 return -ENODEV; 2636 } 2637 hwirq = irqd_to_hwirq(data); 2638 2639 vmbus_irq = irq; 2640 vmbus_interrupt = hwirq; 2641 pr_debug("VMBus virq %d, hwirq %d\n", vmbus_irq, vmbus_interrupt); 2642 2643 return 0; 2644 } 2645 #endif 2646 2647 static int vmbus_device_add(struct platform_device *pdev) 2648 { 2649 struct resource **cur_res = &hyperv_mmio; 2650 struct of_range range; 2651 struct of_range_parser parser; 2652 struct device_node *np = pdev->dev.of_node; 2653 int ret; 2654 2655 vmbus_root_device = &pdev->dev; 2656 2657 ret = of_range_parser_init(&parser, np); 2658 if (ret) 2659 return ret; 2660 2661 #ifndef HYPERVISOR_CALLBACK_VECTOR 2662 ret = vmbus_set_irq(pdev); 2663 if (ret) 2664 return ret; 2665 #endif 2666 for_each_of_range(&parser, &range) { 2667 struct resource *res; 2668 2669 res = kzalloc_obj(*res); 2670 if (!res) { 2671 vmbus_mmio_remove(); 2672 return -ENOMEM; 2673 } 2674 2675 res->name = "hyperv mmio"; 2676 res->flags = range.flags; 2677 res->start = range.cpu_addr; 2678 res->end = range.cpu_addr + range.size; 2679 2680 *cur_res = res; 2681 cur_res = &res->sibling; 2682 } 2683 2684 return ret; 2685 } 2686 2687 static int vmbus_platform_driver_probe(struct platform_device *pdev) 2688 { 2689 if (acpi_disabled) 2690 return vmbus_device_add(pdev); 2691 else 2692 return vmbus_acpi_add(pdev); 2693 } 2694 2695 static void vmbus_platform_driver_remove(struct platform_device *pdev) 2696 { 2697 vmbus_mmio_remove(); 2698 } 2699 2700 #ifdef CONFIG_PM_SLEEP 2701 static int vmbus_bus_suspend(struct device *dev) 2702 { 2703 struct hv_per_cpu_context *hv_cpu = per_cpu_ptr( 2704 hv_context.cpu_context, VMBUS_CONNECT_CPU); 2705 struct vmbus_channel *channel, *sc; 2706 2707 tasklet_disable(&hv_cpu->msg_dpc); 2708 vmbus_connection.ignore_any_offer_msg = true; 2709 /* The tasklet_enable() takes care of providing a memory barrier */ 2710 tasklet_enable(&hv_cpu->msg_dpc); 2711 2712 /* Drain all the workqueues as we are in suspend */ 2713 drain_workqueue(vmbus_connection.rescind_work_queue); 2714 drain_workqueue(vmbus_connection.work_queue); 2715 drain_workqueue(vmbus_connection.handle_primary_chan_wq); 2716 drain_workqueue(vmbus_connection.handle_sub_chan_wq); 2717 2718 mutex_lock(&vmbus_connection.channel_mutex); 2719 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2720 if (!is_hvsock_channel(channel)) 2721 continue; 2722 2723 vmbus_force_channel_rescinded(channel); 2724 } 2725 mutex_unlock(&vmbus_connection.channel_mutex); 2726 2727 /* 2728 * Wait until all the sub-channels and hv_sock channels have been 2729 * cleaned up. Sub-channels should be destroyed upon suspend, otherwise 2730 * they would conflict with the new sub-channels that will be created 2731 * in the resume path. hv_sock channels should also be destroyed, but 2732 * a hv_sock channel of an established hv_sock connection can not be 2733 * really destroyed since it may still be referenced by the userspace 2734 * application, so we just force the hv_sock channel to be rescinded 2735 * by vmbus_force_channel_rescinded(), and the userspace application 2736 * will thoroughly destroy the channel after hibernation. 2737 * 2738 * Note: the counter nr_chan_close_on_suspend may never go above 0 if 2739 * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM. 2740 */ 2741 if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) 2742 wait_for_completion(&vmbus_connection.ready_for_suspend_event); 2743 2744 mutex_lock(&vmbus_connection.channel_mutex); 2745 2746 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2747 /* 2748 * Remove the channel from the array of channels and invalidate 2749 * the channel's relid. Upon resume, vmbus_onoffer() will fix 2750 * up the relid (and other fields, if necessary) and add the 2751 * channel back to the array. 2752 */ 2753 vmbus_channel_unmap_relid(channel); 2754 channel->offermsg.child_relid = INVALID_RELID; 2755 2756 if (is_hvsock_channel(channel)) { 2757 if (!channel->rescind) { 2758 pr_err("hv_sock channel not rescinded!\n"); 2759 WARN_ON_ONCE(1); 2760 } 2761 continue; 2762 } 2763 2764 list_for_each_entry(sc, &channel->sc_list, sc_list) { 2765 pr_err("Sub-channel not deleted!\n"); 2766 WARN_ON_ONCE(1); 2767 } 2768 } 2769 2770 mutex_unlock(&vmbus_connection.channel_mutex); 2771 2772 vmbus_initiate_unload(false); 2773 2774 return 0; 2775 } 2776 2777 static int vmbus_bus_resume(struct device *dev) 2778 { 2779 struct vmbus_channel *channel; 2780 struct vmbus_channel_msginfo *msginfo; 2781 size_t msgsize; 2782 int ret; 2783 2784 vmbus_connection.ignore_any_offer_msg = false; 2785 2786 /* 2787 * We only use the 'vmbus_proto_version', which was in use before 2788 * hibernation, to re-negotiate with the host. 2789 */ 2790 if (!vmbus_proto_version) { 2791 pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version); 2792 return -EINVAL; 2793 } 2794 2795 msgsize = sizeof(*msginfo) + 2796 sizeof(struct vmbus_channel_initiate_contact); 2797 2798 msginfo = kzalloc(msgsize, GFP_KERNEL); 2799 2800 if (msginfo == NULL) 2801 return -ENOMEM; 2802 2803 ret = vmbus_negotiate_version(msginfo, vmbus_proto_version); 2804 2805 kfree(msginfo); 2806 2807 if (ret != 0) 2808 return ret; 2809 2810 vmbus_request_offers(); 2811 2812 mutex_lock(&vmbus_connection.channel_mutex); 2813 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2814 if (channel->offermsg.child_relid != INVALID_RELID) 2815 continue; 2816 2817 /* hvsock channels are not expected to be present. */ 2818 if (is_hvsock_channel(channel)) 2819 continue; 2820 2821 pr_err("channel %pUl/%pUl not present after resume.\n", 2822 &channel->offermsg.offer.if_type, 2823 &channel->offermsg.offer.if_instance); 2824 /* ToDo: Cleanup these channels here */ 2825 } 2826 mutex_unlock(&vmbus_connection.channel_mutex); 2827 2828 /* Reset the event for the next suspend. */ 2829 reinit_completion(&vmbus_connection.ready_for_suspend_event); 2830 2831 return 0; 2832 } 2833 #else 2834 #define vmbus_bus_suspend NULL 2835 #define vmbus_bus_resume NULL 2836 #endif /* CONFIG_PM_SLEEP */ 2837 2838 static const __maybe_unused struct of_device_id vmbus_of_match[] = { 2839 { 2840 .compatible = "microsoft,vmbus", 2841 }, 2842 { 2843 /* sentinel */ 2844 }, 2845 }; 2846 MODULE_DEVICE_TABLE(of, vmbus_of_match); 2847 2848 static const __maybe_unused struct acpi_device_id vmbus_acpi_device_ids[] = { 2849 {"VMBUS", 0}, 2850 {"VMBus", 0}, 2851 {"", 0}, 2852 }; 2853 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); 2854 2855 /* 2856 * Note: we must use the "no_irq" ops, otherwise hibernation can not work with 2857 * PCI device assignment, because "pci_dev_pm_ops" uses the "noirq" ops: in 2858 * the resume path, the pci "noirq" restore op runs before "non-noirq" op (see 2859 * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() -> 2860 * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's 2861 * resume callback must also run via the "noirq" ops. 2862 * 2863 * Set suspend_noirq/resume_noirq to NULL for Suspend-to-Idle: see the comment 2864 * earlier in this file before vmbus_pm. 2865 */ 2866 2867 static const struct dev_pm_ops vmbus_bus_pm = { 2868 .suspend_noirq = NULL, 2869 .resume_noirq = NULL, 2870 .freeze_noirq = vmbus_bus_suspend, 2871 .thaw_noirq = vmbus_bus_resume, 2872 .poweroff_noirq = vmbus_bus_suspend, 2873 .restore_noirq = vmbus_bus_resume 2874 }; 2875 2876 static struct platform_driver vmbus_platform_driver = { 2877 .probe = vmbus_platform_driver_probe, 2878 .remove = vmbus_platform_driver_remove, 2879 .driver = { 2880 .name = "vmbus", 2881 .acpi_match_table = ACPI_PTR(vmbus_acpi_device_ids), 2882 .of_match_table = of_match_ptr(vmbus_of_match), 2883 .pm = &vmbus_bus_pm, 2884 .probe_type = PROBE_FORCE_SYNCHRONOUS, 2885 } 2886 }; 2887 2888 static void hv_kexec_handler(void) 2889 { 2890 vmbus_initiate_unload(false); 2891 /* Make sure conn_state is set as hv_synic_cleanup checks for it */ 2892 mb(); 2893 cpuhp_remove_state(hyperv_cpuhp_online); 2894 }; 2895 2896 static void hv_crash_handler(struct pt_regs *regs) 2897 { 2898 int cpu; 2899 2900 vmbus_initiate_unload(true); 2901 /* 2902 * In crash handler we can't schedule synic cleanup for all CPUs, 2903 * doing the cleanup for current CPU only. This should be sufficient 2904 * for kdump. 2905 */ 2906 cpu = smp_processor_id(); 2907 hv_stimer_cleanup(cpu); 2908 hv_hyp_synic_disable_regs(cpu); 2909 }; 2910 2911 static int hv_synic_suspend(void *data) 2912 { 2913 /* 2914 * When we reach here, all the non-boot CPUs have been offlined. 2915 * If we're in a legacy configuration where stimer Direct Mode is 2916 * not enabled, the stimers on the non-boot CPUs have been unbound 2917 * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() -> 2918 * hv_stimer_cleanup() -> clockevents_unbind_device(). 2919 * 2920 * hv_synic_suspend() only runs on CPU0 with interrupts disabled. 2921 * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because: 2922 * 1) it's unnecessary as interrupts remain disabled between 2923 * syscore_suspend() and syscore_resume(): see create_image() and 2924 * resume_target_kernel() 2925 * 2) the stimer on CPU0 is automatically disabled later by 2926 * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ... 2927 * -> clockevents_shutdown() -> ... -> hv_ce_shutdown() 2928 * 3) a warning would be triggered if we call 2929 * clockevents_unbind_device(), which may sleep, in an 2930 * interrupts-disabled context. 2931 */ 2932 2933 hv_hyp_synic_disable_regs(0); 2934 2935 return 0; 2936 } 2937 2938 static void hv_synic_resume(void *data) 2939 { 2940 hv_hyp_synic_enable_regs(0); 2941 2942 /* 2943 * Note: we don't need to call hv_stimer_init(0), because the timer 2944 * on CPU0 is not unbound in hv_synic_suspend(), and the timer is 2945 * automatically re-enabled in timekeeping_resume(). 2946 */ 2947 } 2948 2949 /* The callbacks run only on CPU0, with irqs_disabled. */ 2950 static const struct syscore_ops hv_synic_syscore_ops = { 2951 .suspend = hv_synic_suspend, 2952 .resume = hv_synic_resume, 2953 }; 2954 2955 static struct syscore hv_synic_syscore = { 2956 .ops = &hv_synic_syscore_ops, 2957 }; 2958 2959 static int __init hv_acpi_init(void) 2960 { 2961 int ret; 2962 2963 if (!hv_is_hyperv_initialized()) 2964 return -ENODEV; 2965 2966 if (hv_root_partition() && !hv_nested) 2967 return 0; 2968 2969 /* 2970 * Get ACPI resources first. 2971 */ 2972 ret = platform_driver_register(&vmbus_platform_driver); 2973 if (ret) 2974 return ret; 2975 2976 if (!vmbus_root_device) { 2977 ret = -ENODEV; 2978 goto cleanup; 2979 } 2980 2981 /* 2982 * If we're on an architecture with a hardcoded hypervisor 2983 * vector (i.e. x86/x64), override the VMbus interrupt found 2984 * in the ACPI tables. Ensure vmbus_irq is not set since the 2985 * normal Linux IRQ mechanism is not used in this case. 2986 */ 2987 #ifdef HYPERVISOR_CALLBACK_VECTOR 2988 vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR; 2989 vmbus_irq = -1; 2990 #endif 2991 2992 hv_debug_init(); 2993 2994 ret = vmbus_bus_init(); 2995 if (ret) 2996 goto cleanup; 2997 2998 hv_setup_kexec_handler(hv_kexec_handler); 2999 hv_setup_crash_handler(hv_crash_handler); 3000 3001 register_syscore(&hv_synic_syscore); 3002 3003 return 0; 3004 3005 cleanup: 3006 platform_driver_unregister(&vmbus_platform_driver); 3007 vmbus_root_device = NULL; 3008 return ret; 3009 } 3010 3011 static void __exit vmbus_exit(void) 3012 { 3013 int cpu; 3014 3015 unregister_syscore(&hv_synic_syscore); 3016 3017 hv_remove_kexec_handler(); 3018 hv_remove_crash_handler(); 3019 vmbus_connection.conn_state = DISCONNECTED; 3020 hv_stimer_global_cleanup(); 3021 vmbus_disconnect(); 3022 if (vmbus_irq == -1) 3023 hv_remove_vmbus_handler(); 3024 else 3025 free_percpu_irq(vmbus_irq, &vmbus_evt); 3026 if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) { 3027 smpboot_unregister_percpu_thread(&vmbus_irq_threads); 3028 vmbus_irq_initialized = false; 3029 } 3030 for_each_online_cpu(cpu) { 3031 struct hv_per_cpu_context *hv_cpu 3032 = per_cpu_ptr(hv_context.cpu_context, cpu); 3033 3034 tasklet_kill(&hv_cpu->msg_dpc); 3035 } 3036 hv_debug_rm_all_dir(); 3037 3038 vmbus_free_channels(); 3039 kfree(vmbus_connection.channels); 3040 3041 /* 3042 * The vmbus panic notifier is always registered, hence we should 3043 * also unconditionally unregister it here as well. 3044 */ 3045 atomic_notifier_chain_unregister(&panic_notifier_list, 3046 &hyperv_panic_vmbus_unload_block); 3047 3048 bus_unregister(&hv_bus); 3049 3050 cpuhp_remove_state(hyperv_cpuhp_online); 3051 hv_synic_free(); 3052 platform_driver_unregister(&vmbus_platform_driver); 3053 } 3054 3055 3056 MODULE_LICENSE("GPL"); 3057 MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver"); 3058 3059 subsys_initcall(hv_acpi_init); 3060 module_exit(vmbus_exit); 3061