1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (c) 2009, Microsoft Corporation. 5 * 6 * Authors: 7 * Haiyang Zhang <haiyangz@microsoft.com> 8 * Hank Janssen <hjanssen@microsoft.com> 9 */ 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/sched.h> 14 #include <linux/wait.h> 15 #include <linux/delay.h> 16 #include <linux/mm.h> 17 #include <linux/module.h> 18 #include <linux/slab.h> 19 #include <linux/vmalloc.h> 20 #include <linux/hyperv.h> 21 #include <linux/export.h> 22 #include <linux/io.h> 23 #include <linux/set_memory.h> 24 #include <asm/mshyperv.h> 25 26 #include "hyperv_vmbus.h" 27 28 29 struct vmbus_connection vmbus_connection = { 30 .conn_state = DISCONNECTED, 31 .unload_event = COMPLETION_INITIALIZER( 32 vmbus_connection.unload_event), 33 .next_gpadl_handle = ATOMIC_INIT(0xE1E10), 34 35 .ready_for_suspend_event = COMPLETION_INITIALIZER( 36 vmbus_connection.ready_for_suspend_event), 37 .all_offers_delivered_event = COMPLETION_INITIALIZER( 38 vmbus_connection.all_offers_delivered_event), 39 }; 40 EXPORT_SYMBOL_GPL(vmbus_connection); 41 42 /* 43 * Negotiated protocol version with the host. 44 */ 45 __u32 vmbus_proto_version; 46 EXPORT_SYMBOL_GPL(vmbus_proto_version); 47 48 /* 49 * Table of VMBus versions listed from newest to oldest. 50 * VERSION_WIN7 and VERSION_WS2008 are no longer supported in 51 * Linux guests and are not listed. 52 */ 53 static __u32 vmbus_versions[] = { 54 VERSION_WIN10_V6_0, 55 VERSION_WIN10_V5_3, 56 VERSION_WIN10_V5_2, 57 VERSION_WIN10_V5_1, 58 VERSION_WIN10_V5, 59 VERSION_WIN10_V4_1, 60 VERSION_WIN10, 61 VERSION_WIN8_1, 62 VERSION_WIN8 63 }; 64 65 /* 66 * Maximal VMBus protocol version guests can negotiate. Useful to cap the 67 * VMBus version for testing and debugging purpose. 68 */ 69 static uint max_version = VERSION_WIN10_V6_0; 70 71 module_param(max_version, uint, S_IRUGO); 72 MODULE_PARM_DESC(max_version, 73 "Maximal VMBus protocol version which can be negotiated"); 74 75 int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) 76 { 77 int ret = 0; 78 struct vmbus_channel_initiate_contact *msg; 79 unsigned long flags; 80 81 init_completion(&msginfo->waitevent); 82 83 msg = (struct vmbus_channel_initiate_contact *)msginfo->msg; 84 85 memset(msg, 0, sizeof(*msg)); 86 msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT; 87 msg->vmbus_version_requested = version; 88 89 /* 90 * VMBus protocol 5.0 (VERSION_WIN10_V5) and higher require that we must 91 * use VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message, 92 * and for subsequent messages, we must use the Message Connection ID 93 * field in the host-returned Version Response Message. And, with 94 * VERSION_WIN10_V5 and higher, we don't use msg->interrupt_page, but we 95 * tell the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for 96 * compatibility. 97 * 98 * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1). 99 */ 100 if (version >= VERSION_WIN10_V5) { 101 msg->msg_sint = VMBUS_MESSAGE_SINT; 102 msg->msg_vtl = ms_hyperv.vtl; 103 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4; 104 } else { 105 msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); 106 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID; 107 } 108 109 if (vmbus_is_confidential() && version >= VERSION_WIN10_V6_0) 110 msg->feature_flags = VMBUS_FEATURE_FLAG_CONFIDENTIAL_CHANNELS; 111 112 /* 113 * shared_gpa_boundary is zero in non-SNP VMs, so it's safe to always 114 * bitwise OR it 115 */ 116 msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]) | 117 ms_hyperv.shared_gpa_boundary; 118 msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]) | 119 ms_hyperv.shared_gpa_boundary; 120 121 msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU); 122 123 /* 124 * Add to list before we send the request since we may 125 * receive the response before returning from this routine 126 */ 127 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 128 list_add_tail(&msginfo->msglistentry, 129 &vmbus_connection.chn_msg_list); 130 131 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 132 133 ret = vmbus_post_msg(msg, 134 sizeof(struct vmbus_channel_initiate_contact), 135 true); 136 137 trace_vmbus_negotiate_version(msg, ret); 138 139 if (ret != 0) { 140 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 141 list_del(&msginfo->msglistentry); 142 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, 143 flags); 144 return ret; 145 } 146 147 /* Wait for the connection response */ 148 wait_for_completion(&msginfo->waitevent); 149 150 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 151 list_del(&msginfo->msglistentry); 152 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 153 154 /* Check if successful */ 155 if (msginfo->response.version_response.version_supported) { 156 vmbus_connection.conn_state = CONNECTED; 157 158 if (version >= VERSION_WIN10_V5) 159 vmbus_connection.msg_conn_id = 160 msginfo->response.version_response.msg_conn_id; 161 } else { 162 return -ECONNREFUSED; 163 } 164 165 return ret; 166 } 167 168 /* 169 * vmbus_connect - Sends a connect request on the partition service connection 170 */ 171 int vmbus_connect(void) 172 { 173 struct vmbus_channel_msginfo *msginfo = NULL; 174 int i, ret = 0; 175 __u32 version; 176 177 /* Initialize the vmbus connection */ 178 vmbus_connection.conn_state = CONNECTING; 179 vmbus_connection.work_queue = create_workqueue("hv_vmbus_con"); 180 if (!vmbus_connection.work_queue) { 181 ret = -ENOMEM; 182 goto cleanup; 183 } 184 185 vmbus_connection.rescind_work_queue = 186 create_workqueue("hv_vmbus_rescind"); 187 if (!vmbus_connection.rescind_work_queue) { 188 ret = -ENOMEM; 189 goto cleanup; 190 } 191 vmbus_connection.ignore_any_offer_msg = false; 192 193 vmbus_connection.handle_primary_chan_wq = 194 create_workqueue("hv_pri_chan"); 195 if (!vmbus_connection.handle_primary_chan_wq) { 196 ret = -ENOMEM; 197 goto cleanup; 198 } 199 200 vmbus_connection.handle_sub_chan_wq = 201 create_workqueue("hv_sub_chan"); 202 if (!vmbus_connection.handle_sub_chan_wq) { 203 ret = -ENOMEM; 204 goto cleanup; 205 } 206 207 INIT_LIST_HEAD(&vmbus_connection.chn_msg_list); 208 spin_lock_init(&vmbus_connection.channelmsg_lock); 209 210 INIT_LIST_HEAD(&vmbus_connection.chn_list); 211 mutex_init(&vmbus_connection.channel_mutex); 212 213 /* 214 * The following Hyper-V interrupt and monitor pages can be used by 215 * UIO for mapping to user-space, so they should always be allocated on 216 * system page boundaries. The system page size must be >= the Hyper-V 217 * page size. 218 */ 219 BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); 220 221 /* 222 * Setup the vmbus event connection for channel interrupt 223 * abstraction stuff 224 */ 225 vmbus_connection.int_page = 226 (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); 227 if (vmbus_connection.int_page == NULL) { 228 ret = -ENOMEM; 229 goto cleanup; 230 } 231 232 vmbus_connection.recv_int_page = vmbus_connection.int_page; 233 vmbus_connection.send_int_page = 234 (void *)((unsigned long)vmbus_connection.int_page + 235 (HV_HYP_PAGE_SIZE >> 1)); 236 237 /* 238 * Setup the monitor notification facility. The 1st page for 239 * parent->child and the 2nd page for child->parent 240 */ 241 vmbus_connection.monitor_pages[0] = (void *)__get_free_page(GFP_KERNEL); 242 vmbus_connection.monitor_pages[1] = (void *)__get_free_page(GFP_KERNEL); 243 if ((vmbus_connection.monitor_pages[0] == NULL) || 244 (vmbus_connection.monitor_pages[1] == NULL)) { 245 ret = -ENOMEM; 246 goto cleanup; 247 } 248 249 ret = set_memory_decrypted((unsigned long) 250 vmbus_connection.monitor_pages[0], 1); 251 ret |= set_memory_decrypted((unsigned long) 252 vmbus_connection.monitor_pages[1], 1); 253 if (ret) { 254 /* 255 * If set_memory_decrypted() fails, the encryption state 256 * of the memory is unknown. So leak the memory instead 257 * of risking returning decrypted memory to the free list. 258 * For simplicity, always handle both pages the same. 259 */ 260 vmbus_connection.monitor_pages[0] = NULL; 261 vmbus_connection.monitor_pages[1] = NULL; 262 goto cleanup; 263 } 264 265 /* 266 * Set_memory_decrypted() will change the memory contents if 267 * decryption occurs, so zero monitor pages here. 268 */ 269 memset(vmbus_connection.monitor_pages[0], 0x00, HV_HYP_PAGE_SIZE); 270 memset(vmbus_connection.monitor_pages[1], 0x00, HV_HYP_PAGE_SIZE); 271 272 msginfo = kzalloc(sizeof(*msginfo) + 273 sizeof(struct vmbus_channel_initiate_contact), 274 GFP_KERNEL); 275 if (msginfo == NULL) { 276 ret = -ENOMEM; 277 goto cleanup; 278 } 279 280 /* 281 * Negotiate a compatible VMBUS version number with the 282 * host. We start with the highest number we can support 283 * and work our way down until we negotiate a compatible 284 * version. 285 */ 286 287 for (i = 0; ; i++) { 288 if (i == ARRAY_SIZE(vmbus_versions)) { 289 ret = -EDOM; 290 goto cleanup; 291 } 292 293 version = vmbus_versions[i]; 294 if (version > max_version) 295 continue; 296 297 ret = vmbus_negotiate_version(msginfo, version); 298 if (ret == -ETIMEDOUT) 299 goto cleanup; 300 301 if (vmbus_connection.conn_state == CONNECTED) 302 break; 303 } 304 305 if (hv_is_isolation_supported() && version < VERSION_WIN10_V5_2) { 306 pr_err("Invalid VMBus version %d.%d (expected >= %d.%d) from the host supporting isolation\n", 307 version >> 16, version & 0xFFFF, VERSION_WIN10_V5_2 >> 16, VERSION_WIN10_V5_2 & 0xFFFF); 308 ret = -EINVAL; 309 goto cleanup; 310 } 311 312 vmbus_proto_version = version; 313 pr_info("Vmbus version:%d.%d\n", 314 version >> 16, version & 0xFFFF); 315 316 vmbus_connection.channels = kcalloc(MAX_CHANNEL_RELIDS, 317 sizeof(struct vmbus_channel *), 318 GFP_KERNEL); 319 if (vmbus_connection.channels == NULL) { 320 ret = -ENOMEM; 321 goto cleanup; 322 } 323 324 kfree(msginfo); 325 return 0; 326 327 cleanup: 328 pr_err("Unable to connect to host\n"); 329 330 vmbus_connection.conn_state = DISCONNECTED; 331 vmbus_disconnect(); 332 333 kfree(msginfo); 334 335 return ret; 336 } 337 338 void vmbus_disconnect(void) 339 { 340 /* 341 * First send the unload request to the host. 342 */ 343 vmbus_initiate_unload(false); 344 345 if (vmbus_connection.handle_sub_chan_wq) 346 destroy_workqueue(vmbus_connection.handle_sub_chan_wq); 347 348 if (vmbus_connection.handle_primary_chan_wq) 349 destroy_workqueue(vmbus_connection.handle_primary_chan_wq); 350 351 if (vmbus_connection.rescind_work_queue) 352 destroy_workqueue(vmbus_connection.rescind_work_queue); 353 354 if (vmbus_connection.work_queue) 355 destroy_workqueue(vmbus_connection.work_queue); 356 357 if (vmbus_connection.int_page) { 358 free_page((unsigned long)vmbus_connection.int_page); 359 vmbus_connection.int_page = NULL; 360 } 361 362 if (vmbus_connection.monitor_pages[0]) { 363 if (!set_memory_encrypted( 364 (unsigned long)vmbus_connection.monitor_pages[0], 1)) 365 free_page((unsigned long) 366 vmbus_connection.monitor_pages[0]); 367 vmbus_connection.monitor_pages[0] = NULL; 368 } 369 370 if (vmbus_connection.monitor_pages[1]) { 371 if (!set_memory_encrypted( 372 (unsigned long)vmbus_connection.monitor_pages[1], 1)) 373 free_page((unsigned long) 374 vmbus_connection.monitor_pages[1]); 375 vmbus_connection.monitor_pages[1] = NULL; 376 } 377 } 378 379 /* 380 * relid2channel - Get the channel object given its 381 * child relative id (ie channel id) 382 */ 383 struct vmbus_channel *relid2channel(u32 relid) 384 { 385 if (vmbus_connection.channels == NULL) { 386 pr_warn_once("relid2channel: relid=%d: No channels mapped!\n", relid); 387 return NULL; 388 } 389 if (WARN_ON(relid >= MAX_CHANNEL_RELIDS)) 390 return NULL; 391 return READ_ONCE(vmbus_connection.channels[relid]); 392 } 393 394 /* 395 * vmbus_on_event - Process a channel event notification 396 * 397 * For batched channels (default) optimize host to guest signaling 398 * by ensuring: 399 * 1. While reading the channel, we disable interrupts from host. 400 * 2. Ensure that we process all posted messages from the host 401 * before returning from this callback. 402 * 3. Once we return, enable signaling from the host. Once this 403 * state is set we check to see if additional packets are 404 * available to read. In this case we repeat the process. 405 * If this tasklet has been running for a long time 406 * then reschedule ourselves. 407 */ 408 void vmbus_on_event(unsigned long data) 409 { 410 struct vmbus_channel *channel = (void *) data; 411 void (*callback_fn)(void *context); 412 413 trace_vmbus_on_event(channel); 414 415 hv_debug_delay_test(channel, INTERRUPT_DELAY); 416 417 /* A channel once created is persistent even when 418 * there is no driver handling the device. An 419 * unloading driver sets the onchannel_callback to NULL. 420 */ 421 callback_fn = READ_ONCE(channel->onchannel_callback); 422 if (unlikely(!callback_fn)) 423 return; 424 425 (*callback_fn)(channel->channel_callback_context); 426 427 if (channel->callback_mode != HV_CALL_BATCHED) 428 return; 429 430 if (likely(hv_end_read(&channel->inbound) == 0)) 431 return; 432 433 hv_begin_read(&channel->inbound); 434 tasklet_schedule(&channel->callback_event); 435 } 436 437 /* 438 * vmbus_post_msg - Send a msg on the vmbus's message connection 439 */ 440 int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) 441 { 442 struct vmbus_channel_message_header *hdr; 443 union hv_connection_id conn_id; 444 int ret = 0; 445 int retries = 0; 446 u32 usec = 1; 447 448 conn_id.asu32 = 0; 449 conn_id.u.id = vmbus_connection.msg_conn_id; 450 451 /* 452 * hv_post_message() can have transient failures because of 453 * insufficient resources. Retry the operation a couple of 454 * times before giving up. 455 */ 456 while (retries < 100) { 457 ret = hv_post_message(conn_id, 1, buffer, buflen); 458 459 switch (ret) { 460 case HV_STATUS_INVALID_CONNECTION_ID: 461 /* 462 * See vmbus_negotiate_version(): VMBus protocol 5.0 463 * and higher require that we must use 464 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate 465 * Contact message, but on old hosts that only 466 * support VMBus protocol 4.0 or lower, here we get 467 * HV_STATUS_INVALID_CONNECTION_ID and we should 468 * return an error immediately without retrying. 469 */ 470 hdr = buffer; 471 if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT) 472 return -EINVAL; 473 /* 474 * We could get this if we send messages too 475 * frequently. 476 */ 477 ret = -EAGAIN; 478 break; 479 case HV_STATUS_INSUFFICIENT_MEMORY: 480 case HV_STATUS_INSUFFICIENT_BUFFERS: 481 ret = -ENOBUFS; 482 break; 483 case HV_STATUS_SUCCESS: 484 return ret; 485 default: 486 pr_err("hv_post_msg() failed; error code:%d\n", ret); 487 return -EINVAL; 488 } 489 490 retries++; 491 if (can_sleep && usec > 1000) 492 msleep(usec / 1000); 493 else if (usec < MAX_UDELAY_MS * 1000) 494 udelay(usec); 495 else 496 mdelay(usec / 1000); 497 498 if (retries < 22) 499 usec *= 2; 500 } 501 return ret; 502 } 503 504 /* 505 * vmbus_set_event - Send an event notification to the parent 506 */ 507 void vmbus_set_event(struct vmbus_channel *channel) 508 { 509 u32 child_relid = channel->offermsg.child_relid; 510 511 if (!channel->is_dedicated_interrupt) 512 vmbus_send_interrupt(child_relid); 513 514 ++channel->sig_events; 515 516 if (ms_hyperv.paravisor_present) { 517 if (hv_isolation_type_snp()) 518 hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event, 519 NULL, sizeof(channel->sig_event)); 520 else if (hv_isolation_type_tdx()) 521 hv_tdx_hypercall(HVCALL_SIGNAL_EVENT | HV_HYPERCALL_FAST_BIT, 522 channel->sig_event, 0); 523 else 524 WARN_ON_ONCE(1); 525 } else { 526 u64 control = HVCALL_SIGNAL_EVENT; 527 528 control |= hv_nested ? HV_HYPERCALL_NESTED : 0; 529 hv_do_fast_hypercall8(control, channel->sig_event); 530 } 531 } 532 EXPORT_SYMBOL_GPL(vmbus_set_event); 533