1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (c) 2009, Microsoft Corporation. 5 * 6 * Authors: 7 * Haiyang Zhang <haiyangz@microsoft.com> 8 * Hank Janssen <hjanssen@microsoft.com> 9 */ 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/sched.h> 14 #include <linux/wait.h> 15 #include <linux/delay.h> 16 #include <linux/mm.h> 17 #include <linux/module.h> 18 #include <linux/slab.h> 19 #include <linux/vmalloc.h> 20 #include <linux/hyperv.h> 21 #include <linux/export.h> 22 #include <linux/io.h> 23 #include <linux/set_memory.h> 24 #include <asm/mshyperv.h> 25 26 #include "hyperv_vmbus.h" 27 28 29 struct vmbus_connection vmbus_connection = { 30 .conn_state = DISCONNECTED, 31 .unload_event = COMPLETION_INITIALIZER( 32 vmbus_connection.unload_event), 33 .next_gpadl_handle = ATOMIC_INIT(0xE1E10), 34 35 .ready_for_suspend_event = COMPLETION_INITIALIZER( 36 vmbus_connection.ready_for_suspend_event), 37 .all_offers_delivered_event = COMPLETION_INITIALIZER( 38 vmbus_connection.all_offers_delivered_event), 39 }; 40 EXPORT_SYMBOL_GPL(vmbus_connection); 41 42 /* 43 * Negotiated protocol version with the host. 44 */ 45 __u32 vmbus_proto_version; 46 EXPORT_SYMBOL_GPL(vmbus_proto_version); 47 48 /* 49 * Table of VMBus versions listed from newest to oldest. 50 * VERSION_WIN7 and VERSION_WS2008 are no longer supported in 51 * Linux guests and are not listed. 52 */ 53 static __u32 vmbus_versions[] = { 54 VERSION_WIN10_V6_0, 55 VERSION_WIN10_V5_3, 56 VERSION_WIN10_V5_2, 57 VERSION_WIN10_V5_1, 58 VERSION_WIN10_V5, 59 VERSION_WIN10_V4_1, 60 VERSION_WIN10, 61 VERSION_WIN8_1, 62 VERSION_WIN8 63 }; 64 65 /* 66 * Maximal VMBus protocol version guests can negotiate. Useful to cap the 67 * VMBus version for testing and debugging purpose. 68 */ 69 static uint max_version = VERSION_WIN10_V6_0; 70 71 module_param(max_version, uint, S_IRUGO); 72 MODULE_PARM_DESC(max_version, 73 "Maximal VMBus protocol version which can be negotiated"); 74 75 int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) 76 { 77 int ret = 0; 78 struct vmbus_channel_initiate_contact *msg; 79 unsigned long flags; 80 81 init_completion(&msginfo->waitevent); 82 83 msg = (struct vmbus_channel_initiate_contact *)msginfo->msg; 84 85 memset(msg, 0, sizeof(*msg)); 86 msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT; 87 msg->vmbus_version_requested = version; 88 89 /* 90 * VMBus protocol 5.0 (VERSION_WIN10_V5) and higher require that we must 91 * use VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message, 92 * and for subsequent messages, we must use the Message Connection ID 93 * field in the host-returned Version Response Message. And, with 94 * VERSION_WIN10_V5 and higher, we don't use msg->interrupt_page, but we 95 * tell the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for 96 * compatibility. 97 * 98 * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1). 99 */ 100 if (version >= VERSION_WIN10_V5) { 101 msg->msg_sint = VMBUS_MESSAGE_SINT; 102 msg->msg_vtl = ms_hyperv.vtl; 103 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4; 104 } else { 105 msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); 106 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID; 107 } 108 109 if (vmbus_is_confidential() && version >= VERSION_WIN10_V6_0) 110 msg->feature_flags = VMBUS_FEATURE_FLAG_CONFIDENTIAL_CHANNELS; 111 112 /* 113 * shared_gpa_boundary is zero in non-SNP VMs, so it's safe to always 114 * bitwise OR it 115 */ 116 msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]) | 117 ms_hyperv.shared_gpa_boundary; 118 msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]) | 119 ms_hyperv.shared_gpa_boundary; 120 121 msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU); 122 123 /* 124 * Add to list before we send the request since we may 125 * receive the response before returning from this routine 126 */ 127 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 128 list_add_tail(&msginfo->msglistentry, 129 &vmbus_connection.chn_msg_list); 130 131 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 132 133 ret = vmbus_post_msg(msg, 134 sizeof(struct vmbus_channel_initiate_contact), 135 true); 136 137 trace_vmbus_negotiate_version(msg, ret); 138 139 if (ret != 0) { 140 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 141 list_del(&msginfo->msglistentry); 142 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, 143 flags); 144 return ret; 145 } 146 147 /* Wait for the connection response */ 148 wait_for_completion(&msginfo->waitevent); 149 150 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 151 list_del(&msginfo->msglistentry); 152 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 153 154 /* Check if successful */ 155 if (msginfo->response.version_response.version_supported) { 156 vmbus_connection.conn_state = CONNECTED; 157 158 if (version >= VERSION_WIN10_V5) 159 vmbus_connection.msg_conn_id = 160 msginfo->response.version_response.msg_conn_id; 161 } else { 162 return -ECONNREFUSED; 163 } 164 165 return ret; 166 } 167 168 /* 169 * vmbus_connect - Sends a connect request on the partition service connection 170 */ 171 int vmbus_connect(void) 172 { 173 struct vmbus_channel_msginfo *msginfo = NULL; 174 int i, ret = 0; 175 __u32 version; 176 177 /* Initialize the vmbus connection */ 178 vmbus_connection.conn_state = CONNECTING; 179 vmbus_connection.work_queue = create_workqueue("hv_vmbus_con"); 180 if (!vmbus_connection.work_queue) { 181 ret = -ENOMEM; 182 goto cleanup; 183 } 184 185 vmbus_connection.rescind_work_queue = 186 create_workqueue("hv_vmbus_rescind"); 187 if (!vmbus_connection.rescind_work_queue) { 188 ret = -ENOMEM; 189 goto cleanup; 190 } 191 vmbus_connection.ignore_any_offer_msg = false; 192 193 vmbus_connection.handle_primary_chan_wq = 194 create_workqueue("hv_pri_chan"); 195 if (!vmbus_connection.handle_primary_chan_wq) { 196 ret = -ENOMEM; 197 goto cleanup; 198 } 199 200 vmbus_connection.handle_sub_chan_wq = 201 create_workqueue("hv_sub_chan"); 202 if (!vmbus_connection.handle_sub_chan_wq) { 203 ret = -ENOMEM; 204 goto cleanup; 205 } 206 207 INIT_LIST_HEAD(&vmbus_connection.chn_msg_list); 208 spin_lock_init(&vmbus_connection.channelmsg_lock); 209 210 INIT_LIST_HEAD(&vmbus_connection.chn_list); 211 mutex_init(&vmbus_connection.channel_mutex); 212 213 /* 214 * The following Hyper-V interrupt and monitor pages can be used by 215 * UIO for mapping to user-space, so they should always be allocated on 216 * system page boundaries. The system page size must be >= the Hyper-V 217 * page size. 218 */ 219 BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); 220 221 /* 222 * Setup the vmbus event connection for channel interrupt 223 * abstraction stuff 224 */ 225 vmbus_connection.int_page = 226 (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); 227 if (vmbus_connection.int_page == NULL) { 228 ret = -ENOMEM; 229 goto cleanup; 230 } 231 232 vmbus_connection.recv_int_page = vmbus_connection.int_page; 233 vmbus_connection.send_int_page = 234 (void *)((unsigned long)vmbus_connection.int_page + 235 (HV_HYP_PAGE_SIZE >> 1)); 236 237 /* 238 * Setup the monitor notification facility. The 1st page for 239 * parent->child and the 2nd page for child->parent 240 */ 241 vmbus_connection.monitor_pages[0] = (void *)__get_free_page(GFP_KERNEL); 242 vmbus_connection.monitor_pages[1] = (void *)__get_free_page(GFP_KERNEL); 243 if ((vmbus_connection.monitor_pages[0] == NULL) || 244 (vmbus_connection.monitor_pages[1] == NULL)) { 245 ret = -ENOMEM; 246 goto cleanup; 247 } 248 249 ret = set_memory_decrypted((unsigned long) 250 vmbus_connection.monitor_pages[0], 1); 251 ret |= set_memory_decrypted((unsigned long) 252 vmbus_connection.monitor_pages[1], 1); 253 if (ret) { 254 /* 255 * If set_memory_decrypted() fails, the encryption state 256 * of the memory is unknown. So leak the memory instead 257 * of risking returning decrypted memory to the free list. 258 * For simplicity, always handle both pages the same. 259 */ 260 vmbus_connection.monitor_pages[0] = NULL; 261 vmbus_connection.monitor_pages[1] = NULL; 262 goto cleanup; 263 } 264 265 /* 266 * Set_memory_decrypted() will change the memory contents if 267 * decryption occurs, so zero monitor pages here. 268 */ 269 memset(vmbus_connection.monitor_pages[0], 0x00, HV_HYP_PAGE_SIZE); 270 memset(vmbus_connection.monitor_pages[1], 0x00, HV_HYP_PAGE_SIZE); 271 272 msginfo = kzalloc(sizeof(*msginfo) + 273 sizeof(struct vmbus_channel_initiate_contact), 274 GFP_KERNEL); 275 if (msginfo == NULL) { 276 ret = -ENOMEM; 277 goto cleanup; 278 } 279 280 /* 281 * Negotiate a compatible VMBUS version number with the 282 * host. We start with the highest number we can support 283 * and work our way down until we negotiate a compatible 284 * version. 285 */ 286 287 for (i = 0; ; i++) { 288 if (i == ARRAY_SIZE(vmbus_versions)) { 289 ret = -EDOM; 290 goto cleanup; 291 } 292 293 version = vmbus_versions[i]; 294 if (version > max_version) 295 continue; 296 297 ret = vmbus_negotiate_version(msginfo, version); 298 if (ret == -ETIMEDOUT) 299 goto cleanup; 300 301 if (vmbus_connection.conn_state == CONNECTED) 302 break; 303 } 304 305 if (hv_is_isolation_supported() && version < VERSION_WIN10_V5_2) { 306 pr_err("Invalid VMBus version %d.%d (expected >= %d.%d) from the host supporting isolation\n", 307 version >> 16, version & 0xFFFF, VERSION_WIN10_V5_2 >> 16, VERSION_WIN10_V5_2 & 0xFFFF); 308 ret = -EINVAL; 309 goto cleanup; 310 } 311 312 vmbus_proto_version = version; 313 pr_info("Vmbus version:%d.%d\n", 314 version >> 16, version & 0xFFFF); 315 316 vmbus_connection.channels = kzalloc_objs(struct vmbus_channel *, 317 MAX_CHANNEL_RELIDS, GFP_KERNEL); 318 if (vmbus_connection.channels == NULL) { 319 ret = -ENOMEM; 320 goto cleanup; 321 } 322 323 kfree(msginfo); 324 return 0; 325 326 cleanup: 327 pr_err("Unable to connect to host\n"); 328 329 vmbus_connection.conn_state = DISCONNECTED; 330 vmbus_disconnect(); 331 332 kfree(msginfo); 333 334 return ret; 335 } 336 337 void vmbus_disconnect(void) 338 { 339 /* 340 * First send the unload request to the host. 341 */ 342 vmbus_initiate_unload(false); 343 344 if (vmbus_connection.handle_sub_chan_wq) 345 destroy_workqueue(vmbus_connection.handle_sub_chan_wq); 346 347 if (vmbus_connection.handle_primary_chan_wq) 348 destroy_workqueue(vmbus_connection.handle_primary_chan_wq); 349 350 if (vmbus_connection.rescind_work_queue) 351 destroy_workqueue(vmbus_connection.rescind_work_queue); 352 353 if (vmbus_connection.work_queue) 354 destroy_workqueue(vmbus_connection.work_queue); 355 356 if (vmbus_connection.int_page) { 357 free_page((unsigned long)vmbus_connection.int_page); 358 vmbus_connection.int_page = NULL; 359 } 360 361 if (vmbus_connection.monitor_pages[0]) { 362 if (!set_memory_encrypted( 363 (unsigned long)vmbus_connection.monitor_pages[0], 1)) 364 free_page((unsigned long) 365 vmbus_connection.monitor_pages[0]); 366 vmbus_connection.monitor_pages[0] = NULL; 367 } 368 369 if (vmbus_connection.monitor_pages[1]) { 370 if (!set_memory_encrypted( 371 (unsigned long)vmbus_connection.monitor_pages[1], 1)) 372 free_page((unsigned long) 373 vmbus_connection.monitor_pages[1]); 374 vmbus_connection.monitor_pages[1] = NULL; 375 } 376 } 377 378 /* 379 * relid2channel - Get the channel object given its 380 * child relative id (ie channel id) 381 */ 382 struct vmbus_channel *relid2channel(u32 relid) 383 { 384 if (vmbus_connection.channels == NULL) { 385 pr_warn_once("relid2channel: relid=%d: No channels mapped!\n", relid); 386 return NULL; 387 } 388 if (WARN_ON(relid >= MAX_CHANNEL_RELIDS)) 389 return NULL; 390 return READ_ONCE(vmbus_connection.channels[relid]); 391 } 392 393 /* 394 * vmbus_on_event - Process a channel event notification 395 * 396 * For batched channels (default) optimize host to guest signaling 397 * by ensuring: 398 * 1. While reading the channel, we disable interrupts from host. 399 * 2. Ensure that we process all posted messages from the host 400 * before returning from this callback. 401 * 3. Once we return, enable signaling from the host. Once this 402 * state is set we check to see if additional packets are 403 * available to read. In this case we repeat the process. 404 * If this tasklet has been running for a long time 405 * then reschedule ourselves. 406 */ 407 void vmbus_on_event(unsigned long data) 408 { 409 struct vmbus_channel *channel = (void *) data; 410 void (*callback_fn)(void *context); 411 412 trace_vmbus_on_event(channel); 413 414 hv_debug_delay_test(channel, INTERRUPT_DELAY); 415 416 /* A channel once created is persistent even when 417 * there is no driver handling the device. An 418 * unloading driver sets the onchannel_callback to NULL. 419 */ 420 callback_fn = READ_ONCE(channel->onchannel_callback); 421 if (unlikely(!callback_fn)) 422 return; 423 424 (*callback_fn)(channel->channel_callback_context); 425 426 if (channel->callback_mode != HV_CALL_BATCHED) 427 return; 428 429 if (likely(hv_end_read(&channel->inbound) == 0)) 430 return; 431 432 hv_begin_read(&channel->inbound); 433 tasklet_schedule(&channel->callback_event); 434 } 435 436 /* 437 * vmbus_post_msg - Send a msg on the vmbus's message connection 438 */ 439 int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) 440 { 441 struct vmbus_channel_message_header *hdr; 442 union hv_connection_id conn_id; 443 int ret = 0; 444 int retries = 0; 445 u32 usec = 1; 446 447 conn_id.asu32 = 0; 448 conn_id.u.id = vmbus_connection.msg_conn_id; 449 450 /* 451 * hv_post_message() can have transient failures because of 452 * insufficient resources. Retry the operation a couple of 453 * times before giving up. 454 */ 455 while (retries < 100) { 456 ret = hv_post_message(conn_id, 1, buffer, buflen); 457 458 switch (ret) { 459 case HV_STATUS_INVALID_CONNECTION_ID: 460 /* 461 * See vmbus_negotiate_version(): VMBus protocol 5.0 462 * and higher require that we must use 463 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate 464 * Contact message, but on old hosts that only 465 * support VMBus protocol 4.0 or lower, here we get 466 * HV_STATUS_INVALID_CONNECTION_ID and we should 467 * return an error immediately without retrying. 468 */ 469 hdr = buffer; 470 if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT) 471 return -EINVAL; 472 /* 473 * We could get this if we send messages too 474 * frequently. 475 */ 476 ret = -EAGAIN; 477 break; 478 case HV_STATUS_INSUFFICIENT_MEMORY: 479 case HV_STATUS_INSUFFICIENT_BUFFERS: 480 ret = -ENOBUFS; 481 break; 482 case HV_STATUS_SUCCESS: 483 return ret; 484 default: 485 pr_err("hv_post_msg() failed; error code:%d\n", ret); 486 return -EINVAL; 487 } 488 489 retries++; 490 if (can_sleep && usec > 1000) 491 msleep(usec / 1000); 492 else if (usec < MAX_UDELAY_MS * 1000) 493 udelay(usec); 494 else 495 mdelay(usec / 1000); 496 497 if (retries < 22) 498 usec *= 2; 499 } 500 return ret; 501 } 502 503 /* 504 * vmbus_set_event - Send an event notification to the parent 505 */ 506 void vmbus_set_event(struct vmbus_channel *channel) 507 { 508 u32 child_relid = channel->offermsg.child_relid; 509 510 if (!channel->is_dedicated_interrupt) 511 vmbus_send_interrupt(child_relid); 512 513 ++channel->sig_events; 514 515 if (ms_hyperv.paravisor_present) { 516 if (hv_isolation_type_snp()) 517 hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event, 518 NULL, sizeof(channel->sig_event)); 519 else if (hv_isolation_type_tdx()) 520 hv_tdx_hypercall(HVCALL_SIGNAL_EVENT | HV_HYPERCALL_FAST_BIT, 521 channel->sig_event, 0); 522 else 523 WARN_ON_ONCE(1); 524 } else { 525 u64 control = HVCALL_SIGNAL_EVENT; 526 527 control |= hv_nested ? HV_HYPERCALL_NESTED : 0; 528 hv_do_fast_hypercall8(control, channel->sig_event); 529 } 530 } 531 EXPORT_SYMBOL_GPL(vmbus_set_event); 532